aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-06 10:59:36 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-06 10:59:36 -0500
commit203b6609e0ede49eb0b97008b1150c69e9d2ffd3 (patch)
tree7d9c1227eeec17f75b2a827e385387f640a365a6 /tools/perf
parent3478588b5136966c80c571cf0006f08e9e5b8f04 (diff)
parentc978b9460fe1d4a1e1effa0abd6bd69b18a098a8 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Lots of tooling updates - too many to list, here's a few highlights: - Various subcommand updates to 'perf trace', 'perf report', 'perf record', 'perf annotate', 'perf script', 'perf test', etc. - CPU and NUMA topology and affinity handling improvements, - HW tracing and HW support updates: - Intel PT updates - ARM CoreSight updates - vendor HW event updates - BPF updates - Tons of infrastructure updates, both on the build system and the library support side - Documentation updates. - ... and lots of other changes, see the changelog for details. Kernel side updates: - Tighten up kprobes blacklist handling, reduce the number of places where developers can install a kprobe and hang/crash the system. - Fix/enhance vma address filter handling. - Various PMU driver updates, small fixes and additions. - refcount_t conversions - BPF updates - error code propagation enhancements - misc other changes" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (238 commits) perf script python: Add Python3 support to syscall-counts-by-pid.py perf script python: Add Python3 support to syscall-counts.py perf script python: Add Python3 support to stat-cpi.py perf script python: Add Python3 support to stackcollapse.py perf script python: Add Python3 support to sctop.py perf script python: Add Python3 support to powerpc-hcalls.py perf script python: Add Python3 support to net_dropmonitor.py perf script python: Add Python3 support to mem-phys-addr.py perf script python: Add Python3 support to failed-syscalls-by-pid.py perf script python: Add Python3 support to netdev-times.py perf tools: Add perf_exe() helper to find perf binary perf script: Handle missing fields with -F +.. perf data: Add perf_data__open_dir_data function perf data: Add perf_data__(create_dir|close_dir) functions perf data: Fail check_backup in case of error perf data: Make check_backup work over directories perf tools: Add rm_rf_perf_data function perf tools: Add pattern name checking to rm_rf perf tools: Add depth checking to rm_rf perf data: Add global path holder ...
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Build10
-rw-r--r--tools/perf/Documentation/perf-config.txt31
-rw-r--r--tools/perf/Documentation/perf-record.txt19
-rw-r--r--tools/perf/Documentation/perf-script.txt6
-rw-r--r--tools/perf/Documentation/perf-trace.txt8
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt11
-rw-r--r--tools/perf/Makefile.config14
-rw-r--r--tools/perf/Makefile.perf28
-rw-r--r--tools/perf/arch/Build4
-rw-r--r--tools/perf/arch/arm/Build4
-rw-r--r--tools/perf/arch/arm/tests/Build8
-rw-r--r--tools/perf/arch/arm/tests/dwarf-unwind.c1
-rw-r--r--tools/perf/arch/arm/util/Build8
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c98
-rw-r--r--tools/perf/arch/arm/util/cs-etm.h3
-rw-r--r--tools/perf/arch/arm/util/pmu.c3
-rw-r--r--tools/perf/arch/arm64/Build4
-rw-r--r--tools/perf/arch/arm64/tests/Build6
-rw-r--r--tools/perf/arch/arm64/tests/dwarf-unwind.c1
-rw-r--r--tools/perf/arch/arm64/util/Build12
-rw-r--r--tools/perf/arch/nds32/Build2
-rw-r--r--tools/perf/arch/nds32/util/Build2
-rw-r--r--tools/perf/arch/powerpc/Build4
-rw-r--r--tools/perf/arch/powerpc/tests/Build6
-rw-r--r--tools/perf/arch/powerpc/tests/dwarf-unwind.c1
-rw-r--r--tools/perf/arch/powerpc/util/Build18
-rw-r--r--tools/perf/arch/powerpc/util/kvm-stat.c2
-rw-r--r--tools/perf/arch/powerpc/util/skip-callchain-idx.c3
-rw-r--r--tools/perf/arch/s390/Build2
-rw-r--r--tools/perf/arch/s390/util/Build12
-rw-r--r--tools/perf/arch/s390/util/kvm-stat.c1
-rw-r--r--tools/perf/arch/sh/Build2
-rw-r--r--tools/perf/arch/sh/util/Build2
-rw-r--r--tools/perf/arch/sparc/Build2
-rw-r--r--tools/perf/arch/sparc/util/Build2
-rw-r--r--tools/perf/arch/x86/Build4
-rw-r--r--tools/perf/arch/x86/tests/Build14
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c1
-rw-r--r--tools/perf/arch/x86/util/Build30
-rw-r--r--tools/perf/arch/x86/util/kvm-stat.c1
-rw-r--r--tools/perf/arch/xtensa/Build2
-rw-r--r--tools/perf/arch/xtensa/util/Build2
-rw-r--r--tools/perf/builtin-annotate.c9
-rw-r--r--tools/perf/builtin-buildid-cache.c4
-rw-r--r--tools/perf/builtin-buildid-list.c8
-rw-r--r--tools/perf/builtin-c2c.c23
-rw-r--r--tools/perf/builtin-diff.c22
-rw-r--r--tools/perf/builtin-evlist.c4
-rw-r--r--tools/perf/builtin-inject.c12
-rw-r--r--tools/perf/builtin-kallsyms.c1
-rw-r--r--tools/perf/builtin-kmem.c7
-rw-r--r--tools/perf/builtin-kvm.c8
-rw-r--r--tools/perf/builtin-list.c8
-rw-r--r--tools/perf/builtin-lock.c8
-rw-r--r--tools/perf/builtin-mem.c9
-rw-r--r--tools/perf/builtin-probe.c1
-rw-r--r--tools/perf/builtin-record.c74
-rw-r--r--tools/perf/builtin-report.c37
-rw-r--r--tools/perf/builtin-sched.c63
-rw-r--r--tools/perf/builtin-script.c31
-rw-r--r--tools/perf/builtin-stat.c16
-rw-r--r--tools/perf/builtin-timechart.c8
-rw-r--r--tools/perf/builtin-top.c23
-rw-r--r--tools/perf/builtin-trace.c38
-rw-r--r--tools/perf/design.txt4
-rw-r--r--tools/perf/examples/bpf/augmented_raw_syscalls.c18
-rw-r--r--tools/perf/examples/bpf/augmented_syscalls.c22
-rw-r--r--tools/perf/examples/bpf/etcsnoop.c18
-rw-r--r--tools/perf/include/bpf/bpf.h16
-rw-r--r--tools/perf/perf.h9
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/metrics.json2245
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/metrics.json1982
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json2
-rw-r--r--tools/perf/scripts/Build4
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Build2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Build2
-rw-r--r--tools/perf/scripts/python/export-to-postgresql.py2
-rw-r--r--tools/perf/scripts/python/export-to-sqlite.py2
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py511
-rw-r--r--tools/perf/scripts/python/failed-syscalls-by-pid.py21
-rw-r--r--tools/perf/scripts/python/mem-phys-addr.py24
-rwxr-xr-xtools/perf/scripts/python/net_dropmonitor.py10
-rw-r--r--tools/perf/scripts/python/netdev-times.py82
-rw-r--r--tools/perf/scripts/python/powerpc-hcalls.py18
-rw-r--r--tools/perf/scripts/python/sched-migration.py2
-rw-r--r--tools/perf/scripts/python/sctop.py24
-rwxr-xr-xtools/perf/scripts/python/stackcollapse.py7
-rw-r--r--tools/perf/scripts/python/stat-cpi.py11
-rw-r--r--tools/perf/scripts/python/syscall-counts-by-pid.py22
-rw-r--r--tools/perf/scripts/python/syscall-counts.py18
-rw-r--r--tools/perf/tests/attr.py1
-rw-r--r--tools/perf/tests/bp_account.c1
-rw-r--r--tools/perf/tests/code-reading.c2
-rw-r--r--tools/perf/tests/dwarf-unwind.c1
-rw-r--r--tools/perf/tests/evsel-tp-sched.c6
-rw-r--r--tools/perf/tests/hists_common.c9
-rw-r--r--tools/perf/tests/hists_cumulate.c15
-rw-r--r--tools/perf/tests/hists_filter.c1
-rw-r--r--tools/perf/tests/hists_link.c8
-rw-r--r--tools/perf/tests/hists_output.c33
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c1
-rw-r--r--tools/perf/tests/parse-events.c30
-rw-r--r--tools/perf/tests/pmu.c2
-rw-r--r--tools/perf/tests/sample-parsing.c2
-rw-r--r--tools/perf/tests/sdt.c1
-rw-r--r--tools/perf/tests/shell/lib/probe.sh5
-rwxr-xr-xtools/perf/tests/shell/trace+probe_vfs_getname.sh1
-rw-r--r--tools/perf/trace/beauty/Build26
-rw-r--r--tools/perf/trace/beauty/ioctl.c2
-rw-r--r--tools/perf/trace/beauty/waitid_options.c2
-rw-r--r--tools/perf/ui/Build18
-rw-r--r--tools/perf/ui/browsers/Build10
-rw-r--r--tools/perf/ui/browsers/annotate.c1
-rw-r--r--tools/perf/ui/browsers/header.c2
-rw-r--r--tools/perf/ui/browsers/hists.c23
-rw-r--r--tools/perf/ui/browsers/map.c1
-rw-r--r--tools/perf/ui/gtk/annotate.c3
-rw-r--r--tools/perf/ui/gtk/hists.c7
-rw-r--r--tools/perf/ui/hist.c1
-rw-r--r--tools/perf/ui/stdio/hist.c7
-rw-r--r--tools/perf/ui/tui/Build8
-rw-r--r--tools/perf/util/Build276
-rw-r--r--tools/perf/util/annotate.c6
-rw-r--r--tools/perf/util/annotate.h14
-rw-r--r--tools/perf/util/auxtrace.c27
-rw-r--r--tools/perf/util/auxtrace.h5
-rw-r--r--tools/perf/util/block-range.c2
-rw-r--r--tools/perf/util/block-range.h6
-rw-r--r--tools/perf/util/bpf-event.c263
-rw-r--r--tools/perf/util/bpf-event.h38
-rw-r--r--tools/perf/util/bpf-loader.c1
-rw-r--r--tools/perf/util/bpf-loader.h7
-rw-r--r--tools/perf/util/bpf_map.c72
-rw-r--r--tools/perf/util/bpf_map.h22
-rw-r--r--tools/perf/util/branch.h27
-rw-r--r--tools/perf/util/build-id.c14
-rw-r--r--tools/perf/util/build-id.h3
-rw-r--r--tools/perf/util/c++/Build4
-rw-r--r--tools/perf/util/callchain.c17
-rw-r--r--tools/perf/util/callchain.h21
-rw-r--r--tools/perf/util/color.c39
-rw-r--r--tools/perf/util/color.h1
-rw-r--r--tools/perf/util/color_config.c47
-rw-r--r--tools/perf/util/comm.c1
-rw-r--r--tools/perf/util/comm.h4
-rw-r--r--tools/perf/util/config.c1
-rw-r--r--tools/perf/util/cpu-set-sched.h50
-rw-r--r--tools/perf/util/cpumap.c12
-rw-r--r--tools/perf/util/cpumap.h1
-rw-r--r--tools/perf/util/cputopo.c277
-rw-r--r--tools/perf/util/cputopo.h33
-rw-r--r--tools/perf/util/cs-etm-decoder/Build2
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c41
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h16
-rw-r--r--tools/perf/util/cs-etm.c832
-rw-r--r--tools/perf/util/cs-etm.h57
-rw-r--r--tools/perf/util/data-convert-bt.c4
-rw-r--r--tools/perf/util/data.c175
-rw-r--r--tools/perf/util/data.h16
-rw-r--r--tools/perf/util/db-export.c1
-rw-r--r--tools/perf/util/drv_configs.c78
-rw-r--r--tools/perf/util/drv_configs.h26
-rw-r--r--tools/perf/util/dso.c11
-rw-r--r--tools/perf/util/dso.h17
-rw-r--r--tools/perf/util/event.c43
-rw-r--r--tools/perf/util/event.h60
-rw-r--r--tools/perf/util/evlist.c6
-rw-r--r--tools/perf/util/evlist.h6
-rw-r--r--tools/perf/util/evsel.c27
-rw-r--r--tools/perf/util/evsel.h4
-rw-r--r--tools/perf/util/header.c289
-rw-r--r--tools/perf/util/hist.c220
-rw-r--r--tools/perf/util/hist.h19
-rw-r--r--tools/perf/util/intel-bts.c6
-rw-r--r--tools/perf/util/intel-pt-decoder/Build2
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c39
-rw-r--r--tools/perf/util/intel-pt.c23
-rw-r--r--tools/perf/util/intlist.h2
-rw-r--r--tools/perf/util/jitdump.c1
-rw-r--r--tools/perf/util/kvm-stat.h7
-rw-r--r--tools/perf/util/machine.c112
-rw-r--r--tools/perf/util/machine.h17
-rw-r--r--tools/perf/util/map.c14
-rw-r--r--tools/perf/util/map.h100
-rw-r--r--tools/perf/util/map_groups.h91
-rw-r--r--tools/perf/util/map_symbol.h22
-rw-r--r--tools/perf/util/metricgroup.c10
-rw-r--r--tools/perf/util/metricgroup.h3
-rw-r--r--tools/perf/util/mmap.c105
-rw-r--r--tools/perf/util/mmap.h3
-rw-r--r--tools/perf/util/parse-events.c2
-rw-r--r--tools/perf/util/parse-events.y4
-rw-r--r--tools/perf/util/pmu.c2
-rw-r--r--tools/perf/util/pmu.h5
-rw-r--r--tools/perf/util/probe-event.c6
-rw-r--r--tools/perf/util/probe-event.h5
-rw-r--r--tools/perf/util/probe-file.c1
-rw-r--r--tools/perf/util/rb_resort.h8
-rw-r--r--tools/perf/util/rblist.c28
-rw-r--r--tools/perf/util/rblist.h2
-rw-r--r--tools/perf/util/s390-cpumcf-kernel.h62
-rw-r--r--tools/perf/util/s390-cpumsf.c89
-rw-r--r--tools/perf/util/s390-sample-raw.c222
-rw-r--r--tools/perf/util/sample-raw.c18
-rw-r--r--tools/perf/util/sample-raw.h14
-rw-r--r--tools/perf/util/scripting-engines/Build4
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c17
-rw-r--r--tools/perf/util/session.c110
-rw-r--r--tools/perf/util/setup.py7
-rw-r--r--tools/perf/util/sort.c18
-rw-r--r--tools/perf/util/sort.h7
-rw-r--r--tools/perf/util/srccode.h13
-rw-r--r--tools/perf/util/srcline.c45
-rw-r--r--tools/perf/util/srcline.h13
-rw-r--r--tools/perf/util/stat-display.c1
-rw-r--r--tools/perf/util/stat-shadow.c2
-rw-r--r--tools/perf/util/strlist.h2
-rw-r--r--tools/perf/util/symbol-elf.c2
-rw-r--r--tools/perf/util/symbol-minimal.c1
-rw-r--r--tools/perf/util/symbol.c90
-rw-r--r--tools/perf/util/symbol.h102
-rw-r--r--tools/perf/util/symbol_conf.h73
-rw-r--r--tools/perf/util/symbol_fprintf.c3
-rw-r--r--tools/perf/util/thread-stack.c235
-rw-r--r--tools/perf/util/thread-stack.h3
-rw-r--r--tools/perf/util/thread.c1
-rw-r--r--tools/perf/util/thread.h8
-rw-r--r--tools/perf/util/tool.h5
-rw-r--r--tools/perf/util/unwind-libdw.c2
-rw-r--r--tools/perf/util/unwind-libunwind-local.c1
-rw-r--r--tools/perf/util/unwind-libunwind.c1
-rw-r--r--tools/perf/util/util.c82
-rw-r--r--tools/perf/util/util.h3
-rw-r--r--tools/perf/util/vdso.c1
-rw-r--r--tools/perf/util/zlib.c1
236 files changed, 9100 insertions, 2137 deletions
diff --git a/tools/perf/Build b/tools/perf/Build
index e5232d567611..5f392dbb88fc 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -46,10 +46,10 @@ CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_
46CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))" 46CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))"
47CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" 47CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)"
48 48
49libperf-y += util/ 49perf-y += util/
50libperf-y += arch/ 50perf-y += arch/
51libperf-y += ui/ 51perf-y += ui/
52libperf-y += scripts/ 52perf-y += scripts/
53libperf-$(CONFIG_TRACE) += trace/beauty/ 53perf-$(CONFIG_TRACE) += trace/beauty/
54 54
55gtk-y += ui/gtk/ 55gtk-y += ui/gtk/
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 4ac7775fbc11..86f3dcc15f83 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -120,6 +120,10 @@ Given a $HOME/.perfconfig like this:
120 children = true 120 children = true
121 group = true 121 group = true
122 122
123 [llvm]
124 dump-obj = true
125 clang-opt = -g
126
123You can hide source code of annotate feature setting the config to false with 127You can hide source code of annotate feature setting the config to false with
124 128
125 % perf config annotate.hide_src_code=true 129 % perf config annotate.hide_src_code=true
@@ -553,6 +557,33 @@ trace.*::
553 trace.show_zeros:: 557 trace.show_zeros::
554 Do not suppress syscall arguments that are equal to zero. 558 Do not suppress syscall arguments that are equal to zero.
555 559
560llvm.*::
561 llvm.clang-path::
562 Path to clang. If omit, search it from $PATH.
563
564 llvm.clang-bpf-cmd-template::
565 Cmdline template. Below lines show its default value. Environment
566 variable is used to pass options.
567 "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS \
568 -Wno-unused-value -Wno-pointer-sign -working-directory \
569 $WORKING_DIR -c $CLANG_SOURCE -target bpf -O2 -o -"
570
571 llvm.clang-opt::
572 Options passed to clang.
573
574 llvm.kbuild-dir::
575 kbuild directory. If not set, use /lib/modules/`uname -r`/build.
576 If set to "" deliberately, skip kernel header auto-detector.
577
578 llvm.kbuild-opts::
579 Options passed to 'make' when detecting kernel header options.
580
581 llvm.dump-obj::
582 Enable perf dump BPF object files compiled by LLVM.
583
584 llvm.opts::
585 Options passed to llc.
586
556SEE ALSO 587SEE ALSO
557-------- 588--------
558linkperf:perf[1] 589linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index d232b13ea713..8f0c2be34848 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -88,6 +88,20 @@ OPTIONS
88 If you want to profile write accesses in [0x1000~1008), just set 88 If you want to profile write accesses in [0x1000~1008), just set
89 'mem:0x1000/8:w'. 89 'mem:0x1000/8:w'.
90 90
91 - a BPF source file (ending in .c) or a precompiled object file (ending
92 in .o) selects one or more BPF events.
93 The BPF program can attach to various perf events based on the ELF section
94 names.
95
96 When processing a '.c' file, perf searches an installed LLVM to compile it
97 into an object file first. Optional clang options can be passed via the
98 '--clang-opt' command line option, e.g.:
99
100 perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \
101 -e tests/bpf-script-example.c
102
103 Note: '--clang-opt' must be placed before '--event/-e'.
104
91 - a group of events surrounded by a pair of brace ("{event1,event2,...}"). 105 - a group of events surrounded by a pair of brace ("{event1,event2,...}").
92 Each event is separated by commas and the group should be quoted to 106 Each event is separated by commas and the group should be quoted to
93 prevent the shell interpretation. You also need to use --group on 107 prevent the shell interpretation. You also need to use --group on
@@ -440,6 +454,11 @@ Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default:
440Asynchronous mode is supported only when linking Perf tool with libc library 454Asynchronous mode is supported only when linking Perf tool with libc library
441providing implementation for Posix AIO API. 455providing implementation for Posix AIO API.
442 456
457--affinity=mode::
458Set affinity mask of trace reading thread according to the policy defined by 'mode' value:
459 node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer
460 cpu - thread affinity mask is set to cpu of the processed mmap buffer
461
443--all-kernel:: 462--all-kernel::
444Configure all used events to run in kernel space. 463Configure all used events to run in kernel space.
445 464
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 9e4def08d569..2e19fd7ffe35 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -159,6 +159,12 @@ OPTIONS
159 the override, and the result of the above is that only S/W and H/W 159 the override, and the result of the above is that only S/W and H/W
160 events are displayed with the given fields. 160 events are displayed with the given fields.
161 161
162 It's possible tp add/remove fields only for specific event type:
163
164 -Fsw:-cpu,-period
165
166 removes cpu and period from software events.
167
162 For the 'wildcard' option if a user selected field is invalid for an 168 For the 'wildcard' option if a user selected field is invalid for an
163 event type, a message is displayed to the user that the option is 169 event type, a message is displayed to the user that the option is
164 ignored for that type. For example: 170 ignored for that type. For example:
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 631e687be4eb..fc6e43262c41 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -210,6 +210,14 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
210 may happen, for instance, when a thread gets migrated to a different CPU 210 may happen, for instance, when a thread gets migrated to a different CPU
211 while processing a syscall. 211 while processing a syscall.
212 212
213--map-dump::
214 Dump BPF maps setup by events passed via -e, for instance the augmented_raw_syscalls
215 living in tools/perf/examples/bpf/augmented_raw_syscalls.c. For now this
216 dumps just boolean map values and integer keys, in time this will print in hex
217 by default and use BTF when available, as well as use functions to do pretty
218 printing using the existing 'perf trace' syscall arg beautifiers to map integer
219 arguments to strings (pid to comm, syscall id to syscall name, etc).
220
213 221
214PAGEFAULTS 222PAGEFAULTS
215---------- 223----------
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index dfb218feaad9..593ef49b273c 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -43,11 +43,10 @@ struct perf_file_section {
43 43
44Flags section: 44Flags section:
45 45
46The header is followed by different optional headers, described by the bits set 46For each of the optional features a perf_file_section it placed after the data
47in flags. Only headers for which the bit is set are included. Each header 47section if the feature bit is set in the perf_header flags bitset. The
48consists of a perf_file_section located after the initial header. 48respective perf_file_section points to the data of the additional header and
49The respective perf_file_section points to the data of the additional 49defines its size.
50header and defines its size.
51 50
52Some headers consist of strings, which are defined like this: 51Some headers consist of strings, which are defined like this:
53 52
@@ -131,7 +130,7 @@ An uint64_t with the total memory in bytes.
131 130
132 HEADER_CMDLINE = 11, 131 HEADER_CMDLINE = 11,
133 132
134A perf_header_string with the perf command line used to collect the data. 133A perf_header_string_list with the perf arg-vector used to collect the data.
135 134
136 HEADER_EVENT_DESC = 12, 135 HEADER_EVENT_DESC = 12,
137 136
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index b441c88cafa1..0f11d5891301 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -109,6 +109,13 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
109FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) 109FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
110FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) 110FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
111 111
112FEATURE_CHECK_LDFLAGS-libunwind-arm = -lunwind -lunwind-arm
113FEATURE_CHECK_LDFLAGS-libunwind-aarch64 = -lunwind -lunwind-aarch64
114FEATURE_CHECK_LDFLAGS-libunwind-x86 = -lunwind -llzma -lunwind-x86
115FEATURE_CHECK_LDFLAGS-libunwind-x86_64 = -lunwind -llzma -lunwind-x86_64
116
117FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
118
112ifdef CSINCLUDES 119ifdef CSINCLUDES
113 LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) 120 LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
114endif 121endif
@@ -218,6 +225,8 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
218FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) 225FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS)
219FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) 226FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS)
220 227
228FEATURE_CHECK_LDFLAGS-libaio = -lrt
229
221CFLAGS += -fno-omit-frame-pointer 230CFLAGS += -fno-omit-frame-pointer
222CFLAGS += -ggdb3 231CFLAGS += -ggdb3
223CFLAGS += -funwind-tables 232CFLAGS += -funwind-tables
@@ -386,7 +395,8 @@ ifeq ($(feature-setns), 1)
386 $(call detected,CONFIG_SETNS) 395 $(call detected,CONFIG_SETNS)
387endif 396endif
388 397
389ifndef NO_CORESIGHT 398ifdef CORESIGHT
399 $(call feature_check,libopencsd)
390 ifeq ($(feature-libopencsd), 1) 400 ifeq ($(feature-libopencsd), 1)
391 CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) 401 CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS)
392 LDFLAGS += $(LIBOPENCSD_LDFLAGS) 402 LDFLAGS += $(LIBOPENCSD_LDFLAGS)
@@ -482,6 +492,7 @@ endif
482ifndef NO_LIBUNWIND 492ifndef NO_LIBUNWIND
483 have_libunwind := 493 have_libunwind :=
484 494
495 $(call feature_check,libunwind-x86)
485 ifeq ($(feature-libunwind-x86), 1) 496 ifeq ($(feature-libunwind-x86), 1)
486 $(call detected,CONFIG_LIBUNWIND_X86) 497 $(call detected,CONFIG_LIBUNWIND_X86)
487 CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT 498 CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT
@@ -490,6 +501,7 @@ ifndef NO_LIBUNWIND
490 have_libunwind = 1 501 have_libunwind = 1
491 endif 502 endif
492 503
504 $(call feature_check,libunwind-aarch64)
493 ifeq ($(feature-libunwind-aarch64), 1) 505 ifeq ($(feature-libunwind-aarch64), 1)
494 $(call detected,CONFIG_LIBUNWIND_AARCH64) 506 $(call detected,CONFIG_LIBUNWIND_AARCH64)
495 CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT 507 CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 0ee6795d82cc..01f7555fd933 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -102,7 +102,7 @@ include ../scripts/utilities.mak
102# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if 102# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if
103# llvm-config is not in $PATH. 103# llvm-config is not in $PATH.
104# 104#
105# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding. 105# Define CORESIGHT if you DO WANT support for CoreSight trace decoding.
106# 106#
107# Define NO_AIO if you do not want support of Posix AIO based trace 107# Define NO_AIO if you do not want support of Posix AIO based trace
108# streaming for record mode. Currently Posix AIO trace streaming is 108# streaming for record mode. Currently Posix AIO trace streaming is
@@ -344,9 +344,9 @@ endif
344 344
345export PERL_PATH 345export PERL_PATH
346 346
347LIB_FILE=$(OUTPUT)libperf.a 347LIBPERF_A=$(OUTPUT)libperf.a
348 348
349PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) 349PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
350ifndef NO_LIBBPF 350ifndef NO_LIBBPF
351 PERFLIBS += $(LIBBPF) 351 PERFLIBS += $(LIBBPF)
352endif 352endif
@@ -549,6 +549,8 @@ JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o
549 549
550PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o 550PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o
551 551
552LIBPERF_IN := $(OUTPUT)libperf-in.o
553
552export JEVENTS 554export JEVENTS
553 555
554build := -f $(srctree)/tools/build/Makefile.build dir=. obj 556build := -f $(srctree)/tools/build/Makefile.build dir=. obj
@@ -565,9 +567,12 @@ $(JEVENTS): $(JEVENTS_IN)
565$(PMU_EVENTS_IN): $(JEVENTS) FORCE 567$(PMU_EVENTS_IN): $(JEVENTS) FORCE
566 $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events 568 $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
567 569
568$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) 570$(LIBPERF_IN): prepare FORCE
571 $(Q)$(MAKE) $(build)=libperf
572
573$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
569 $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ 574 $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
570 $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ 575 $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBS) -o $@
571 576
572$(GTK_IN): FORCE 577$(GTK_IN): FORCE
573 $(Q)$(MAKE) $(build)=gtk 578 $(Q)$(MAKE) $(build)=gtk
@@ -683,12 +688,7 @@ endif
683 688
684$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) 689$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
685 690
686LIBPERF_IN := $(OUTPUT)libperf-in.o 691$(LIBPERF_A): $(LIBPERF_IN)
687
688$(LIBPERF_IN): prepare FORCE
689 $(Q)$(MAKE) $(build)=libperf
690
691$(LIB_FILE): $(LIBPERF_IN)
692 $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) 692 $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
693 693
694LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' 694LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)'
@@ -863,8 +863,8 @@ ifndef NO_LIBPYTHON
863 $(call QUIET_INSTALL, python-scripts) \ 863 $(call QUIET_INSTALL, python-scripts) \
864 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ 864 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
865 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \ 865 $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \
866 $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ 866 $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
867 $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \ 867 $(INSTALL) scripts/python/*.py -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
868 $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' 868 $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
869endif 869endif
870 $(call QUIET_INSTALL, perf_completion-script) \ 870 $(call QUIET_INSTALL, perf_completion-script) \
@@ -910,7 +910,7 @@ python-clean:
910 $(python-clean) 910 $(python-clean)
911 911
912clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean 912clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean
913 $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) 913 $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
914 $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete 914 $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
915 $(Q)$(RM) $(OUTPUT).config-detected 915 $(Q)$(RM) $(OUTPUT).config-detected
916 $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so 916 $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build
index d9b6af837c7d..688818844c11 100644
--- a/tools/perf/arch/Build
+++ b/tools/perf/arch/Build
@@ -1,2 +1,2 @@
1libperf-y += common.o 1perf-y += common.o
2libperf-y += $(SRCARCH)/ 2perf-y += $(SRCARCH)/
diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build
index 41bf61da476a..36222e64bbf7 100644
--- a/tools/perf/arch/arm/Build
+++ b/tools/perf/arch/arm/Build
@@ -1,2 +1,2 @@
1libperf-y += util/ 1perf-y += util/
2libperf-$(CONFIG_DWARF_UNWIND) += tests/ 2perf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build
index d9ae2733f9cc..bc8e97380c82 100644
--- a/tools/perf/arch/arm/tests/Build
+++ b/tools/perf/arch/arm/tests/Build
@@ -1,5 +1,5 @@
1libperf-y += regs_load.o 1perf-y += regs_load.o
2libperf-y += dwarf-unwind.o 2perf-y += dwarf-unwind.o
3libperf-y += vectors-page.o 3perf-y += vectors-page.o
4 4
5libperf-y += arch-tests.o 5perf-y += arch-tests.o
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
index 9a0242e74cfc..2c35e532bc9a 100644
--- a/tools/perf/arch/arm/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -3,6 +3,7 @@
3#include "perf_regs.h" 3#include "perf_regs.h"
4#include "thread.h" 4#include "thread.h"
5#include "map.h" 5#include "map.h"
6#include "map_groups.h"
6#include "event.h" 7#include "event.h"
7#include "debug.h" 8#include "debug.h"
8#include "tests/tests.h" 9#include "tests/tests.h"
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index e64c5f216448..296f0eac5e18 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -1,6 +1,6 @@
1libperf-$(CONFIG_DWARF) += dwarf-regs.o 1perf-$(CONFIG_DWARF) += dwarf-regs.o
2 2
3libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o 3perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
4libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o 4perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
5 5
6libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o 6perf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 2f595cd73da6..911426721170 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -5,6 +5,7 @@
5 */ 5 */
6 6
7#include <api/fs/fs.h> 7#include <api/fs/fs.h>
8#include <linux/bits.h>
8#include <linux/bitops.h> 9#include <linux/bitops.h>
9#include <linux/compiler.h> 10#include <linux/compiler.h>
10#include <linux/coresight-pmu.h> 11#include <linux/coresight-pmu.h>
@@ -22,12 +23,10 @@
22#include "../../util/thread_map.h" 23#include "../../util/thread_map.h"
23#include "../../util/cs-etm.h" 24#include "../../util/cs-etm.h"
24 25
26#include <errno.h>
25#include <stdlib.h> 27#include <stdlib.h>
26#include <sys/stat.h> 28#include <sys/stat.h>
27 29
28#define ENABLE_SINK_MAX 128
29#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"
30
31struct cs_etm_recording { 30struct cs_etm_recording {
32 struct auxtrace_record itr; 31 struct auxtrace_record itr;
33 struct perf_pmu *cs_etm_pmu; 32 struct perf_pmu *cs_etm_pmu;
@@ -60,10 +59,48 @@ static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
60 return 0; 59 return 0;
61} 60}
62 61
62static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
63 struct perf_evsel *evsel)
64{
65 char msg[BUFSIZ], path[PATH_MAX], *sink;
66 struct perf_evsel_config_term *term;
67 int ret = -EINVAL;
68 u32 hash;
69
70 if (evsel->attr.config2 & GENMASK(31, 0))
71 return 0;
72
73 list_for_each_entry(term, &evsel->config_terms, list) {
74 if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
75 continue;
76
77 sink = term->val.drv_cfg;
78 snprintf(path, PATH_MAX, "sinks/%s", sink);
79
80 ret = perf_pmu__scan_file(pmu, path, "%x", &hash);
81 if (ret != 1) {
82 pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n",
83 sink, perf_evsel__name(evsel), errno,
84 str_error_r(errno, msg, sizeof(msg)));
85 return ret;
86 }
87
88 evsel->attr.config2 |= hash;
89 return 0;
90 }
91
92 /*
93 * No sink was provided on the command line - for _now_ treat
94 * this as an error.
95 */
96 return ret;
97}
98
63static int cs_etm_recording_options(struct auxtrace_record *itr, 99static int cs_etm_recording_options(struct auxtrace_record *itr,
64 struct perf_evlist *evlist, 100 struct perf_evlist *evlist,
65 struct record_opts *opts) 101 struct record_opts *opts)
66{ 102{
103 int ret;
67 struct cs_etm_recording *ptr = 104 struct cs_etm_recording *ptr =
68 container_of(itr, struct cs_etm_recording, itr); 105 container_of(itr, struct cs_etm_recording, itr);
69 struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; 106 struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
@@ -92,6 +129,10 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
92 if (!cs_etm_evsel) 129 if (!cs_etm_evsel)
93 return 0; 130 return 0;
94 131
132 ret = cs_etm_set_sink_attr(cs_etm_pmu, cs_etm_evsel);
133 if (ret)
134 return ret;
135
95 if (opts->use_clockid) { 136 if (opts->use_clockid) {
96 pr_err("Cannot use clockid (-k option) with %s\n", 137 pr_err("Cannot use clockid (-k option) with %s\n",
97 CORESIGHT_ETM_PMU_NAME); 138 CORESIGHT_ETM_PMU_NAME);
@@ -598,54 +639,3 @@ struct auxtrace_record *cs_etm_record_init(int *err)
598out: 639out:
599 return NULL; 640 return NULL;
600} 641}
601
602static FILE *cs_device__open_file(const char *name)
603{
604 struct stat st;
605 char path[PATH_MAX];
606 const char *sysfs;
607
608 sysfs = sysfs__mountpoint();
609 if (!sysfs)
610 return NULL;
611
612 snprintf(path, PATH_MAX,
613 "%s" CS_BUS_DEVICE_PATH "%s", sysfs, name);
614
615 if (stat(path, &st) < 0)
616 return NULL;
617
618 return fopen(path, "w");
619
620}
621
622static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...)
623{
624 va_list args;
625 FILE *file;
626 int ret = -EINVAL;
627
628 va_start(args, fmt);
629 file = cs_device__open_file(name);
630 if (file) {
631 ret = vfprintf(file, fmt, args);
632 fclose(file);
633 }
634 va_end(args);
635 return ret;
636}
637
638int cs_etm_set_drv_config(struct perf_evsel_config_term *term)
639{
640 int ret;
641 char enable_sink[ENABLE_SINK_MAX];
642
643 snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s",
644 term->val.drv_cfg, "enable_sink");
645
646 ret = cs_device__print_file(enable_sink, "%d", 1);
647 if (ret < 0)
648 return ret;
649
650 return 0;
651}
diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h
index 1a12e64f5127..a3354bda4fe8 100644
--- a/tools/perf/arch/arm/util/cs-etm.h
+++ b/tools/perf/arch/arm/util/cs-etm.h
@@ -7,9 +7,6 @@
7#ifndef INCLUDE__PERF_CS_ETM_H__ 7#ifndef INCLUDE__PERF_CS_ETM_H__
8#define INCLUDE__PERF_CS_ETM_H__ 8#define INCLUDE__PERF_CS_ETM_H__
9 9
10#include "../../util/evsel.h"
11
12struct auxtrace_record *cs_etm_record_init(int *err); 10struct auxtrace_record *cs_etm_record_init(int *err);
13int cs_etm_set_drv_config(struct perf_evsel_config_term *term);
14 11
15#endif 12#endif
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index e047571e6080..bbc297a7e2e3 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -7,8 +7,8 @@
7#include <string.h> 7#include <string.h>
8#include <linux/coresight-pmu.h> 8#include <linux/coresight-pmu.h>
9#include <linux/perf_event.h> 9#include <linux/perf_event.h>
10#include <linux/string.h>
10 11
11#include "cs-etm.h"
12#include "arm-spe.h" 12#include "arm-spe.h"
13#include "../../util/pmu.h" 13#include "../../util/pmu.h"
14 14
@@ -19,7 +19,6 @@ struct perf_event_attr
19 if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) { 19 if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
20 /* add ETM default config here */ 20 /* add ETM default config here */
21 pmu->selectable = true; 21 pmu->selectable = true;
22 pmu->set_drv_config = cs_etm_set_drv_config;
23#if defined(__aarch64__) 22#if defined(__aarch64__)
24 } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) { 23 } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
25 return arm_spe_pmu_default_config(pmu); 24 return arm_spe_pmu_default_config(pmu);
diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build
index 41bf61da476a..36222e64bbf7 100644
--- a/tools/perf/arch/arm64/Build
+++ b/tools/perf/arch/arm64/Build
@@ -1,2 +1,2 @@
1libperf-y += util/ 1perf-y += util/
2libperf-$(CONFIG_DWARF_UNWIND) += tests/ 2perf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build
index 883c57ff0c08..41707fea74b3 100644
--- a/tools/perf/arch/arm64/tests/Build
+++ b/tools/perf/arch/arm64/tests/Build
@@ -1,4 +1,4 @@
1libperf-y += regs_load.o 1perf-y += regs_load.o
2libperf-y += dwarf-unwind.o 2perf-y += dwarf-unwind.o
3 3
4libperf-y += arch-tests.o 4perf-y += arch-tests.o
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
index 5522ce384723..a6a407fa1b8b 100644
--- a/tools/perf/arch/arm64/tests/dwarf-unwind.c
+++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c
@@ -3,6 +3,7 @@
3#include "perf_regs.h" 3#include "perf_regs.h"
4#include "thread.h" 4#include "thread.h"
5#include "map.h" 5#include "map.h"
6#include "map_groups.h"
6#include "event.h" 7#include "event.h"
7#include "debug.h" 8#include "debug.h"
8#include "tests/tests.h" 9#include "tests/tests.h"
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 68f8a8eb3ad0..3cde540d2fcf 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,10 +1,10 @@
1libperf-y += header.o 1perf-y += header.o
2libperf-y += sym-handling.o 2perf-y += sym-handling.o
3libperf-$(CONFIG_DWARF) += dwarf-regs.o 3perf-$(CONFIG_DWARF) += dwarf-regs.o
4libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o 4perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
5libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o 5perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
6 6
7libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ 7perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
8 ../../arm/util/auxtrace.o \ 8 ../../arm/util/auxtrace.o \
9 ../../arm/util/cs-etm.o \ 9 ../../arm/util/cs-etm.o \
10 arm-spe.o 10 arm-spe.o
diff --git a/tools/perf/arch/nds32/Build b/tools/perf/arch/nds32/Build
index 54afe4a467e7..e4e5f33c84d8 100644
--- a/tools/perf/arch/nds32/Build
+++ b/tools/perf/arch/nds32/Build
@@ -1 +1 @@
libperf-y += util/ perf-y += util/
diff --git a/tools/perf/arch/nds32/util/Build b/tools/perf/arch/nds32/util/Build
index ca623bbf993c..d0bc205fe49a 100644
--- a/tools/perf/arch/nds32/util/Build
+++ b/tools/perf/arch/nds32/util/Build
@@ -1 +1 @@
libperf-y += header.o perf-y += header.o
diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build
index db52fa22d3a1..a7dd46a5b678 100644
--- a/tools/perf/arch/powerpc/Build
+++ b/tools/perf/arch/powerpc/Build
@@ -1,2 +1,2 @@
1libperf-y += util/ 1perf-y += util/
2libperf-y += tests/ 2perf-y += tests/
diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build
index d827ef384b33..3526ab0af9f9 100644
--- a/tools/perf/arch/powerpc/tests/Build
+++ b/tools/perf/arch/powerpc/tests/Build
@@ -1,4 +1,4 @@
1libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o 1perf-$(CONFIG_DWARF_UNWIND) += regs_load.o
2libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o 2perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
3 3
4libperf-y += arch-tests.o 4perf-y += arch-tests.o
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
index 5f39efef0856..5c178e4a1995 100644
--- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -3,6 +3,7 @@
3#include "perf_regs.h" 3#include "perf_regs.h"
4#include "thread.h" 4#include "thread.h"
5#include "map.h" 5#include "map.h"
6#include "map_groups.h"
6#include "event.h" 7#include "event.h"
7#include "debug.h" 8#include "debug.h"
8#include "tests/tests.h" 9#include "tests/tests.h"
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index ba98bd006488..7cf0b8803097 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,11 +1,11 @@
1libperf-y += header.o 1perf-y += header.o
2libperf-y += sym-handling.o 2perf-y += sym-handling.o
3libperf-y += kvm-stat.o 3perf-y += kvm-stat.o
4libperf-y += perf_regs.o 4perf-y += perf_regs.o
5libperf-y += mem-events.o 5perf-y += mem-events.o
6 6
7libperf-$(CONFIG_DWARF) += dwarf-regs.o 7perf-$(CONFIG_DWARF) += dwarf-regs.o
8libperf-$(CONFIG_DWARF) += skip-callchain-idx.o 8perf-$(CONFIG_DWARF) += skip-callchain-idx.o
9 9
10libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o 10perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
11libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o 11perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c
index 596ad6aedaac..f9db341c47b6 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -3,6 +3,8 @@
3#include "util/kvm-stat.h" 3#include "util/kvm-stat.h"
4#include "util/parse-events.h" 4#include "util/parse-events.h"
5#include "util/debug.h" 5#include "util/debug.h"
6#include "util/evsel.h"
7#include "util/evlist.h"
6 8
7#include "book3s_hv_exits.h" 9#include "book3s_hv_exits.h"
8#include "book3s_hcalls.h" 10#include "book3s_hcalls.h"
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index 7c6eeb4633fe..2918bb16c892 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -16,6 +16,9 @@
16#include "util/thread.h" 16#include "util/thread.h"
17#include "util/callchain.h" 17#include "util/callchain.h"
18#include "util/debug.h" 18#include "util/debug.h"
19#include "util/dso.h"
20#include "util/map.h"
21#include "util/symbol.h"
19 22
20/* 23/*
21 * When saving the callchain on Power, the kernel conservatively saves 24 * When saving the callchain on Power, the kernel conservatively saves
diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build
index 54afe4a467e7..e4e5f33c84d8 100644
--- a/tools/perf/arch/s390/Build
+++ b/tools/perf/arch/s390/Build
@@ -1 +1 @@
libperf-y += util/ perf-y += util/
diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build
index 4a233683c684..22797f043b84 100644
--- a/tools/perf/arch/s390/util/Build
+++ b/tools/perf/arch/s390/util/Build
@@ -1,9 +1,9 @@
1libperf-y += header.o 1perf-y += header.o
2libperf-y += kvm-stat.o 2perf-y += kvm-stat.o
3 3
4libperf-$(CONFIG_DWARF) += dwarf-regs.o 4perf-$(CONFIG_DWARF) += dwarf-regs.o
5libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o 5perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
6 6
7libperf-y += machine.o 7perf-y += machine.o
8 8
9libperf-$(CONFIG_AUXTRACE) += auxtrace.o 9perf-$(CONFIG_AUXTRACE) += auxtrace.o
diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c
index aaabab5e2830..7e3961a4b292 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -11,6 +11,7 @@
11 11
12#include <errno.h> 12#include <errno.h>
13#include "../../util/kvm-stat.h" 13#include "../../util/kvm-stat.h"
14#include "../../util/evsel.h"
14#include <asm/sie.h> 15#include <asm/sie.h>
15 16
16define_exit_reasons_table(sie_exit_reasons, sie_intercept_code); 17define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build
index 54afe4a467e7..e4e5f33c84d8 100644
--- a/tools/perf/arch/sh/Build
+++ b/tools/perf/arch/sh/Build
@@ -1 +1 @@
libperf-y += util/ perf-y += util/
diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build
index 954e287bbb89..e813e618954b 100644
--- a/tools/perf/arch/sh/util/Build
+++ b/tools/perf/arch/sh/util/Build
@@ -1 +1 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build
index 54afe4a467e7..e4e5f33c84d8 100644
--- a/tools/perf/arch/sparc/Build
+++ b/tools/perf/arch/sparc/Build
@@ -1 +1 @@
libperf-y += util/ perf-y += util/
diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build
index 954e287bbb89..e813e618954b 100644
--- a/tools/perf/arch/sparc/util/Build
+++ b/tools/perf/arch/sparc/util/Build
@@ -1 +1 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build
index db52fa22d3a1..a7dd46a5b678 100644
--- a/tools/perf/arch/x86/Build
+++ b/tools/perf/arch/x86/Build
@@ -1,2 +1,2 @@
1libperf-y += util/ 1perf-y += util/
2libperf-y += tests/ 2perf-y += tests/
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 586849ff83a0..3d83d0c6982d 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -1,8 +1,8 @@
1libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o 1perf-$(CONFIG_DWARF_UNWIND) += regs_load.o
2libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o 2perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
3 3
4libperf-y += arch-tests.o 4perf-y += arch-tests.o
5libperf-y += rdpmc.o 5perf-y += rdpmc.o
6libperf-y += perf-time-to-tsc.o 6perf-y += perf-time-to-tsc.o
7libperf-$(CONFIG_AUXTRACE) += insn-x86.o 7perf-$(CONFIG_AUXTRACE) += insn-x86.o
8libperf-$(CONFIG_X86_64) += bp-modify.o 8perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index 7879df34569a..6ad0a1cedb13 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -3,6 +3,7 @@
3#include "perf_regs.h" 3#include "perf_regs.h"
4#include "thread.h" 4#include "thread.h"
5#include "map.h" 5#include "map.h"
6#include "map_groups.h"
6#include "event.h" 7#include "event.h"
7#include "debug.h" 8#include "debug.h"
8#include "tests/tests.h" 9#include "tests/tests.h"
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 844b8f335532..7aab0be5fc5f 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -1,18 +1,18 @@
1libperf-y += header.o 1perf-y += header.o
2libperf-y += tsc.o 2perf-y += tsc.o
3libperf-y += pmu.o 3perf-y += pmu.o
4libperf-y += kvm-stat.o 4perf-y += kvm-stat.o
5libperf-y += perf_regs.o 5perf-y += perf_regs.o
6libperf-y += group.o 6perf-y += group.o
7libperf-y += machine.o 7perf-y += machine.o
8libperf-y += event.o 8perf-y += event.o
9 9
10libperf-$(CONFIG_DWARF) += dwarf-regs.o 10perf-$(CONFIG_DWARF) += dwarf-regs.o
11libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o 11perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
12 12
13libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o 13perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
14libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o 14perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
15 15
16libperf-$(CONFIG_AUXTRACE) += auxtrace.o 16perf-$(CONFIG_AUXTRACE) += auxtrace.o
17libperf-$(CONFIG_AUXTRACE) += intel-pt.o 17perf-$(CONFIG_AUXTRACE) += intel-pt.o
18libperf-$(CONFIG_AUXTRACE) += intel-bts.o 18perf-$(CONFIG_AUXTRACE) += intel-bts.o
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
index 081353d7b095..865a9762f22e 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -1,6 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <errno.h> 2#include <errno.h>
3#include "../../util/kvm-stat.h" 3#include "../../util/kvm-stat.h"
4#include "../../util/evsel.h"
4#include <asm/svm.h> 5#include <asm/svm.h>
5#include <asm/vmx.h> 6#include <asm/vmx.h>
6#include <asm/kvm.h> 7#include <asm/kvm.h>
diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build
index 54afe4a467e7..e4e5f33c84d8 100644
--- a/tools/perf/arch/xtensa/Build
+++ b/tools/perf/arch/xtensa/Build
@@ -1 +1 @@
libperf-y += util/ perf-y += util/
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build
index 954e287bbb89..e813e618954b 100644
--- a/tools/perf/arch/xtensa/util/Build
+++ b/tools/perf/arch/xtensa/util/Build
@@ -1 +1 @@
libperf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 93d679eaf1f4..67f9d9ffacfb 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -27,6 +27,7 @@
27#include "util/thread.h" 27#include "util/thread.h"
28#include "util/sort.h" 28#include "util/sort.h"
29#include "util/hist.h" 29#include "util/hist.h"
30#include "util/map.h"
30#include "util/session.h" 31#include "util/session.h"
31#include "util/tool.h" 32#include "util/tool.h"
32#include "util/data.h" 33#include "util/data.h"
@@ -227,7 +228,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
227 * the DSO? 228 * the DSO?
228 */ 229 */
229 if (al->sym != NULL) { 230 if (al->sym != NULL) {
230 rb_erase(&al->sym->rb_node, 231 rb_erase_cached(&al->sym->rb_node,
231 &al->map->dso->symbols); 232 &al->map->dso->symbols);
232 symbol__delete(al->sym); 233 symbol__delete(al->sym);
233 dso__reset_find_symbol_cache(al->map->dso); 234 dso__reset_find_symbol_cache(al->map->dso);
@@ -305,7 +306,7 @@ static void hists__find_annotations(struct hists *hists,
305 struct perf_evsel *evsel, 306 struct perf_evsel *evsel,
306 struct perf_annotate *ann) 307 struct perf_annotate *ann)
307{ 308{
308 struct rb_node *nd = rb_first(&hists->entries), *next; 309 struct rb_node *nd = rb_first_cached(&hists->entries), *next;
309 int key = K_RIGHT; 310 int key = K_RIGHT;
310 311
311 while (nd) { 312 while (nd) {
@@ -440,7 +441,7 @@ static int __cmd_annotate(struct perf_annotate *ann)
440 } 441 }
441 442
442 if (total_nr_samples == 0) { 443 if (total_nr_samples == 0) {
443 ui__error("The %s file has no samples!\n", session->data->file.path); 444 ui__error("The %s data has no samples!\n", session->data->path);
444 goto out; 445 goto out;
445 } 446 }
446 447
@@ -577,7 +578,7 @@ int cmd_annotate(int argc, const char **argv)
577 if (quiet) 578 if (quiet)
578 perf_quiet_option(); 579 perf_quiet_option();
579 580
580 data.file.path = input_name; 581 data.path = input_name;
581 582
582 annotate.session = perf_session__new(&data, false, &annotate.tool); 583 annotate.session = perf_session__new(&data, false, &annotate.tool);
583 if (annotate.session == NULL) 584 if (annotate.session == NULL)
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 115110a4796a..10457b10e568 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -416,8 +416,8 @@ int cmd_buildid_cache(int argc, const char **argv)
416 nsi = nsinfo__new(ns_id); 416 nsi = nsinfo__new(ns_id);
417 417
418 if (missing_filename) { 418 if (missing_filename) {
419 data.file.path = missing_filename; 419 data.path = missing_filename;
420 data.force = force; 420 data.force = force;
421 421
422 session = perf_session__new(&data, false, NULL); 422 session = perf_session__new(&data, false, NULL);
423 if (session == NULL) 423 if (session == NULL)
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index 78abbe8d9d5f..f403e19488b5 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -52,11 +52,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
52{ 52{
53 struct perf_session *session; 53 struct perf_session *session;
54 struct perf_data data = { 54 struct perf_data data = {
55 .file = { 55 .path = input_name,
56 .path = input_name, 56 .mode = PERF_DATA_MODE_READ,
57 }, 57 .force = force,
58 .mode = PERF_DATA_MODE_READ,
59 .force = force,
60 }; 58 };
61 59
62 symbol__elf_init(); 60 symbol__elf_init();
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index d340d2e42776..4272763a5e96 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -33,6 +33,7 @@
33#include "ui/browsers/hists.h" 33#include "ui/browsers/hists.h"
34#include "thread.h" 34#include "thread.h"
35#include "mem2node.h" 35#include "mem2node.h"
36#include "symbol.h"
36 37
37struct c2c_hists { 38struct c2c_hists {
38 struct hists hists; 39 struct hists hists;
@@ -1969,7 +1970,7 @@ static void calc_width(struct c2c_hist_entry *c2c_he)
1969 set_nodestr(c2c_he); 1970 set_nodestr(c2c_he);
1970} 1971}
1971 1972
1972static int filter_cb(struct hist_entry *he) 1973static int filter_cb(struct hist_entry *he, void *arg __maybe_unused)
1973{ 1974{
1974 struct c2c_hist_entry *c2c_he; 1975 struct c2c_hist_entry *c2c_he;
1975 1976
@@ -1986,7 +1987,7 @@ static int filter_cb(struct hist_entry *he)
1986 return 0; 1987 return 0;
1987} 1988}
1988 1989
1989static int resort_cl_cb(struct hist_entry *he) 1990static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused)
1990{ 1991{
1991 struct c2c_hist_entry *c2c_he; 1992 struct c2c_hist_entry *c2c_he;
1992 struct c2c_hists *c2c_hists; 1993 struct c2c_hists *c2c_hists;
@@ -2073,7 +2074,7 @@ static int setup_nodes(struct perf_session *session)
2073 2074
2074#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) 2075#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm)
2075 2076
2076static int resort_hitm_cb(struct hist_entry *he) 2077static int resort_hitm_cb(struct hist_entry *he, void *arg __maybe_unused)
2077{ 2078{
2078 struct c2c_hist_entry *c2c_he; 2079 struct c2c_hist_entry *c2c_he;
2079 c2c_he = container_of(he, struct c2c_hist_entry, he); 2080 c2c_he = container_of(he, struct c2c_hist_entry, he);
@@ -2088,14 +2089,14 @@ static int resort_hitm_cb(struct hist_entry *he)
2088 2089
2089static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb) 2090static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb)
2090{ 2091{
2091 struct rb_node *next = rb_first(&hists->entries); 2092 struct rb_node *next = rb_first_cached(&hists->entries);
2092 int ret = 0; 2093 int ret = 0;
2093 2094
2094 while (next) { 2095 while (next) {
2095 struct hist_entry *he; 2096 struct hist_entry *he;
2096 2097
2097 he = rb_entry(next, struct hist_entry, rb_node); 2098 he = rb_entry(next, struct hist_entry, rb_node);
2098 ret = cb(he); 2099 ret = cb(he, NULL);
2099 if (ret) 2100 if (ret)
2100 break; 2101 break;
2101 next = rb_next(&he->rb_node); 2102 next = rb_next(&he->rb_node);
@@ -2215,7 +2216,7 @@ static void print_pareto(FILE *out)
2215 if (WARN_ONCE(ret, "failed to setup sort entries\n")) 2216 if (WARN_ONCE(ret, "failed to setup sort entries\n"))
2216 return; 2217 return;
2217 2218
2218 nd = rb_first(&c2c.hists.hists.entries); 2219 nd = rb_first_cached(&c2c.hists.hists.entries);
2219 2220
2220 for (; nd; nd = rb_next(nd)) { 2221 for (; nd; nd = rb_next(nd)) {
2221 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); 2222 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
@@ -2283,7 +2284,7 @@ static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
2283static void c2c_browser__update_nr_entries(struct hist_browser *hb) 2284static void c2c_browser__update_nr_entries(struct hist_browser *hb)
2284{ 2285{
2285 u64 nr_entries = 0; 2286 u64 nr_entries = 0;
2286 struct rb_node *nd = rb_first(&hb->hists->entries); 2287 struct rb_node *nd = rb_first_cached(&hb->hists->entries);
2287 2288
2288 while (nd) { 2289 while (nd) {
2289 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); 2290 struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
@@ -2343,7 +2344,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
2343 struct c2c_cacheline_browser *cl_browser; 2344 struct c2c_cacheline_browser *cl_browser;
2344 struct hist_browser *browser; 2345 struct hist_browser *browser;
2345 int key = -1; 2346 int key = -1;
2346 const char help[] = 2347 static const char help[] =
2347 " ENTER Toggle callchains (if present) \n" 2348 " ENTER Toggle callchains (if present) \n"
2348 " n Toggle Node details info \n" 2349 " n Toggle Node details info \n"
2349 " s Toggle full length of symbol and source line columns \n" 2350 " s Toggle full length of symbol and source line columns \n"
@@ -2424,7 +2425,7 @@ static int perf_c2c__hists_browse(struct hists *hists)
2424{ 2425{
2425 struct hist_browser *browser; 2426 struct hist_browser *browser;
2426 int key = -1; 2427 int key = -1;
2427 const char help[] = 2428 static const char help[] =
2428 " d Display cacheline details \n" 2429 " d Display cacheline details \n"
2429 " ENTER Toggle callchains (if present) \n" 2430 " ENTER Toggle callchains (if present) \n"
2430 " q Quit \n"; 2431 " q Quit \n";
@@ -2749,8 +2750,8 @@ static int perf_c2c__report(int argc, const char **argv)
2749 if (!input_name || !strlen(input_name)) 2750 if (!input_name || !strlen(input_name))
2750 input_name = "perf.data"; 2751 input_name = "perf.data";
2751 2752
2752 data.file.path = input_name; 2753 data.path = input_name;
2753 data.force = symbol_conf.force; 2754 data.force = symbol_conf.force;
2754 2755
2755 err = setup_display(display); 2756 err = setup_display(display);
2756 if (err) 2757 if (err)
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 39db2ee32d48..58fe0e88215c 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -429,7 +429,7 @@ get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt)
429 429
430static void hists__baseline_only(struct hists *hists) 430static void hists__baseline_only(struct hists *hists)
431{ 431{
432 struct rb_root *root; 432 struct rb_root_cached *root;
433 struct rb_node *next; 433 struct rb_node *next;
434 434
435 if (hists__has(hists, need_collapse)) 435 if (hists__has(hists, need_collapse))
@@ -437,13 +437,13 @@ static void hists__baseline_only(struct hists *hists)
437 else 437 else
438 root = hists->entries_in; 438 root = hists->entries_in;
439 439
440 next = rb_first(root); 440 next = rb_first_cached(root);
441 while (next != NULL) { 441 while (next != NULL) {
442 struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in); 442 struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in);
443 443
444 next = rb_next(&he->rb_node_in); 444 next = rb_next(&he->rb_node_in);
445 if (!hist_entry__next_pair(he)) { 445 if (!hist_entry__next_pair(he)) {
446 rb_erase(&he->rb_node_in, root); 446 rb_erase_cached(&he->rb_node_in, root);
447 hist_entry__delete(he); 447 hist_entry__delete(he);
448 } 448 }
449 } 449 }
@@ -451,7 +451,7 @@ static void hists__baseline_only(struct hists *hists)
451 451
452static void hists__precompute(struct hists *hists) 452static void hists__precompute(struct hists *hists)
453{ 453{
454 struct rb_root *root; 454 struct rb_root_cached *root;
455 struct rb_node *next; 455 struct rb_node *next;
456 456
457 if (hists__has(hists, need_collapse)) 457 if (hists__has(hists, need_collapse))
@@ -459,7 +459,7 @@ static void hists__precompute(struct hists *hists)
459 else 459 else
460 root = hists->entries_in; 460 root = hists->entries_in;
461 461
462 next = rb_first(root); 462 next = rb_first_cached(root);
463 while (next != NULL) { 463 while (next != NULL) {
464 struct hist_entry *he, *pair; 464 struct hist_entry *he, *pair;
465 struct data__file *d; 465 struct data__file *d;
@@ -708,7 +708,7 @@ static void data__fprintf(void)
708 708
709 data__for_each_file(i, d) 709 data__for_each_file(i, d)
710 fprintf(stdout, "# [%d] %s %s\n", 710 fprintf(stdout, "# [%d] %s %s\n",
711 d->idx, d->data.file.path, 711 d->idx, d->data.path,
712 !d->idx ? "(Baseline)" : ""); 712 !d->idx ? "(Baseline)" : "");
713 713
714 fprintf(stdout, "#\n"); 714 fprintf(stdout, "#\n");
@@ -779,14 +779,14 @@ static int __cmd_diff(void)
779 data__for_each_file(i, d) { 779 data__for_each_file(i, d) {
780 d->session = perf_session__new(&d->data, false, &tool); 780 d->session = perf_session__new(&d->data, false, &tool);
781 if (!d->session) { 781 if (!d->session) {
782 pr_err("Failed to open %s\n", d->data.file.path); 782 pr_err("Failed to open %s\n", d->data.path);
783 ret = -1; 783 ret = -1;
784 goto out_delete; 784 goto out_delete;
785 } 785 }
786 786
787 ret = perf_session__process_events(d->session); 787 ret = perf_session__process_events(d->session);
788 if (ret) { 788 if (ret) {
789 pr_err("Failed to process %s\n", d->data.file.path); 789 pr_err("Failed to process %s\n", d->data.path);
790 goto out_delete; 790 goto out_delete;
791 } 791 }
792 792
@@ -1289,9 +1289,9 @@ static int data_init(int argc, const char **argv)
1289 data__for_each_file(i, d) { 1289 data__for_each_file(i, d) {
1290 struct perf_data *data = &d->data; 1290 struct perf_data *data = &d->data;
1291 1291
1292 data->file.path = use_default ? defaults[i] : argv[i]; 1292 data->path = use_default ? defaults[i] : argv[i];
1293 data->mode = PERF_DATA_MODE_READ, 1293 data->mode = PERF_DATA_MODE_READ,
1294 data->force = force, 1294 data->force = force,
1295 1295
1296 d->idx = i; 1296 d->idx = i;
1297 } 1297 }
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index e06e822ce634..6e4f63b0da4a 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -23,9 +23,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
23 struct perf_session *session; 23 struct perf_session *session;
24 struct perf_evsel *pos; 24 struct perf_evsel *pos;
25 struct perf_data data = { 25 struct perf_data data = {
26 .file = { 26 .path = file_name,
27 .path = file_name,
28 },
29 .mode = PERF_DATA_MODE_READ, 27 .mode = PERF_DATA_MODE_READ,
30 .force = details->force, 28 .force = details->force,
31 }; 29 };
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index eda41673c4f3..24086b7f1b14 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -12,6 +12,7 @@
12#include "util/color.h" 12#include "util/color.h"
13#include "util/evlist.h" 13#include "util/evlist.h"
14#include "util/evsel.h" 14#include "util/evsel.h"
15#include "util/map.h"
15#include "util/session.h" 16#include "util/session.h"
16#include "util/tool.h" 17#include "util/tool.h"
17#include "util/debug.h" 18#include "util/debug.h"
@@ -19,6 +20,7 @@
19#include "util/data.h" 20#include "util/data.h"
20#include "util/auxtrace.h" 21#include "util/auxtrace.h"
21#include "util/jit.h" 22#include "util/jit.h"
23#include "util/symbol.h"
22#include "util/thread.h" 24#include "util/thread.h"
23 25
24#include <subcmd/parse-options.h> 26#include <subcmd/parse-options.h>
@@ -768,10 +770,8 @@ int cmd_inject(int argc, const char **argv)
768 .input_name = "-", 770 .input_name = "-",
769 .samples = LIST_HEAD_INIT(inject.samples), 771 .samples = LIST_HEAD_INIT(inject.samples),
770 .output = { 772 .output = {
771 .file = { 773 .path = "-",
772 .path = "-", 774 .mode = PERF_DATA_MODE_WRITE,
773 },
774 .mode = PERF_DATA_MODE_WRITE,
775 }, 775 },
776 }; 776 };
777 struct perf_data data = { 777 struct perf_data data = {
@@ -784,7 +784,7 @@ int cmd_inject(int argc, const char **argv)
784 "Inject build-ids into the output stream"), 784 "Inject build-ids into the output stream"),
785 OPT_STRING('i', "input", &inject.input_name, "file", 785 OPT_STRING('i', "input", &inject.input_name, "file",
786 "input file name"), 786 "input file name"),
787 OPT_STRING('o', "output", &inject.output.file.path, "file", 787 OPT_STRING('o', "output", &inject.output.path, "file",
788 "output file name"), 788 "output file name"),
789 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, 789 OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
790 "Merge sched-stat and sched-switch for getting events " 790 "Merge sched-stat and sched-switch for getting events "
@@ -832,7 +832,7 @@ int cmd_inject(int argc, const char **argv)
832 832
833 inject.tool.ordered_events = inject.sched_stat; 833 inject.tool.ordered_events = inject.sched_stat;
834 834
835 data.file.path = inject.input_name; 835 data.path = inject.input_name;
836 inject.session = perf_session__new(&data, true, &inject.tool); 836 inject.session = perf_session__new(&data, true, &inject.tool);
837 if (inject.session == NULL) 837 if (inject.session == NULL)
838 return -1; 838 return -1;
diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c
index 90d1a2305b72..bc7a2bc7aed7 100644
--- a/tools/perf/builtin-kallsyms.c
+++ b/tools/perf/builtin-kallsyms.c
@@ -13,6 +13,7 @@
13#include <subcmd/parse-options.h> 13#include <subcmd/parse-options.h>
14#include "debug.h" 14#include "debug.h"
15#include "machine.h" 15#include "machine.h"
16#include "map.h"
16#include "symbol.h" 17#include "symbol.h"
17 18
18static int __cmd_kallsyms(int argc, const char **argv) 19static int __cmd_kallsyms(int argc, const char **argv)
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index b63bca4b0c2a..fa520f4b8095 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -6,6 +6,7 @@
6#include "util/evsel.h" 6#include "util/evsel.h"
7#include "util/util.h" 7#include "util/util.h"
8#include "util/config.h" 8#include "util/config.h"
9#include "util/map.h"
9#include "util/symbol.h" 10#include "util/symbol.h"
10#include "util/thread.h" 11#include "util/thread.h"
11#include "util/header.h" 12#include "util/header.h"
@@ -334,7 +335,7 @@ static int build_alloc_func_list(void)
334 struct alloc_func *func; 335 struct alloc_func *func;
335 struct machine *machine = &kmem_session->machines.host; 336 struct machine *machine = &kmem_session->machines.host;
336 regex_t alloc_func_regex; 337 regex_t alloc_func_regex;
337 const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?"; 338 static const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
338 339
339 ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED); 340 ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
340 if (ret) { 341 if (ret) {
@@ -1924,7 +1925,7 @@ int cmd_kmem(int argc, const char **argv)
1924 NULL 1925 NULL
1925 }; 1926 };
1926 struct perf_session *session; 1927 struct perf_session *session;
1927 const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n"; 1928 static const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n";
1928 int ret = perf_config(kmem_config, NULL); 1929 int ret = perf_config(kmem_config, NULL);
1929 1930
1930 if (ret) 1931 if (ret)
@@ -1948,7 +1949,7 @@ int cmd_kmem(int argc, const char **argv)
1948 return __cmd_record(argc, argv); 1949 return __cmd_record(argc, argv);
1949 } 1950 }
1950 1951
1951 data.file.path = input_name; 1952 data.path = input_name;
1952 1953
1953 kmem_session = session = perf_session__new(&data, false, &perf_kmem); 1954 kmem_session = session = perf_session__new(&data, false, &perf_kmem);
1954 if (session == NULL) 1955 if (session == NULL)
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 3d4cbc4e87c7..dbb6f737a3e2 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1080,11 +1080,9 @@ static int read_events(struct perf_kvm_stat *kvm)
1080 .ordered_events = true, 1080 .ordered_events = true,
1081 }; 1081 };
1082 struct perf_data file = { 1082 struct perf_data file = {
1083 .file = { 1083 .path = kvm->file_name,
1084 .path = kvm->file_name, 1084 .mode = PERF_DATA_MODE_READ,
1085 }, 1085 .force = kvm->force,
1086 .mode = PERF_DATA_MODE_READ,
1087 .force = kvm->force,
1088 }; 1086 };
1089 1087
1090 kvm->tool = eops; 1088 kvm->tool = eops;
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index ead221e49f00..c9f98d00c0e9 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -82,9 +82,9 @@ int cmd_list(int argc, const char **argv)
82 else if (strcmp(argv[i], "sdt") == 0) 82 else if (strcmp(argv[i], "sdt") == 0)
83 print_sdt_events(NULL, NULL, raw_dump); 83 print_sdt_events(NULL, NULL, raw_dump);
84 else if (strcmp(argv[i], "metric") == 0) 84 else if (strcmp(argv[i], "metric") == 0)
85 metricgroup__print(true, false, NULL, raw_dump); 85 metricgroup__print(true, false, NULL, raw_dump, details_flag);
86 else if (strcmp(argv[i], "metricgroup") == 0) 86 else if (strcmp(argv[i], "metricgroup") == 0)
87 metricgroup__print(false, true, NULL, raw_dump); 87 metricgroup__print(false, true, NULL, raw_dump, details_flag);
88 else if ((sep = strchr(argv[i], ':')) != NULL) { 88 else if ((sep = strchr(argv[i], ':')) != NULL) {
89 int sep_idx; 89 int sep_idx;
90 90
@@ -102,7 +102,7 @@ int cmd_list(int argc, const char **argv)
102 s[sep_idx] = '\0'; 102 s[sep_idx] = '\0';
103 print_tracepoint_events(s, s + sep_idx + 1, raw_dump); 103 print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
104 print_sdt_events(s, s + sep_idx + 1, raw_dump); 104 print_sdt_events(s, s + sep_idx + 1, raw_dump);
105 metricgroup__print(true, true, s, raw_dump); 105 metricgroup__print(true, true, s, raw_dump, details_flag);
106 free(s); 106 free(s);
107 } else { 107 } else {
108 if (asprintf(&s, "*%s*", argv[i]) < 0) { 108 if (asprintf(&s, "*%s*", argv[i]) < 0) {
@@ -119,7 +119,7 @@ int cmd_list(int argc, const char **argv)
119 details_flag); 119 details_flag);
120 print_tracepoint_events(NULL, s, raw_dump); 120 print_tracepoint_events(NULL, s, raw_dump);
121 print_sdt_events(NULL, s, raw_dump); 121 print_sdt_events(NULL, s, raw_dump);
122 metricgroup__print(true, true, NULL, raw_dump); 122 metricgroup__print(true, true, NULL, raw_dump, details_flag);
123 free(s); 123 free(s);
124 } 124 }
125 } 125 }
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 6e0189df2b3b..b9810a8d350a 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -866,11 +866,9 @@ static int __cmd_report(bool display_info)
866 .ordered_events = true, 866 .ordered_events = true,
867 }; 867 };
868 struct perf_data data = { 868 struct perf_data data = {
869 .file = { 869 .path = input_name,
870 .path = input_name, 870 .mode = PERF_DATA_MODE_READ,
871 }, 871 .force = force,
872 .mode = PERF_DATA_MODE_READ,
873 .force = force,
874 }; 872 };
875 873
876 session = perf_session__new(&data, false, &eops); 874 session = perf_session__new(&data, false, &eops);
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 57393e94d156..f45c8b502f63 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -13,6 +13,7 @@
13#include "util/data.h" 13#include "util/data.h"
14#include "util/mem-events.h" 14#include "util/mem-events.h"
15#include "util/debug.h" 15#include "util/debug.h"
16#include "util/map.h"
16#include "util/symbol.h" 17#include "util/symbol.h"
17 18
18#define MEM_OPERATION_LOAD 0x1 19#define MEM_OPERATION_LOAD 0x1
@@ -238,11 +239,9 @@ static int process_sample_event(struct perf_tool *tool,
238static int report_raw_events(struct perf_mem *mem) 239static int report_raw_events(struct perf_mem *mem)
239{ 240{
240 struct perf_data data = { 241 struct perf_data data = {
241 .file = { 242 .path = input_name,
242 .path = input_name, 243 .mode = PERF_DATA_MODE_READ,
243 }, 244 .force = mem->force,
244 .mode = PERF_DATA_MODE_READ,
245 .force = mem->force,
246 }; 245 };
247 int ret; 246 int ret;
248 struct perf_session *session = perf_session__new(&data, false, 247 struct perf_session *session = perf_session__new(&data, false,
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 99de91698de1..46d3c2deeb40 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -32,6 +32,7 @@
32 32
33#include "perf.h" 33#include "perf.h"
34#include "builtin.h" 34#include "builtin.h"
35#include "namespaces.h"
35#include "util/util.h" 36#include "util/util.h"
36#include "util/strlist.h" 37#include "util/strlist.h"
37#include "util/strfilter.h" 38#include "util/strfilter.h"
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 882285fb9f64..f3f7f3100336 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -23,7 +23,6 @@
23#include "util/evlist.h" 23#include "util/evlist.h"
24#include "util/evsel.h" 24#include "util/evsel.h"
25#include "util/debug.h" 25#include "util/debug.h"
26#include "util/drv_configs.h"
27#include "util/session.h" 26#include "util/session.h"
28#include "util/tool.h" 27#include "util/tool.h"
29#include "util/symbol.h" 28#include "util/symbol.h"
@@ -39,8 +38,10 @@
39#include "util/bpf-loader.h" 38#include "util/bpf-loader.h"
40#include "util/trigger.h" 39#include "util/trigger.h"
41#include "util/perf-hooks.h" 40#include "util/perf-hooks.h"
41#include "util/cpu-set-sched.h"
42#include "util/time-utils.h" 42#include "util/time-utils.h"
43#include "util/units.h" 43#include "util/units.h"
44#include "util/bpf-event.h"
44#include "asm/bug.h" 45#include "asm/bug.h"
45 46
46#include <errno.h> 47#include <errno.h>
@@ -81,12 +82,17 @@ struct record {
81 bool timestamp_boundary; 82 bool timestamp_boundary;
82 struct switch_output switch_output; 83 struct switch_output switch_output;
83 unsigned long long samples; 84 unsigned long long samples;
85 cpu_set_t affinity_mask;
84}; 86};
85 87
86static volatile int auxtrace_record__snapshot_started; 88static volatile int auxtrace_record__snapshot_started;
87static DEFINE_TRIGGER(auxtrace_snapshot_trigger); 89static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
88static DEFINE_TRIGGER(switch_output_trigger); 90static DEFINE_TRIGGER(switch_output_trigger);
89 91
92static const char *affinity_tags[PERF_AFFINITY_MAX] = {
93 "SYS", "NODE", "CPU"
94};
95
90static bool switch_output_signal(struct record *rec) 96static bool switch_output_signal(struct record *rec)
91{ 97{
92 return rec->switch_output.signal && 98 return rec->switch_output.signal &&
@@ -531,9 +537,13 @@ static int record__mmap_evlist(struct record *rec,
531 struct record_opts *opts = &rec->opts; 537 struct record_opts *opts = &rec->opts;
532 char msg[512]; 538 char msg[512];
533 539
540 if (opts->affinity != PERF_AFFINITY_SYS)
541 cpu__setup_cpunode_map();
542
534 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, 543 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
535 opts->auxtrace_mmap_pages, 544 opts->auxtrace_mmap_pages,
536 opts->auxtrace_snapshot_mode, opts->nr_cblocks) < 0) { 545 opts->auxtrace_snapshot_mode,
546 opts->nr_cblocks, opts->affinity) < 0) {
537 if (errno == EPERM) { 547 if (errno == EPERM) {
538 pr_err("Permission error mapping pages.\n" 548 pr_err("Permission error mapping pages.\n"
539 "Consider increasing " 549 "Consider increasing "
@@ -566,7 +576,6 @@ static int record__open(struct record *rec)
566 struct perf_evlist *evlist = rec->evlist; 576 struct perf_evlist *evlist = rec->evlist;
567 struct perf_session *session = rec->session; 577 struct perf_session *session = rec->session;
568 struct record_opts *opts = &rec->opts; 578 struct record_opts *opts = &rec->opts;
569 struct perf_evsel_config_term *err_term;
570 int rc = 0; 579 int rc = 0;
571 580
572 /* 581 /*
@@ -619,14 +628,6 @@ try_again:
619 goto out; 628 goto out;
620 } 629 }
621 630
622 if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
623 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
624 err_term->val.drv_cfg, perf_evsel__name(pos), errno,
625 str_error_r(errno, msg, sizeof(msg)));
626 rc = -1;
627 goto out;
628 }
629
630 rc = record__mmap(rec); 631 rc = record__mmap(rec);
631 if (rc) 632 if (rc)
632 goto out; 633 goto out;
@@ -659,10 +660,9 @@ static int process_sample_event(struct perf_tool *tool,
659 660
660static int process_buildids(struct record *rec) 661static int process_buildids(struct record *rec)
661{ 662{
662 struct perf_data *data = &rec->data;
663 struct perf_session *session = rec->session; 663 struct perf_session *session = rec->session;
664 664
665 if (data->size == 0) 665 if (perf_data__size(&rec->data) == 0)
666 return 0; 666 return 0;
667 667
668 /* 668 /*
@@ -722,6 +722,16 @@ static struct perf_event_header finished_round_event = {
722 .type = PERF_RECORD_FINISHED_ROUND, 722 .type = PERF_RECORD_FINISHED_ROUND,
723}; 723};
724 724
725static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
726{
727 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
728 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
729 CPU_ZERO(&rec->affinity_mask);
730 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
731 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
732 }
733}
734
725static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 735static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
726 bool overwrite) 736 bool overwrite)
727{ 737{
@@ -749,6 +759,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
749 struct perf_mmap *map = &maps[i]; 759 struct perf_mmap *map = &maps[i];
750 760
751 if (map->base) { 761 if (map->base) {
762 record__adjust_affinity(rec, map);
752 if (!record__aio_enabled(rec)) { 763 if (!record__aio_enabled(rec)) {
753 if (perf_mmap__push(map, rec, record__pushfn) != 0) { 764 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
754 rc = -1; 765 rc = -1;
@@ -839,7 +850,7 @@ record__finish_output(struct record *rec)
839 return; 850 return;
840 851
841 rec->session->header.data_size += rec->bytes_written; 852 rec->session->header.data_size += rec->bytes_written;
842 data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); 853 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
843 854
844 if (!rec->no_buildid) { 855 if (!rec->no_buildid) {
845 process_buildids(rec); 856 process_buildids(rec);
@@ -907,7 +918,7 @@ record__switch_output(struct record *rec, bool at_exit)
907 918
908 if (!quiet) 919 if (!quiet)
909 fprintf(stderr, "[ perf record: Dump %s.%s ]\n", 920 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
910 data->file.path, timestamp); 921 data->path, timestamp);
911 922
912 /* Output tracking events */ 923 /* Output tracking events */
913 if (!at_exit) { 924 if (!at_exit) {
@@ -1082,6 +1093,11 @@ static int record__synthesize(struct record *rec, bool tail)
1082 return err; 1093 return err;
1083 } 1094 }
1084 1095
1096 err = perf_event__synthesize_bpf_events(tool, process_synthesized_event,
1097 machine, opts);
1098 if (err < 0)
1099 pr_warning("Couldn't synthesize bpf events.\n");
1100
1085 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, 1101 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1086 process_synthesized_event, opts->sample_address, 1102 process_synthesized_event, opts->sample_address,
1087 1); 1103 1);
@@ -1445,7 +1461,7 @@ out_child:
1445 1461
1446 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1462 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
1447 perf_data__size(data) / 1024.0 / 1024.0, 1463 perf_data__size(data) / 1024.0 / 1024.0,
1448 data->file.path, postfix, samples); 1464 data->path, postfix, samples);
1449 } 1465 }
1450 1466
1451out_delete_session: 1467out_delete_session:
@@ -1639,6 +1655,21 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
1639 return -1; 1655 return -1;
1640} 1656}
1641 1657
1658static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1659{
1660 struct record_opts *opts = (struct record_opts *)opt->value;
1661
1662 if (unset || !str)
1663 return 0;
1664
1665 if (!strcasecmp(str, "node"))
1666 opts->affinity = PERF_AFFINITY_NODE;
1667 else if (!strcasecmp(str, "cpu"))
1668 opts->affinity = PERF_AFFINITY_CPU;
1669
1670 return 0;
1671}
1672
1642static int record__parse_mmap_pages(const struct option *opt, 1673static int record__parse_mmap_pages(const struct option *opt,
1643 const char *str, 1674 const char *str,
1644 int unset __maybe_unused) 1675 int unset __maybe_unused)
@@ -1831,7 +1862,7 @@ static struct option __record_options[] = {
1831 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", 1862 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
1832 "list of cpus to monitor"), 1863 "list of cpus to monitor"),
1833 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), 1864 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
1834 OPT_STRING('o', "output", &record.data.file.path, "file", 1865 OPT_STRING('o', "output", &record.data.path, "file",
1835 "output file name"), 1866 "output file name"),
1836 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, 1867 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1837 &record.opts.no_inherit_set, 1868 &record.opts.no_inherit_set,
@@ -1839,6 +1870,7 @@ static struct option __record_options[] = {
1839 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, 1870 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1840 "synthesize non-sample events at the end of output"), 1871 "synthesize non-sample events at the end of output"),
1841 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), 1872 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
1873 OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"),
1842 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, 1874 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1843 "Fail if the specified frequency can't be used"), 1875 "Fail if the specified frequency can't be used"),
1844 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", 1876 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
@@ -1946,6 +1978,9 @@ static struct option __record_options[] = {
1946 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", 1978 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
1947 record__aio_parse), 1979 record__aio_parse),
1948#endif 1980#endif
1981 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
1982 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
1983 record__parse_affinity),
1949 OPT_END() 1984 OPT_END()
1950}; 1985};
1951 1986
@@ -1980,6 +2015,9 @@ int cmd_record(int argc, const char **argv)
1980# undef REASON 2015# undef REASON
1981#endif 2016#endif
1982 2017
2018 CPU_ZERO(&rec->affinity_mask);
2019 rec->opts.affinity = PERF_AFFINITY_SYS;
2020
1983 rec->evlist = perf_evlist__new(); 2021 rec->evlist = perf_evlist__new();
1984 if (rec->evlist == NULL) 2022 if (rec->evlist == NULL)
1985 return -ENOMEM; 2023 return -ENOMEM;
@@ -2143,6 +2181,8 @@ int cmd_record(int argc, const char **argv)
2143 if (verbose > 0) 2181 if (verbose > 0)
2144 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); 2182 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2145 2183
2184 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2185
2146 err = __cmd_record(&record, argc, argv); 2186 err = __cmd_record(&record, argc, argv);
2147out: 2187out:
2148 perf_evlist__delete(rec->evlist); 2188 perf_evlist__delete(rec->evlist);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4958095be4fc..1532ebde6c4b 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -16,6 +16,7 @@
16#include <linux/list.h> 16#include <linux/list.h>
17#include <linux/rbtree.h> 17#include <linux/rbtree.h>
18#include <linux/err.h> 18#include <linux/err.h>
19#include "util/map.h"
19#include "util/symbol.h" 20#include "util/symbol.h"
20#include "util/callchain.h" 21#include "util/callchain.h"
21#include "util/values.h" 22#include "util/values.h"
@@ -615,6 +616,21 @@ static int report__collapse_hists(struct report *rep)
615 return ret; 616 return ret;
616} 617}
617 618
619static int hists__resort_cb(struct hist_entry *he, void *arg)
620{
621 struct report *rep = arg;
622 struct symbol *sym = he->ms.sym;
623
624 if (rep->symbol_ipc && sym && !sym->annotate2) {
625 struct perf_evsel *evsel = hists_to_evsel(he->hists);
626
627 symbol__annotate2(sym, he->ms.map, evsel,
628 &annotation__default_options, NULL);
629 }
630
631 return 0;
632}
633
618static void report__output_resort(struct report *rep) 634static void report__output_resort(struct report *rep)
619{ 635{
620 struct ui_progress prog; 636 struct ui_progress prog;
@@ -622,8 +638,10 @@ static void report__output_resort(struct report *rep)
622 638
623 ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); 639 ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
624 640
625 evlist__for_each_entry(rep->session->evlist, pos) 641 evlist__for_each_entry(rep->session->evlist, pos) {
626 perf_evsel__output_resort(pos, &prog); 642 perf_evsel__output_resort_cb(pos, &prog,
643 hists__resort_cb, rep);
644 }
627 645
628 ui_progress__finish(); 646 ui_progress__finish();
629} 647}
@@ -753,7 +771,8 @@ static int tasks_print(struct report *rep, FILE *fp)
753 for (i = 0; i < THREADS__TABLE_SIZE; i++) { 771 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
754 struct threads *threads = &machine->threads[i]; 772 struct threads *threads = &machine->threads[i];
755 773
756 for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { 774 for (nd = rb_first_cached(&threads->entries); nd;
775 nd = rb_next(nd)) {
757 task = tasks + itask++; 776 task = tasks + itask++;
758 777
759 task->thread = rb_entry(nd, struct thread, rb_node); 778 task->thread = rb_entry(nd, struct thread, rb_node);
@@ -880,7 +899,7 @@ static int __cmd_report(struct report *rep)
880 rep->nr_entries += evsel__hists(pos)->nr_entries; 899 rep->nr_entries += evsel__hists(pos)->nr_entries;
881 900
882 if (rep->nr_entries == 0) { 901 if (rep->nr_entries == 0) {
883 ui__error("The %s file has no samples!\n", data->file.path); 902 ui__error("The %s data has no samples!\n", data->path);
884 return 0; 903 return 0;
885 } 904 }
886 905
@@ -956,9 +975,9 @@ int cmd_report(int argc, const char **argv)
956 int branch_mode = -1; 975 int branch_mode = -1;
957 bool branch_call_mode = false; 976 bool branch_call_mode = false;
958#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" 977#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
959 const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" 978 static const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
960 CALLCHAIN_REPORT_HELP 979 CALLCHAIN_REPORT_HELP
961 "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; 980 "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
962 char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; 981 char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
963 const char * const report_usage[] = { 982 const char * const report_usage[] = {
964 "perf report [<options>]", 983 "perf report [<options>]",
@@ -1188,8 +1207,8 @@ int cmd_report(int argc, const char **argv)
1188 input_name = "perf.data"; 1207 input_name = "perf.data";
1189 } 1208 }
1190 1209
1191 data.file.path = input_name; 1210 data.path = input_name;
1192 data.force = symbol_conf.force; 1211 data.force = symbol_conf.force;
1193 1212
1194repeat: 1213repeat:
1195 session = perf_session__new(&data, false, &report.tool); 1214 session = perf_session__new(&data, false, &report.tool);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index cbf39dab19c1..275f2d92a7bf 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -213,7 +213,7 @@ struct perf_sched {
213 u64 all_runtime; 213 u64 all_runtime;
214 u64 all_count; 214 u64 all_count;
215 u64 cpu_last_switched[MAX_CPUS]; 215 u64 cpu_last_switched[MAX_CPUS];
216 struct rb_root atom_root, sorted_atom_root, merged_atom_root; 216 struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root;
217 struct list_head sort_list, cmp_pid; 217 struct list_head sort_list, cmp_pid;
218 bool force; 218 bool force;
219 bool skip_merge; 219 bool skip_merge;
@@ -271,7 +271,7 @@ struct evsel_runtime {
271struct idle_thread_runtime { 271struct idle_thread_runtime {
272 struct thread_runtime tr; 272 struct thread_runtime tr;
273 struct thread *last_thread; 273 struct thread *last_thread;
274 struct rb_root sorted_root; 274 struct rb_root_cached sorted_root;
275 struct callchain_root callchain; 275 struct callchain_root callchain;
276 struct callchain_cursor cursor; 276 struct callchain_cursor cursor;
277}; 277};
@@ -950,10 +950,10 @@ thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *
950} 950}
951 951
952static struct work_atoms * 952static struct work_atoms *
953thread_atoms_search(struct rb_root *root, struct thread *thread, 953thread_atoms_search(struct rb_root_cached *root, struct thread *thread,
954 struct list_head *sort_list) 954 struct list_head *sort_list)
955{ 955{
956 struct rb_node *node = root->rb_node; 956 struct rb_node *node = root->rb_root.rb_node;
957 struct work_atoms key = { .thread = thread }; 957 struct work_atoms key = { .thread = thread };
958 958
959 while (node) { 959 while (node) {
@@ -976,10 +976,11 @@ thread_atoms_search(struct rb_root *root, struct thread *thread,
976} 976}
977 977
978static void 978static void
979__thread_latency_insert(struct rb_root *root, struct work_atoms *data, 979__thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data,
980 struct list_head *sort_list) 980 struct list_head *sort_list)
981{ 981{
982 struct rb_node **new = &(root->rb_node), *parent = NULL; 982 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL;
983 bool leftmost = true;
983 984
984 while (*new) { 985 while (*new) {
985 struct work_atoms *this; 986 struct work_atoms *this;
@@ -992,12 +993,14 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data,
992 993
993 if (cmp > 0) 994 if (cmp > 0)
994 new = &((*new)->rb_left); 995 new = &((*new)->rb_left);
995 else 996 else {
996 new = &((*new)->rb_right); 997 new = &((*new)->rb_right);
998 leftmost = false;
999 }
997 } 1000 }
998 1001
999 rb_link_node(&data->node, parent, new); 1002 rb_link_node(&data->node, parent, new);
1000 rb_insert_color(&data->node, root); 1003 rb_insert_color_cached(&data->node, root, leftmost);
1001} 1004}
1002 1005
1003static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) 1006static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
@@ -1447,15 +1450,15 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
1447static void perf_sched__sort_lat(struct perf_sched *sched) 1450static void perf_sched__sort_lat(struct perf_sched *sched)
1448{ 1451{
1449 struct rb_node *node; 1452 struct rb_node *node;
1450 struct rb_root *root = &sched->atom_root; 1453 struct rb_root_cached *root = &sched->atom_root;
1451again: 1454again:
1452 for (;;) { 1455 for (;;) {
1453 struct work_atoms *data; 1456 struct work_atoms *data;
1454 node = rb_first(root); 1457 node = rb_first_cached(root);
1455 if (!node) 1458 if (!node)
1456 break; 1459 break;
1457 1460
1458 rb_erase(node, root); 1461 rb_erase_cached(node, root);
1459 data = rb_entry(node, struct work_atoms, node); 1462 data = rb_entry(node, struct work_atoms, node);
1460 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); 1463 __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
1461 } 1464 }
@@ -1782,11 +1785,9 @@ static int perf_sched__read_events(struct perf_sched *sched)
1782 }; 1785 };
1783 struct perf_session *session; 1786 struct perf_session *session;
1784 struct perf_data data = { 1787 struct perf_data data = {
1785 .file = { 1788 .path = input_name,
1786 .path = input_name, 1789 .mode = PERF_DATA_MODE_READ,
1787 }, 1790 .force = sched->force,
1788 .mode = PERF_DATA_MODE_READ,
1789 .force = sched->force,
1790 }; 1791 };
1791 int rc = -1; 1792 int rc = -1;
1792 1793
@@ -2762,12 +2763,12 @@ static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
2762 return ret; 2763 return ret;
2763} 2764}
2764 2765
2765static size_t timehist_print_idlehist_callchain(struct rb_root *root) 2766static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root)
2766{ 2767{
2767 size_t ret = 0; 2768 size_t ret = 0;
2768 FILE *fp = stdout; 2769 FILE *fp = stdout;
2769 struct callchain_node *chain; 2770 struct callchain_node *chain;
2770 struct rb_node *rb_node = rb_first(root); 2771 struct rb_node *rb_node = rb_first_cached(root);
2771 2772
2772 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); 2773 printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains");
2773 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, 2774 printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line,
@@ -2868,7 +2869,7 @@ static void timehist_print_summary(struct perf_sched *sched,
2868 if (itr == NULL) 2869 if (itr == NULL)
2869 continue; 2870 continue;
2870 2871
2871 callchain_param.sort(&itr->sorted_root, &itr->callchain, 2872 callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain,
2872 0, &callchain_param); 2873 0, &callchain_param);
2873 2874
2874 printf(" CPU %2d:", i); 2875 printf(" CPU %2d:", i);
@@ -2955,11 +2956,9 @@ static int perf_sched__timehist(struct perf_sched *sched)
2955 { "sched:sched_migrate_task", timehist_migrate_task_event, }, 2956 { "sched:sched_migrate_task", timehist_migrate_task_event, },
2956 }; 2957 };
2957 struct perf_data data = { 2958 struct perf_data data = {
2958 .file = { 2959 .path = input_name,
2959 .path = input_name, 2960 .mode = PERF_DATA_MODE_READ,
2960 }, 2961 .force = sched->force,
2961 .mode = PERF_DATA_MODE_READ,
2962 .force = sched->force,
2963 }; 2962 };
2964 2963
2965 struct perf_session *session; 2964 struct perf_session *session;
@@ -3074,11 +3073,12 @@ static void print_bad_events(struct perf_sched *sched)
3074 } 3073 }
3075} 3074}
3076 3075
3077static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) 3076static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data)
3078{ 3077{
3079 struct rb_node **new = &(root->rb_node), *parent = NULL; 3078 struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL;
3080 struct work_atoms *this; 3079 struct work_atoms *this;
3081 const char *comm = thread__comm_str(data->thread), *this_comm; 3080 const char *comm = thread__comm_str(data->thread), *this_comm;
3081 bool leftmost = true;
3082 3082
3083 while (*new) { 3083 while (*new) {
3084 int cmp; 3084 int cmp;
@@ -3092,6 +3092,7 @@ static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
3092 new = &((*new)->rb_left); 3092 new = &((*new)->rb_left);
3093 } else if (cmp < 0) { 3093 } else if (cmp < 0) {
3094 new = &((*new)->rb_right); 3094 new = &((*new)->rb_right);
3095 leftmost = false;
3095 } else { 3096 } else {
3096 this->num_merged++; 3097 this->num_merged++;
3097 this->total_runtime += data->total_runtime; 3098 this->total_runtime += data->total_runtime;
@@ -3109,7 +3110,7 @@ static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
3109 3110
3110 data->num_merged++; 3111 data->num_merged++;
3111 rb_link_node(&data->node, parent, new); 3112 rb_link_node(&data->node, parent, new);
3112 rb_insert_color(&data->node, root); 3113 rb_insert_color_cached(&data->node, root, leftmost);
3113} 3114}
3114 3115
3115static void perf_sched__merge_lat(struct perf_sched *sched) 3116static void perf_sched__merge_lat(struct perf_sched *sched)
@@ -3120,8 +3121,8 @@ static void perf_sched__merge_lat(struct perf_sched *sched)
3120 if (sched->skip_merge) 3121 if (sched->skip_merge)
3121 return; 3122 return;
3122 3123
3123 while ((node = rb_first(&sched->atom_root))) { 3124 while ((node = rb_first_cached(&sched->atom_root))) {
3124 rb_erase(node, &sched->atom_root); 3125 rb_erase_cached(node, &sched->atom_root);
3125 data = rb_entry(node, struct work_atoms, node); 3126 data = rb_entry(node, struct work_atoms, node);
3126 __merge_work_atoms(&sched->merged_atom_root, data); 3127 __merge_work_atoms(&sched->merged_atom_root, data);
3127 } 3128 }
@@ -3143,7 +3144,7 @@ static int perf_sched__lat(struct perf_sched *sched)
3143 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); 3144 printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
3144 printf(" -----------------------------------------------------------------------------------------------------------------\n"); 3145 printf(" -----------------------------------------------------------------------------------------------------------------\n");
3145 3146
3146 next = rb_first(&sched->sorted_atom_root); 3147 next = rb_first_cached(&sched->sorted_atom_root);
3147 3148
3148 while (next) { 3149 while (next) {
3149 struct work_atoms *work_list; 3150 struct work_atoms *work_list;
@@ -3336,7 +3337,7 @@ static int __cmd_record(int argc, const char **argv)
3336 3337
3337int cmd_sched(int argc, const char **argv) 3338int cmd_sched(int argc, const char **argv)
3338{ 3339{
3339 const char default_sort_order[] = "avg, max, switch, runtime"; 3340 static const char default_sort_order[] = "avg, max, switch, runtime";
3340 struct perf_sched sched = { 3341 struct perf_sched sched = {
3341 .tool = { 3342 .tool = {
3342 .sample = perf_sched__process_tracepoint_sample, 3343 .sample = perf_sched__process_tracepoint_sample,
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ac221f137ed2..2d8cb1d1682c 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -10,6 +10,7 @@
10#include "util/perf_regs.h" 10#include "util/perf_regs.h"
11#include "util/session.h" 11#include "util/session.h"
12#include "util/tool.h" 12#include "util/tool.h"
13#include "util/map.h"
13#include "util/symbol.h" 14#include "util/symbol.h"
14#include "util/thread.h" 15#include "util/thread.h"
15#include "util/trace-event.h" 16#include "util/trace-event.h"
@@ -148,6 +149,7 @@ static struct {
148 unsigned int print_ip_opts; 149 unsigned int print_ip_opts;
149 u64 fields; 150 u64 fields;
150 u64 invalid_fields; 151 u64 invalid_fields;
152 u64 user_set_fields;
151} output[OUTPUT_TYPE_MAX] = { 153} output[OUTPUT_TYPE_MAX] = {
152 154
153 [PERF_TYPE_HARDWARE] = { 155 [PERF_TYPE_HARDWARE] = {
@@ -344,7 +346,7 @@ static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
344 if (attr->sample_type & sample_type) 346 if (attr->sample_type & sample_type)
345 return 0; 347 return 0;
346 348
347 if (output[type].user_set) { 349 if (output[type].user_set_fields & field) {
348 if (allow_user_set) 350 if (allow_user_set)
349 return 0; 351 return 0;
350 evname = perf_evsel__name(evsel); 352 evname = perf_evsel__name(evsel);
@@ -2559,6 +2561,10 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
2559 pr_warning("Overriding previous field request for %s events.\n", 2561 pr_warning("Overriding previous field request for %s events.\n",
2560 event_type(type)); 2562 event_type(type));
2561 2563
2564 /* Don't override defaults for +- */
2565 if (strchr(tok, '+') || strchr(tok, '-'))
2566 goto parse;
2567
2562 output[type].fields = 0; 2568 output[type].fields = 0;
2563 output[type].user_set = true; 2569 output[type].user_set = true;
2564 output[type].wildcard_set = false; 2570 output[type].wildcard_set = false;
@@ -2627,10 +2633,13 @@ parse:
2627 pr_warning("\'%s\' not valid for %s events. Ignoring.\n", 2633 pr_warning("\'%s\' not valid for %s events. Ignoring.\n",
2628 all_output_options[i].str, event_type(j)); 2634 all_output_options[i].str, event_type(j));
2629 } else { 2635 } else {
2630 if (change == REMOVE) 2636 if (change == REMOVE) {
2631 output[j].fields &= ~all_output_options[i].field; 2637 output[j].fields &= ~all_output_options[i].field;
2632 else 2638 output[j].user_set_fields &= ~all_output_options[i].field;
2639 } else {
2633 output[j].fields |= all_output_options[i].field; 2640 output[j].fields |= all_output_options[i].field;
2641 output[j].user_set_fields |= all_output_options[i].field;
2642 }
2634 output[j].user_set = true; 2643 output[j].user_set = true;
2635 output[j].wildcard_set = true; 2644 output[j].wildcard_set = true;
2636 } 2645 }
@@ -2643,6 +2652,10 @@ parse:
2643 rc = -EINVAL; 2652 rc = -EINVAL;
2644 goto out; 2653 goto out;
2645 } 2654 }
2655 if (change == REMOVE)
2656 output[type].fields &= ~all_output_options[i].field;
2657 else
2658 output[type].fields |= all_output_options[i].field;
2646 output[type].user_set = true; 2659 output[type].user_set = true;
2647 output[type].wildcard_set = true; 2660 output[type].wildcard_set = true;
2648 } 2661 }
@@ -2942,10 +2955,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array)
2942 DIR *scripts_dir, *lang_dir; 2955 DIR *scripts_dir, *lang_dir;
2943 struct perf_session *session; 2956 struct perf_session *session;
2944 struct perf_data data = { 2957 struct perf_data data = {
2945 .file = { 2958 .path = input_name,
2946 .path = input_name, 2959 .mode = PERF_DATA_MODE_READ,
2947 },
2948 .mode = PERF_DATA_MODE_READ,
2949 }; 2960 };
2950 char *temp; 2961 char *temp;
2951 int i = 0; 2962 int i = 0;
@@ -3418,8 +3429,8 @@ int cmd_script(int argc, const char **argv)
3418 argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, 3429 argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
3419 PARSE_OPT_STOP_AT_NON_OPTION); 3430 PARSE_OPT_STOP_AT_NON_OPTION);
3420 3431
3421 data.file.path = input_name; 3432 data.path = input_name;
3422 data.force = symbol_conf.force; 3433 data.force = symbol_conf.force;
3423 3434
3424 if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { 3435 if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
3425 rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); 3436 rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);
@@ -3645,7 +3656,7 @@ int cmd_script(int argc, const char **argv)
3645 goto out_delete; 3656 goto out_delete;
3646 } 3657 }
3647 3658
3648 input = open(data.file.path, O_RDONLY); /* input_name */ 3659 input = open(data.path, O_RDONLY); /* input_name */
3649 if (input < 0) { 3660 if (input < 0) {
3650 err = -errno; 3661 err = -errno;
3651 perror("failed to open file"); 3662 perror("failed to open file");
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 63a3afc7f32b..7b8f09b0b8bf 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -52,7 +52,6 @@
52#include "util/evlist.h" 52#include "util/evlist.h"
53#include "util/evsel.h" 53#include "util/evsel.h"
54#include "util/debug.h" 54#include "util/debug.h"
55#include "util/drv_configs.h"
56#include "util/color.h" 55#include "util/color.h"
57#include "util/stat.h" 56#include "util/stat.h"
58#include "util/header.h" 57#include "util/header.h"
@@ -83,7 +82,6 @@
83#include <unistd.h> 82#include <unistd.h>
84#include <sys/time.h> 83#include <sys/time.h>
85#include <sys/resource.h> 84#include <sys/resource.h>
86#include <sys/wait.h>
87 85
88#include "sane_ctype.h" 86#include "sane_ctype.h"
89 87
@@ -418,7 +416,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
418 int status = 0; 416 int status = 0;
419 const bool forks = (argc > 0); 417 const bool forks = (argc > 0);
420 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; 418 bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
421 struct perf_evsel_config_term *err_term;
422 419
423 if (interval) { 420 if (interval) {
424 ts.tv_sec = interval / USEC_PER_MSEC; 421 ts.tv_sec = interval / USEC_PER_MSEC;
@@ -515,13 +512,6 @@ try_again:
515 return -1; 512 return -1;
516 } 513 }
517 514
518 if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
519 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
520 err_term->val.drv_cfg, perf_evsel__name(counter), errno,
521 str_error_r(errno, msg, sizeof(msg)));
522 return -1;
523 }
524
525 if (STAT_RECORD) { 515 if (STAT_RECORD) {
526 int err, fd = perf_data__fd(&perf_stat.data); 516 int err, fd = perf_data__fd(&perf_stat.data);
527 517
@@ -1332,7 +1322,7 @@ static int __cmd_record(int argc, const char **argv)
1332 PARSE_OPT_STOP_AT_NON_OPTION); 1322 PARSE_OPT_STOP_AT_NON_OPTION);
1333 1323
1334 if (output_name) 1324 if (output_name)
1335 data->file.path = output_name; 1325 data->path = output_name;
1336 1326
1337 if (stat_config.run_count != 1 || forever) { 1327 if (stat_config.run_count != 1 || forever) {
1338 pr_err("Cannot use -r option with perf stat record.\n"); 1328 pr_err("Cannot use -r option with perf stat record.\n");
@@ -1533,8 +1523,8 @@ static int __cmd_report(int argc, const char **argv)
1533 input_name = "perf.data"; 1523 input_name = "perf.data";
1534 } 1524 }
1535 1525
1536 perf_stat.data.file.path = input_name; 1526 perf_stat.data.path = input_name;
1537 perf_stat.data.mode = PERF_DATA_MODE_READ; 1527 perf_stat.data.mode = PERF_DATA_MODE_READ;
1538 1528
1539 session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); 1529 session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
1540 if (session == NULL) 1530 if (session == NULL)
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 775b99833e51..9b98687a27b9 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1602,11 +1602,9 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name)
1602 { "syscalls:sys_exit_select", process_exit_poll }, 1602 { "syscalls:sys_exit_select", process_exit_poll },
1603 }; 1603 };
1604 struct perf_data data = { 1604 struct perf_data data = {
1605 .file = { 1605 .path = input_name,
1606 .path = input_name, 1606 .mode = PERF_DATA_MODE_READ,
1607 }, 1607 .force = tchart->force,
1608 .mode = PERF_DATA_MODE_READ,
1609 .force = tchart->force,
1610 }; 1608 };
1611 1609
1612 struct perf_session *session = perf_session__new(&data, false, 1610 struct perf_session *session = perf_session__new(&data, false,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f64e312db787..231a90daa958 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -22,13 +22,14 @@
22#include "perf.h" 22#include "perf.h"
23 23
24#include "util/annotate.h" 24#include "util/annotate.h"
25#include "util/bpf-event.h"
25#include "util/config.h" 26#include "util/config.h"
26#include "util/color.h" 27#include "util/color.h"
27#include "util/drv_configs.h"
28#include "util/evlist.h" 28#include "util/evlist.h"
29#include "util/evsel.h" 29#include "util/evsel.h"
30#include "util/event.h" 30#include "util/event.h"
31#include "util/machine.h" 31#include "util/machine.h"
32#include "util/map.h"
32#include "util/session.h" 33#include "util/session.h"
33#include "util/symbol.h" 34#include "util/symbol.h"
34#include "util/thread.h" 35#include "util/thread.h"
@@ -366,7 +367,7 @@ static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
366 if (p) 367 if (p)
367 *p = 0; 368 *p = 0;
368 369
369 next = rb_first(&hists->entries); 370 next = rb_first_cached(&hists->entries);
370 while (next) { 371 while (next) {
371 n = rb_entry(next, struct hist_entry, rb_node); 372 n = rb_entry(next, struct hist_entry, rb_node);
372 if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) { 373 if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
@@ -1184,10 +1185,6 @@ static void init_process_thread(struct perf_top *top)
1184 1185
1185static int __cmd_top(struct perf_top *top) 1186static int __cmd_top(struct perf_top *top)
1186{ 1187{
1187 char msg[512];
1188 struct perf_evsel *pos;
1189 struct perf_evsel_config_term *err_term;
1190 struct perf_evlist *evlist = top->evlist;
1191 struct record_opts *opts = &top->record_opts; 1188 struct record_opts *opts = &top->record_opts;
1192 pthread_t thread, thread_process; 1189 pthread_t thread, thread_process;
1193 int ret; 1190 int ret;
@@ -1215,6 +1212,12 @@ static int __cmd_top(struct perf_top *top)
1215 1212
1216 init_process_thread(top); 1213 init_process_thread(top);
1217 1214
1215 ret = perf_event__synthesize_bpf_events(&top->tool, perf_event__process,
1216 &top->session->machines.host,
1217 &top->record_opts);
1218 if (ret < 0)
1219 pr_warning("Couldn't synthesize bpf events.\n");
1220
1218 machine__synthesize_threads(&top->session->machines.host, &opts->target, 1221 machine__synthesize_threads(&top->session->machines.host, &opts->target,
1219 top->evlist->threads, false, 1222 top->evlist->threads, false,
1220 top->nr_threads_synthesize); 1223 top->nr_threads_synthesize);
@@ -1232,14 +1235,6 @@ static int __cmd_top(struct perf_top *top)
1232 if (ret) 1235 if (ret)
1233 goto out_delete; 1236 goto out_delete;
1234 1237
1235 ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term);
1236 if (ret) {
1237 pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
1238 err_term->val.drv_cfg, perf_evsel__name(pos), errno,
1239 str_error_r(errno, msg, sizeof(msg)));
1240 goto out_delete;
1241 }
1242
1243 top->session->evlist = top->evlist; 1238 top->session->evlist = top->evlist;
1244 perf_session__set_id_hdr_size(top->session); 1239 perf_session__set_id_hdr_size(top->session);
1245 1240
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index b36061cd1ab8..f5b3a1e9c1dd 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -19,6 +19,7 @@
19#include <traceevent/event-parse.h> 19#include <traceevent/event-parse.h>
20#include <api/fs/tracing_path.h> 20#include <api/fs/tracing_path.h>
21#include <bpf/bpf.h> 21#include <bpf/bpf.h>
22#include "util/bpf_map.h"
22#include "builtin.h" 23#include "builtin.h"
23#include "util/cgroup.h" 24#include "util/cgroup.h"
24#include "util/color.h" 25#include "util/color.h"
@@ -29,6 +30,8 @@
29#include "util/evlist.h" 30#include "util/evlist.h"
30#include <subcmd/exec-cmd.h> 31#include <subcmd/exec-cmd.h>
31#include "util/machine.h" 32#include "util/machine.h"
33#include "util/map.h"
34#include "util/symbol.h"
32#include "util/path.h" 35#include "util/path.h"
33#include "util/session.h" 36#include "util/session.h"
34#include "util/thread.h" 37#include "util/thread.h"
@@ -85,6 +88,9 @@ struct trace {
85 *augmented; 88 *augmented;
86 } events; 89 } events;
87 } syscalls; 90 } syscalls;
91 struct {
92 struct bpf_map *map;
93 } dump;
88 struct record_opts opts; 94 struct record_opts opts;
89 struct perf_evlist *evlist; 95 struct perf_evlist *evlist;
90 struct machine *host; 96 struct machine *host;
@@ -1039,6 +1045,9 @@ static const size_t trace__entry_str_size = 2048;
1039 1045
1040static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd) 1046static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
1041{ 1047{
1048 if (fd < 0)
1049 return NULL;
1050
1042 if (fd > ttrace->files.max) { 1051 if (fd > ttrace->files.max) {
1043 struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file)); 1052 struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file));
1044 1053
@@ -2766,7 +2775,8 @@ static int trace__set_filter_loop_pids(struct trace *trace)
2766 if (parent == NULL) 2775 if (parent == NULL)
2767 break; 2776 break;
2768 2777
2769 if (!strcmp(thread__comm_str(parent), "sshd")) { 2778 if (!strcmp(thread__comm_str(parent), "sshd") ||
2779 strstarts(thread__comm_str(parent), "gnome-terminal")) {
2770 pids[nr++] = parent->tid; 2780 pids[nr++] = parent->tid;
2771 break; 2781 break;
2772 } 2782 }
@@ -2991,6 +3001,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
2991 if (err < 0) 3001 if (err < 0)
2992 goto out_error_apply_filters; 3002 goto out_error_apply_filters;
2993 3003
3004 if (trace->dump.map)
3005 bpf_map__fprintf(trace->dump.map, trace->output);
3006
2994 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); 3007 err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
2995 if (err < 0) 3008 if (err < 0)
2996 goto out_error_mmap; 3009 goto out_error_mmap;
@@ -3141,11 +3154,9 @@ static int trace__replay(struct trace *trace)
3141 { "probe:vfs_getname", trace__vfs_getname, }, 3154 { "probe:vfs_getname", trace__vfs_getname, },
3142 }; 3155 };
3143 struct perf_data data = { 3156 struct perf_data data = {
3144 .file = { 3157 .path = input_name,
3145 .path = input_name, 3158 .mode = PERF_DATA_MODE_READ,
3146 }, 3159 .force = trace->force,
3147 .mode = PERF_DATA_MODE_READ,
3148 .force = trace->force,
3149 }; 3160 };
3150 struct perf_session *session; 3161 struct perf_session *session;
3151 struct perf_evsel *evsel; 3162 struct perf_evsel *evsel;
@@ -3680,6 +3691,7 @@ int cmd_trace(int argc, const char **argv)
3680 .max_stack = UINT_MAX, 3691 .max_stack = UINT_MAX,
3681 .max_events = ULONG_MAX, 3692 .max_events = ULONG_MAX,
3682 }; 3693 };
3694 const char *map_dump_str = NULL;
3683 const char *output_name = NULL; 3695 const char *output_name = NULL;
3684 const struct option trace_options[] = { 3696 const struct option trace_options[] = {
3685 OPT_CALLBACK('e', "event", &trace, "event", 3697 OPT_CALLBACK('e', "event", &trace, "event",
@@ -3712,6 +3724,9 @@ int cmd_trace(int argc, const char **argv)
3712 OPT_CALLBACK(0, "duration", &trace, "float", 3724 OPT_CALLBACK(0, "duration", &trace, "float",
3713 "show only events with duration > N.M ms", 3725 "show only events with duration > N.M ms",
3714 trace__set_duration), 3726 trace__set_duration),
3727#ifdef HAVE_LIBBPF_SUPPORT
3728 OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"),
3729#endif
3715 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), 3730 OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
3716 OPT_INCR('v', "verbose", &verbose, "be more verbose"), 3731 OPT_INCR('v', "verbose", &verbose, "be more verbose"),
3717 OPT_BOOLEAN('T', "time", &trace.full_time, 3732 OPT_BOOLEAN('T', "time", &trace.full_time,
@@ -3806,6 +3821,14 @@ int cmd_trace(int argc, const char **argv)
3806 3821
3807 err = -1; 3822 err = -1;
3808 3823
3824 if (map_dump_str) {
3825 trace.dump.map = bpf__find_map_by_name(map_dump_str);
3826 if (trace.dump.map == NULL) {
3827 pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str);
3828 goto out;
3829 }
3830 }
3831
3809 if (trace.trace_pgfaults) { 3832 if (trace.trace_pgfaults) {
3810 trace.opts.sample_address = true; 3833 trace.opts.sample_address = true;
3811 trace.opts.sample_time = true; 3834 trace.opts.sample_time = true;
@@ -3865,7 +3888,8 @@ int cmd_trace(int argc, const char **argv)
3865 goto init_augmented_syscall_tp; 3888 goto init_augmented_syscall_tp;
3866 } 3889 }
3867 3890
3868 if (strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_enter") == 0) { 3891 if (trace.syscalls.events.augmented->priv == NULL &&
3892 strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) {
3869 struct perf_evsel *augmented = trace.syscalls.events.augmented; 3893 struct perf_evsel *augmented = trace.syscalls.events.augmented;
3870 if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) || 3894 if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
3871 perf_evsel__init_augmented_syscall_tp_args(augmented)) 3895 perf_evsel__init_augmented_syscall_tp_args(augmented))
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index a28dca2582aa..0453ba26cdbd 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -222,6 +222,10 @@ The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a
222way to request that counting of events be restricted to times when the 222way to request that counting of events be restricted to times when the
223CPU is in user, kernel and/or hypervisor mode. 223CPU is in user, kernel and/or hypervisor mode.
224 224
225Furthermore the 'exclude_host' and 'exclude_guest' bits provide a way
226to request counting of events restricted to guest and host contexts when
227using Linux as the hypervisor.
228
225The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap 229The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap
226operations, these can be used to relate userspace IP addresses to actual 230operations, these can be used to relate userspace IP addresses to actual
227code, even after the mapping (or even the whole process) is gone, 231code, even after the mapping (or even the whole process) is gone,
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 53c233370fae..f9b2161e1ca4 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -18,23 +18,13 @@
18#include <pid_filter.h> 18#include <pid_filter.h>
19 19
20/* bpf-output associated map */ 20/* bpf-output associated map */
21struct bpf_map SEC("maps") __augmented_syscalls__ = { 21bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
22 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
23 .key_size = sizeof(int),
24 .value_size = sizeof(u32),
25 .max_entries = __NR_CPUS__,
26};
27 22
28struct syscall { 23struct syscall {
29 bool enabled; 24 bool enabled;
30}; 25};
31 26
32struct bpf_map SEC("maps") syscalls = { 27bpf_map(syscalls, ARRAY, int, struct syscall, 512);
33 .type = BPF_MAP_TYPE_ARRAY,
34 .key_size = sizeof(int),
35 .value_size = sizeof(struct syscall),
36 .max_entries = 512,
37};
38 28
39struct syscall_enter_args { 29struct syscall_enter_args {
40 unsigned long long common_tp_fields; 30 unsigned long long common_tp_fields;
@@ -141,8 +131,8 @@ int sys_enter(struct syscall_enter_args *args)
141 len = sizeof(augmented_args.args); 131 len = sizeof(augmented_args.args);
142 } 132 }
143 133
144 perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); 134 /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
145 return 0; 135 return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
146} 136}
147 137
148SEC("raw_syscalls:sys_exit") 138SEC("raw_syscalls:sys_exit")
diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c
index 2ae44813ef2d..524fdb8534b3 100644
--- a/tools/perf/examples/bpf/augmented_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_syscalls.c
@@ -19,12 +19,8 @@
19#include <stdio.h> 19#include <stdio.h>
20#include <linux/socket.h> 20#include <linux/socket.h>
21 21
22struct bpf_map SEC("maps") __augmented_syscalls__ = { 22/* bpf-output associated map */
23 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, 23bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
24 .key_size = sizeof(int),
25 .value_size = sizeof(u32),
26 .max_entries = __NR_CPUS__,
27};
28 24
29struct syscall_exit_args { 25struct syscall_exit_args {
30 unsigned long long common_tp_fields; 26 unsigned long long common_tp_fields;
@@ -55,9 +51,9 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
55 len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ 51 len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \
56 len &= sizeof(augmented_args.filename.value) - 1; \ 52 len &= sizeof(augmented_args.filename.value) - 1; \
57 } \ 53 } \
58 perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ 54 /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \
59 &augmented_args, len); \ 55 return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
60 return 0; \ 56 &augmented_args, len); \
61} \ 57} \
62int syscall_exit(syscall)(struct syscall_exit_args *args) \ 58int syscall_exit(syscall)(struct syscall_exit_args *args) \
63{ \ 59{ \
@@ -125,10 +121,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
125/* addrlen = augmented_args.args.addrlen; */ \ 121/* addrlen = augmented_args.args.addrlen; */ \
126/* */ \ 122/* */ \
127 probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ 123 probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \
128 perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ 124 /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \
129 &augmented_args, \ 125 return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
130 sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ 126 &augmented_args, \
131 return 0; \ 127 sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\
132} \ 128} \
133int syscall_exit(syscall)(struct syscall_exit_args *args) \ 129int syscall_exit(syscall)(struct syscall_exit_args *args) \
134{ \ 130{ \
diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c
index b59e8812ee8c..e81b535346c0 100644
--- a/tools/perf/examples/bpf/etcsnoop.c
+++ b/tools/perf/examples/bpf/etcsnoop.c
@@ -21,12 +21,8 @@
21 21
22#include <stdio.h> 22#include <stdio.h>
23 23
24struct bpf_map SEC("maps") __augmented_syscalls__ = { 24/* bpf-output associated map */
25 .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, 25bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
26 .key_size = sizeof(int),
27 .value_size = sizeof(u32),
28 .max_entries = __NR_CPUS__,
29};
30 26
31struct augmented_filename { 27struct augmented_filename {
32 int size; 28 int size;
@@ -49,11 +45,11 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \
49 args->filename_ptr); \ 45 args->filename_ptr); \
50 if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ 46 if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \
51 return 0; \ 47 return 0; \
52 perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ 48 /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \
53 &augmented_args, \ 49 return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \
54 (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ 50 &augmented_args, \
55 augmented_args.filename.size)); \ 51 (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \
56 return 0; \ 52 augmented_args.filename.size)); \
57} 53}
58 54
59struct syscall_enter_openat_args { 55struct syscall_enter_openat_args {
diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h
index e667577207dc..5df7ed9d9020 100644
--- a/tools/perf/include/bpf/bpf.h
+++ b/tools/perf/include/bpf/bpf.h
@@ -18,6 +18,14 @@ struct bpf_map {
18 unsigned int numa_node; 18 unsigned int numa_node;
19}; 19};
20 20
21#define bpf_map(name, _type, type_key, type_val, _max_entries) \
22struct bpf_map SEC("maps") name = { \
23 .type = BPF_MAP_TYPE_##_type, \
24 .key_size = sizeof(type_key), \
25 .value_size = sizeof(type_val), \
26 .max_entries = _max_entries, \
27}
28
21/* 29/*
22 * FIXME: this should receive .max_entries as a parameter, as careful 30 * FIXME: this should receive .max_entries as a parameter, as careful
23 * tuning of these limits is needed to avoid hitting limits that 31 * tuning of these limits is needed to avoid hitting limits that
@@ -26,13 +34,7 @@ struct bpf_map {
26 * For the current need, 'perf trace --filter-pids', 64 should 34 * For the current need, 'perf trace --filter-pids', 64 should
27 * be good enough, but this surely needs to be revisited. 35 * be good enough, but this surely needs to be revisited.
28 */ 36 */
29#define pid_map(name, value_type) \ 37#define pid_map(name, value_type) bpf_map(name, HASH, pid_t, value_type, 64)
30struct bpf_map SEC("maps") name = { \
31 .type = BPF_MAP_TYPE_HASH, \
32 .key_size = sizeof(pid_t), \
33 .value_size = sizeof(value_type), \
34 .max_entries = 64, \
35}
36 38
37static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem; 39static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem;
38static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem; 40static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 388c6dd128b8..b120e547ddc7 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -66,6 +66,7 @@ struct record_opts {
66 bool ignore_missing_thread; 66 bool ignore_missing_thread;
67 bool strict_freq; 67 bool strict_freq;
68 bool sample_id; 68 bool sample_id;
69 bool bpf_event;
69 unsigned int freq; 70 unsigned int freq;
70 unsigned int mmap_pages; 71 unsigned int mmap_pages;
71 unsigned int auxtrace_mmap_pages; 72 unsigned int auxtrace_mmap_pages;
@@ -83,6 +84,14 @@ struct record_opts {
83 clockid_t clockid; 84 clockid_t clockid;
84 u64 clockid_res_ns; 85 u64 clockid_res_ns;
85 int nr_cblocks; 86 int nr_cblocks;
87 int affinity;
88};
89
90enum perf_affinity {
91 PERF_AFFINITY_SYS = 0,
92 PERF_AFFINITY_NODE,
93 PERF_AFFINITY_CPU,
94 PERF_AFFINITY_MAX
86}; 95};
87 96
88struct option; 97struct option;
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
new file mode 100644
index 000000000000..bffb2d4a6420
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
@@ -0,0 +1,2245 @@
1[
2 {
3 "BriefDescription": "% of finished branches that were treated as BC+8",
4 "MetricExpr": "PM_BR_BC_8_CONV / PM_BRU_FIN * 100",
5 "MetricGroup": "branch_prediction",
6 "MetricName": "bc_8_branch_ratio_percent"
7 },
8 {
9 "BriefDescription": "% of finished branches that were pairable but not treated as BC+8",
10 "MetricExpr": "PM_BR_BC_8 / PM_BRU_FIN * 100",
11 "MetricGroup": "branch_prediction",
12 "MetricName": "bc_8_not_converted_branch_ratio_percent"
13 },
14 {
15 "BriefDescription": "Percent of mispredicted branches out of all predicted (correctly and incorrectly) branches that completed",
16 "MetricExpr": "PM_BR_MPRED_CMPL / (PM_BR_PRED_BR0 + PM_BR_PRED_BR1) * 100",
17 "MetricGroup": "branch_prediction",
18 "MetricName": "br_misprediction_percent"
19 },
20 {
21 "BriefDescription": "% of Branch miss predictions per instruction",
22 "MetricExpr": "PM_BR_MPRED_CMPL / PM_RUN_INST_CMPL * 100",
23 "MetricGroup": "branch_prediction",
24 "MetricName": "branch_mispredict_rate_percent"
25 },
26 {
27 "BriefDescription": "Count cache branch misprediction per instruction",
28 "MetricExpr": "PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100",
29 "MetricGroup": "branch_prediction",
30 "MetricName": "ccache_mispredict_rate_percent"
31 },
32 {
33 "BriefDescription": "Percent of count catch mispredictions out of all completed branches that required count cache predictionn",
34 "MetricExpr": "PM_BR_MPRED_CCACHE / (PM_BR_PRED_CCACHE_BR0 + PM_BR_PRED_CCACHE_BR1) * 100",
35 "MetricGroup": "branch_prediction",
36 "MetricName": "ccache_misprediction_percent"
37 },
38 {
39 "BriefDescription": "CR MisPredictions per Instruction",
40 "MetricExpr": "PM_BR_MPRED_CR / PM_RUN_INST_CMPL * 100",
41 "MetricGroup": "branch_prediction",
42 "MetricName": "cr_mispredict_rate_percent"
43 },
44 {
45 "BriefDescription": "Link stack branch misprediction",
46 "MetricExpr": "(PM_BR_MPRED_TA - PM_BR_MPRED_CCACHE) / PM_RUN_INST_CMPL * 100",
47 "MetricGroup": "branch_prediction",
48 "MetricName": "lstack_mispredict_rate_percent"
49 },
50 {
51 "BriefDescription": "Percent of link stack mispredictions out of all completed branches that required link stack prediction",
52 "MetricExpr": "(PM_BR_MPRED_TA - PM_BR_MPRED_CCACHE) / (PM_BR_PRED_LSTACK_BR0 + PM_BR_PRED_LSTACK_BR1) * 100",
53 "MetricGroup": "branch_prediction",
54 "MetricName": "lstack_misprediction_percent"
55 },
56 {
57 "BriefDescription": "TA MisPredictions per Instruction",
58 "MetricExpr": "PM_BR_MPRED_TA / PM_RUN_INST_CMPL * 100",
59 "MetricGroup": "branch_prediction",
60 "MetricName": "ta_mispredict_rate_percent"
61 },
62 {
63 "BriefDescription": "Percent of target address mispredictions out of all completed branches that required address prediction",
64 "MetricExpr": "PM_BR_MPRED_TA / (PM_BR_PRED_CCACHE_BR0 + PM_BR_PRED_CCACHE_BR1 + PM_BR_PRED_LSTACK_BR0 + PM_BR_PRED_LSTACK_BR1) * 100",
65 "MetricGroup": "branch_prediction",
66 "MetricName": "ta_misprediction_percent"
67 },
68 {
69 "BriefDescription": "Percent of branches completed that were taken",
70 "MetricExpr": "PM_BR_TAKEN_CMPL * 100 / PM_BR_CMPL",
71 "MetricGroup": "branch_prediction",
72 "MetricName": "taken_branches_percent"
73 },
74 {
75 "BriefDescription": "Percent of chip+group+sys pumps that were incorrectly predicted",
76 "MetricExpr": "PM_PUMP_MPRED * 100 / (PM_PUMP_CPRED + PM_PUMP_MPRED)",
77 "MetricGroup": "bus_stats",
78 "MetricName": "any_pump_mpred_percent"
79 },
80 {
81 "BriefDescription": "Percent of chip pumps that were correctly predicted as chip pumps the first time",
82 "MetricExpr": "PM_CHIP_PUMP_CPRED * 100 / PM_L2_CHIP_PUMP",
83 "MetricGroup": "bus_stats",
84 "MetricName": "chip_pump_cpred_percent"
85 },
86 {
87 "BriefDescription": "Percent of group pumps that were correctly predicted as group pumps the first time",
88 "MetricExpr": "PM_GRP_PUMP_CPRED * 100 / PM_L2_GROUP_PUMP",
89 "MetricGroup": "bus_stats",
90 "MetricName": "group_pump_cpred_percent"
91 },
92 {
93 "BriefDescription": "Percent of system pumps that were correctly predicted as group pumps the first time",
94 "MetricExpr": "PM_SYS_PUMP_CPRED * 100 / PM_L2_GROUP_PUMP",
95 "MetricGroup": "bus_stats",
96 "MetricName": "sys_pump_cpred_percent"
97 },
98 {
99 "BriefDescription": "Cycles stalled due to CRU or BRU operations",
100 "MetricExpr": "PM_CMPLU_STALL_BRU_CRU / PM_RUN_INST_CMPL",
101 "MetricGroup": "cpi_breakdown",
102 "MetricName": "bru_cru_stall_cpi"
103 },
104 {
105 "BriefDescription": "Cycles stalled due to ISU Branch Operations",
106 "MetricExpr": "PM_CMPLU_STALL_BRU / PM_RUN_INST_CMPL",
107 "MetricGroup": "cpi_breakdown",
108 "MetricName": "bru_stall_cpi"
109 },
110 {
111 "BriefDescription": "Cycles in which a Group Completed",
112 "MetricExpr": "PM_GRP_CMPL / PM_RUN_INST_CMPL",
113 "MetricGroup": "cpi_breakdown",
114 "MetricName": "completion_cpi"
115 },
116 {
117 "BriefDescription": "Cycles stalled by CO queue full",
118 "MetricExpr": "PM_CMPLU_STALL_COQ_FULL / PM_RUN_INST_CMPL",
119 "MetricGroup": "cpi_breakdown",
120 "MetricName": "coq_full_stall_cpi"
121 },
122 {
123 "BriefDescription": "Cycles stalled due to CRU Operations",
124 "MetricExpr": "(PM_CMPLU_STALL_BRU_CRU - PM_CMPLU_STALL_BRU) / PM_RUN_INST_CMPL",
125 "MetricGroup": "cpi_breakdown",
126 "MetricName": "cru_stall_cpi"
127 },
128 {
129 "BriefDescription": "Cycles stalled by flushes",
130 "MetricExpr": "PM_CMPLU_STALL_FLUSH / PM_RUN_INST_CMPL",
131 "MetricGroup": "cpi_breakdown",
132 "MetricName": "flush_stall_cpi"
133 },
134 {
135 "BriefDescription": "Cycles stalled by FXU Multi-Cycle Instructions",
136 "MetricExpr": "PM_CMPLU_STALL_FXLONG / PM_RUN_INST_CMPL",
137 "MetricGroup": "cpi_breakdown",
138 "MetricName": "fxu_multi_cyc_cpi"
139 },
140 {
141 "BriefDescription": "Cycles stalled by FXU",
142 "MetricExpr": "PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL",
143 "MetricGroup": "cpi_breakdown",
144 "MetricName": "fxu_stall_cpi"
145 },
146 {
147 "BriefDescription": "Other cycles stalled by FXU",
148 "MetricExpr": "(PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_FXLONG / PM_RUN_INST_CMPL)",
149 "MetricGroup": "cpi_breakdown",
150 "MetricName": "fxu_stall_other_cpi"
151 },
152 {
153 "BriefDescription": "Cycles GCT empty due to Branch Mispredicts",
154 "MetricExpr": "PM_GCT_NOSLOT_BR_MPRED / PM_RUN_INST_CMPL",
155 "MetricGroup": "cpi_breakdown",
156 "MetricName": "gct_empty_br_mpred_cpi"
157 },
158 {
159 "BriefDescription": "Cycles GCT empty due to Branch Mispredicts and Icache Misses",
160 "MetricExpr": "PM_GCT_NOSLOT_BR_MPRED_ICMISS / PM_RUN_INST_CMPL",
161 "MetricGroup": "cpi_breakdown",
162 "MetricName": "gct_empty_br_mpred_ic_miss_cpi"
163 },
164 {
165 "BriefDescription": "GCT empty cycles",
166 "MetricExpr": "PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL",
167 "MetricGroup": "cpi_breakdown",
168 "MetricName": "gct_empty_cpi"
169 },
170 {
171 "BriefDescription": "Cycles GCT empty where dispatch was held",
172 "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL)",
173 "MetricGroup": "cpi_breakdown",
174 "MetricName": "gct_empty_disp_held_cpi"
175 },
176 {
177 "BriefDescription": "Cycles GCT empty where dispatch was held due to issue queue",
178 "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_ISSQ / PM_RUN_INST_CMPL",
179 "MetricGroup": "cpi_breakdown",
180 "MetricName": "gct_empty_disp_held_issq_cpi"
181 },
182 {
183 "BriefDescription": "Cycles GCT empty where dispatch was held due to maps",
184 "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_MAP / PM_RUN_INST_CMPL",
185 "MetricGroup": "cpi_breakdown",
186 "MetricName": "gct_empty_disp_held_map_cpi"
187 },
188 {
189 "BriefDescription": "Cycles GCT empty where dispatch was held due to syncs and other effects",
190 "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_OTHER / PM_RUN_INST_CMPL",
191 "MetricGroup": "cpi_breakdown",
192 "MetricName": "gct_empty_disp_held_other_cpi"
193 },
194 {
195 "BriefDescription": "Cycles GCT empty where dispatch was held due to SRQ",
196 "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_SRQ / PM_RUN_INST_CMPL",
197 "MetricGroup": "cpi_breakdown",
198 "MetricName": "gct_empty_disp_held_srq_cpi"
199 },
200 {
201 "BriefDescription": "Cycles stalled by GCT empty due to Icache misses",
202 "MetricExpr": "PM_GCT_NOSLOT_IC_MISS / PM_RUN_INST_CMPL",
203 "MetricGroup": "cpi_breakdown",
204 "MetricName": "gct_empty_ic_miss_cpi"
205 },
206 {
207 "BriefDescription": "Cycles stalled by GCT empty due to Icache misses that resolve in the local L2 or L3",
208 "MetricExpr": "(PM_GCT_NOSLOT_IC_MISS - PM_GCT_NOSLOT_IC_L3MISS) / PM_RUN_INST_CMPL",
209 "MetricGroup": "cpi_breakdown",
210 "MetricName": "gct_empty_ic_miss_l2l3_cpi"
211 },
212 {
213 "BriefDescription": "Cycles stalled by GCT empty due to Icache misses that resolve off-chip",
214 "MetricExpr": "PM_GCT_NOSLOT_IC_L3MISS / PM_RUN_INST_CMPL",
215 "MetricGroup": "cpi_breakdown",
216 "MetricName": "gct_empty_ic_miss_l3miss_cpi"
217 },
218 {
219 "BriefDescription": "Other GCT empty cycles",
220 "MetricExpr": "(PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_IC_MISS / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_BR_MPRED / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_BR_MPRED_ICMISS / PM_RUN_INST_CMPL) - ((PM_GCT_NOSLOT_DISP_HELD_MAP / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_SRQ / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_ISSQ / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_OTHER / PM_RUN_INST_CMPL))",
221 "MetricGroup": "cpi_breakdown",
222 "MetricName": "gct_empty_other_cpi"
223 },
224 {
225 "BriefDescription": "Cycles stalled by heavyweight syncs",
226 "MetricExpr": "PM_CMPLU_STALL_HWSYNC / PM_RUN_INST_CMPL",
227 "MetricGroup": "cpi_breakdown",
228 "MetricName": "hwsync_stall_cpi"
229 },
230 {
231 "BriefDescription": "Cycles stalled by LSU",
232 "MetricExpr": "PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL",
233 "MetricGroup": "cpi_breakdown",
234 "MetricName": "lsu_stall_cpi"
235 },
236 {
237 "BriefDescription": "Cycles stalled by D-Cache Misses",
238 "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL",
239 "MetricGroup": "cpi_breakdown",
240 "MetricName": "lsu_stall_dcache_miss_cpi"
241 },
242 {
243 "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in distant interventions and memory",
244 "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_REMOTE) / PM_RUN_INST_CMPL",
245 "MetricGroup": "cpi_breakdown",
246 "MetricName": "lsu_stall_dcache_miss_distant_cpi"
247 },
248 {
249 "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in remote or distant caches",
250 "MetricExpr": "PM_CMPLU_STALL_DMISS_L21_L31 / PM_RUN_INST_CMPL",
251 "MetricGroup": "cpi_breakdown",
252 "MetricName": "lsu_stall_dcache_miss_l21l31_cpi"
253 },
254 {
255 "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3, where there was a conflict",
256 "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT / PM_RUN_INST_CMPL",
257 "MetricGroup": "cpi_breakdown",
258 "MetricName": "lsu_stall_dcache_miss_l2l3_conflict_cpi"
259 },
260 {
261 "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3",
262 "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3 / PM_RUN_INST_CMPL",
263 "MetricGroup": "cpi_breakdown",
264 "MetricName": "lsu_stall_dcache_miss_l2l3_cpi"
265 },
266 {
267 "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3, where there was no conflict",
268 "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT) / PM_RUN_INST_CMPL",
269 "MetricGroup": "cpi_breakdown",
270 "MetricName": "lsu_stall_dcache_miss_l2l3_noconflict_cpi"
271 },
272 {
273 "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in other core's caches or memory",
274 "MetricExpr": "PM_CMPLU_STALL_DMISS_L3MISS / PM_RUN_INST_CMPL",
275 "MetricGroup": "cpi_breakdown",
276 "MetricName": "lsu_stall_dcache_miss_l3miss_cpi"
277 },
278 {
279 "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in local memory or local L4",
280 "MetricExpr": "PM_CMPLU_STALL_DMISS_LMEM / PM_RUN_INST_CMPL",
281 "MetricGroup": "cpi_breakdown",
282 "MetricName": "lsu_stall_dcache_miss_lmem_cpi"
283 },
284 {
285 "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in remote interventions and memory",
286 "MetricExpr": "PM_CMPLU_STALL_DMISS_REMOTE / PM_RUN_INST_CMPL",
287 "MetricGroup": "cpi_breakdown",
288 "MetricName": "lsu_stall_dcache_miss_remote_cpi"
289 },
290 {
291 "BriefDescription": "Cycles stalled by ERAT Translation rejects",
292 "MetricExpr": "PM_CMPLU_STALL_ERAT_MISS / PM_RUN_INST_CMPL",
293 "MetricGroup": "cpi_breakdown",
294 "MetricName": "lsu_stall_erat_miss_cpi"
295 },
296 {
297 "BriefDescription": "Cycles stalled by LSU load finishes",
298 "MetricExpr": "PM_CMPLU_STALL_LOAD_FINISH / PM_RUN_INST_CMPL",
299 "MetricGroup": "cpi_breakdown",
300 "MetricName": "lsu_stall_ld_fin_cpi"
301 },
302 {
303 "BriefDescription": "Cycles stalled by LHS rejects",
304 "MetricExpr": "PM_CMPLU_STALL_REJECT_LHS / PM_RUN_INST_CMPL",
305 "MetricGroup": "cpi_breakdown",
306 "MetricName": "lsu_stall_lhs_cpi"
307 },
308 {
309 "BriefDescription": "Cycles stalled by LMQ Full rejects",
310 "MetricExpr": "PM_CMPLU_STALL_REJ_LMQ_FULL / PM_RUN_INST_CMPL",
311 "MetricGroup": "cpi_breakdown",
312 "MetricName": "lsu_stall_lmq_full_cpi"
313 },
314 {
315 "BriefDescription": "Cycles stalled by Other LSU Operations",
316 "MetricExpr": "(PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_STORE / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_LOAD_FINISH / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_ST_FWD / PM_RUN_INST_CMPL)",
317 "MetricGroup": "cpi_breakdown",
318 "MetricName": "lsu_stall_other_cpi"
319 },
320 {
321 "BriefDescription": "Cycles stalled by LSU Rejects",
322 "MetricExpr": "PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL",
323 "MetricGroup": "cpi_breakdown",
324 "MetricName": "lsu_stall_reject_cpi"
325 },
326 {
327 "BriefDescription": "Cycles stalled by Other LSU Rejects",
328 "MetricExpr": "(PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJECT_LHS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_ERAT_MISS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJ_LMQ_FULL / PM_RUN_INST_CMPL)",
329 "MetricGroup": "cpi_breakdown",
330 "MetricName": "lsu_stall_reject_other_cpi"
331 },
332 {
333 "BriefDescription": "Cycles stalled by LSU store forwarding",
334 "MetricExpr": "PM_CMPLU_STALL_ST_FWD / PM_RUN_INST_CMPL",
335 "MetricGroup": "cpi_breakdown",
336 "MetricName": "lsu_stall_st_fwd_cpi"
337 },
338 {
339 "BriefDescription": "Cycles stalled by LSU Stores",
340 "MetricExpr": "PM_CMPLU_STALL_STORE / PM_RUN_INST_CMPL",
341 "MetricGroup": "cpi_breakdown",
342 "MetricName": "lsu_stall_store_cpi"
343 },
344 {
345 "BriefDescription": "Cycles stalled by lightweight syncs",
346 "MetricExpr": "PM_CMPLU_STALL_LWSYNC / PM_RUN_INST_CMPL",
347 "MetricGroup": "cpi_breakdown",
348 "MetricName": "lwsync_stall_cpi"
349 },
350 {
351 "MetricExpr": "PM_CMPLU_STALL_MEM_ECC_DELAY / PM_RUN_INST_CMPL",
352 "MetricGroup": "cpi_breakdown",
353 "MetricName": "mem_ecc_delay_stall_cpi"
354 },
355 {
356 "BriefDescription": "Cycles stalled by nops (nothing next to finish)",
357 "MetricExpr": "PM_CMPLU_STALL_NO_NTF / PM_RUN_INST_CMPL",
358 "MetricGroup": "cpi_breakdown",
359 "MetricName": "no_ntf_stall_cpi"
360 },
361 {
362 "MetricExpr": "PM_NTCG_ALL_FIN / PM_RUN_INST_CMPL",
363 "MetricGroup": "cpi_breakdown",
364 "MetricName": "ntcg_all_fin_cpi"
365 },
366 {
367 "MetricExpr": "PM_CMPLU_STALL_NTCG_FLUSH / PM_RUN_INST_CMPL",
368 "MetricGroup": "cpi_breakdown",
369 "MetricName": "ntcg_flush_cpi"
370 },
371 {
372 "BriefDescription": "Other thread block stall cycles",
373 "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_LWSYNC - PM_CMPLU_STALL_HWSYNC - PM_CMPLU_STALL_MEM_ECC_DELAY - PM_CMPLU_STALL_FLUSH - PM_CMPLU_STALL_COQ_FULL) / PM_RUN_INST_CMPL",
374 "MetricGroup": "cpi_breakdown",
375 "MetricName": "other_block_stall_cpi"
376 },
377 {
378 "BriefDescription": "Cycles unaccounted for",
379 "MetricExpr": "(PM_RUN_CYC / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL) - (PM_NTCG_ALL_FIN / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_THRD / PM_RUN_INST_CMPL) - (PM_GRP_CMPL / PM_RUN_INST_CMPL)",
380 "MetricGroup": "cpi_breakdown",
381 "MetricName": "other_cpi"
382 },
383 {
384 "BriefDescription": "Stall cycles unaccounted for",
385 "MetricExpr": "(PM_CMPLU_STALL / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_BRU_CRU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_VSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_NTCG_FLUSH / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_NO_NTF / PM_RUN_INST_CMPL)",
386 "MetricGroup": "cpi_breakdown",
387 "MetricName": "other_stall_cpi"
388 },
389 {
390 "BriefDescription": "Run cycles per run instruction",
391 "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL",
392 "MetricGroup": "cpi_breakdown",
393 "MetricName": "run_cpi"
394 },
395 {
396 "BriefDescription": "Completion Stall Cycles",
397 "MetricExpr": "PM_CMPLU_STALL / PM_RUN_INST_CMPL",
398 "MetricGroup": "cpi_breakdown",
399 "MetricName": "stall_cpi"
400 },
401 {
402 "BriefDescription": "Cycles a thread was blocked",
403 "MetricExpr": "PM_CMPLU_STALL_THRD / PM_RUN_INST_CMPL",
404 "MetricGroup": "cpi_breakdown",
405 "MetricName": "thread_block_stall_cpi"
406 },
407 {
408 "BriefDescription": "Cycles stalled by VSU",
409 "MetricExpr": "PM_CMPLU_STALL_VSU / PM_RUN_INST_CMPL",
410 "MetricGroup": "cpi_breakdown",
411 "MetricName": "vsu_stall_cpi"
412 },
413 {
414 "BriefDescription": "Cycles stalled by other VSU Operations",
415 "MetricExpr": "(PM_CMPLU_STALL_VSU - PM_CMPLU_STALL_VECTOR - PM_CMPLU_STALL_SCALAR) / PM_RUN_INST_CMPL",
416 "MetricGroup": "cpi_breakdown",
417 "MetricName": "vsu_stall_other_cpi"
418 },
419 {
420 "BriefDescription": "Cycles stalled by VSU Scalar Operations",
421 "MetricExpr": "PM_CMPLU_STALL_SCALAR / PM_RUN_INST_CMPL",
422 "MetricGroup": "cpi_breakdown",
423 "MetricName": "vsu_stall_scalar_cpi"
424 },
425 {
426 "BriefDescription": "Cycles stalled by VSU Scalar Long Operations",
427 "MetricExpr": "PM_CMPLU_STALL_SCALAR_LONG / PM_RUN_INST_CMPL",
428 "MetricGroup": "cpi_breakdown",
429 "MetricName": "vsu_stall_scalar_long_cpi"
430 },
431 {
432 "BriefDescription": "Cycles stalled by Other VSU Scalar Operations",
433 "MetricExpr": "(PM_CMPLU_STALL_SCALAR / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_SCALAR_LONG / PM_RUN_INST_CMPL)",
434 "MetricGroup": "cpi_breakdown",
435 "MetricName": "vsu_stall_scalar_other_cpi"
436 },
437 {
438 "BriefDescription": "Cycles stalled by VSU Vector Operations",
439 "MetricExpr": "PM_CMPLU_STALL_VECTOR / PM_RUN_INST_CMPL",
440 "MetricGroup": "cpi_breakdown",
441 "MetricName": "vsu_stall_vector_cpi"
442 },
443 {
444 "BriefDescription": "Cycles stalled by VSU Vector Long Operations",
445 "MetricExpr": "PM_CMPLU_STALL_VECTOR_LONG / PM_RUN_INST_CMPL",
446 "MetricGroup": "cpi_breakdown",
447 "MetricName": "vsu_stall_vector_long_cpi"
448 },
449 {
450 "BriefDescription": "Cycles stalled by other VSU Vector Operations",
451 "MetricExpr": "(PM_CMPLU_STALL_VECTOR - PM_CMPLU_STALL_VECTOR_LONG) / PM_RUN_INST_CMPL",
452 "MetricGroup": "cpi_breakdown",
453 "MetricName": "vsu_stall_vector_other_cpi"
454 },
455 {
456 "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst",
457 "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
458 "MetricGroup": "dl1_reloads_percent_per_inst",
459 "MetricName": "dl1_reload_from_dl2l3_mod_rate_percent"
460 },
461 {
462 "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst",
463 "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
464 "MetricGroup": "dl1_reloads_percent_per_inst",
465 "MetricName": "dl1_reload_from_dl2l3_shr_rate_percent"
466 },
467 {
468 "BriefDescription": "% of DL1 Reloads from Distant L4 per Inst",
469 "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
470 "MetricGroup": "dl1_reloads_percent_per_inst",
471 "MetricName": "dl1_reload_from_dl4_rate_percent"
472 },
473 {
474 "BriefDescription": "% of DL1 Reloads from Distant Memory per Inst",
475 "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
476 "MetricGroup": "dl1_reloads_percent_per_inst",
477 "MetricName": "dl1_reload_from_dmem_rate_percent"
478 },
479 {
480 "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst",
481 "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
482 "MetricGroup": "dl1_reloads_percent_per_inst",
483 "MetricName": "dl1_reload_from_l21_mod_rate_percent"
484 },
485 {
486 "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst",
487 "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
488 "MetricGroup": "dl1_reloads_percent_per_inst",
489 "MetricName": "dl1_reload_from_l21_shr_rate_percent"
490 },
491 {
492 "BriefDescription": "Percentage of L2 load hits per instruction where the L2 experienced a Load-Hit-Store conflict",
493 "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST * 100 / PM_RUN_INST_CMPL",
494 "MetricGroup": "dl1_reloads_percent_per_inst",
495 "MetricName": "dl1_reload_from_l2_lhs_rate_percent"
496 },
497 {
498 "BriefDescription": "% of DL1 reloads from L2 per Inst",
499 "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
500 "MetricGroup": "dl1_reloads_percent_per_inst",
501 "MetricName": "dl1_reload_from_l2_miss_rate_percent"
502 },
503 {
504 "BriefDescription": "Percentage of L2 load hits per instruction where the L2 did not experience a conflict",
505 "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT * 100 / PM_RUN_INST_CMPL",
506 "MetricGroup": "dl1_reloads_percent_per_inst",
507 "MetricName": "dl1_reload_from_l2_no_conflict_rate_percent"
508 },
509 {
510 "BriefDescription": "Percentage of L2 load hits per instruction where the L2 experienced some conflict other than Load-Hit-Store",
511 "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER * 100 / PM_RUN_INST_CMPL",
512 "MetricGroup": "dl1_reloads_percent_per_inst",
513 "MetricName": "dl1_reload_from_l2_other_conflict_rate_percent"
514 },
515 {
516 "BriefDescription": "% of DL1 reloads from L2 per Inst",
517 "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL",
518 "MetricGroup": "dl1_reloads_percent_per_inst",
519 "MetricName": "dl1_reload_from_l2_rate_percent"
520 },
521 {
522 "BriefDescription": "% of DL1 reloads from Private L3 M state, other core per Inst",
523 "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
524 "MetricGroup": "dl1_reloads_percent_per_inst",
525 "MetricName": "dl1_reload_from_l31_mod_rate_percent"
526 },
527 {
528 "BriefDescription": "% of DL1 reloads from Private L3 S tate, other core per Inst",
529 "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
530 "MetricGroup": "dl1_reloads_percent_per_inst",
531 "MetricName": "dl1_reload_from_l31_shr_rate_percent"
532 },
533 {
534 "BriefDescription": "Percentage of L3 load hits per instruction where the load collided with a pending prefetch",
535 "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT * 100 / PM_RUN_INST_CMPL",
536 "MetricGroup": "dl1_reloads_percent_per_inst",
537 "MetricName": "dl1_reload_from_l3_conflict_rate_percent"
538 },
539 {
540 "BriefDescription": "% of DL1 reloads from L3 per Inst",
541 "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
542 "MetricGroup": "dl1_reloads_percent_per_inst",
543 "MetricName": "dl1_reload_from_l3_miss_rate_percent"
544 },
545 {
546 "BriefDescription": "Percentage of L3 load hits per instruction where the L3 did not experience a conflict",
547 "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT * 100 / PM_RUN_INST_CMPL",
548 "MetricGroup": "dl1_reloads_percent_per_inst",
549 "MetricName": "dl1_reload_from_l3_no_conflict_rate_percent"
550 },
551 {
552 "BriefDescription": "% of DL1 Reloads from L3 per Inst",
553 "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL",
554 "MetricGroup": "dl1_reloads_percent_per_inst",
555 "MetricName": "dl1_reload_from_l3_rate_percent"
556 },
557 {
558 "BriefDescription": "% of DL1 Reloads from Local L4 per Inst",
559 "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
560 "MetricGroup": "dl1_reloads_percent_per_inst",
561 "MetricName": "dl1_reload_from_ll4_rate_percent"
562 },
563 {
564 "BriefDescription": "% of DL1 Reloads from Local Memory per Inst",
565 "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
566 "MetricGroup": "dl1_reloads_percent_per_inst",
567 "MetricName": "dl1_reload_from_lmem_rate_percent"
568 },
569 {
570 "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
571 "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
572 "MetricGroup": "dl1_reloads_percent_per_inst",
573 "MetricName": "dl1_reload_from_rl2l3_mod_rate_percent"
574 },
575 {
576 "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
577 "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
578 "MetricGroup": "dl1_reloads_percent_per_inst",
579 "MetricName": "dl1_reload_from_rl2l3_shr_rate_percent"
580 },
581 {
582 "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst",
583 "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
584 "MetricGroup": "dl1_reloads_percent_per_inst",
585 "MetricName": "dl1_reload_from_rl4_rate_percent"
586 },
587 {
588 "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst",
589 "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
590 "MetricGroup": "dl1_reloads_percent_per_inst",
591 "MetricName": "dl1_reload_from_rmem_rate_percent"
592 },
593 {
594 "BriefDescription": "Percentage of L1 demand load misses per run instruction",
595 "MetricExpr": "PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL",
596 "MetricGroup": "dl1_reloads_percent_per_inst",
597 "MetricName": "l1_ld_miss_rate_percent"
598 },
599 {
600 "BriefDescription": "% of DL1 misses that result in a cache reload",
601 "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1",
602 "MetricGroup": "dl1_reloads_percent_per_ref",
603 "MetricName": "dl1_miss_reloads_percent"
604 },
605 {
606 "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Modified)",
607 "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
608 "MetricGroup": "dl1_reloads_percent_per_ref",
609 "MetricName": "dl1_reload_from_dl2l3_mod_percent"
610 },
611 {
612 "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Shared)",
613 "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
614 "MetricGroup": "dl1_reloads_percent_per_ref",
615 "MetricName": "dl1_reload_from_dl2l3_shr_percent"
616 },
617 {
618 "BriefDescription": "% of DL1 dL1_Reloads from Distant L4",
619 "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
620 "MetricGroup": "dl1_reloads_percent_per_ref",
621 "MetricName": "dl1_reload_from_dl4_percent"
622 },
623 {
624 "BriefDescription": "% of DL1 dL1_Reloads from Distant Memory",
625 "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
626 "MetricGroup": "dl1_reloads_percent_per_ref",
627 "MetricName": "dl1_reload_from_dmem_percent"
628 },
629 {
630 "BriefDescription": "% of DL1 reloads from Private L2, other core",
631 "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
632 "MetricGroup": "dl1_reloads_percent_per_ref",
633 "MetricName": "dl1_reload_from_l21_mod_percent"
634 },
635 {
636 "BriefDescription": "% of DL1 reloads from Private L2, other core",
637 "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
638 "MetricGroup": "dl1_reloads_percent_per_ref",
639 "MetricName": "dl1_reload_from_l21_shr_percent"
640 },
641 {
642 "BriefDescription": "Percentage of DL1 reloads from L2 with a Load-Hit-Store conflict",
643 "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST * 100 / PM_L1_DCACHE_RELOAD_VALID",
644 "MetricGroup": "dl1_reloads_percent_per_ref",
645 "MetricName": "dl1_reload_from_l2_lhs_percent"
646 },
647 {
648 "BriefDescription": "Percentage of DL1 reloads from L2 with no conflicts",
649 "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID",
650 "MetricGroup": "dl1_reloads_percent_per_ref",
651 "MetricName": "dl1_reload_from_l2_no_conflict_percent"
652 },
653 {
654 "BriefDescription": "Percentage of DL1 reloads from L2 with some conflict other than Load-Hit-Store",
655 "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER * 100 / PM_L1_DCACHE_RELOAD_VALID",
656 "MetricGroup": "dl1_reloads_percent_per_ref",
657 "MetricName": "dl1_reload_from_l2_other_conflict_percent"
658 },
659 {
660 "BriefDescription": "% of DL1 reloads from L2",
661 "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID",
662 "MetricGroup": "dl1_reloads_percent_per_ref",
663 "MetricName": "dl1_reload_from_l2_percent"
664 },
665 {
666 "BriefDescription": "% of DL1 reloads from Private L3, other core",
667 "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
668 "MetricGroup": "dl1_reloads_percent_per_ref",
669 "MetricName": "dl1_reload_from_l31_mod_percent"
670 },
671 {
672 "BriefDescription": "% of DL1 reloads from Private L3, other core",
673 "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
674 "MetricGroup": "dl1_reloads_percent_per_ref",
675 "MetricName": "dl1_reload_from_l31_shr_percent"
676 },
677 {
678 "BriefDescription": "Percentage of DL1 reloads from L3 where the load collided with a pending prefetch",
679 "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID",
680 "MetricGroup": "dl1_reloads_percent_per_ref",
681 "MetricName": "dl1_reload_from_l3_conflict_percent"
682 },
683 {
684 "BriefDescription": "Percentage of L3 load hits per instruction where the line was brought into the L3 by a prefetch operation",
685 "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL",
686 "MetricGroup": "dl1_reloads_percent_per_ref",
687 "MetricName": "dl1_reload_from_l3_mepf_rate_percent"
688 },
689 {
690 "BriefDescription": "Percentage of DL1 reloads from L3 without conflicts",
691 "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID",
692 "MetricGroup": "dl1_reloads_percent_per_ref",
693 "MetricName": "dl1_reload_from_l3_no_conflict_percent"
694 },
695 {
696 "BriefDescription": "% of DL1 Reloads from L3",
697 "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID",
698 "MetricGroup": "dl1_reloads_percent_per_ref",
699 "MetricName": "dl1_reload_from_l3_percent"
700 },
701 {
702 "BriefDescription": "% of DL1 dL1_Reloads from Local L4",
703 "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
704 "MetricGroup": "dl1_reloads_percent_per_ref",
705 "MetricName": "dl1_reload_from_ll4_percent"
706 },
707 {
708 "BriefDescription": "% of DL1 dL1_Reloads from Local Memory",
709 "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
710 "MetricGroup": "dl1_reloads_percent_per_ref",
711 "MetricName": "dl1_reload_from_lmem_percent"
712 },
713 {
714 "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Modified)",
715 "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
716 "MetricGroup": "dl1_reloads_percent_per_ref",
717 "MetricName": "dl1_reload_from_rl2l3_mod_percent"
718 },
719 {
720 "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Shared)",
721 "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
722 "MetricGroup": "dl1_reloads_percent_per_ref",
723 "MetricName": "dl1_reload_from_rl2l3_shr_percent"
724 },
725 {
726 "BriefDescription": "% of DL1 dL1_Reloads from Remote L4",
727 "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
728 "MetricGroup": "dl1_reloads_percent_per_ref",
729 "MetricName": "dl1_reload_from_rl4_percent"
730 },
731 {
732 "BriefDescription": "% of DL1 dL1_Reloads from Remote Memory",
733 "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
734 "MetricGroup": "dl1_reloads_percent_per_ref",
735 "MetricName": "dl1_reload_from_rmem_percent"
736 },
737 {
738 "BriefDescription": "dL1 miss portion of CPI",
739 "MetricExpr": "( (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)/ (PM_RUN_CYC / PM_RUN_INST_CMPL)) * 100",
740 "MetricGroup": "estimated_dcache_miss_cpi",
741 "MetricName": "dcache_miss_cpi_percent"
742 },
743 {
744 "BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi",
745 "MetricExpr": "(((PM_DATA_FROM_DL2L3_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
746 "MetricGroup": "estimated_dcache_miss_cpi",
747 "MetricName": "dl2l3_mod_cpi_percent"
748 },
749 {
750 "BriefDescription": "estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi",
751 "MetricExpr": "(((PM_DATA_FROM_DL2L3_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
752 "MetricGroup": "estimated_dcache_miss_cpi",
753 "MetricName": "dl2l3_shr_cpi_percent"
754 },
755 {
756 "BriefDescription": "estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi",
757 "MetricExpr": "(((PM_DATA_FROM_DL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
758 "MetricGroup": "estimated_dcache_miss_cpi",
759 "MetricName": "dl4_cpi_percent"
760 },
761 {
762 "BriefDescription": "estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi",
763 "MetricExpr": "(((PM_DATA_FROM_DMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
764 "MetricGroup": "estimated_dcache_miss_cpi",
765 "MetricName": "dmem_cpi_percent"
766 },
767 {
768 "BriefDescription": "estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi",
769 "MetricExpr": "(((PM_DATA_FROM_L21_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
770 "MetricGroup": "estimated_dcache_miss_cpi",
771 "MetricName": "l21_mod_cpi_percent"
772 },
773 {
774 "BriefDescription": "estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi",
775 "MetricExpr": "(((PM_DATA_FROM_L21_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
776 "MetricGroup": "estimated_dcache_miss_cpi",
777 "MetricName": "l21_shr_cpi_percent"
778 },
779 {
780 "BriefDescription": "estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi",
781 "MetricExpr": "(((PM_DATA_FROM_L2 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL) ) *100",
782 "MetricGroup": "estimated_dcache_miss_cpi",
783 "MetricName": "l2_cpi_percent"
784 },
785 {
786 "BriefDescription": "estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi",
787 "MetricExpr": "(((PM_DATA_FROM_L31_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
788 "MetricGroup": "estimated_dcache_miss_cpi",
789 "MetricName": "l31_mod_cpi_percent"
790 },
791 {
792 "BriefDescription": "estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi",
793 "MetricExpr": "(((PM_DATA_FROM_L31_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
794 "MetricGroup": "estimated_dcache_miss_cpi",
795 "MetricName": "l31_shr_cpi_percent"
796 },
797 {
798 "BriefDescription": "estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi",
799 "MetricExpr": "(((PM_DATA_FROM_L3 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) * 100",
800 "MetricGroup": "estimated_dcache_miss_cpi",
801 "MetricName": "l3_cpi_percent"
802 },
803 {
804 "BriefDescription": "estimate of Local L4 miss rates with measured LL4 latency as a %of dcache miss cpi",
805 "MetricExpr": "(((PM_DATA_FROM_LL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
806 "MetricGroup": "estimated_dcache_miss_cpi",
807 "MetricName": "ll4_cpi_percent"
808 },
809 {
810 "BriefDescription": "estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi",
811 "MetricExpr": "(((PM_DATA_FROM_LMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
812 "MetricGroup": "estimated_dcache_miss_cpi",
813 "MetricName": "lmem_cpi_percent"
814 },
815 {
816 "BriefDescription": "estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi",
817 "MetricExpr": "(((PM_DATA_FROM_RL2L3_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
818 "MetricGroup": "estimated_dcache_miss_cpi",
819 "MetricName": "rl2l3_mod_cpi_percent"
820 },
821 {
822 "BriefDescription": "estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi",
823 "MetricExpr": "(((PM_DATA_FROM_RL2L3_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) * 100",
824 "MetricGroup": "estimated_dcache_miss_cpi",
825 "MetricName": "rl2l3_shr_cpi_percent"
826 },
827 {
828 "BriefDescription": "estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi",
829 "MetricExpr": "(((PM_DATA_FROM_RL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
830 "MetricGroup": "estimated_dcache_miss_cpi",
831 "MetricName": "rl4_cpi_percent"
832 },
833 {
834 "BriefDescription": "estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi",
835 "MetricExpr": "(((PM_DATA_FROM_RMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100",
836 "MetricGroup": "estimated_dcache_miss_cpi",
837 "MetricName": "rmem_cpi_percent"
838 },
839 {
840 "BriefDescription": "Branch Mispredict flushes per instruction",
841 "MetricExpr": "PM_FLUSH_BR_MPRED / PM_RUN_INST_CMPL * 100",
842 "MetricGroup": "general",
843 "MetricName": "br_mpred_flush_rate_percent"
844 },
845 {
846 "BriefDescription": "Cycles per instruction",
847 "MetricExpr": "PM_CYC / PM_INST_CMPL",
848 "MetricGroup": "general",
849 "MetricName": "cpi"
850 },
851 {
852 "BriefDescription": "Percentage Cycles a group completed",
853 "MetricExpr": "PM_GRP_CMPL / PM_CYC * 100",
854 "MetricGroup": "general",
855 "MetricName": "cyc_grp_completed_percent"
856 },
857 {
858 "BriefDescription": "Percentage Cycles a group dispatched",
859 "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100",
860 "MetricGroup": "general",
861 "MetricName": "cyc_grp_dispatched_percent"
862 },
863 {
864 "BriefDescription": "Cycles per group",
865 "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL",
866 "MetricGroup": "general",
867 "MetricName": "cyc_per_group"
868 },
869 {
870 "BriefDescription": "GCT empty cycles",
871 "MetricExpr": "(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100",
872 "MetricGroup": "general",
873 "MetricName": "disp_flush_rate_percent"
874 },
875 {
876 "BriefDescription": "% DTLB miss rate per inst",
877 "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL *100",
878 "MetricGroup": "general",
879 "MetricName": "dtlb_miss_rate_percent"
880 },
881 {
882 "BriefDescription": "Flush rate (%)",
883 "MetricExpr": "PM_FLUSH * 100 / PM_RUN_INST_CMPL",
884 "MetricGroup": "general",
885 "MetricName": "flush_rate_percent"
886 },
887 {
888 "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had atleast 1 slot valid",
889 "MetricExpr": "PM_GCT_UTIL_11_14_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
890 "MetricGroup": "general",
891 "MetricName": "gct_util_11to14_slots_percent"
892 },
893 {
894 "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had atleast 1 slot valid",
895 "MetricExpr": "PM_GCT_UTIL_15_17_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
896 "MetricGroup": "general",
897 "MetricName": "gct_util_15to17_slots_percent"
898 },
899 {
900 "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had atleast 1 slot valid",
901 "MetricExpr": "PM_GCT_UTIL_18_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
902 "MetricGroup": "general",
903 "MetricName": "gct_util_18plus_slots_percent"
904 },
905 {
906 "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had atleast 1 slot valid",
907 "MetricExpr": "PM_GCT_UTIL_1_2_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
908 "MetricGroup": "general",
909 "MetricName": "gct_util_1to2_slots_percent"
910 },
911 {
912 "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had atleast 1 slot valid",
913 "MetricExpr": "PM_GCT_UTIL_3_6_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
914 "MetricGroup": "general",
915 "MetricName": "gct_util_3to6_slots_percent"
916 },
917 {
918 "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had atleast 1 slot valid",
919 "MetricExpr": "PM_GCT_UTIL_7_10_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100",
920 "MetricGroup": "general",
921 "MetricName": "gct_util_7to10_slots_percent"
922 },
923 {
924 "BriefDescription": "Avg. group size",
925 "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL",
926 "MetricGroup": "general",
927 "MetricName": "group_size"
928 },
929 {
930 "BriefDescription": "Instructions per group",
931 "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL",
932 "MetricGroup": "general",
933 "MetricName": "inst_per_group"
934 },
935 {
936 "BriefDescription": "Instructions per cycles",
937 "MetricExpr": "PM_INST_CMPL / PM_CYC",
938 "MetricGroup": "general",
939 "MetricName": "ipc"
940 },
941 {
942 "BriefDescription": "% ITLB miss rate per inst",
943 "MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL *100",
944 "MetricGroup": "general",
945 "MetricName": "itlb_miss_rate_percent"
946 },
947 {
948 "BriefDescription": "Percentage of L1 load misses per L1 load ref",
949 "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100",
950 "MetricGroup": "general",
951 "MetricName": "l1_ld_miss_ratio_percent"
952 },
953 {
954 "BriefDescription": "Percentage of L1 store misses per run instruction",
955 "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL",
956 "MetricGroup": "general",
957 "MetricName": "l1_st_miss_rate_percent"
958 },
959 {
960 "BriefDescription": "Percentage of L1 store misses per L1 store ref",
961 "MetricExpr": "PM_ST_MISS_L1 / PM_ST_FIN * 100",
962 "MetricGroup": "general",
963 "MetricName": "l1_st_miss_ratio_percent"
964 },
965 {
966 "BriefDescription": "L2 Instruction Miss Rate (per instruction)(%)",
967 "MetricExpr": "PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
968 "MetricGroup": "general",
969 "MetricName": "l2_inst_miss_rate_percent"
970 },
971 {
972 "BriefDescription": "L2 dmand Load Miss Rate (per run instruction)(%)",
973 "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
974 "MetricGroup": "general",
975 "MetricName": "l2_ld_miss_rate_percent"
976 },
977 {
978 "BriefDescription": "L2 PTEG Miss Rate (per run instruction)(%)",
979 "MetricExpr": "PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
980 "MetricGroup": "general",
981 "MetricName": "l2_pteg_miss_rate_percent"
982 },
983 {
984 "BriefDescription": "Percentage of L2 store misses per run instruction",
985 "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL",
986 "MetricGroup": "general",
987 "MetricName": "l2_st_miss_rate_percent"
988 },
989 {
990 "BriefDescription": "L3 Instruction Miss Rate (per instruction)(%)",
991 "MetricExpr": "PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
992 "MetricGroup": "general",
993 "MetricName": "l3_inst_miss_rate_percent"
994 },
995 {
996 "BriefDescription": "L3 demand Load Miss Rate (per run instruction)(%)",
997 "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
998 "MetricGroup": "general",
999 "MetricName": "l3_ld_miss_rate_percent"
1000 },
1001 {
1002 "BriefDescription": "L3 PTEG Miss Rate (per run instruction)(%)",
1003 "MetricExpr": "PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
1004 "MetricGroup": "general",
1005 "MetricName": "l3_pteg_miss_rate_percent"
1006 },
1007 {
1008 "BriefDescription": "Run cycles per cycle",
1009 "MetricExpr": "PM_RUN_CYC / PM_CYC*100",
1010 "MetricGroup": "general",
1011 "MetricName": "run_cycles_percent"
1012 },
1013 {
1014 "BriefDescription": "Percentage of cycles spent in SMT2 Mode",
1015 "MetricExpr": "(PM_RUN_CYC_SMT2_MODE/PM_RUN_CYC) * 100",
1016 "MetricGroup": "general",
1017 "MetricName": "smt2_cycles_percent"
1018 },
1019 {
1020 "BriefDescription": "Percentage of cycles spent in SMT4 Mode",
1021 "MetricExpr": "(PM_RUN_CYC_SMT4_MODE/PM_RUN_CYC) * 100",
1022 "MetricGroup": "general",
1023 "MetricName": "smt4_cycles_percent"
1024 },
1025 {
1026 "BriefDescription": "Percentage of cycles spent in SMT8 Mode",
1027 "MetricExpr": "(PM_RUN_CYC_SMT8_MODE/PM_RUN_CYC) * 100",
1028 "MetricGroup": "general",
1029 "MetricName": "smt8_cycles_percent"
1030 },
1031 {
1032 "BriefDescription": "IPC of all instructions completed by the core while this thread was stalled",
1033 "MetricExpr": "PM_CMPLU_STALL_OTHER_CMPL/PM_RUN_CYC",
1034 "MetricGroup": "general",
1035 "MetricName": "smt_benefit"
1036 },
1037 {
1038 "BriefDescription": "Instruction dispatch-to-completion ratio",
1039 "MetricExpr": "PM_INST_DISP / PM_INST_CMPL",
1040 "MetricGroup": "general",
1041 "MetricName": "speculation"
1042 },
1043 {
1044 "BriefDescription": "Percentage of cycles spent in Single Thread Mode",
1045 "MetricExpr": "(PM_RUN_CYC_ST_MODE/PM_RUN_CYC) * 100",
1046 "MetricGroup": "general",
1047 "MetricName": "st_cycles_percent"
1048 },
1049 {
1050 "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst",
1051 "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
1052 "MetricGroup": "instruction_misses_percent_per_inst",
1053 "MetricName": "inst_from_dl2l3_mod_rate_percent"
1054 },
1055 {
1056 "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared) per Inst",
1057 "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
1058 "MetricGroup": "instruction_misses_percent_per_inst",
1059 "MetricName": "inst_from_dl2l3_shr_rate_percent"
1060 },
1061 {
1062 "BriefDescription": "% of ICache reloads from Distant L4 per Inst",
1063 "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
1064 "MetricGroup": "instruction_misses_percent_per_inst",
1065 "MetricName": "inst_from_dl4_rate_percent"
1066 },
1067 {
1068 "BriefDescription": "% of ICache reloads from Distant Memory per Inst",
1069 "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
1070 "MetricGroup": "instruction_misses_percent_per_inst",
1071 "MetricName": "inst_from_dmem_rate_percent"
1072 },
1073 {
1074 "BriefDescription": "% of ICache reloads from Private L2, other core per Inst",
1075 "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
1076 "MetricGroup": "instruction_misses_percent_per_inst",
1077 "MetricName": "inst_from_l21_mod_rate_percent"
1078 },
1079 {
1080 "BriefDescription": "% of ICache reloads from Private L2, other core per Inst",
1081 "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
1082 "MetricGroup": "instruction_misses_percent_per_inst",
1083 "MetricName": "inst_from_l21_shr_rate_percent"
1084 },
1085 {
1086 "BriefDescription": "% of ICache reloads from L2 per Inst",
1087 "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL",
1088 "MetricGroup": "instruction_misses_percent_per_inst",
1089 "MetricName": "inst_from_l2_rate_percent"
1090 },
1091 {
1092 "BriefDescription": "% of ICache reloads from Private L3, other core per Inst",
1093 "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
1094 "MetricGroup": "instruction_misses_percent_per_inst",
1095 "MetricName": "inst_from_l31_mod_rate_percent"
1096 },
1097 {
1098 "BriefDescription": "% of ICache reloads from Private L3 other core per Inst",
1099 "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
1100 "MetricGroup": "instruction_misses_percent_per_inst",
1101 "MetricName": "inst_from_l31_shr_rate_percent"
1102 },
1103 {
1104 "BriefDescription": "% of ICache reloads from L3 per Inst",
1105 "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL",
1106 "MetricGroup": "instruction_misses_percent_per_inst",
1107 "MetricName": "inst_from_l3_rate_percent"
1108 },
1109 {
1110 "BriefDescription": "% of ICache reloads from Local L4 per Inst",
1111 "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
1112 "MetricGroup": "instruction_misses_percent_per_inst",
1113 "MetricName": "inst_from_ll4_rate_percent"
1114 },
1115 {
1116 "BriefDescription": "% of ICache reloads from Local Memory per Inst",
1117 "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
1118 "MetricGroup": "instruction_misses_percent_per_inst",
1119 "MetricName": "inst_from_lmem_rate_percent"
1120 },
1121 {
1122 "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified) per Inst",
1123 "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
1124 "MetricGroup": "instruction_misses_percent_per_inst",
1125 "MetricName": "inst_from_rl2l3_mod_rate_percent"
1126 },
1127 {
1128 "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared) per Inst",
1129 "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
1130 "MetricGroup": "instruction_misses_percent_per_inst",
1131 "MetricName": "inst_from_rl2l3_shr_rate_percent"
1132 },
1133 {
1134 "BriefDescription": "% of ICache reloads from Remote L4 per Inst",
1135 "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
1136 "MetricGroup": "instruction_misses_percent_per_inst",
1137 "MetricName": "inst_from_rl4_rate_percent"
1138 },
1139 {
1140 "BriefDescription": "% of ICache reloads from Remote Memory per Inst",
1141 "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
1142 "MetricGroup": "instruction_misses_percent_per_inst",
1143 "MetricName": "inst_from_rmem_rate_percent"
1144 },
1145 {
1146 "BriefDescription": "Instruction Cache Miss Rate (Per run Instruction)(%)",
1147 "MetricExpr": "PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL",
1148 "MetricGroup": "instruction_misses_percent_per_inst",
1149 "MetricName": "l1_inst_miss_rate_percent"
1150 },
1151 {
1152 "BriefDescription": "% Branches per instruction",
1153 "MetricExpr": "PM_BRU_FIN / PM_RUN_INST_CMPL",
1154 "MetricGroup": "instruction_mix",
1155 "MetricName": "branches_per_inst"
1156 },
1157 {
1158 "BriefDescription": "Total Fixed point operations",
1159 "MetricExpr": "(PM_FXU0_FIN + PM_FXU1_FIN)/PM_RUN_INST_CMPL",
1160 "MetricGroup": "instruction_mix",
1161 "MetricName": "fixed_per_inst"
1162 },
1163 {
1164 "BriefDescription": "FXU0 balance",
1165 "MetricExpr": "PM_FXU0_FIN / (PM_FXU0_FIN + PM_FXU1_FIN)",
1166 "MetricGroup": "instruction_mix",
1167 "MetricName": "fxu0_balance"
1168 },
1169 {
1170 "BriefDescription": "Fraction of cycles that FXU0 is in use",
1171 "MetricExpr": "PM_FXU0_FIN / PM_RUN_CYC",
1172 "MetricGroup": "instruction_mix",
1173 "MetricName": "fxu0_fin"
1174 },
1175 {
1176 "BriefDescription": "FXU0 only Busy",
1177 "MetricExpr": "PM_FXU0_BUSY_FXU1_IDLE / PM_CYC",
1178 "MetricGroup": "instruction_mix",
1179 "MetricName": "fxu0_only_busy"
1180 },
1181 {
1182 "BriefDescription": "Fraction of cycles that FXU1 is in use",
1183 "MetricExpr": "PM_FXU1_FIN / PM_RUN_CYC",
1184 "MetricGroup": "instruction_mix",
1185 "MetricName": "fxu1_fin"
1186 },
1187 {
1188 "BriefDescription": "FXU1 only Busy",
1189 "MetricExpr": "PM_FXU1_BUSY_FXU0_IDLE / PM_CYC",
1190 "MetricGroup": "instruction_mix",
1191 "MetricName": "fxu1_only_busy"
1192 },
1193 {
1194 "BriefDescription": "Both FXU Busy",
1195 "MetricExpr": "PM_FXU_BUSY / PM_CYC",
1196 "MetricGroup": "instruction_mix",
1197 "MetricName": "fxu_both_busy"
1198 },
1199 {
1200 "BriefDescription": "Both FXU Idle",
1201 "MetricExpr": "PM_FXU_IDLE / PM_CYC",
1202 "MetricGroup": "instruction_mix",
1203 "MetricName": "fxu_both_idle"
1204 },
1205 {
1206 "BriefDescription": "PCT instruction loads",
1207 "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL",
1208 "MetricGroup": "instruction_mix",
1209 "MetricName": "loads_per_inst"
1210 },
1211 {
1212 "BriefDescription": "PCT instruction stores",
1213 "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL",
1214 "MetricGroup": "instruction_mix",
1215 "MetricName": "stores_per_inst"
1216 },
1217 {
1218 "BriefDescription": "Icache Fetchs per Icache Miss",
1219 "MetricExpr": "(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS",
1220 "MetricGroup": "instruction_stats_percent_per_ref",
1221 "MetricName": "icache_miss_reload"
1222 },
1223 {
1224 "BriefDescription": "% of ICache reloads due to prefetch",
1225 "MetricExpr": "PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS",
1226 "MetricGroup": "instruction_stats_percent_per_ref",
1227 "MetricName": "icache_pref_percent"
1228 },
1229 {
1230 "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified)",
1231 "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS",
1232 "MetricGroup": "instruction_stats_percent_per_ref",
1233 "MetricName": "inst_from_dl2l3_mod_percent"
1234 },
1235 {
1236 "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared)",
1237 "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS",
1238 "MetricGroup": "instruction_stats_percent_per_ref",
1239 "MetricName": "inst_from_dl2l3_shr_percent"
1240 },
1241 {
1242 "BriefDescription": "% of ICache reloads from Distant L4",
1243 "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS",
1244 "MetricGroup": "instruction_stats_percent_per_ref",
1245 "MetricName": "inst_from_dl4_percent"
1246 },
1247 {
1248 "BriefDescription": "% of ICache reloads from Distant Memory",
1249 "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS",
1250 "MetricGroup": "instruction_stats_percent_per_ref",
1251 "MetricName": "inst_from_dmem_percent"
1252 },
1253 {
1254 "BriefDescription": "% of ICache reloads from Private L2, other core",
1255 "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS",
1256 "MetricGroup": "instruction_stats_percent_per_ref",
1257 "MetricName": "inst_from_l21_mod_percent"
1258 },
1259 {
1260 "BriefDescription": "% of ICache reloads from Private L2, other core",
1261 "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS",
1262 "MetricGroup": "instruction_stats_percent_per_ref",
1263 "MetricName": "inst_from_l21_shr_percent"
1264 },
1265 {
1266 "BriefDescription": "% of ICache reloads from L2",
1267 "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS",
1268 "MetricGroup": "instruction_stats_percent_per_ref",
1269 "MetricName": "inst_from_l2_percent"
1270 },
1271 {
1272 "BriefDescription": "% of ICache reloads from Private L3, other core",
1273 "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS",
1274 "MetricGroup": "instruction_stats_percent_per_ref",
1275 "MetricName": "inst_from_l31_mod_percent"
1276 },
1277 {
1278 "BriefDescription": "% of ICache reloads from Private L3, other core",
1279 "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS",
1280 "MetricGroup": "instruction_stats_percent_per_ref",
1281 "MetricName": "inst_from_l31_shr_percent"
1282 },
1283 {
1284 "BriefDescription": "% of ICache reloads from L3",
1285 "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS",
1286 "MetricGroup": "instruction_stats_percent_per_ref",
1287 "MetricName": "inst_from_l3_percent"
1288 },
1289 {
1290 "BriefDescription": "% of ICache reloads from Local L4",
1291 "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS",
1292 "MetricGroup": "instruction_stats_percent_per_ref",
1293 "MetricName": "inst_from_ll4_percent"
1294 },
1295 {
1296 "BriefDescription": "% of ICache reloads from Local Memory",
1297 "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS",
1298 "MetricGroup": "instruction_stats_percent_per_ref",
1299 "MetricName": "inst_from_lmem_percent"
1300 },
1301 {
1302 "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified)",
1303 "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS",
1304 "MetricGroup": "instruction_stats_percent_per_ref",
1305 "MetricName": "inst_from_rl2l3_mod_percent"
1306 },
1307 {
1308 "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared)",
1309 "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS",
1310 "MetricGroup": "instruction_stats_percent_per_ref",
1311 "MetricName": "inst_from_rl2l3_shr_percent"
1312 },
1313 {
1314 "BriefDescription": "% of ICache reloads from Remote L4",
1315 "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS",
1316 "MetricGroup": "instruction_stats_percent_per_ref",
1317 "MetricName": "inst_from_rl4_percent"
1318 },
1319 {
1320 "BriefDescription": "% of ICache reloads from Remote Memory",
1321 "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS",
1322 "MetricGroup": "instruction_stats_percent_per_ref",
1323 "MetricName": "inst_from_rmem_percent"
1324 },
1325 {
1326 "BriefDescription": "Average number of stores that gather in the store buffer before being sent to an L2 RC machine",
1327 "MetricExpr": "PM_ST_CMPL / (PM_L2_ST / 2)",
1328 "MetricGroup": "l2_stats",
1329 "MetricName": "avg_stores_gathered"
1330 },
1331 {
1332 "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)",
1333 "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100",
1334 "MetricGroup": "l2_stats",
1335 "MetricName": "l2_st_miss_ratio_percent"
1336 },
1337 {
1338 "BriefDescription": "Percentage of L2 store misses per drained store. A drained store may contain multiple individual stores if they target the same line",
1339 "MetricExpr": "PM_L2_ST_MISS / (PM_L2_ST / 2)",
1340 "MetricGroup": "l2_stats",
1341 "MetricName": "l2_store_miss_ratio_percent"
1342 },
1343 {
1344 "BriefDescription": "average L1 miss latency using marked events",
1345 "MetricExpr": "PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1",
1346 "MetricGroup": "latency",
1347 "MetricName": "average_dl1miss_latency"
1348 },
1349 {
1350 "BriefDescription": "Average icache miss latency",
1351 "MetricExpr": "(PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ)",
1352 "MetricGroup": "latency",
1353 "MetricName": "average_il1_miss_latency"
1354 },
1355 {
1356 "BriefDescription": "average service time for SYNC",
1357 "MetricExpr": "PM_LSU_SRQ_SYNC_CYC / PM_LSU_SRQ_SYNC",
1358 "MetricGroup": "latency",
1359 "MetricName": "average_sync_cyc"
1360 },
1361 {
1362 "BriefDescription": "Cycles LMQ slot0 was active on an average",
1363 "MetricExpr": "PM_LSU_LMQ_S0_VALID / PM_LSU_LMQ_S0_ALLOC",
1364 "MetricGroup": "latency",
1365 "MetricName": "avg_lmq_life_time"
1366 },
1367 {
1368 "BriefDescription": "Average number of cycles LRQ stays active for one load. Slot 0 is VALID ONLY FOR EVEN THREADS",
1369 "MetricExpr": "PM_LSU_LRQ_S0_VALID / PM_LSU_LRQ_S0_ALLOC",
1370 "MetricGroup": "latency",
1371 "MetricName": "avg_lrq_life_time_even"
1372 },
1373 {
1374 "BriefDescription": "Average number of cycles LRQ stays active for one load. Slot 43 is valid ONLY FOR ODD THREADS",
1375 "MetricExpr": "PM_LSU_LRQ_S43_VALID / PM_LSU_LRQ_S43_ALLOC",
1376 "MetricGroup": "latency",
1377 "MetricName": "avg_lrq_life_time_odd"
1378 },
1379 {
1380 "BriefDescription": "Average number of cycles SRQ stays active for one load. Slot 0 is VALID ONLY FOR EVEN THREADS",
1381 "MetricExpr": "PM_LSU_SRQ_S0_VALID / PM_LSU_SRQ_S0_ALLOC",
1382 "MetricGroup": "latency",
1383 "MetricName": "avg_srq_life_time_even"
1384 },
1385 {
1386 "BriefDescription": "Average number of cycles SRQ stays active for one load. Slot 39 is valid ONLY FOR ODD THREADS",
1387 "MetricExpr": "PM_LSU_SRQ_S39_VALID / PM_LSU_SRQ_S39_ALLOC",
1388 "MetricGroup": "latency",
1389 "MetricName": "avg_srq_life_time_odd"
1390 },
1391 {
1392 "BriefDescription": "Marked background kill latency, measured in L2",
1393 "MetricExpr": "PM_MRK_FAB_RSP_BKILL_CYC / PM_MRK_FAB_RSP_BKILL",
1394 "MetricGroup": "latency",
1395 "MetricName": "bkill_latency"
1396 },
1397 {
1398 "BriefDescription": "Marked dclaim latency, measured in L2",
1399 "MetricExpr": "PM_MRK_FAB_RSP_DCLAIM_CYC / PM_MRK_FAB_RSP_DCLAIM",
1400 "MetricGroup": "latency",
1401 "MetricName": "dclaim_latency"
1402 },
1403 {
1404 "BriefDescription": "Marked L2L3 remote Load latency",
1405 "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD",
1406 "MetricGroup": "latency",
1407 "MetricName": "dl2l3_mod_latency"
1408 },
1409 {
1410 "BriefDescription": "Marked L2L3 distant Load latency",
1411 "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR",
1412 "MetricGroup": "latency",
1413 "MetricName": "dl2l3_shr_latency"
1414 },
1415 {
1416 "BriefDescription": "Distant L4 average load latency",
1417 "MetricExpr": "PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4",
1418 "MetricGroup": "latency",
1419 "MetricName": "dl4_latency"
1420 },
1421 {
1422 "BriefDescription": "Marked Dmem Load latency",
1423 "MetricExpr": "PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM",
1424 "MetricGroup": "latency",
1425 "MetricName": "dmem_latency"
1426 },
1427 {
1428 "BriefDescription": "estimated exposed miss latency for dL1 misses, ie load miss when we were NTC",
1429 "MetricExpr": "PM_MRK_LD_MISS_EXPOSED_CYC / PM_MRK_LD_MISS_EXPOSED",
1430 "MetricGroup": "latency",
1431 "MetricName": "exposed_dl1miss_latency"
1432 },
1433 {
1434 "BriefDescription": "Average load latency for all marked demand loads that came from L2.1 in the M state",
1435 "MetricExpr": "PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD",
1436 "MetricGroup": "latency",
1437 "MetricName": "l21_mod_latency"
1438 },
1439 {
1440 "BriefDescription": "Average load latency for all marked demand loads that came from L2.1 in the S state",
1441 "MetricExpr": "PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR",
1442 "MetricGroup": "latency",
1443 "MetricName": "l21_shr_latency"
1444 },
1445 {
1446 "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered a conflict at RC machine dispatch time due to load-hit-store",
1447 "MetricExpr": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC/ PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST",
1448 "MetricGroup": "latency",
1449 "MetricName": "l2_disp_conflict_ldhitst_latency"
1450 },
1451 {
1452 "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered a conflict at RC machine dispatch time NOT due load-hit-store",
1453 "MetricExpr": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC/ PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER",
1454 "MetricGroup": "latency",
1455 "MetricName": "l2_disp_conflict_other_latency"
1456 },
1457 {
1458 "BriefDescription": "Average load latency for all marked demand loads that came from the L2",
1459 "MetricExpr": "PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2",
1460 "MetricGroup": "latency",
1461 "MetricName": "l2_latency"
1462 },
1463 {
1464 "BriefDescription": "Average load latency for all marked demand loads that were satisfied by lines prefetched into the L3. This information is forwarded from the L3",
1465 "MetricExpr": "PM_MRK_DATA_FROM_L2_MEPF_CYC/ PM_MRK_DATA_FROM_L2",
1466 "MetricGroup": "latency",
1467 "MetricName": "l2_mepf_latency"
1468 },
1469 {
1470 "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered no conflicts",
1471 "MetricExpr": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC/ PM_MRK_DATA_FROM_L2",
1472 "MetricGroup": "latency",
1473 "MetricName": "l2_no_conflict_latency"
1474 },
1475 {
1476 "BriefDescription": "Average load latency for all marked demand loads that came from the L3 and beyond",
1477 "MetricExpr": "PM_MRK_DATA_FROM_L2MISS_CYC/ PM_MRK_DATA_FROM_L2MISS",
1478 "MetricGroup": "latency",
1479 "MetricName": "l2miss_latency"
1480 },
1481 {
1482 "BriefDescription": "Marked L31 Load latency",
1483 "MetricExpr": "PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD",
1484 "MetricGroup": "latency",
1485 "MetricName": "l31_mod_latency"
1486 },
1487 {
1488 "BriefDescription": "Marked L31 Load latency",
1489 "MetricExpr": "PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR",
1490 "MetricGroup": "latency",
1491 "MetricName": "l31_shr_latency"
1492 },
1493 {
1494 "BriefDescription": "Average load latency for all marked demand loads that came from the L3",
1495 "MetricExpr": "PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3",
1496 "MetricGroup": "latency",
1497 "MetricName": "l3_latency"
1498 },
1499 {
1500 "BriefDescription": "Average load latency for all marked demand loads that came from the L3 and suffered no conflicts",
1501 "MetricExpr": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC/ PM_MRK_DATA_FROM_L2",
1502 "MetricGroup": "latency",
1503 "MetricName": "l3_no_conflict_latency"
1504 },
1505 {
1506 "BriefDescription": "Average load latency for all marked demand loads that come from beyond the L3",
1507 "MetricExpr": "PM_MRK_DATA_FROM_L3MISS_CYC/ PM_MRK_DATA_FROM_L3MISS",
1508 "MetricGroup": "latency",
1509 "MetricName": "l3miss_latency"
1510 },
1511 {
1512 "BriefDescription": "Average latency for marked reloads that hit in the L3 on the MEPF state. i.e. lines that were prefetched into the L3",
1513 "MetricExpr": "PM_MRK_DATA_FROM_L3_MEPF_CYC/ PM_MRK_DATA_FROM_L3_MEPF",
1514 "MetricGroup": "latency",
1515 "MetricName": "l3pref_latency"
1516 },
1517 {
1518 "BriefDescription": "Local L4 average load latency",
1519 "MetricExpr": "PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4",
1520 "MetricGroup": "latency",
1521 "MetricName": "ll4_latency"
1522 },
1523 {
1524 "BriefDescription": "Marked Lmem Load latency",
1525 "MetricExpr": "PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM",
1526 "MetricGroup": "latency",
1527 "MetricName": "lmem_latency"
1528 },
1529 {
1530 "BriefDescription": "Latency for marked reloads that hit in the L2 or L3 of any other core on a different chip",
1531 "MetricExpr": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC/ PM_MRK_DATA_FROM_OFF_CHIP_CACHE",
1532 "MetricGroup": "latency",
1533 "MetricName": "off_chip_cache_latency"
1534 },
1535 {
1536 "BriefDescription": "Latency for marked reloads that hit in the L2 or L3 of any other core on the same chip",
1537 "MetricExpr": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC/ PM_MRK_DATA_FROM_ON_CHIP_CACHE",
1538 "MetricGroup": "latency",
1539 "MetricName": "on_chip_cache_latency"
1540 },
1541 {
1542 "BriefDescription": "Marked L2L3 remote Load latency",
1543 "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD",
1544 "MetricGroup": "latency",
1545 "MetricName": "rl2l3_mod_latency"
1546 },
1547 {
1548 "BriefDescription": "Marked L2L3 remote Load latency",
1549 "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR",
1550 "MetricGroup": "latency",
1551 "MetricName": "rl2l3_shr_latency"
1552 },
1553 {
1554 "BriefDescription": "Remote L4 average load latency",
1555 "MetricExpr": "PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4",
1556 "MetricGroup": "latency",
1557 "MetricName": "rl4_latency"
1558 },
1559 {
1560 "BriefDescription": "Marked Rmem Load latency",
1561 "MetricExpr": "PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM",
1562 "MetricGroup": "latency",
1563 "MetricName": "rmem_latency"
1564 },
1565 {
1566 "BriefDescription": "ERAT miss reject ratio",
1567 "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL",
1568 "MetricGroup": "lsu_rejects",
1569 "MetricName": "erat_reject_rate_percent"
1570 },
1571 {
1572 "BriefDescription": "ERAT miss reject ratio",
1573 "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / (PM_LSU_FIN - PM_LSU_FX_FIN)",
1574 "MetricGroup": "lsu_rejects",
1575 "MetricName": "erat_reject_ratio_percent"
1576 },
1577 {
1578 "BriefDescription": "LHS reject ratio",
1579 "MetricExpr": "PM_LSU_REJECT_LHS *100/ PM_RUN_INST_CMPL",
1580 "MetricGroup": "lsu_rejects",
1581 "MetricName": "lhs_reject_rate_percent"
1582 },
1583 {
1584 "BriefDescription": "LHS reject ratio",
1585 "MetricExpr": "PM_LSU_REJECT_LHS *100/ (PM_LSU_FIN - PM_LSU_FX_FIN)",
1586 "MetricGroup": "lsu_rejects",
1587 "MetricName": "lhs_reject_ratio_percent"
1588 },
1589 {
1590 "BriefDescription": "LMQ full reject ratio",
1591 "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_RUN_INST_CMPL",
1592 "MetricGroup": "lsu_rejects",
1593 "MetricName": "lmq_full_reject_rate_percent"
1594 },
1595 {
1596 "BriefDescription": "ERAT miss reject ratio",
1597 "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_LD_REF_L1",
1598 "MetricGroup": "lsu_rejects",
1599 "MetricName": "lmq_full_reject_ratio_percent"
1600 },
1601 {
1602 "BriefDescription": "LSU reject ratio",
1603 "MetricExpr": "PM_LSU_REJECT *100/ PM_RUN_INST_CMPL",
1604 "MetricGroup": "lsu_rejects",
1605 "MetricName": "lsu_reject_rate_percent"
1606 },
1607 {
1608 "BriefDescription": "LSU reject ratio",
1609 "MetricExpr": "PM_LSU_REJECT *100/ (PM_LSU_FIN - PM_LSU_FX_FIN)",
1610 "MetricGroup": "lsu_rejects",
1611 "MetricName": "lsu_reject_ratio_percent"
1612 },
1613 {
1614 "BriefDescription": "Ratio of reloads from local L4 to distant L4",
1615 "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4",
1616 "MetricGroup": "memory",
1617 "MetricName": "ld_ll4_per_ld_dmem"
1618 },
1619 {
1620 "BriefDescription": "Ratio of reloads from local L4 to remote+distant L4",
1621 "MetricExpr": "PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4)",
1622 "MetricGroup": "memory",
1623 "MetricName": "ld_ll4_per_ld_mem"
1624 },
1625 {
1626 "BriefDescription": "Ratio of reloads from local L4 to remote L4",
1627 "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4",
1628 "MetricGroup": "memory",
1629 "MetricName": "ld_ll4_per_ld_rl4"
1630 },
1631 {
1632 "BriefDescription": "Number of loads from local memory per loads from distant memory",
1633 "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM",
1634 "MetricGroup": "memory",
1635 "MetricName": "ld_lmem_per_ld_dmem"
1636 },
1637 {
1638 "BriefDescription": "Number of loads from local memory per loads from remote and distant memory",
1639 "MetricExpr": "PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM)",
1640 "MetricGroup": "memory",
1641 "MetricName": "ld_lmem_per_ld_mem"
1642 },
1643 {
1644 "BriefDescription": "Number of loads from local memory per loads from remote memory",
1645 "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM",
1646 "MetricGroup": "memory",
1647 "MetricName": "ld_lmem_per_ld_rmem"
1648 },
1649 {
1650 "BriefDescription": "Number of loads from remote memory per loads from distant memory",
1651 "MetricExpr": "PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM",
1652 "MetricGroup": "memory",
1653 "MetricName": "ld_rmem_per_ld_dmem"
1654 },
1655 {
1656 "BriefDescription": "Memory locality",
1657 "MetricExpr": "(PM_DATA_FROM_LL4 + PM_DATA_FROM_LMEM) * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_LL4 + PM_DATA_FROM_RMEM + PM_DATA_FROM_RL4 + PM_DATA_FROM_DMEM + PM_DATA_FROM_DL4)",
1658 "MetricGroup": "memory",
1659 "MetricName": "mem_locality_percent"
1660 },
1661 {
1662 "BriefDescription": "DERAT Miss Rate (per run instruction)(%)",
1663 "MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL",
1664 "MetricGroup": "pteg_reloads_percent_per_inst",
1665 "MetricName": "derat_miss_rate_percent"
1666 },
1667 {
1668 "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified) per inst",
1669 "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
1670 "MetricGroup": "pteg_reloads_percent_per_inst",
1671 "MetricName": "pteg_from_dl2l3_mod_rate_percent"
1672 },
1673 {
1674 "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared) per inst",
1675 "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
1676 "MetricGroup": "pteg_reloads_percent_per_inst",
1677 "MetricName": "pteg_from_dl2l3_shr_rate_percent"
1678 },
1679 {
1680 "BriefDescription": "% of DERAT reloads from Distant L4 per inst",
1681 "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
1682 "MetricGroup": "pteg_reloads_percent_per_inst",
1683 "MetricName": "pteg_from_dl4_rate_percent"
1684 },
1685 {
1686 "BriefDescription": "% of DERAT reloads from Distant Memory per inst",
1687 "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
1688 "MetricGroup": "pteg_reloads_percent_per_inst",
1689 "MetricName": "pteg_from_dmem_rate_percent"
1690 },
1691 {
1692 "BriefDescription": "% of DERAT reloads from Private L2, other core per inst",
1693 "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
1694 "MetricGroup": "pteg_reloads_percent_per_inst",
1695 "MetricName": "pteg_from_l21_mod_rate_percent"
1696 },
1697 {
1698 "BriefDescription": "% of DERAT reloads from Private L2, other core per inst",
1699 "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
1700 "MetricGroup": "pteg_reloads_percent_per_inst",
1701 "MetricName": "pteg_from_l21_shr_rate_percent"
1702 },
1703 {
1704 "BriefDescription": "% of DERAT reloads from L2 per inst",
1705 "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
1706 "MetricGroup": "pteg_reloads_percent_per_inst",
1707 "MetricName": "pteg_from_l2_rate_percent"
1708 },
1709 {
1710 "BriefDescription": "% of DERAT reloads from Private L3, other core per inst",
1711 "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
1712 "MetricGroup": "pteg_reloads_percent_per_inst",
1713 "MetricName": "pteg_from_l31_mod_rate_percent"
1714 },
1715 {
1716 "BriefDescription": "% of DERAT reloads from Private L3, other core per inst",
1717 "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
1718 "MetricGroup": "pteg_reloads_percent_per_inst",
1719 "MetricName": "pteg_from_l31_shr_rate_percent"
1720 },
1721 {
1722 "BriefDescription": "% of DERAT reloads from L3 per inst",
1723 "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
1724 "MetricGroup": "pteg_reloads_percent_per_inst",
1725 "MetricName": "pteg_from_l3_rate_percent"
1726 },
1727 {
1728 "BriefDescription": "% of DERAT reloads from Local L4 per inst",
1729 "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
1730 "MetricGroup": "pteg_reloads_percent_per_inst",
1731 "MetricName": "pteg_from_ll4_rate_percent"
1732 },
1733 {
1734 "BriefDescription": "% of DERAT reloads from Local Memory per inst",
1735 "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
1736 "MetricGroup": "pteg_reloads_percent_per_inst",
1737 "MetricName": "pteg_from_lmem_rate_percent"
1738 },
1739 {
1740 "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified) per inst",
1741 "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
1742 "MetricGroup": "pteg_reloads_percent_per_inst",
1743 "MetricName": "pteg_from_rl2l3_mod_rate_percent"
1744 },
1745 {
1746 "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared) per inst",
1747 "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
1748 "MetricGroup": "pteg_reloads_percent_per_inst",
1749 "MetricName": "pteg_from_rl2l3_shr_rate_percent"
1750 },
1751 {
1752 "BriefDescription": "% of DERAT reloads from Remote L4 per inst",
1753 "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
1754 "MetricGroup": "pteg_reloads_percent_per_inst",
1755 "MetricName": "pteg_from_rl4_rate_percent"
1756 },
1757 {
1758 "BriefDescription": "% of DERAT reloads from Remote Memory per inst",
1759 "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
1760 "MetricGroup": "pteg_reloads_percent_per_inst",
1761 "MetricName": "pteg_from_rmem_rate_percent"
1762 },
1763 {
1764 "BriefDescription": "% of DERAT misses that result in an ERAT reload",
1765 "MetricExpr": "PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS",
1766 "MetricGroup": "pteg_reloads_percent_per_ref",
1767 "MetricName": "derat_miss_reload_percent"
1768 },
1769 {
1770 "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified)",
1771 "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS",
1772 "MetricGroup": "pteg_reloads_percent_per_ref",
1773 "MetricName": "pteg_from_dl2l3_mod_percent"
1774 },
1775 {
1776 "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared)",
1777 "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS",
1778 "MetricGroup": "pteg_reloads_percent_per_ref",
1779 "MetricName": "pteg_from_dl2l3_shr_percent"
1780 },
1781 {
1782 "BriefDescription": "% of DERAT reloads from Distant L4",
1783 "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS",
1784 "MetricGroup": "pteg_reloads_percent_per_ref",
1785 "MetricName": "pteg_from_dl4_percent"
1786 },
1787 {
1788 "BriefDescription": "% of DERAT reloads from Distant Memory",
1789 "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS",
1790 "MetricGroup": "pteg_reloads_percent_per_ref",
1791 "MetricName": "pteg_from_dmem_percent"
1792 },
1793 {
1794 "BriefDescription": "% of DERAT reloads from Private L2, other core",
1795 "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS",
1796 "MetricGroup": "pteg_reloads_percent_per_ref",
1797 "MetricName": "pteg_from_l21_mod_percent"
1798 },
1799 {
1800 "BriefDescription": "% of DERAT reloads from Private L2, other core",
1801 "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS",
1802 "MetricGroup": "pteg_reloads_percent_per_ref",
1803 "MetricName": "pteg_from_l21_shr_percent"
1804 },
1805 {
1806 "BriefDescription": "% of DERAT reloads from L2",
1807 "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS",
1808 "MetricGroup": "pteg_reloads_percent_per_ref",
1809 "MetricName": "pteg_from_l2_percent"
1810 },
1811 {
1812 "BriefDescription": "% of DERAT reloads from Private L3, other core",
1813 "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS",
1814 "MetricGroup": "pteg_reloads_percent_per_ref",
1815 "MetricName": "pteg_from_l31_mod_percent"
1816 },
1817 {
1818 "BriefDescription": "% of DERAT reloads from Private L3, other core",
1819 "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS",
1820 "MetricGroup": "pteg_reloads_percent_per_ref",
1821 "MetricName": "pteg_from_l31_shr_percent"
1822 },
1823 {
1824 "BriefDescription": "% of DERAT reloads from L3",
1825 "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS",
1826 "MetricGroup": "pteg_reloads_percent_per_ref",
1827 "MetricName": "pteg_from_l3_percent"
1828 },
1829 {
1830 "BriefDescription": "% of DERAT reloads from Local L4",
1831 "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS",
1832 "MetricGroup": "pteg_reloads_percent_per_ref",
1833 "MetricName": "pteg_from_ll4_percent"
1834 },
1835 {
1836 "BriefDescription": "% of DERAT reloads from Local Memory",
1837 "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS",
1838 "MetricGroup": "pteg_reloads_percent_per_ref",
1839 "MetricName": "pteg_from_lmem_percent"
1840 },
1841 {
1842 "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified)",
1843 "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS",
1844 "MetricGroup": "pteg_reloads_percent_per_ref",
1845 "MetricName": "pteg_from_rl2l3_mod_percent"
1846 },
1847 {
1848 "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared)",
1849 "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS",
1850 "MetricGroup": "pteg_reloads_percent_per_ref",
1851 "MetricName": "pteg_from_rl2l3_shr_percent"
1852 },
1853 {
1854 "BriefDescription": "% of DERAT reloads from Remote L4",
1855 "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS",
1856 "MetricGroup": "pteg_reloads_percent_per_ref",
1857 "MetricName": "pteg_from_rl4_percent"
1858 },
1859 {
1860 "BriefDescription": "% of DERAT reloads from Remote Memory",
1861 "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS",
1862 "MetricGroup": "pteg_reloads_percent_per_ref",
1863 "MetricName": "pteg_from_rmem_percent"
1864 },
1865 {
1866 "BriefDescription": "% DERAT miss ratio for 16G page per inst",
1867 "MetricExpr": "100 * PM_DERAT_MISS_16G / PM_RUN_INST_CMPL",
1868 "MetricGroup": "translation",
1869 "MetricName": "derat_16g_miss_rate_percent"
1870 },
1871 {
1872 "BriefDescription": "DERAT miss ratio for 16G page",
1873 "MetricExpr": "PM_DERAT_MISS_16G / PM_LSU_DERAT_MISS",
1874 "MetricGroup": "translation",
1875 "MetricName": "derat_16g_miss_ratio"
1876 },
1877 {
1878 "BriefDescription": "% DERAT miss rate for 16M page per inst",
1879 "MetricExpr": "PM_DERAT_MISS_16M * 100 / PM_RUN_INST_CMPL",
1880 "MetricGroup": "translation",
1881 "MetricName": "derat_16m_miss_rate_percent"
1882 },
1883 {
1884 "BriefDescription": "DERAT miss ratio for 16M page",
1885 "MetricExpr": "PM_DERAT_MISS_16M / PM_LSU_DERAT_MISS",
1886 "MetricGroup": "translation",
1887 "MetricName": "derat_16m_miss_ratio"
1888 },
1889 {
1890 "BriefDescription": "% DERAT miss rate for 4K page per inst",
1891 "MetricExpr": "PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL",
1892 "MetricGroup": "translation",
1893 "MetricName": "derat_4k_miss_rate_percent"
1894 },
1895 {
1896 "BriefDescription": "DERAT miss ratio for 4K page",
1897 "MetricExpr": "PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS",
1898 "MetricGroup": "translation",
1899 "MetricName": "derat_4k_miss_ratio"
1900 },
1901 {
1902 "BriefDescription": "% DERAT miss ratio for 64K page per inst",
1903 "MetricExpr": "PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL",
1904 "MetricGroup": "translation",
1905 "MetricName": "derat_64k_miss_rate_percent"
1906 },
1907 {
1908 "BriefDescription": "DERAT miss ratio for 64K page",
1909 "MetricExpr": "PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS",
1910 "MetricGroup": "translation",
1911 "MetricName": "derat_64k_miss_ratio"
1912 },
1913 {
1914 "BriefDescription": "% DSLB_Miss_Rate per inst",
1915 "MetricExpr": "PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL",
1916 "MetricGroup": "translation",
1917 "MetricName": "dslb_miss_rate_percent"
1918 },
1919 {
1920 "BriefDescription": "% ISLB miss rate per inst",
1921 "MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL",
1922 "MetricGroup": "translation",
1923 "MetricName": "islb_miss_rate_percent"
1924 },
1925 {
1926 "BriefDescription": "Fraction of hits on any Centaur (local, remote, or distant) on either L4 or DRAM per L1 load ref",
1927 "MetricExpr": "PM_DATA_FROM_MEMORY / PM_LD_REF_L1",
1928 "MetricName": "any_centaur_ld_hit_ratio"
1929 },
1930 {
1931 "BriefDescription": "Base Completion Cycles",
1932 "MetricExpr": "PM_1PLUS_PPC_CMPL / PM_RUN_INST_CMPL",
1933 "MetricName": "base_completion_cpi"
1934 },
1935 {
1936 "BriefDescription": "Marked background kill latency, measured in L2",
1937 "MetricExpr": "PM_MRK_FAB_RSP_BKILL_CYC / PM_MRK_FAB_RSP_BKILL",
1938 "MetricName": "bkill_ratio_percent"
1939 },
1940 {
1941 "BriefDescription": "cycles",
1942 "MetricExpr": "PM_RUN_CYC",
1943 "MetricName": "custom_secs"
1944 },
1945 {
1946 "BriefDescription": "Fraction of hits on a distant chip's Centaur (L4 or DRAM) per L1 load ref",
1947 "MetricExpr": "(PM_DATA_FROM_DMEM + PM_DATA_FROM_DL4) / PM_LD_REF_L1",
1948 "MetricName": "distant_centaur_ld_hit_ratio"
1949 },
1950 {
1951 "BriefDescription": "% of DL1 reloads that came from the L3 and beyond",
1952 "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID",
1953 "MetricName": "dl1_reload_from_l2_miss_percent"
1954 },
1955 {
1956 "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
1957 "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL",
1958 "MetricName": "dl1_reload_from_l31_rate_percent"
1959 },
1960 {
1961 "BriefDescription": "Percentage of DL1 reloads from L3 where the lines were brought into the L3 by a prefetch operation",
1962 "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID",
1963 "MetricName": "dl1_reload_from_l3_mepf_percent"
1964 },
1965 {
1966 "BriefDescription": "% of DL1 Reloads from beyond the local L3",
1967 "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID",
1968 "MetricName": "dl1_reload_from_l3_miss_percent"
1969 },
1970 {
1971 "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on the L2 or L3 of a core on a distant chip per L1 load ref",
1972 "MetricExpr": "PM_DATA_FROM_DL2L3_MOD / PM_LD_REF_L1",
1973 "MetricName": "dl2l3_mod_ld_hit_ratio"
1974 },
1975 {
1976 "BriefDescription": "Fraction of hits of a line in the S state on the L2 or L3 of a core on a distant chip per L1 load ref",
1977 "MetricExpr": "PM_DATA_FROM_DL2L3_SHR / PM_LD_REF_L1",
1978 "MetricName": "dl2l3_shr_ld_hit_ratio"
1979 },
1980 {
1981 "BriefDescription": "Fraction of hits on a distant Centaur's cache per L1 load ref",
1982 "MetricExpr": "PM_DATA_FROM_DL4 / PM_LD_REF_L1",
1983 "MetricName": "dl4_ld_hit_ratio"
1984 },
1985 {
1986 "BriefDescription": "Fraction of hits on a distant Centaur's DRAM per L1 load ref",
1987 "MetricExpr": "PM_DATA_FROM_DMEM / PM_LD_REF_L1",
1988 "MetricName": "dmem_ld_hit_ratio"
1989 },
1990 {
1991 "BriefDescription": "Rate of DERAT reloads from L2",
1992 "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
1993 "MetricName": "dpteg_from_l2_rate_percent"
1994 },
1995 {
1996 "BriefDescription": "Rate of DERAT reloads from L3",
1997 "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
1998 "MetricName": "dpteg_from_l3_rate_percent"
1999 },
2000 {
2001 "BriefDescription": "Overhead of expansion cycles",
2002 "MetricExpr": "(PM_GRP_CMPL / PM_RUN_INST_CMPL) - (PM_1PLUS_PPC_CMPL / PM_RUN_INST_CMPL)",
2003 "MetricName": "expansion_overhead_cpi"
2004 },
2005 {
2006 "BriefDescription": "Total Fixed point operations executded in the Load/Store Unit following a load/store operation",
2007 "MetricExpr": "PM_LSU_FX_FIN/PM_RUN_INST_CMPL",
2008 "MetricName": "fixed_in_lsu_per_inst"
2009 },
2010 {
2011 "BriefDescription": "GCT empty cycles",
2012 "MetricExpr": "(PM_GCT_NOSLOT_CYC / PM_RUN_CYC) * 100",
2013 "MetricName": "gct_empty_percent"
2014 },
2015 {
2016 "BriefDescription": "Rate of IERAT reloads from L2",
2017 "MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
2018 "MetricName": "ipteg_from_l2_rate_percent"
2019 },
2020 {
2021 "BriefDescription": "Rate of IERAT reloads from L3",
2022 "MetricExpr": "PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
2023 "MetricName": "ipteg_from_l3_rate_percent"
2024 },
2025 {
2026 "BriefDescription": "Rate of IERAT reloads from local memory",
2027 "MetricExpr": "PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
2028 "MetricName": "ipteg_from_ll4_rate_percent"
2029 },
2030 {
2031 "BriefDescription": "Rate of IERAT reloads from local memory",
2032 "MetricExpr": "PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
2033 "MetricName": "ipteg_from_lmem_rate_percent"
2034 },
2035 {
2036 "BriefDescription": "Fraction of L1 hits per load ref",
2037 "MetricExpr": "(PM_LD_REF_L1 - PM_LD_MISS_L1) / PM_LD_REF_L1",
2038 "MetricName": "l1_ld_hit_ratio"
2039 },
2040 {
2041 "BriefDescription": "Fraction of L1 load misses per L1 load ref",
2042 "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1",
2043 "MetricName": "l1_ld_miss_ratio"
2044 },
2045 {
2046 "BriefDescription": "Fraction of hits on another core's L2 on the same chip per L1 load ref",
2047 "MetricExpr": "(PM_DATA_FROM_L21_MOD + PM_DATA_FROM_L21_SHR) / PM_LD_REF_L1",
2048 "MetricName": "l2_1_ld_hit_ratio"
2049 },
2050 {
2051 "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on another core's L2 on the same chip per L1 load ref",
2052 "MetricExpr": "PM_DATA_FROM_L21_MOD / PM_LD_REF_L1",
2053 "MetricName": "l2_1_mod_ld_hit_ratio"
2054 },
2055 {
2056 "BriefDescription": "Fraction of hits of a line in the S state on another core's L2 on the same chip per L1 load ref",
2057 "MetricExpr": "PM_DATA_FROM_L21_SHR / PM_LD_REF_L1",
2058 "MetricName": "l2_1_shr_ld_hit_ratio"
2059 },
2060 {
2061 "BriefDescription": "Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle",
2062 "MetricExpr": "(PM_CO_USAGE / PM_RUN_CYC) * 16",
2063 "MetricName": "l2_co_usage"
2064 },
2065 {
2066 "BriefDescription": "Fraction of L2 load hits per L1 load ref",
2067 "MetricExpr": "PM_DATA_FROM_L2 / PM_LD_REF_L1",
2068 "MetricName": "l2_ld_hit_ratio"
2069 },
2070 {
2071 "BriefDescription": "Fraction of L2 load misses per L1 load ref",
2072 "MetricExpr": "PM_DATA_FROM_L2MISS / PM_LD_REF_L1",
2073 "MetricName": "l2_ld_miss_ratio"
2074 },
2075 {
2076 "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 experienced a Load-Hit-Store conflict",
2077 "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST / PM_LD_REF_L1",
2078 "MetricName": "l2_lhs_ld_hit_ratio"
2079 },
2080 {
2081 "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 did not experience a conflict",
2082 "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT / PM_LD_REF_L1",
2083 "MetricName": "l2_no_conflict_ld_hit_ratio"
2084 },
2085 {
2086 "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 experienced some conflict other than Load-Hit-Store",
2087 "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER / PM_LD_REF_L1",
2088 "MetricName": "l2_other_conflict_ld_hit_ratio"
2089 },
2090 {
2091 "BriefDescription": "Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle",
2092 "MetricExpr": "(PM_RC_USAGE / PM_RUN_CYC) * 16",
2093 "MetricName": "l2_rc_usage"
2094 },
2095 {
2096 "BriefDescription": "Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle",
2097 "MetricExpr": "(PM_SN_USAGE / PM_RUN_CYC) * 8",
2098 "MetricName": "l2_sn_usage"
2099 },
2100 {
2101 "BriefDescription": "Marked L31 Load latency",
2102 "MetricExpr": "(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD)",
2103 "MetricName": "l31_latency"
2104 },
2105 {
2106 "BriefDescription": "Fraction of hits on another core's L3 on the same chip per L1 load ref",
2107 "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) / PM_LD_REF_L1",
2108 "MetricName": "l3_1_ld_hit_ratio"
2109 },
2110 {
2111 "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on another core's L3 on the same chip per L1 load ref",
2112 "MetricExpr": "PM_DATA_FROM_L31_MOD / PM_LD_REF_L1",
2113 "MetricName": "l3_1_mod_ld_hit_ratio"
2114 },
2115 {
2116 "BriefDescription": "Fraction of hits of a line in the S state on another core's L3 on the same chip per L1 load ref",
2117 "MetricExpr": "PM_DATA_FROM_L31_SHR / PM_LD_REF_L1",
2118 "MetricName": "l3_1_shr_ld_hit_ratio"
2119 },
2120 {
2121 "BriefDescription": "Fraction of L3 load hits per load ref where the demand load collided with a pending prefetch",
2122 "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT / PM_LD_REF_L1",
2123 "MetricName": "l3_conflict_ld_hit_ratio"
2124 },
2125 {
2126 "BriefDescription": "Fraction of L3 load hits per L1 load ref",
2127 "MetricExpr": "PM_DATA_FROM_L3 / PM_LD_REF_L1",
2128 "MetricName": "l3_ld_hit_ratio"
2129 },
2130 {
2131 "BriefDescription": "Fraction of L3 load misses per L1 load ref",
2132 "MetricExpr": "PM_DATA_FROM_L3MISS / PM_LD_REF_L1",
2133 "MetricName": "l3_ld_miss_ratio"
2134 },
2135 {
2136 "BriefDescription": "Fraction of L3 load hits per load ref where the L3 did not experience a conflict",
2137 "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT / PM_LD_REF_L1",
2138 "MetricName": "l3_no_conflict_ld_hit_ratio"
2139 },
2140 {
2141 "BriefDescription": "Fraction of L3 hits on lines that were not in the MEPF state per L1 load ref",
2142 "MetricExpr": "(PM_DATA_FROM_L3 - PM_DATA_FROM_L3_MEPF) / PM_LD_REF_L1",
2143 "MetricName": "l3other_ld_hit_ratio"
2144 },
2145 {
2146 "BriefDescription": "Fraction of L3 hits on lines that were recently prefetched into the L3 (MEPF state) per L1 load ref",
2147 "MetricExpr": "PM_DATA_FROM_L3_MEPF / PM_LD_REF_L1",
2148 "MetricName": "l3pref_ld_hit_ratio"
2149 },
2150 {
2151 "BriefDescription": "Fraction of hits on a local Centaur's cache per L1 load ref",
2152 "MetricExpr": "PM_DATA_FROM_LL4 / PM_LD_REF_L1",
2153 "MetricName": "ll4_ld_hit_ratio"
2154 },
2155 {
2156 "BriefDescription": "Fraction of hits on a local Centaur's DRAM per L1 load ref",
2157 "MetricExpr": "PM_DATA_FROM_LMEM / PM_LD_REF_L1",
2158 "MetricName": "lmem_ld_hit_ratio"
2159 },
2160 {
2161 "BriefDescription": "Fraction of hits on a local Centaur (L4 or DRAM) per L1 load ref",
2162 "MetricExpr": "(PM_DATA_FROM_LMEM + PM_DATA_FROM_LL4) / PM_LD_REF_L1",
2163 "MetricName": "local_centaur_ld_hit_ratio"
2164 },
2165 {
2166 "BriefDescription": "Cycles stalled by Other LSU Operations",
2167 "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_REJECT - PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_STORE) / (PM_LD_REF_L1 - PM_LD_MISS_L1)",
2168 "MetricName": "lsu_stall_avg_cyc_per_l1hit_stfw"
2169 },
2170 {
2171 "BriefDescription": "Fraction of hits on another core's L2 or L3 on a different chip (remote or distant) per L1 load ref",
2172 "MetricExpr": "PM_DATA_FROM_OFF_CHIP_CACHE / PM_LD_REF_L1",
2173 "MetricName": "off_chip_cache_ld_hit_ratio"
2174 },
2175 {
2176 "BriefDescription": "Fraction of hits on another core's L2 or L3 on the same chip per L1 load ref",
2177 "MetricExpr": "PM_DATA_FROM_ON_CHIP_CACHE / PM_LD_REF_L1",
2178 "MetricName": "on_chip_cache_ld_hit_ratio"
2179 },
2180 {
2181 "BriefDescription": "Fraction of hits on a remote chip's Centaur (L4 or DRAM) per L1 load ref",
2182 "MetricExpr": "(PM_DATA_FROM_RMEM + PM_DATA_FROM_RL4) / PM_LD_REF_L1",
2183 "MetricName": "remote_centaur_ld_hit_ratio"
2184 },
2185 {
2186 "BriefDescription": "Percent of all FXU/VSU instructions that got rejected because of unavailable resources or facilities",
2187 "MetricExpr": "PM_ISU_REJECT_RES_NA *100/ PM_RUN_INST_CMPL",
2188 "MetricName": "resource_na_reject_rate_percent"
2189 },
2190 {
2191 "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on the L2 or L3 of a core on a remote chip per L1 load ref",
2192 "MetricExpr": "PM_DATA_FROM_RL2L3_MOD / PM_LD_REF_L1",
2193 "MetricName": "rl2l3_mod_ld_hit_ratio"
2194 },
2195 {
2196 "BriefDescription": "Fraction of hits of a line in the S state on the L2 or L3 of a core on a remote chip per L1 load ref",
2197 "MetricExpr": "PM_DATA_FROM_RL2L3_SHR / PM_LD_REF_L1",
2198 "MetricName": "rl2l3_shr_ld_hit_ratio"
2199 },
2200 {
2201 "BriefDescription": "Fraction of hits on a remote Centaur's cache per L1 load ref",
2202 "MetricExpr": "PM_DATA_FROM_RL4 / PM_LD_REF_L1",
2203 "MetricName": "rl4_ld_hit_ratio"
2204 },
2205 {
2206 "BriefDescription": "Fraction of hits on a remote Centaur's DRAM per L1 load ref",
2207 "MetricExpr": "PM_DATA_FROM_RMEM / PM_LD_REF_L1",
2208 "MetricName": "rmem_ld_hit_ratio"
2209 },
2210 {
2211 "BriefDescription": "Percent of all FXU/VSU instructions that got rejected due to SAR Bypass",
2212 "MetricExpr": "PM_ISU_REJECT_SAR_BYPASS *100/ PM_RUN_INST_CMPL",
2213 "MetricName": "sar_bypass_reject_rate_percent"
2214 },
2215 {
2216 "BriefDescription": "Percent of all FXU/VSU instructions that got rejected because of unavailable sources",
2217 "MetricExpr": "PM_ISU_REJECT_SRC_NA *100/ PM_RUN_INST_CMPL",
2218 "MetricName": "source_na_reject_rate_percent"
2219 },
2220 {
2221 "BriefDescription": "Store forward rate",
2222 "MetricExpr": "100 * (PM_LSU0_SRQ_STFWD + PM_LSU1_SRQ_STFWD) / PM_RUN_INST_CMPL",
2223 "MetricName": "store_forward_rate_percent"
2224 },
2225 {
2226 "BriefDescription": "Store forward rate",
2227 "MetricExpr": "100 * (PM_LSU0_SRQ_STFWD + PM_LSU1_SRQ_STFWD) / (PM_LD_REF_L1 - PM_LD_MISS_L1)",
2228 "MetricName": "store_forward_ratio_percent"
2229 },
2230 {
2231 "BriefDescription": "Marked store latency, from core completion to L2 RC machine completion",
2232 "MetricExpr": "(PM_MRK_ST_L2DISP_TO_CMPL_CYC + PM_MRK_ST_DRAIN_TO_L2DISP_CYC) / PM_MRK_ST_NEST",
2233 "MetricName": "store_latency"
2234 },
2235 {
2236 "BriefDescription": "Cycles stalled by any sync",
2237 "MetricExpr": "(PM_CMPLU_STALL_LWSYNC + PM_CMPLU_STALL_HWSYNC) / PM_RUN_INST_CMPL",
2238 "MetricName": "sync_stall_cpi"
2239 },
2240 {
2241 "BriefDescription": "Percentage of lines that were prefetched into the L3 and evicted before they were consumed",
2242 "MetricExpr": "(PM_L3_CO_MEPF / 2) / PM_L3_PREF_ALL * 100",
2243 "MetricName": "wasted_l3_prefetch_percent"
2244 }
2245]
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
new file mode 100644
index 000000000000..811c2a8c1c9e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
@@ -0,0 +1,1982 @@
1[
2 {
3 "MetricExpr": "PM_BR_MPRED_CMPL / PM_BR_PRED * 100",
4 "MetricGroup": "branch_prediction",
5 "MetricName": "br_misprediction_percent"
6 },
7 {
8 "BriefDescription": "Count cache branch misprediction per instruction",
9 "MetricExpr": "PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100",
10 "MetricGroup": "branch_prediction",
11 "MetricName": "ccache_mispredict_rate_percent"
12 },
13 {
14 "BriefDescription": "Count cache branch misprediction",
15 "MetricExpr": "PM_BR_MPRED_CCACHE / PM_BR_PRED_CCACHE * 100",
16 "MetricGroup": "branch_prediction",
17 "MetricName": "ccache_misprediction_percent"
18 },
19 {
20 "BriefDescription": "Link stack branch misprediction",
21 "MetricExpr": "PM_BR_MPRED_LSTACK / PM_RUN_INST_CMPL * 100",
22 "MetricGroup": "branch_prediction",
23 "MetricName": "lstack_mispredict_rate_percent"
24 },
25 {
26 "BriefDescription": "Link stack branch misprediction",
27 "MetricExpr": "PM_BR_MPRED_LSTACK/ PM_BR_PRED_LSTACK * 100",
28 "MetricGroup": "branch_prediction",
29 "MetricName": "lstack_misprediction_percent"
30 },
31 {
32 "BriefDescription": "% Branches Taken",
33 "MetricExpr": "PM_BR_TAKEN_CMPL * 100 / PM_BRU_FIN",
34 "MetricGroup": "branch_prediction",
35 "MetricName": "taken_branches_percent"
36 },
37 {
38 "BriefDescription": "Completion stall due to a Branch Unit",
39 "MetricExpr": "PM_CMPLU_STALL_BRU/PM_RUN_INST_CMPL",
40 "MetricGroup": "cpi_breakdown",
41 "MetricName": "bru_stall_cpi"
42 },
43 {
44 "BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish",
45 "MetricExpr": "PM_CMPLU_STALL_CRYPTO/PM_RUN_INST_CMPL",
46 "MetricGroup": "cpi_breakdown",
47 "MetricName": "crypto_stall_cpi"
48 },
49 {
50 "BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest",
51 "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS/PM_RUN_INST_CMPL",
52 "MetricGroup": "cpi_breakdown",
53 "MetricName": "dcache_miss_stall_cpi"
54 },
55 {
56 "BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish.",
57 "MetricExpr": "PM_CMPLU_STALL_DFLONG/PM_RUN_INST_CMPL",
58 "MetricGroup": "cpi_breakdown",
59 "MetricName": "dflong_stall_cpi"
60 },
61 {
62 "BriefDescription": "Stalls due to short latency decimal floating ops.",
63 "MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL",
64 "MetricGroup": "cpi_breakdown",
65 "MetricName": "dfu_other_stall_cpi"
66 },
67 {
68 "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish.",
69 "MetricExpr": "PM_CMPLU_STALL_DFU/PM_RUN_INST_CMPL",
70 "MetricGroup": "cpi_breakdown",
71 "MetricName": "dfu_stall_cpi"
72 },
73 {
74 "BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache",
75 "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL",
76 "MetricGroup": "cpi_breakdown",
77 "MetricName": "dmiss_distant_stall_cpi"
78 },
79 {
80 "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)",
81 "MetricExpr": "PM_CMPLU_STALL_DMISS_L21_L31/PM_RUN_INST_CMPL",
82 "MetricGroup": "cpi_breakdown",
83 "MetricName": "dmiss_l21_l31_stall_cpi"
84 },
85 {
86 "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict",
87 "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT/PM_RUN_INST_CMPL",
88 "MetricGroup": "cpi_breakdown",
89 "MetricName": "dmiss_l2l3_conflict_stall_cpi"
90 },
91 {
92 "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict",
93 "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL",
94 "MetricGroup": "cpi_breakdown",
95 "MetricName": "dmiss_l2l3_noconflict_stall_cpi"
96 },
97 {
98 "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3",
99 "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3/PM_RUN_INST_CMPL",
100 "MetricGroup": "cpi_breakdown",
101 "MetricName": "dmiss_l2l3_stall_cpi"
102 },
103 {
104 "BriefDescription": "Completion stall due to cache miss resolving missed the L3",
105 "MetricExpr": "PM_CMPLU_STALL_DMISS_L3MISS/PM_RUN_INST_CMPL",
106 "MetricGroup": "cpi_breakdown",
107 "MetricName": "dmiss_l3miss_stall_cpi"
108 },
109 {
110 "BriefDescription": "Completion stall due to cache miss that resolves in local memory",
111 "MetricExpr": "PM_CMPLU_STALL_DMISS_LMEM/PM_RUN_INST_CMPL",
112 "MetricGroup": "cpi_breakdown",
113 "MetricName": "dmiss_lmem_stall_cpi"
114 },
115 {
116 "BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory",
117 "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL",
118 "MetricGroup": "cpi_breakdown",
119 "MetricName": "dmiss_non_local_stall_cpi"
120 },
121 {
122 "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)",
123 "MetricExpr": "PM_CMPLU_STALL_DMISS_REMOTE/PM_RUN_INST_CMPL",
124 "MetricGroup": "cpi_breakdown",
125 "MetricName": "dmiss_remote_stall_cpi"
126 },
127 {
128 "BriefDescription": "Stalls due to short latency double precision ops.",
129 "MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL",
130 "MetricGroup": "cpi_breakdown",
131 "MetricName": "dp_other_stall_cpi"
132 },
133 {
134 "BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.",
135 "MetricExpr": "PM_CMPLU_STALL_DP/PM_RUN_INST_CMPL",
136 "MetricGroup": "cpi_breakdown",
137 "MetricName": "dp_stall_cpi"
138 },
139 {
140 "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.",
141 "MetricExpr": "PM_CMPLU_STALL_DPLONG/PM_RUN_INST_CMPL",
142 "MetricGroup": "cpi_breakdown",
143 "MetricName": "dplong_stall_cpi"
144 },
145 {
146 "BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2",
147 "MetricExpr": "PM_CMPLU_STALL_EIEIO/PM_RUN_INST_CMPL",
148 "MetricGroup": "cpi_breakdown",
149 "MetricName": "eieio_stall_cpi"
150 },
151 {
152 "BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full",
153 "MetricExpr": "PM_CMPLU_STALL_EMQ_FULL/PM_RUN_INST_CMPL",
154 "MetricGroup": "cpi_breakdown",
155 "MetricName": "emq_full_stall_cpi"
156 },
157 {
158 "MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL",
159 "MetricGroup": "cpi_breakdown",
160 "MetricName": "emq_stall_cpi"
161 },
162 {
163 "BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss",
164 "MetricExpr": "PM_CMPLU_STALL_ERAT_MISS/PM_RUN_INST_CMPL",
165 "MetricGroup": "cpi_breakdown",
166 "MetricName": "erat_miss_stall_cpi"
167 },
168 {
169 "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete",
170 "MetricExpr": "PM_CMPLU_STALL_EXCEPTION/PM_RUN_INST_CMPL",
171 "MetricGroup": "cpi_breakdown",
172 "MetricName": "exception_stall_cpi"
173 },
174 {
175 "BriefDescription": "Completion stall due to execution units for other reasons.",
176 "MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
177 "MetricGroup": "cpi_breakdown",
178 "MetricName": "exec_unit_other_stall_cpi"
179 },
180 {
181 "BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)",
182 "MetricExpr": "PM_CMPLU_STALL_EXEC_UNIT/PM_RUN_INST_CMPL",
183 "MetricGroup": "cpi_breakdown",
184 "MetricName": "exec_unit_stall_cpi"
185 },
186 {
187 "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion",
188 "MetricExpr": "PM_CMPLU_STALL_FLUSH_ANY_THREAD/PM_RUN_INST_CMPL",
189 "MetricGroup": "cpi_breakdown",
190 "MetricName": "flush_any_thread_stall_cpi"
191 },
192 {
193 "BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)",
194 "MetricExpr": "PM_CMPLU_STALL_FXLONG/PM_RUN_INST_CMPL",
195 "MetricGroup": "cpi_breakdown",
196 "MetricName": "fxlong_stall_cpi"
197 },
198 {
199 "BriefDescription": "Stalls due to short latency integer ops",
200 "MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL",
201 "MetricGroup": "cpi_breakdown",
202 "MetricName": "fxu_other_stall_cpi"
203 },
204 {
205 "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes",
206 "MetricExpr": "PM_CMPLU_STALL_FXU/PM_RUN_INST_CMPL",
207 "MetricGroup": "cpi_breakdown",
208 "MetricName": "fxu_stall_cpi"
209 },
210 {
211 "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL",
212 "MetricGroup": "cpi_breakdown",
213 "MetricName": "issue_hold_cpi"
214 },
215 {
216 "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied",
217 "MetricExpr": "PM_CMPLU_STALL_LARX/PM_RUN_INST_CMPL",
218 "MetricGroup": "cpi_breakdown",
219 "MetricName": "larx_stall_cpi"
220 },
221 {
222 "BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data",
223 "MetricExpr": "PM_CMPLU_STALL_LHS/PM_RUN_INST_CMPL",
224 "MetricGroup": "cpi_breakdown",
225 "MetricName": "lhs_stall_cpi"
226 },
227 {
228 "BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full",
229 "MetricExpr": "PM_CMPLU_STALL_LMQ_FULL/PM_RUN_INST_CMPL",
230 "MetricGroup": "cpi_breakdown",
231 "MetricName": "lmq_full_stall_cpi"
232 },
233 {
234 "BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish",
235 "MetricExpr": "PM_CMPLU_STALL_LOAD_FINISH/PM_RUN_INST_CMPL",
236 "MetricGroup": "cpi_breakdown",
237 "MetricName": "load_finish_stall_cpi"
238 },
239 {
240 "BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ because the LRQ was full",
241 "MetricExpr": "PM_CMPLU_STALL_LRQ_FULL/PM_RUN_INST_CMPL",
242 "MetricGroup": "cpi_breakdown",
243 "MetricName": "lrq_full_stall_cpi"
244 },
245 {
246 "BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others",
247 "MetricExpr": "PM_CMPLU_STALL_LRQ_OTHER/PM_RUN_INST_CMPL",
248 "MetricGroup": "cpi_breakdown",
249 "MetricName": "lrq_other_stall_cpi"
250 },
251 {
252 "MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL",
253 "MetricGroup": "cpi_breakdown",
254 "MetricName": "lrq_stall_cpi"
255 },
256 {
257 "BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch",
258 "MetricExpr": "PM_CMPLU_STALL_LSAQ_ARB/PM_RUN_INST_CMPL",
259 "MetricGroup": "cpi_breakdown",
260 "MetricName": "lsaq_arb_stall_cpi"
261 },
262 {
263 "MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL",
264 "MetricGroup": "cpi_breakdown",
265 "MetricName": "lsaq_stall_cpi"
266 },
267 {
268 "BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish",
269 "MetricExpr": "PM_CMPLU_STALL_LSU_FIN/PM_RUN_INST_CMPL",
270 "MetricGroup": "cpi_breakdown",
271 "MetricName": "lsu_fin_stall_cpi"
272 },
273 {
274 "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete",
275 "MetricExpr": "PM_CMPLU_STALL_LSU_FLUSH_NEXT/PM_RUN_INST_CMPL",
276 "MetricGroup": "cpi_breakdown",
277 "MetricName": "lsu_flush_next_stall_cpi"
278 },
279 {
280 "BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned",
281 "MetricExpr": "PM_CMPLU_STALL_LSU_MFSPR/PM_RUN_INST_CMPL",
282 "MetricGroup": "cpi_breakdown",
283 "MetricName": "lsu_mfspr_stall_cpi"
284 },
285 {
286 "BriefDescription": "Completion LSU stall for other reasons",
287 "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL",
288 "MetricGroup": "cpi_breakdown",
289 "MetricName": "lsu_other_stall_cpi"
290 },
291 {
292 "BriefDescription": "Completion stall by LSU instruction",
293 "MetricExpr": "PM_CMPLU_STALL_LSU/PM_RUN_INST_CMPL",
294 "MetricGroup": "cpi_breakdown",
295 "MetricName": "lsu_stall_cpi"
296 },
297 {
298 "BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)",
299 "MetricExpr": "PM_CMPLU_STALL_MTFPSCR/PM_RUN_INST_CMPL",
300 "MetricGroup": "cpi_breakdown",
301 "MetricName": "mtfpscr_stall_cpi"
302 },
303 {
304 "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT",
305 "MetricExpr": "PM_CMPLU_STALL_NESTED_TBEGIN/PM_RUN_INST_CMPL",
306 "MetricGroup": "cpi_breakdown",
307 "MetricName": "nested_tbegin_stall_cpi"
308 },
309 {
310 "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay",
311 "MetricExpr": "PM_CMPLU_STALL_NESTED_TEND/PM_RUN_INST_CMPL",
312 "MetricGroup": "cpi_breakdown",
313 "MetricName": "nested_tend_stall_cpi"
314 },
315 {
316 "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread",
317 "MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL",
318 "MetricGroup": "cpi_breakdown",
319 "MetricName": "nothing_dispatched_cpi"
320 },
321 {
322 "BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch.",
323 "MetricExpr": "PM_CMPLU_STALL_NTC_DISP_FIN/PM_RUN_INST_CMPL",
324 "MetricGroup": "cpi_breakdown",
325 "MetricName": "ntc_disp_fin_stall_cpi"
326 },
327 {
328 "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack",
329 "MetricExpr": "PM_NTC_FIN/PM_RUN_INST_CMPL",
330 "MetricGroup": "cpi_breakdown",
331 "MetricName": "ntc_fin_cpi"
332 },
333 {
334 "BriefDescription": "Completion stall due to ntc flush",
335 "MetricExpr": "PM_CMPLU_STALL_NTC_FLUSH/PM_RUN_INST_CMPL",
336 "MetricGroup": "cpi_breakdown",
337 "MetricName": "ntc_flush_stall_cpi"
338 },
339 {
340 "BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)",
341 "MetricExpr": "PM_NTC_ISSUE_HELD_ARB/PM_RUN_INST_CMPL",
342 "MetricGroup": "cpi_breakdown",
343 "MetricName": "ntc_issue_held_arb_cpi"
344 },
345 {
346 "BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it",
347 "MetricExpr": "PM_NTC_ISSUE_HELD_DARQ_FULL/PM_RUN_INST_CMPL",
348 "MetricGroup": "cpi_breakdown",
349 "MetricName": "ntc_issue_held_darq_full_cpi"
350 },
351 {
352 "BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU",
353 "MetricExpr": "PM_NTC_ISSUE_HELD_OTHER/PM_RUN_INST_CMPL",
354 "MetricGroup": "cpi_breakdown",
355 "MetricName": "ntc_issue_held_other_cpi"
356 },
357 {
358 "BriefDescription": "Cycles unaccounted for.",
359 "MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL",
360 "MetricGroup": "cpi_breakdown",
361 "MetricName": "other_cpi"
362 },
363 {
364 "BriefDescription": "Completion stall for other reasons",
365 "MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL",
366 "MetricGroup": "cpi_breakdown",
367 "MetricName": "other_stall_cpi"
368 },
369 {
370 "BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2",
371 "MetricExpr": "PM_CMPLU_STALL_PASTE/PM_RUN_INST_CMPL",
372 "MetricGroup": "cpi_breakdown",
373 "MetricName": "paste_stall_cpi"
374 },
375 {
376 "BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish.",
377 "MetricExpr": "PM_CMPLU_STALL_PM/PM_RUN_INST_CMPL",
378 "MetricGroup": "cpi_breakdown",
379 "MetricName": "pm_stall_cpi"
380 },
381 {
382 "BriefDescription": "Run cycles per run instruction",
383 "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL",
384 "MetricGroup": "cpi_breakdown",
385 "MetricName": "run_cpi"
386 },
387 {
388 "BriefDescription": "Run_cycles",
389 "MetricExpr": "PM_RUN_CYC/PM_RUN_INST_CMPL",
390 "MetricGroup": "cpi_breakdown",
391 "MetricName": "run_cyc_cpi"
392 },
393 {
394 "MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL",
395 "MetricGroup": "cpi_breakdown",
396 "MetricName": "scalar_stall_cpi"
397 },
398 {
399 "BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB",
400 "MetricExpr": "PM_CMPLU_STALL_SLB/PM_RUN_INST_CMPL",
401 "MetricGroup": "cpi_breakdown",
402 "MetricName": "slb_stall_cpi"
403 },
404 {
405 "BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC",
406 "MetricExpr": "PM_CMPLU_STALL_SPEC_FINISH/PM_RUN_INST_CMPL",
407 "MetricGroup": "cpi_breakdown",
408 "MetricName": "spec_finish_stall_cpi"
409 },
410 {
411 "BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full",
412 "MetricExpr": "PM_CMPLU_STALL_SRQ_FULL/PM_RUN_INST_CMPL",
413 "MetricGroup": "cpi_breakdown",
414 "MetricName": "srq_full_stall_cpi"
415 },
416 {
417 "MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL",
418 "MetricGroup": "cpi_breakdown",
419 "MetricName": "srq_stall_cpi"
420 },
421 {
422 "BriefDescription": "Completion stall due to store forward",
423 "MetricExpr": "PM_CMPLU_STALL_ST_FWD/PM_RUN_INST_CMPL",
424 "MetricGroup": "cpi_breakdown",
425 "MetricName": "st_fwd_stall_cpi"
426 },
427 {
428 "BriefDescription": "Nothing completed and ICT not empty",
429 "MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL",
430 "MetricGroup": "cpi_breakdown",
431 "MetricName": "stall_cpi"
432 },
433 {
434 "BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2",
435 "MetricExpr": "PM_CMPLU_STALL_STCX/PM_RUN_INST_CMPL",
436 "MetricGroup": "cpi_breakdown",
437 "MetricName": "stcx_stall_cpi"
438 },
439 {
440 "BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data",
441 "MetricExpr": "PM_CMPLU_STALL_STORE_DATA/PM_RUN_INST_CMPL",
442 "MetricGroup": "cpi_breakdown",
443 "MetricName": "store_data_stall_cpi"
444 },
445 {
446 "BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe",
447 "MetricExpr": "PM_CMPLU_STALL_STORE_FIN_ARB/PM_RUN_INST_CMPL",
448 "MetricGroup": "cpi_breakdown",
449 "MetricName": "store_fin_arb_stall_cpi"
450 },
451 {
452 "BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish",
453 "MetricExpr": "PM_CMPLU_STALL_STORE_FINISH/PM_RUN_INST_CMPL",
454 "MetricGroup": "cpi_breakdown",
455 "MetricName": "store_finish_stall_cpi"
456 },
457 {
458 "BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration",
459 "MetricExpr": "PM_CMPLU_STALL_STORE_PIPE_ARB/PM_RUN_INST_CMPL",
460 "MetricGroup": "cpi_breakdown",
461 "MetricName": "store_pipe_arb_stall_cpi"
462 },
463 {
464 "BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2",
465 "MetricExpr": "PM_CMPLU_STALL_TEND/PM_RUN_INST_CMPL",
466 "MetricGroup": "cpi_breakdown",
467 "MetricName": "tend_stall_cpi"
468 },
469 {
470 "BriefDescription": "Completion Stalled because the thread was blocked",
471 "MetricExpr": "PM_CMPLU_STALL_THRD/PM_RUN_INST_CMPL",
472 "MetricGroup": "cpi_breakdown",
473 "MetricName": "thread_block_stall_cpi"
474 },
475 {
476 "BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2",
477 "MetricExpr": "PM_CMPLU_STALL_TLBIE/PM_RUN_INST_CMPL",
478 "MetricGroup": "cpi_breakdown",
479 "MetricName": "tlbie_stall_cpi"
480 },
481 {
482 "BriefDescription": "Vector stalls due to small latency double precision ops",
483 "MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL",
484 "MetricGroup": "cpi_breakdown",
485 "MetricName": "vdp_other_stall_cpi"
486 },
487 {
488 "BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish.",
489 "MetricExpr": "PM_CMPLU_STALL_VDP/PM_RUN_INST_CMPL",
490 "MetricGroup": "cpi_breakdown",
491 "MetricName": "vdp_stall_cpi"
492 },
493 {
494 "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.",
495 "MetricExpr": "PM_CMPLU_STALL_VDPLONG/PM_RUN_INST_CMPL",
496 "MetricGroup": "cpi_breakdown",
497 "MetricName": "vdplong_stall_cpi"
498 },
499 {
500 "MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
501 "MetricGroup": "cpi_breakdown",
502 "MetricName": "vector_stall_cpi"
503 },
504 {
505 "BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)",
506 "MetricExpr": "PM_CMPLU_STALL_VFXLONG/PM_RUN_INST_CMPL",
507 "MetricGroup": "cpi_breakdown",
508 "MetricName": "vfxlong_stall_cpi"
509 },
510 {
511 "BriefDescription": "Vector stalls due to small latency integer ops",
512 "MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL",
513 "MetricGroup": "cpi_breakdown",
514 "MetricName": "vfxu_other_stall_cpi"
515 },
516 {
517 "BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes",
518 "MetricExpr": "PM_CMPLU_STALL_VFXU/PM_RUN_INST_CMPL",
519 "MetricGroup": "cpi_breakdown",
520 "MetricName": "vfxu_stall_cpi"
521 },
522 {
523 "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst",
524 "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
525 "MetricGroup": "dl1_reloads_percent_per_inst",
526 "MetricName": "dl1_reload_from_dl2l3_mod_rate_percent"
527 },
528 {
529 "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst",
530 "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
531 "MetricGroup": "dl1_reloads_percent_per_inst",
532 "MetricName": "dl1_reload_from_dl2l3_shr_rate_percent"
533 },
534 {
535 "BriefDescription": "% of DL1 Reloads from Distant Memory per Inst",
536 "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
537 "MetricGroup": "dl1_reloads_percent_per_inst",
538 "MetricName": "dl1_reload_from_dmem_rate_percent"
539 },
540 {
541 "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst",
542 "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
543 "MetricGroup": "dl1_reloads_percent_per_inst",
544 "MetricName": "dl1_reload_from_l21_mod_rate_percent"
545 },
546 {
547 "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst",
548 "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
549 "MetricGroup": "dl1_reloads_percent_per_inst",
550 "MetricName": "dl1_reload_from_l21_shr_rate_percent"
551 },
552 {
553 "BriefDescription": "% of DL1 reloads from L2 per Inst",
554 "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
555 "MetricGroup": "dl1_reloads_percent_per_inst",
556 "MetricName": "dl1_reload_from_l2_miss_rate_percent"
557 },
558 {
559 "BriefDescription": "% of DL1 reloads from L2 per Inst",
560 "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL",
561 "MetricGroup": "dl1_reloads_percent_per_inst",
562 "MetricName": "dl1_reload_from_l2_rate_percent"
563 },
564 {
565 "BriefDescription": "% of DL1 reloads from Private L3 M state, other core per Inst",
566 "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
567 "MetricGroup": "dl1_reloads_percent_per_inst",
568 "MetricName": "dl1_reload_from_l31_mod_rate_percent"
569 },
570 {
571 "BriefDescription": "% of DL1 reloads from Private L3 S tate, other core per Inst",
572 "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
573 "MetricGroup": "dl1_reloads_percent_per_inst",
574 "MetricName": "dl1_reload_from_l31_shr_rate_percent"
575 },
576 {
577 "BriefDescription": "% of DL1 Reloads that came from the L3 and were brought into the L3 by a prefetch, per instruction completed",
578 "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL",
579 "MetricGroup": "dl1_reloads_percent_per_inst",
580 "MetricName": "dl1_reload_from_l3_mepf_rate_percent"
581 },
582 {
583 "BriefDescription": "% of DL1 reloads from L3 per Inst",
584 "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
585 "MetricGroup": "dl1_reloads_percent_per_inst",
586 "MetricName": "dl1_reload_from_l3_miss_rate_percent"
587 },
588 {
589 "BriefDescription": "% of DL1 Reloads from L3 per Inst",
590 "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL",
591 "MetricGroup": "dl1_reloads_percent_per_inst",
592 "MetricName": "dl1_reload_from_l3_rate_percent"
593 },
594 {
595 "BriefDescription": "% of DL1 Reloads from Local Memory per Inst",
596 "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
597 "MetricGroup": "dl1_reloads_percent_per_inst",
598 "MetricName": "dl1_reload_from_lmem_rate_percent"
599 },
600 {
601 "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
602 "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
603 "MetricGroup": "dl1_reloads_percent_per_inst",
604 "MetricName": "dl1_reload_from_rl2l3_mod_rate_percent"
605 },
606 {
607 "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
608 "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
609 "MetricGroup": "dl1_reloads_percent_per_inst",
610 "MetricName": "dl1_reload_from_rl2l3_shr_rate_percent"
611 },
612 {
613 "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst",
614 "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
615 "MetricGroup": "dl1_reloads_percent_per_inst",
616 "MetricName": "dl1_reload_from_rmem_rate_percent"
617 },
618 {
619 "BriefDescription": "Percentage of L1 demand load misses per run instruction",
620 "MetricExpr": "PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL",
621 "MetricGroup": "dl1_reloads_percent_per_inst",
622 "MetricName": "l1_ld_miss_rate_percent"
623 },
624 {
625 "BriefDescription": "% of DL1 misses that result in a cache reload",
626 "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1",
627 "MetricGroup": "dl1_reloads_percent_per_ref",
628 "MetricName": "dl1_miss_reloads_percent"
629 },
630 {
631 "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Modified)",
632 "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
633 "MetricGroup": "dl1_reloads_percent_per_ref",
634 "MetricName": "dl1_reload_from_dl2l3_mod_percent"
635 },
636 {
637 "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Shared)",
638 "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
639 "MetricGroup": "dl1_reloads_percent_per_ref",
640 "MetricName": "dl1_reload_from_dl2l3_shr_percent"
641 },
642 {
643 "BriefDescription": "% of DL1 dL1_Reloads from Distant Memory",
644 "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
645 "MetricGroup": "dl1_reloads_percent_per_ref",
646 "MetricName": "dl1_reload_from_dmem_percent"
647 },
648 {
649 "BriefDescription": "% of DL1 reloads from Private L2, other core",
650 "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
651 "MetricGroup": "dl1_reloads_percent_per_ref",
652 "MetricName": "dl1_reload_from_l21_mod_percent"
653 },
654 {
655 "BriefDescription": "% of DL1 reloads from Private L2, other core",
656 "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
657 "MetricGroup": "dl1_reloads_percent_per_ref",
658 "MetricName": "dl1_reload_from_l21_shr_percent"
659 },
660 {
661 "BriefDescription": "% of DL1 Reloads from sources beyond the local L2",
662 "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID",
663 "MetricGroup": "dl1_reloads_percent_per_ref",
664 "MetricName": "dl1_reload_from_l2_miss_percent"
665 },
666 {
667 "BriefDescription": "% of DL1 reloads from L2",
668 "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID",
669 "MetricGroup": "dl1_reloads_percent_per_ref",
670 "MetricName": "dl1_reload_from_l2_percent"
671 },
672 {
673 "BriefDescription": "% of DL1 reloads from Private L3, other core",
674 "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
675 "MetricGroup": "dl1_reloads_percent_per_ref",
676 "MetricName": "dl1_reload_from_l31_mod_percent"
677 },
678 {
679 "BriefDescription": "% of DL1 reloads from Private L3, other core",
680 "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
681 "MetricGroup": "dl1_reloads_percent_per_ref",
682 "MetricName": "dl1_reload_from_l31_shr_percent"
683 },
684 {
685 "BriefDescription": "% of DL1 Reloads that came from L3 and were brought into the L3 by a prefetch",
686 "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID",
687 "MetricGroup": "dl1_reloads_percent_per_ref",
688 "MetricName": "dl1_reload_from_l3_mepf_percent"
689 },
690 {
691 "BriefDescription": "% of DL1 Reloads from sources beyond the local L3",
692 "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID",
693 "MetricGroup": "dl1_reloads_percent_per_ref",
694 "MetricName": "dl1_reload_from_l3_miss_percent"
695 },
696 {
697 "BriefDescription": "% of DL1 Reloads from L3",
698 "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID",
699 "MetricGroup": "dl1_reloads_percent_per_ref",
700 "MetricName": "dl1_reload_from_l3_percent"
701 },
702 {
703 "BriefDescription": "% of DL1 dL1_Reloads from Local Memory",
704 "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
705 "MetricGroup": "dl1_reloads_percent_per_ref",
706 "MetricName": "dl1_reload_from_lmem_percent"
707 },
708 {
709 "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Modified)",
710 "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID",
711 "MetricGroup": "dl1_reloads_percent_per_ref",
712 "MetricName": "dl1_reload_from_rl2l3_mod_percent"
713 },
714 {
715 "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Shared)",
716 "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID",
717 "MetricGroup": "dl1_reloads_percent_per_ref",
718 "MetricName": "dl1_reload_from_rl2l3_shr_percent"
719 },
720 {
721 "BriefDescription": "% of DL1 dL1_Reloads from Remote Memory",
722 "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID",
723 "MetricGroup": "dl1_reloads_percent_per_ref",
724 "MetricName": "dl1_reload_from_rmem_percent"
725 },
726 {
727 "BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi",
728 "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * PM_MRK_DATA_FROM_DL2L3_MOD_CYC / PM_MRK_DATA_FROM_DL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
729 "MetricGroup": "estimated_dcache_miss_cpi",
730 "MetricName": "dl2l3_mod_cpi_percent"
731 },
732 {
733 "BriefDescription": "estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi",
734 "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * PM_MRK_DATA_FROM_DL2L3_SHR_CYC / PM_MRK_DATA_FROM_DL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS *100",
735 "MetricGroup": "estimated_dcache_miss_cpi",
736 "MetricName": "dl2l3_shr_cpi_percent"
737 },
738 {
739 "BriefDescription": "estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi",
740 "MetricExpr": "PM_DATA_FROM_DL4 * PM_MRK_DATA_FROM_DL4_CYC / PM_MRK_DATA_FROM_DL4 / PM_CMPLU_STALL_DCACHE_MISS *100",
741 "MetricGroup": "estimated_dcache_miss_cpi",
742 "MetricName": "dl4_cpi_percent"
743 },
744 {
745 "BriefDescription": "estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi",
746 "MetricExpr": "PM_DATA_FROM_DMEM * PM_MRK_DATA_FROM_DMEM_CYC / PM_MRK_DATA_FROM_DMEM / PM_CMPLU_STALL_DCACHE_MISS *100",
747 "MetricGroup": "estimated_dcache_miss_cpi",
748 "MetricName": "dmem_cpi_percent"
749 },
750 {
751 "BriefDescription": "estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi",
752 "MetricExpr": "PM_DATA_FROM_L21_MOD * PM_MRK_DATA_FROM_L21_MOD_CYC / PM_MRK_DATA_FROM_L21_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
753 "MetricGroup": "estimated_dcache_miss_cpi",
754 "MetricName": "l21_mod_cpi_percent"
755 },
756 {
757 "BriefDescription": "estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi",
758 "MetricExpr": "PM_DATA_FROM_L21_SHR * PM_MRK_DATA_FROM_L21_SHR_CYC / PM_MRK_DATA_FROM_L21_SHR / PM_CMPLU_STALL_DCACHE_MISS *100",
759 "MetricGroup": "estimated_dcache_miss_cpi",
760 "MetricName": "l21_shr_cpi_percent"
761 },
762 {
763 "BriefDescription": "estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi",
764 "MetricExpr": "PM_DATA_FROM_L2 * PM_MRK_DATA_FROM_L2_CYC / PM_MRK_DATA_FROM_L2 / PM_CMPLU_STALL_DCACHE_MISS *100",
765 "MetricGroup": "estimated_dcache_miss_cpi",
766 "MetricName": "l2_cpi_percent"
767 },
768 {
769 "BriefDescription": "estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi",
770 "MetricExpr": "PM_DATA_FROM_L31_MOD * PM_MRK_DATA_FROM_L31_MOD_CYC / PM_MRK_DATA_FROM_L31_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
771 "MetricGroup": "estimated_dcache_miss_cpi",
772 "MetricName": "l31_mod_cpi_percent"
773 },
774 {
775 "BriefDescription": "estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi",
776 "MetricExpr": "PM_DATA_FROM_L31_SHR * PM_MRK_DATA_FROM_L31_SHR_CYC / PM_MRK_DATA_FROM_L31_SHR / PM_CMPLU_STALL_DCACHE_MISS *100",
777 "MetricGroup": "estimated_dcache_miss_cpi",
778 "MetricName": "l31_shr_cpi_percent"
779 },
780 {
781 "BriefDescription": "estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi",
782 "MetricExpr": "PM_DATA_FROM_L3 * PM_MRK_DATA_FROM_L3_CYC / PM_MRK_DATA_FROM_L3 / PM_CMPLU_STALL_DCACHE_MISS * 100",
783 "MetricGroup": "estimated_dcache_miss_cpi",
784 "MetricName": "l3_cpi_percent"
785 },
786 {
787 "BriefDescription": "estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi",
788 "MetricExpr": "PM_DATA_FROM_LMEM * PM_MRK_DATA_FROM_LMEM_CYC / PM_MRK_DATA_FROM_LMEM / PM_CMPLU_STALL_DCACHE_MISS *100",
789 "MetricGroup": "estimated_dcache_miss_cpi",
790 "MetricName": "lmem_cpi_percent"
791 },
792 {
793 "BriefDescription": "estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi",
794 "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * PM_MRK_DATA_FROM_RL2L3_MOD_CYC / PM_MRK_DATA_FROM_RL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100",
795 "MetricGroup": "estimated_dcache_miss_cpi",
796 "MetricName": "rl2l3_mod_cpi_percent"
797 },
798 {
799 "BriefDescription": "estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi",
800 "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * PM_MRK_DATA_FROM_RL2L3_SHR_CYC / PM_MRK_DATA_FROM_RL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS * 100",
801 "MetricGroup": "estimated_dcache_miss_cpi",
802 "MetricName": "rl2l3_shr_cpi_percent"
803 },
804 {
805 "BriefDescription": "estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi",
806 "MetricExpr": "PM_DATA_FROM_RL4 * PM_MRK_DATA_FROM_RL4_CYC / PM_MRK_DATA_FROM_RL4 / PM_CMPLU_STALL_DCACHE_MISS *100",
807 "MetricGroup": "estimated_dcache_miss_cpi",
808 "MetricName": "rl4_cpi_percent"
809 },
810 {
811 "BriefDescription": "estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi",
812 "MetricExpr": "PM_DATA_FROM_RMEM * PM_MRK_DATA_FROM_RMEM_CYC / PM_MRK_DATA_FROM_RMEM / PM_CMPLU_STALL_DCACHE_MISS *100",
813 "MetricGroup": "estimated_dcache_miss_cpi",
814 "MetricName": "rmem_cpi_percent"
815 },
816 {
817 "BriefDescription": "Branch Mispredict flushes per instruction",
818 "MetricExpr": "PM_FLUSH_MPRED / PM_RUN_INST_CMPL * 100",
819 "MetricGroup": "general",
820 "MetricName": "br_mpred_flush_rate_percent"
821 },
822 {
823 "BriefDescription": "Cycles per instruction",
824 "MetricExpr": "PM_CYC / PM_INST_CMPL",
825 "MetricGroup": "general",
826 "MetricName": "cpi"
827 },
828 {
829 "BriefDescription": "GCT empty cycles",
830 "MetricExpr": "(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100",
831 "MetricGroup": "general",
832 "MetricName": "disp_flush_rate_percent"
833 },
834 {
835 "BriefDescription": "% DTLB miss rate per inst",
836 "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL *100",
837 "MetricGroup": "general",
838 "MetricName": "dtlb_miss_rate_percent"
839 },
840 {
841 "BriefDescription": "Flush rate (%)",
842 "MetricExpr": "PM_FLUSH * 100 / PM_RUN_INST_CMPL",
843 "MetricGroup": "general",
844 "MetricName": "flush_rate_percent"
845 },
846 {
847 "BriefDescription": "Instructions per cycles",
848 "MetricExpr": "PM_INST_CMPL / PM_CYC",
849 "MetricGroup": "general",
850 "MetricName": "ipc"
851 },
852 {
853 "BriefDescription": "% ITLB miss rate per inst",
854 "MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL *100",
855 "MetricGroup": "general",
856 "MetricName": "itlb_miss_rate_percent"
857 },
858 {
859 "BriefDescription": "Percentage of L1 load misses per L1 load ref",
860 "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100",
861 "MetricGroup": "general",
862 "MetricName": "l1_ld_miss_ratio_percent"
863 },
864 {
865 "BriefDescription": "Percentage of L1 store misses per run instruction",
866 "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL",
867 "MetricGroup": "general",
868 "MetricName": "l1_st_miss_rate_percent"
869 },
870 {
871 "BriefDescription": "Percentage of L1 store misses per L1 store ref",
872 "MetricExpr": "PM_ST_MISS_L1 / PM_ST_FIN * 100",
873 "MetricGroup": "general",
874 "MetricName": "l1_st_miss_ratio_percent"
875 },
876 {
877 "BriefDescription": "L2 Instruction Miss Rate (per instruction)(%)",
878 "MetricExpr": "PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
879 "MetricGroup": "general",
880 "MetricName": "l2_inst_miss_rate_percent"
881 },
882 {
883 "BriefDescription": "L2 dmand Load Miss Rate (per run instruction)(%)",
884 "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
885 "MetricGroup": "general",
886 "MetricName": "l2_ld_miss_rate_percent"
887 },
888 {
889 "BriefDescription": "L2 PTEG Miss Rate (per run instruction)(%)",
890 "MetricExpr": "PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL",
891 "MetricGroup": "general",
892 "MetricName": "l2_pteg_miss_rate_percent"
893 },
894 {
895 "BriefDescription": "L3 Instruction Miss Rate (per instruction)(%)",
896 "MetricExpr": "PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
897 "MetricGroup": "general",
898 "MetricName": "l3_inst_miss_rate_percent"
899 },
900 {
901 "BriefDescription": "L3 demand Load Miss Rate (per run instruction)(%)",
902 "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
903 "MetricGroup": "general",
904 "MetricName": "l3_ld_miss_rate_percent"
905 },
906 {
907 "BriefDescription": "L3 PTEG Miss Rate (per run instruction)(%)",
908 "MetricExpr": "PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL",
909 "MetricGroup": "general",
910 "MetricName": "l3_pteg_miss_rate_percent"
911 },
912 {
913 "BriefDescription": "Run cycles per cycle",
914 "MetricExpr": "PM_RUN_CYC / PM_CYC*100",
915 "MetricGroup": "general",
916 "MetricName": "run_cycles_percent"
917 },
918 {
919 "BriefDescription": "Instruction dispatch-to-completion ratio",
920 "MetricExpr": "PM_INST_DISP / PM_INST_CMPL",
921 "MetricGroup": "general",
922 "MetricName": "speculation"
923 },
924 {
925 "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst",
926 "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
927 "MetricGroup": "instruction_misses_percent_per_inst",
928 "MetricName": "inst_from_dl2l3_mod_rate_percent"
929 },
930 {
931 "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared) per Inst",
932 "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
933 "MetricGroup": "instruction_misses_percent_per_inst",
934 "MetricName": "inst_from_dl2l3_shr_rate_percent"
935 },
936 {
937 "BriefDescription": "% of ICache reloads from Distant L4 per Inst",
938 "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
939 "MetricGroup": "instruction_misses_percent_per_inst",
940 "MetricName": "inst_from_dl4_rate_percent"
941 },
942 {
943 "BriefDescription": "% of ICache reloads from Distant Memory per Inst",
944 "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
945 "MetricGroup": "instruction_misses_percent_per_inst",
946 "MetricName": "inst_from_dmem_rate_percent"
947 },
948 {
949 "BriefDescription": "% of ICache reloads from Private L2, other core per Inst",
950 "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
951 "MetricGroup": "instruction_misses_percent_per_inst",
952 "MetricName": "inst_from_l21_mod_rate_percent"
953 },
954 {
955 "BriefDescription": "% of ICache reloads from Private L2, other core per Inst",
956 "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
957 "MetricGroup": "instruction_misses_percent_per_inst",
958 "MetricName": "inst_from_l21_shr_rate_percent"
959 },
960 {
961 "BriefDescription": "% of ICache reloads from L2 per Inst",
962 "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL",
963 "MetricGroup": "instruction_misses_percent_per_inst",
964 "MetricName": "inst_from_l2_rate_percent"
965 },
966 {
967 "BriefDescription": "% of ICache reloads from Private L3, other core per Inst",
968 "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
969 "MetricGroup": "instruction_misses_percent_per_inst",
970 "MetricName": "inst_from_l31_mod_rate_percent"
971 },
972 {
973 "BriefDescription": "% of ICache reloads from Private L3 other core per Inst",
974 "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
975 "MetricGroup": "instruction_misses_percent_per_inst",
976 "MetricName": "inst_from_l31_shr_rate_percent"
977 },
978 {
979 "BriefDescription": "% of ICache reloads from L3 per Inst",
980 "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL",
981 "MetricGroup": "instruction_misses_percent_per_inst",
982 "MetricName": "inst_from_l3_rate_percent"
983 },
984 {
985 "BriefDescription": "% of ICache reloads from Local L4 per Inst",
986 "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
987 "MetricGroup": "instruction_misses_percent_per_inst",
988 "MetricName": "inst_from_ll4_rate_percent"
989 },
990 {
991 "BriefDescription": "% of ICache reloads from Local Memory per Inst",
992 "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
993 "MetricGroup": "instruction_misses_percent_per_inst",
994 "MetricName": "inst_from_lmem_rate_percent"
995 },
996 {
997 "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified) per Inst",
998 "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
999 "MetricGroup": "instruction_misses_percent_per_inst",
1000 "MetricName": "inst_from_rl2l3_mod_rate_percent"
1001 },
1002 {
1003 "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared) per Inst",
1004 "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
1005 "MetricGroup": "instruction_misses_percent_per_inst",
1006 "MetricName": "inst_from_rl2l3_shr_rate_percent"
1007 },
1008 {
1009 "BriefDescription": "% of ICache reloads from Remote L4 per Inst",
1010 "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
1011 "MetricGroup": "instruction_misses_percent_per_inst",
1012 "MetricName": "inst_from_rl4_rate_percent"
1013 },
1014 {
1015 "BriefDescription": "% of ICache reloads from Remote Memory per Inst",
1016 "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
1017 "MetricGroup": "instruction_misses_percent_per_inst",
1018 "MetricName": "inst_from_rmem_rate_percent"
1019 },
1020 {
1021 "BriefDescription": "Instruction Cache Miss Rate (Per run Instruction)(%)",
1022 "MetricExpr": "PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL",
1023 "MetricGroup": "instruction_misses_percent_per_inst",
1024 "MetricName": "l1_inst_miss_rate_percent"
1025 },
1026 {
1027 "BriefDescription": "Icache Fetchs per Icache Miss",
1028 "MetricExpr": "(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS",
1029 "MetricGroup": "instruction_stats_percent_per_ref",
1030 "MetricName": "icache_miss_reload"
1031 },
1032 {
1033 "BriefDescription": "% of ICache reloads due to prefetch",
1034 "MetricExpr": "PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS",
1035 "MetricGroup": "instruction_stats_percent_per_ref",
1036 "MetricName": "icache_pref_percent"
1037 },
1038 {
1039 "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified)",
1040 "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS",
1041 "MetricGroup": "instruction_stats_percent_per_ref",
1042 "MetricName": "inst_from_dl2l3_mod_percent"
1043 },
1044 {
1045 "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared)",
1046 "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS",
1047 "MetricGroup": "instruction_stats_percent_per_ref",
1048 "MetricName": "inst_from_dl2l3_shr_percent"
1049 },
1050 {
1051 "BriefDescription": "% of ICache reloads from Distant L4",
1052 "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS",
1053 "MetricGroup": "instruction_stats_percent_per_ref",
1054 "MetricName": "inst_from_dl4_percent"
1055 },
1056 {
1057 "BriefDescription": "% of ICache reloads from Distant Memory",
1058 "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS",
1059 "MetricGroup": "instruction_stats_percent_per_ref",
1060 "MetricName": "inst_from_dmem_percent"
1061 },
1062 {
1063 "BriefDescription": "% of ICache reloads from Private L2, other core",
1064 "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS",
1065 "MetricGroup": "instruction_stats_percent_per_ref",
1066 "MetricName": "inst_from_l21_mod_percent"
1067 },
1068 {
1069 "BriefDescription": "% of ICache reloads from Private L2, other core",
1070 "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS",
1071 "MetricGroup": "instruction_stats_percent_per_ref",
1072 "MetricName": "inst_from_l21_shr_percent"
1073 },
1074 {
1075 "BriefDescription": "% of ICache reloads from L2",
1076 "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS",
1077 "MetricGroup": "instruction_stats_percent_per_ref",
1078 "MetricName": "inst_from_l2_percent"
1079 },
1080 {
1081 "BriefDescription": "% of ICache reloads from Private L3, other core",
1082 "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS",
1083 "MetricGroup": "instruction_stats_percent_per_ref",
1084 "MetricName": "inst_from_l31_mod_percent"
1085 },
1086 {
1087 "BriefDescription": "% of ICache reloads from Private L3, other core",
1088 "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS",
1089 "MetricGroup": "instruction_stats_percent_per_ref",
1090 "MetricName": "inst_from_l31_shr_percent"
1091 },
1092 {
1093 "BriefDescription": "% of ICache reloads from L3",
1094 "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS",
1095 "MetricGroup": "instruction_stats_percent_per_ref",
1096 "MetricName": "inst_from_l3_percent"
1097 },
1098 {
1099 "BriefDescription": "% of ICache reloads from Local L4",
1100 "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS",
1101 "MetricGroup": "instruction_stats_percent_per_ref",
1102 "MetricName": "inst_from_ll4_percent"
1103 },
1104 {
1105 "BriefDescription": "% of ICache reloads from Local Memory",
1106 "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS",
1107 "MetricGroup": "instruction_stats_percent_per_ref",
1108 "MetricName": "inst_from_lmem_percent"
1109 },
1110 {
1111 "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified)",
1112 "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS",
1113 "MetricGroup": "instruction_stats_percent_per_ref",
1114 "MetricName": "inst_from_rl2l3_mod_percent"
1115 },
1116 {
1117 "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared)",
1118 "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS",
1119 "MetricGroup": "instruction_stats_percent_per_ref",
1120 "MetricName": "inst_from_rl2l3_shr_percent"
1121 },
1122 {
1123 "BriefDescription": "% of ICache reloads from Remote L4",
1124 "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS",
1125 "MetricGroup": "instruction_stats_percent_per_ref",
1126 "MetricName": "inst_from_rl4_percent"
1127 },
1128 {
1129 "BriefDescription": "% of ICache reloads from Remote Memory",
1130 "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS",
1131 "MetricGroup": "instruction_stats_percent_per_ref",
1132 "MetricName": "inst_from_rmem_percent"
1133 },
1134 {
1135 "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)",
1136 "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100",
1137 "MetricGroup": "l2_stats",
1138 "MetricName": "l2_co_m_rd_util"
1139 },
1140 {
1141 "BriefDescription": "L2 dcache invalidates per run inst (per core)",
1142 "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100",
1143 "MetricGroup": "l2_stats",
1144 "MetricName": "l2_dc_inv_rate_percent"
1145 },
1146 {
1147 "BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)",
1148 "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100",
1149 "MetricGroup": "l2_stats",
1150 "MetricName": "l2_dem_ld_disp_percent"
1151 },
1152 {
1153 "BriefDescription": "L2 Icache invalidates per run inst (per core)",
1154 "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100",
1155 "MetricGroup": "l2_stats",
1156 "MetricName": "l2_ic_inv_rate_percent"
1157 },
1158 {
1159 "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)",
1160 "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100",
1161 "MetricGroup": "l2_stats",
1162 "MetricName": "l2_inst_miss_ratio_percent"
1163 },
1164 {
1165 "BriefDescription": "Average number of cycles between L2 Load hits",
1166 "MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2",
1167 "MetricGroup": "l2_stats",
1168 "MetricName": "l2_ld_hit_frequency"
1169 },
1170 {
1171 "BriefDescription": "Average number of cycles between L2 Load misses",
1172 "MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2",
1173 "MetricGroup": "l2_stats",
1174 "MetricName": "l2_ld_miss_frequency"
1175 },
1176 {
1177 "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)",
1178 "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100",
1179 "MetricGroup": "l2_stats",
1180 "MetricName": "l2_ld_miss_ratio_percent"
1181 },
1182 {
1183 "BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)",
1184 "MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100",
1185 "MetricGroup": "l2_stats",
1186 "MetricName": "l2_ld_rd_util"
1187 },
1188 {
1189 "BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks",
1190 "MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100",
1191 "MetricGroup": "l2_stats",
1192 "MetricName": "l2_ldmiss_wr_util"
1193 },
1194 {
1195 "BriefDescription": "L2 local pump prediction success",
1196 "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100",
1197 "MetricGroup": "l2_stats",
1198 "MetricName": "l2_local_pred_correct_percent"
1199 },
1200 {
1201 "BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs",
1202 "MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100",
1203 "MetricGroup": "l2_stats",
1204 "MetricName": "l2_mod_co_percent"
1205 },
1206 {
1207 "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts",
1208 "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100",
1209 "MetricGroup": "l2_stats",
1210 "MetricName": "l2_rc_ld_disp_addr_fail_percent"
1211 },
1212 {
1213 "BriefDescription": "% of L2 Load RC dispatch attempts that failed",
1214 "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100",
1215 "MetricGroup": "l2_stats",
1216 "MetricName": "l2_rc_ld_disp_fail_percent"
1217 },
1218 {
1219 "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts",
1220 "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100",
1221 "MetricGroup": "l2_stats",
1222 "MetricName": "l2_rc_st_disp_addr_fail_percent"
1223 },
1224 {
1225 "BriefDescription": "% of L2 Store RC dispatch attempts that failed",
1226 "MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100",
1227 "MetricGroup": "l2_stats",
1228 "MetricName": "l2_rc_st_disp_fail_percent"
1229 },
1230 {
1231 "BriefDescription": "L2 Cache Read Utilization (per core)",
1232 "MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)",
1233 "MetricGroup": "l2_stats",
1234 "MetricName": "l2_rd_util_percent"
1235 },
1236 {
1237 "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs",
1238 "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100",
1239 "MetricGroup": "l2_stats",
1240 "MetricName": "l2_shr_co_percent"
1241 },
1242 {
1243 "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)",
1244 "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100",
1245 "MetricGroup": "l2_stats",
1246 "MetricName": "l2_st_miss_ratio_percent"
1247 },
1248 {
1249 "BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)",
1250 "MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100",
1251 "MetricGroup": "l2_stats",
1252 "MetricName": "l2_st_rd_util"
1253 },
1254 {
1255 "BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks",
1256 "MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100",
1257 "MetricGroup": "l2_stats",
1258 "MetricName": "l2_st_wr_util"
1259 },
1260 {
1261 "BriefDescription": "L2 Cache Write Utilization (per core)",
1262 "MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)",
1263 "MetricGroup": "l2_stats",
1264 "MetricName": "l2_wr_util_percent"
1265 },
1266 {
1267 "BriefDescription": "Average number of cycles between L3 Load hits",
1268 "MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2",
1269 "MetricGroup": "l3_stats",
1270 "MetricName": "l3_ld_hit_frequency"
1271 },
1272 {
1273 "BriefDescription": "Average number of cycles between L3 Load misses",
1274 "MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2",
1275 "MetricGroup": "l3_stats",
1276 "MetricName": "l3_ld_miss_frequency"
1277 },
1278 {
1279 "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle",
1280 "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8",
1281 "MetricGroup": "l3_stats",
1282 "MetricName": "l3_wi_usage"
1283 },
1284 {
1285 "BriefDescription": "Average icache miss latency",
1286 "MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ",
1287 "MetricGroup": "latency",
1288 "MetricName": "average_il1_miss_latency"
1289 },
1290 {
1291 "BriefDescription": "Marked L2L3 remote Load latency",
1292 "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD",
1293 "MetricGroup": "latency",
1294 "MetricName": "dl2l3_mod_latency"
1295 },
1296 {
1297 "BriefDescription": "Marked L2L3 distant Load latency",
1298 "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR",
1299 "MetricGroup": "latency",
1300 "MetricName": "dl2l3_shr_latency"
1301 },
1302 {
1303 "BriefDescription": "Distant L4 average load latency",
1304 "MetricExpr": "PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4",
1305 "MetricGroup": "latency",
1306 "MetricName": "dl4_latency"
1307 },
1308 {
1309 "BriefDescription": "Marked Dmem Load latency",
1310 "MetricExpr": "PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM",
1311 "MetricGroup": "latency",
1312 "MetricName": "dmem_latency"
1313 },
1314 {
1315 "BriefDescription": "average L1 miss latency using marked events",
1316 "MetricExpr": "PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1",
1317 "MetricGroup": "latency",
1318 "MetricName": "estimated_dl1miss_latency"
1319 },
1320 {
1321 "BriefDescription": "Marked L21 Load latency",
1322 "MetricExpr": "PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD",
1323 "MetricGroup": "latency",
1324 "MetricName": "l21_mod_latency"
1325 },
1326 {
1327 "BriefDescription": "Marked L21 Load latency",
1328 "MetricExpr": "PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR",
1329 "MetricGroup": "latency",
1330 "MetricName": "l21_shr_latency"
1331 },
1332 {
1333 "BriefDescription": "Marked L2 Load latency",
1334 "MetricExpr": "PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2",
1335 "MetricGroup": "latency",
1336 "MetricName": "l2_latency"
1337 },
1338 {
1339 "BriefDescription": "Marked L31 Load latency",
1340 "MetricExpr": "PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD",
1341 "MetricGroup": "latency",
1342 "MetricName": "l31_mod_latency"
1343 },
1344 {
1345 "BriefDescription": "Marked L31 Load latency",
1346 "MetricExpr": "PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR",
1347 "MetricGroup": "latency",
1348 "MetricName": "l31_shr_latency"
1349 },
1350 {
1351 "BriefDescription": "Marked L3 Load latency",
1352 "MetricExpr": "PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3",
1353 "MetricGroup": "latency",
1354 "MetricName": "l3_latency"
1355 },
1356 {
1357 "BriefDescription": "Local L4 average load latency",
1358 "MetricExpr": "PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4",
1359 "MetricGroup": "latency",
1360 "MetricName": "ll4_latency"
1361 },
1362 {
1363 "BriefDescription": "Marked Lmem Load latency",
1364 "MetricExpr": "PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM",
1365 "MetricGroup": "latency",
1366 "MetricName": "lmem_latency"
1367 },
1368 {
1369 "BriefDescription": "Marked L2L3 remote Load latency",
1370 "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD",
1371 "MetricGroup": "latency",
1372 "MetricName": "rl2l3_mod_latency"
1373 },
1374 {
1375 "BriefDescription": "Marked L2L3 remote Load latency",
1376 "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR",
1377 "MetricGroup": "latency",
1378 "MetricName": "rl2l3_shr_latency"
1379 },
1380 {
1381 "BriefDescription": "Remote L4 average load latency",
1382 "MetricExpr": "PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4",
1383 "MetricGroup": "latency",
1384 "MetricName": "rl4_latency"
1385 },
1386 {
1387 "BriefDescription": "Marked Rmem Load latency",
1388 "MetricExpr": "PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM",
1389 "MetricGroup": "latency",
1390 "MetricName": "rmem_latency"
1391 },
1392 {
1393 "BriefDescription": "ERAT miss reject ratio",
1394 "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL",
1395 "MetricGroup": "lsu_rejects",
1396 "MetricName": "erat_reject_rate_percent"
1397 },
1398 {
1399 "BriefDescription": "LHS reject ratio",
1400 "MetricExpr": "PM_LSU_REJECT_LHS *100/ PM_RUN_INST_CMPL",
1401 "MetricGroup": "lsu_rejects",
1402 "MetricName": "lhs_reject_rate_percent"
1403 },
1404 {
1405 "BriefDescription": "ERAT miss reject ratio",
1406 "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_RUN_INST_CMPL",
1407 "MetricGroup": "lsu_rejects",
1408 "MetricName": "lmq_full_reject_rate_percent"
1409 },
1410 {
1411 "BriefDescription": "ERAT miss reject ratio",
1412 "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_LD_REF_L1",
1413 "MetricGroup": "lsu_rejects",
1414 "MetricName": "lmq_full_reject_ratio_percent"
1415 },
1416 {
1417 "BriefDescription": "L4 locality(%)",
1418 "MetricExpr": "PM_DATA_FROM_LL4 * 100 / (PM_DATA_FROM_LL4 + PM_DATA_FROM_RL4 + PM_DATA_FROM_DL4)",
1419 "MetricGroup": "memory",
1420 "MetricName": "l4_locality"
1421 },
1422 {
1423 "BriefDescription": "Ratio of reloads from local L4 to distant L4",
1424 "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4",
1425 "MetricGroup": "memory",
1426 "MetricName": "ld_ll4_per_ld_dmem"
1427 },
1428 {
1429 "BriefDescription": "Ratio of reloads from local L4 to remote+distant L4",
1430 "MetricExpr": "PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4)",
1431 "MetricGroup": "memory",
1432 "MetricName": "ld_ll4_per_ld_mem"
1433 },
1434 {
1435 "BriefDescription": "Ratio of reloads from local L4 to remote L4",
1436 "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4",
1437 "MetricGroup": "memory",
1438 "MetricName": "ld_ll4_per_ld_rl4"
1439 },
1440 {
1441 "BriefDescription": "Number of loads from local memory per loads from distant memory",
1442 "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM",
1443 "MetricGroup": "memory",
1444 "MetricName": "ld_lmem_per_ld_dmem"
1445 },
1446 {
1447 "BriefDescription": "Number of loads from local memory per loads from remote and distant memory",
1448 "MetricExpr": "PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM)",
1449 "MetricGroup": "memory",
1450 "MetricName": "ld_lmem_per_ld_mem"
1451 },
1452 {
1453 "BriefDescription": "Number of loads from local memory per loads from remote memory",
1454 "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM",
1455 "MetricGroup": "memory",
1456 "MetricName": "ld_lmem_per_ld_rmem"
1457 },
1458 {
1459 "BriefDescription": "Number of loads from remote memory per loads from distant memory",
1460 "MetricExpr": "PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM",
1461 "MetricGroup": "memory",
1462 "MetricName": "ld_rmem_per_ld_dmem"
1463 },
1464 {
1465 "BriefDescription": "Memory locality",
1466 "MetricExpr": "PM_DATA_FROM_LMEM * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM)",
1467 "MetricGroup": "memory",
1468 "MetricName": "mem_locality_percent"
1469 },
1470 {
1471 "BriefDescription": "L1 Prefetches issued by the prefetch machine per instruction (per thread)",
1472 "MetricExpr": "PM_L1_PREF / PM_RUN_INST_CMPL * 100",
1473 "MetricGroup": "prefetch",
1474 "MetricName": "l1_prefetch_rate_percent"
1475 },
1476 {
1477 "BriefDescription": "DERAT Miss Rate (per run instruction)(%)",
1478 "MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL",
1479 "MetricGroup": "pteg_reloads_percent_per_inst",
1480 "MetricName": "derat_miss_rate_percent"
1481 },
1482 {
1483 "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified) per inst",
1484 "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL",
1485 "MetricGroup": "pteg_reloads_percent_per_inst",
1486 "MetricName": "pteg_from_dl2l3_mod_rate_percent"
1487 },
1488 {
1489 "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared) per inst",
1490 "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL",
1491 "MetricGroup": "pteg_reloads_percent_per_inst",
1492 "MetricName": "pteg_from_dl2l3_shr_rate_percent"
1493 },
1494 {
1495 "BriefDescription": "% of DERAT reloads from Distant L4 per inst",
1496 "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
1497 "MetricGroup": "pteg_reloads_percent_per_inst",
1498 "MetricName": "pteg_from_dl4_rate_percent"
1499 },
1500 {
1501 "BriefDescription": "% of DERAT reloads from Distant Memory per inst",
1502 "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL",
1503 "MetricGroup": "pteg_reloads_percent_per_inst",
1504 "MetricName": "pteg_from_dmem_rate_percent"
1505 },
1506 {
1507 "BriefDescription": "% of DERAT reloads from Private L2, other core per inst",
1508 "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL",
1509 "MetricGroup": "pteg_reloads_percent_per_inst",
1510 "MetricName": "pteg_from_l21_mod_rate_percent"
1511 },
1512 {
1513 "BriefDescription": "% of DERAT reloads from Private L2, other core per inst",
1514 "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL",
1515 "MetricGroup": "pteg_reloads_percent_per_inst",
1516 "MetricName": "pteg_from_l21_shr_rate_percent"
1517 },
1518 {
1519 "BriefDescription": "% of DERAT reloads from L2 per inst",
1520 "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
1521 "MetricGroup": "pteg_reloads_percent_per_inst",
1522 "MetricName": "pteg_from_l2_rate_percent"
1523 },
1524 {
1525 "BriefDescription": "% of DERAT reloads from Private L3, other core per inst",
1526 "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL",
1527 "MetricGroup": "pteg_reloads_percent_per_inst",
1528 "MetricName": "pteg_from_l31_mod_rate_percent"
1529 },
1530 {
1531 "BriefDescription": "% of DERAT reloads from Private L3, other core per inst",
1532 "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL",
1533 "MetricGroup": "pteg_reloads_percent_per_inst",
1534 "MetricName": "pteg_from_l31_shr_rate_percent"
1535 },
1536 {
1537 "BriefDescription": "% of DERAT reloads from L3 per inst",
1538 "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
1539 "MetricGroup": "pteg_reloads_percent_per_inst",
1540 "MetricName": "pteg_from_l3_rate_percent"
1541 },
1542 {
1543 "BriefDescription": "% of DERAT reloads from Local L4 per inst",
1544 "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
1545 "MetricGroup": "pteg_reloads_percent_per_inst",
1546 "MetricName": "pteg_from_ll4_rate_percent"
1547 },
1548 {
1549 "BriefDescription": "% of DERAT reloads from Local Memory per inst",
1550 "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
1551 "MetricGroup": "pteg_reloads_percent_per_inst",
1552 "MetricName": "pteg_from_lmem_rate_percent"
1553 },
1554 {
1555 "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified) per inst",
1556 "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL",
1557 "MetricGroup": "pteg_reloads_percent_per_inst",
1558 "MetricName": "pteg_from_rl2l3_mod_rate_percent"
1559 },
1560 {
1561 "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared) per inst",
1562 "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL",
1563 "MetricGroup": "pteg_reloads_percent_per_inst",
1564 "MetricName": "pteg_from_rl2l3_shr_rate_percent"
1565 },
1566 {
1567 "BriefDescription": "% of DERAT reloads from Remote L4 per inst",
1568 "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
1569 "MetricGroup": "pteg_reloads_percent_per_inst",
1570 "MetricName": "pteg_from_rl4_rate_percent"
1571 },
1572 {
1573 "BriefDescription": "% of DERAT reloads from Remote Memory per inst",
1574 "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL",
1575 "MetricGroup": "pteg_reloads_percent_per_inst",
1576 "MetricName": "pteg_from_rmem_rate_percent"
1577 },
1578 {
1579 "BriefDescription": "% of DERAT misses that result in an ERAT reload",
1580 "MetricExpr": "PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS",
1581 "MetricGroup": "pteg_reloads_percent_per_ref",
1582 "MetricName": "derat_miss_reload_percent"
1583 },
1584 {
1585 "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified)",
1586 "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS",
1587 "MetricGroup": "pteg_reloads_percent_per_ref",
1588 "MetricName": "pteg_from_dl2l3_mod_percent"
1589 },
1590 {
1591 "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared)",
1592 "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS",
1593 "MetricGroup": "pteg_reloads_percent_per_ref",
1594 "MetricName": "pteg_from_dl2l3_shr_percent"
1595 },
1596 {
1597 "BriefDescription": "% of DERAT reloads from Distant L4",
1598 "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS",
1599 "MetricGroup": "pteg_reloads_percent_per_ref",
1600 "MetricName": "pteg_from_dl4_percent"
1601 },
1602 {
1603 "BriefDescription": "% of DERAT reloads from Distant Memory",
1604 "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS",
1605 "MetricGroup": "pteg_reloads_percent_per_ref",
1606 "MetricName": "pteg_from_dmem_percent"
1607 },
1608 {
1609 "BriefDescription": "% of DERAT reloads from Private L2, other core",
1610 "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS",
1611 "MetricGroup": "pteg_reloads_percent_per_ref",
1612 "MetricName": "pteg_from_l21_mod_percent"
1613 },
1614 {
1615 "BriefDescription": "% of DERAT reloads from Private L2, other core",
1616 "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS",
1617 "MetricGroup": "pteg_reloads_percent_per_ref",
1618 "MetricName": "pteg_from_l21_shr_percent"
1619 },
1620 {
1621 "BriefDescription": "% of DERAT reloads from L2",
1622 "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS",
1623 "MetricGroup": "pteg_reloads_percent_per_ref",
1624 "MetricName": "pteg_from_l2_percent"
1625 },
1626 {
1627 "BriefDescription": "% of DERAT reloads from Private L3, other core",
1628 "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS",
1629 "MetricGroup": "pteg_reloads_percent_per_ref",
1630 "MetricName": "pteg_from_l31_mod_percent"
1631 },
1632 {
1633 "BriefDescription": "% of DERAT reloads from Private L3, other core",
1634 "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS",
1635 "MetricGroup": "pteg_reloads_percent_per_ref",
1636 "MetricName": "pteg_from_l31_shr_percent"
1637 },
1638 {
1639 "BriefDescription": "% of DERAT reloads from L3",
1640 "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS",
1641 "MetricGroup": "pteg_reloads_percent_per_ref",
1642 "MetricName": "pteg_from_l3_percent"
1643 },
1644 {
1645 "BriefDescription": "% of DERAT reloads from Local L4",
1646 "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS",
1647 "MetricGroup": "pteg_reloads_percent_per_ref",
1648 "MetricName": "pteg_from_ll4_percent"
1649 },
1650 {
1651 "BriefDescription": "% of DERAT reloads from Local Memory",
1652 "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS",
1653 "MetricGroup": "pteg_reloads_percent_per_ref",
1654 "MetricName": "pteg_from_lmem_percent"
1655 },
1656 {
1657 "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified)",
1658 "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS",
1659 "MetricGroup": "pteg_reloads_percent_per_ref",
1660 "MetricName": "pteg_from_rl2l3_mod_percent"
1661 },
1662 {
1663 "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared)",
1664 "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS",
1665 "MetricGroup": "pteg_reloads_percent_per_ref",
1666 "MetricName": "pteg_from_rl2l3_shr_percent"
1667 },
1668 {
1669 "BriefDescription": "% of DERAT reloads from Remote L4",
1670 "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS",
1671 "MetricGroup": "pteg_reloads_percent_per_ref",
1672 "MetricName": "pteg_from_rl4_percent"
1673 },
1674 {
1675 "BriefDescription": "% of DERAT reloads from Remote Memory",
1676 "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS",
1677 "MetricGroup": "pteg_reloads_percent_per_ref",
1678 "MetricName": "pteg_from_rmem_percent"
1679 },
1680 {
1681 "BriefDescription": "% DERAT miss rate for 4K page per inst",
1682 "MetricExpr": "PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL",
1683 "MetricGroup": "translation",
1684 "MetricName": "derat_4k_miss_rate_percent"
1685 },
1686 {
1687 "BriefDescription": "DERAT miss ratio for 4K page",
1688 "MetricExpr": "PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS",
1689 "MetricGroup": "translation",
1690 "MetricName": "derat_4k_miss_ratio"
1691 },
1692 {
1693 "BriefDescription": "% DERAT miss ratio for 64K page per inst",
1694 "MetricExpr": "PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL",
1695 "MetricGroup": "translation",
1696 "MetricName": "derat_64k_miss_rate_percent"
1697 },
1698 {
1699 "BriefDescription": "DERAT miss ratio for 64K page",
1700 "MetricExpr": "PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS",
1701 "MetricGroup": "translation",
1702 "MetricName": "derat_64k_miss_ratio"
1703 },
1704 {
1705 "BriefDescription": "DERAT miss ratio",
1706 "MetricExpr": "PM_LSU_DERAT_MISS / PM_LSU_DERAT_MISS",
1707 "MetricGroup": "translation",
1708 "MetricName": "derat_miss_ratio"
1709 },
1710 {
1711 "BriefDescription": "% DSLB_Miss_Rate per inst",
1712 "MetricExpr": "PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL",
1713 "MetricGroup": "translation",
1714 "MetricName": "dslb_miss_rate_percent"
1715 },
1716 {
1717 "BriefDescription": "% ISLB miss rate per inst",
1718 "MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL",
1719 "MetricGroup": "translation",
1720 "MetricName": "islb_miss_rate_percent"
1721 },
1722 {
1723 "BriefDescription": "ANY_SYNC_STALL_CPI",
1724 "MetricExpr": "PM_CMPLU_STALL_ANY_SYNC / PM_RUN_INST_CMPL",
1725 "MetricName": "any_sync_stall_cpi"
1726 },
1727 {
1728 "BriefDescription": "Avg. more than 1 instructions completed",
1729 "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL",
1730 "MetricName": "average_completed_instruction_set_size"
1731 },
1732 {
1733 "BriefDescription": "% Branches per instruction",
1734 "MetricExpr": "PM_BRU_FIN / PM_RUN_INST_CMPL",
1735 "MetricName": "branches_per_inst"
1736 },
1737 {
1738 "BriefDescription": "Cycles in which at least one instruction completes in this thread",
1739 "MetricExpr": "PM_1PLUS_PPC_CMPL/PM_RUN_INST_CMPL",
1740 "MetricName": "completion_cpi"
1741 },
1742 {
1743 "BriefDescription": "cycles",
1744 "MetricExpr": "PM_RUN_CYC",
1745 "MetricName": "custom_secs"
1746 },
1747 {
1748 "BriefDescription": "Percentage Cycles atleast one instruction dispatched",
1749 "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100",
1750 "MetricName": "cycles_atleast_one_inst_dispatched_percent"
1751 },
1752 {
1753 "BriefDescription": "Cycles per instruction group",
1754 "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL",
1755 "MetricName": "cycles_per_completed_instructions_set"
1756 },
1757 {
1758 "BriefDescription": "% of DL1 dL1_Reloads from Distant L4",
1759 "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
1760 "MetricName": "dl1_reload_from_dl4_percent"
1761 },
1762 {
1763 "BriefDescription": "% of DL1 Reloads from Distant L4 per Inst",
1764 "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL",
1765 "MetricName": "dl1_reload_from_dl4_rate_percent"
1766 },
1767 {
1768 "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
1769 "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL",
1770 "MetricName": "dl1_reload_from_l31_rate_percent"
1771 },
1772 {
1773 "BriefDescription": "% of DL1 dL1_Reloads from Local L4",
1774 "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
1775 "MetricName": "dl1_reload_from_ll4_percent"
1776 },
1777 {
1778 "BriefDescription": "% of DL1 Reloads from Local L4 per Inst",
1779 "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
1780 "MetricName": "dl1_reload_from_ll4_rate_percent"
1781 },
1782 {
1783 "BriefDescription": "% of DL1 dL1_Reloads from Remote L4",
1784 "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID",
1785 "MetricName": "dl1_reload_from_rl4_percent"
1786 },
1787 {
1788 "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst",
1789 "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL",
1790 "MetricName": "dl1_reload_from_rl4_rate_percent"
1791 },
1792 {
1793 "BriefDescription": "Rate of DERAT reloads from L2",
1794 "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
1795 "MetricName": "dpteg_from_l2_rate_percent"
1796 },
1797 {
1798 "BriefDescription": "Rate of DERAT reloads from L3",
1799 "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
1800 "MetricName": "dpteg_from_l3_rate_percent"
1801 },
1802 {
1803 "BriefDescription": "Cycles in which the oldest instruction is finished and ready to complete for waiting to get through the completion pipe",
1804 "MetricExpr": "PM_NTC_ALL_FIN / PM_RUN_INST_CMPL",
1805 "MetricName": "finish_to_cmpl_cpi"
1806 },
1807 {
1808 "BriefDescription": "Total Fixed point operations",
1809 "MetricExpr": "PM_FXU_FIN/PM_RUN_INST_CMPL",
1810 "MetricName": "fixed_per_inst"
1811 },
1812 {
1813 "BriefDescription": "All FXU Busy",
1814 "MetricExpr": "PM_FXU_BUSY / PM_CYC",
1815 "MetricName": "fxu_all_busy"
1816 },
1817 {
1818 "BriefDescription": "All FXU Idle",
1819 "MetricExpr": "PM_FXU_IDLE / PM_CYC",
1820 "MetricName": "fxu_all_idle"
1821 },
1822 {
1823 "BriefDescription": "Ict empty for this thread due to branch mispred",
1824 "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL",
1825 "MetricName": "ict_noslot_br_mpred_cpi"
1826 },
1827 {
1828 "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred",
1829 "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL",
1830 "MetricName": "ict_noslot_br_mpred_icmiss_cpi"
1831 },
1832 {
1833 "BriefDescription": "ICT other stalls",
1834 "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL",
1835 "MetricName": "ict_noslot_cyc_other_cpi"
1836 },
1837 {
1838 "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason",
1839 "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL",
1840 "MetricName": "ict_noslot_disp_held_cpi"
1841 },
1842 {
1843 "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF",
1844 "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL",
1845 "MetricName": "ict_noslot_disp_held_hb_full_cpi"
1846 },
1847 {
1848 "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full",
1849 "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL",
1850 "MetricName": "ict_noslot_disp_held_issq_cpi"
1851 },
1852 {
1853 "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
1854 "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL",
1855 "MetricName": "ict_noslot_disp_held_other_cpi"
1856 },
1857 {
1858 "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch",
1859 "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL",
1860 "MetricName": "ict_noslot_disp_held_sync_cpi"
1861 },
1862 {
1863 "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch",
1864 "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL",
1865 "MetricName": "ict_noslot_disp_held_tbegin_cpi"
1866 },
1867 {
1868 "BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
1869 "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL",
1870 "MetricName": "ict_noslot_ic_l2_cpi"
1871 },
1872 {
1873 "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3",
1874 "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL",
1875 "MetricName": "ict_noslot_ic_l3_cpi"
1876 },
1877 {
1878 "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache",
1879 "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL",
1880 "MetricName": "ict_noslot_ic_l3miss_cpi"
1881 },
1882 {
1883 "BriefDescription": "Ict empty for this thread due to Icache Miss",
1884 "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL",
1885 "MetricName": "ict_noslot_ic_miss_cpi"
1886 },
1887 {
1888 "BriefDescription": "Rate of IERAT reloads from L2",
1889 "MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
1890 "MetricName": "ipteg_from_l2_rate_percent"
1891 },
1892 {
1893 "BriefDescription": "Rate of IERAT reloads from L3",
1894 "MetricExpr": "PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL",
1895 "MetricName": "ipteg_from_l3_rate_percent"
1896 },
1897 {
1898 "BriefDescription": "Rate of IERAT reloads from local memory",
1899 "MetricExpr": "PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL",
1900 "MetricName": "ipteg_from_ll4_rate_percent"
1901 },
1902 {
1903 "BriefDescription": "Rate of IERAT reloads from local memory",
1904 "MetricExpr": "PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL",
1905 "MetricName": "ipteg_from_lmem_rate_percent"
1906 },
1907 {
1908 "BriefDescription": "Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle",
1909 "MetricExpr": "PM_CO_USAGE / PM_RUN_CYC * 16",
1910 "MetricName": "l2_co_usage"
1911 },
1912 {
1913 "BriefDescription": "Percent of instruction reads out of all L2 commands",
1914 "MetricExpr": "PM_ISIDE_DISP * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)",
1915 "MetricName": "l2_instr_commands_percent"
1916 },
1917 {
1918 "BriefDescription": "Percent of loads out of all L2 commands",
1919 "MetricExpr": "PM_L2_LD * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)",
1920 "MetricName": "l2_ld_commands_percent"
1921 },
1922 {
1923 "BriefDescription": "Rate of L2 store dispatches that failed per core",
1924 "MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL",
1925 "MetricName": "l2_rc_st_disp_fail_rate_percent"
1926 },
1927 {
1928 "BriefDescription": "Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle",
1929 "MetricExpr": "PM_RC_USAGE / PM_RUN_CYC * 16",
1930 "MetricName": "l2_rc_usage"
1931 },
1932 {
1933 "BriefDescription": "Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle",
1934 "MetricExpr": "PM_SN_USAGE / PM_RUN_CYC * 8",
1935 "MetricName": "l2_sn_usage"
1936 },
1937 {
1938 "BriefDescription": "Percent of stores out of all L2 commands",
1939 "MetricExpr": "PM_L2_ST * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)",
1940 "MetricName": "l2_st_commands_percent"
1941 },
1942 {
1943 "BriefDescription": "Rate of L2 store dispatches that failed per core",
1944 "MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL",
1945 "MetricName": "l2_st_disp_fail_rate_percent"
1946 },
1947 {
1948 "BriefDescription": "Rate of L2 dispatches per core",
1949 "MetricExpr": "100 * PM_L2_RCST_DISP/2 / PM_RUN_INST_CMPL",
1950 "MetricName": "l2_st_disp_rate_percent"
1951 },
1952 {
1953 "BriefDescription": "Marked L31 Load latency",
1954 "MetricExpr": "(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD)",
1955 "MetricName": "l31_latency"
1956 },
1957 {
1958 "BriefDescription": "PCT instruction loads",
1959 "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL",
1960 "MetricName": "loads_per_inst"
1961 },
1962 {
1963 "BriefDescription": "Cycles stalled by D-Cache Misses",
1964 "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL",
1965 "MetricName": "lsu_stall_dcache_miss_cpi"
1966 },
1967 {
1968 "BriefDescription": "Completion stall because a different thread was using the completion pipe",
1969 "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL",
1970 "MetricName": "other_thread_cmpl_stall"
1971 },
1972 {
1973 "BriefDescription": "PCT instruction stores",
1974 "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL",
1975 "MetricName": "stores_per_inst"
1976 },
1977 {
1978 "BriefDescription": "ANY_SYNC_STALL_CPI",
1979 "MetricExpr": "PM_CMPLU_STALL_SYNC_PMU_INT / PM_RUN_INST_CMPL",
1980 "MetricName": "sync_pmu_int_stall_cpi"
1981 }
1982]
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 36c903faed0b..71e9737f4614 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -73,7 +73,7 @@
73 }, 73 },
74 { 74 {
75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", 75 "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )", 76 "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
77 "MetricGroup": "Memory_Bound;Memory_Lat", 77 "MetricGroup": "Memory_Bound;Memory_Lat",
78 "MetricName": "Load_Miss_Real_Latency" 78 "MetricName": "Load_Miss_Real_Latency"
79 }, 79 },
diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build
index 41efd7e368b3..68d4b54574ad 100644
--- a/tools/perf/scripts/Build
+++ b/tools/perf/scripts/Build
@@ -1,2 +1,2 @@
1libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ 1perf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/
2libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ 2perf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build
index 34faecf774ae..db0036129307 100644
--- a/tools/perf/scripts/perl/Perf-Trace-Util/Build
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build
@@ -1,4 +1,4 @@
1libperf-y += Context.o 1perf-y += Context.o
2 2
3CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes 3CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes
4CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef 4CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build
index aefc15c9444a..7d0e33ce6aba 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Build
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Build
@@ -1,3 +1,3 @@
1libperf-y += Context.o 1perf-y += Context.o
2 2
3CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs 3CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index 0564dd7377f2..30130213da7e 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -478,7 +478,7 @@ if perf_db_export_calls:
478 'branch_count,' 478 'branch_count,'
479 'call_id,' 479 'call_id,'
480 'return_id,' 480 'return_id,'
481 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' 481 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,'
482 'parent_call_path_id' 482 'parent_call_path_id'
483 ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') 483 ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
484 484
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index 245caf2643ed..ed237f2ed03f 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -320,7 +320,7 @@ if perf_db_export_calls:
320 'branch_count,' 320 'branch_count,'
321 'call_id,' 321 'call_id,'
322 'return_id,' 322 'return_id,'
323 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' 323 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,'
324 'parent_call_path_id' 324 'parent_call_path_id'
325 ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') 325 ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
326 326
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index f278ce5ebab7..09ce73b07d35 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -1,4 +1,4 @@
1#!/usr/bin/python2 1#!/usr/bin/env python2
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3# exported-sql-viewer.py: view data from sql database 3# exported-sql-viewer.py: view data from sql database
4# Copyright (c) 2014-2018, Intel Corporation. 4# Copyright (c) 2014-2018, Intel Corporation.
@@ -1398,18 +1398,28 @@ class BranchModel(TreeModel):
1398 def HasMoreRecords(self): 1398 def HasMoreRecords(self):
1399 return self.more 1399 return self.more
1400 1400
1401# Report Variables
1402
1403class ReportVars():
1404
1405 def __init__(self, name = "", where_clause = "", limit = ""):
1406 self.name = name
1407 self.where_clause = where_clause
1408 self.limit = limit
1409
1410 def UniqueId(self):
1411 return str(self.where_clause + ";" + self.limit)
1412
1401# Branch window 1413# Branch window
1402 1414
1403class BranchWindow(QMdiSubWindow): 1415class BranchWindow(QMdiSubWindow):
1404 1416
1405 def __init__(self, glb, event_id, name, where_clause, parent=None): 1417 def __init__(self, glb, event_id, report_vars, parent=None):
1406 super(BranchWindow, self).__init__(parent) 1418 super(BranchWindow, self).__init__(parent)
1407 1419
1408 model_name = "Branch Events " + str(event_id) 1420 model_name = "Branch Events " + str(event_id) + " " + report_vars.UniqueId()
1409 if len(where_clause):
1410 model_name = where_clause + " " + model_name
1411 1421
1412 self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, where_clause)) 1422 self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, report_vars.where_clause))
1413 1423
1414 self.view = QTreeView() 1424 self.view = QTreeView()
1415 self.view.setUniformRowHeights(True) 1425 self.view.setUniformRowHeights(True)
@@ -1427,7 +1437,7 @@ class BranchWindow(QMdiSubWindow):
1427 1437
1428 self.setWidget(self.vbox.Widget()) 1438 self.setWidget(self.vbox.Widget())
1429 1439
1430 AddSubWindow(glb.mainwindow.mdi_area, self, name + " Branch Events") 1440 AddSubWindow(glb.mainwindow.mdi_area, self, report_vars.name + " Branch Events")
1431 1441
1432 def ResizeColumnToContents(self, column, n): 1442 def ResizeColumnToContents(self, column, n):
1433 # Using the view's resizeColumnToContents() here is extrememly slow 1443 # Using the view's resizeColumnToContents() here is extrememly slow
@@ -1472,47 +1482,134 @@ class BranchWindow(QMdiSubWindow):
1472 else: 1482 else:
1473 self.find_bar.NotFound() 1483 self.find_bar.NotFound()
1474 1484
1475# Dialog data item converted and validated using a SQL table 1485# Line edit data item
1476 1486
1477class SQLTableDialogDataItem(): 1487class LineEditDataItem(object):
1478 1488
1479 def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent): 1489 def __init__(self, glb, label, placeholder_text, parent, id = "", default = ""):
1480 self.glb = glb 1490 self.glb = glb
1481 self.label = label 1491 self.label = label
1482 self.placeholder_text = placeholder_text 1492 self.placeholder_text = placeholder_text
1483 self.table_name = table_name
1484 self.match_column = match_column
1485 self.column_name1 = column_name1
1486 self.column_name2 = column_name2
1487 self.parent = parent 1493 self.parent = parent
1494 self.id = id
1488 1495
1489 self.value = "" 1496 self.value = default
1490 1497
1491 self.widget = QLineEdit() 1498 self.widget = QLineEdit(default)
1492 self.widget.editingFinished.connect(self.Validate) 1499 self.widget.editingFinished.connect(self.Validate)
1493 self.widget.textChanged.connect(self.Invalidate) 1500 self.widget.textChanged.connect(self.Invalidate)
1494 self.red = False 1501 self.red = False
1495 self.error = "" 1502 self.error = ""
1496 self.validated = True 1503 self.validated = True
1497 1504
1498 self.last_id = 0
1499 self.first_time = 0
1500 self.last_time = 2 ** 64
1501 if self.table_name == "<timeranges>":
1502 query = QSqlQuery(self.glb.db)
1503 QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1")
1504 if query.next():
1505 self.last_id = int(query.value(0))
1506 self.last_time = int(query.value(1))
1507 QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1")
1508 if query.next():
1509 self.first_time = int(query.value(0))
1510 if placeholder_text:
1511 placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time)
1512
1513 if placeholder_text: 1505 if placeholder_text:
1514 self.widget.setPlaceholderText(placeholder_text) 1506 self.widget.setPlaceholderText(placeholder_text)
1515 1507
1508 def TurnTextRed(self):
1509 if not self.red:
1510 palette = QPalette()
1511 palette.setColor(QPalette.Text,Qt.red)
1512 self.widget.setPalette(palette)
1513 self.red = True
1514
1515 def TurnTextNormal(self):
1516 if self.red:
1517 palette = QPalette()
1518 self.widget.setPalette(palette)
1519 self.red = False
1520
1521 def InvalidValue(self, value):
1522 self.value = ""
1523 self.TurnTextRed()
1524 self.error = self.label + " invalid value '" + value + "'"
1525 self.parent.ShowMessage(self.error)
1526
1527 def Invalidate(self):
1528 self.validated = False
1529
1530 def DoValidate(self, input_string):
1531 self.value = input_string.strip()
1532
1533 def Validate(self):
1534 self.validated = True
1535 self.error = ""
1536 self.TurnTextNormal()
1537 self.parent.ClearMessage()
1538 input_string = self.widget.text()
1539 if not len(input_string.strip()):
1540 self.value = ""
1541 return
1542 self.DoValidate(input_string)
1543
1544 def IsValid(self):
1545 if not self.validated:
1546 self.Validate()
1547 if len(self.error):
1548 self.parent.ShowMessage(self.error)
1549 return False
1550 return True
1551
1552 def IsNumber(self, value):
1553 try:
1554 x = int(value)
1555 except:
1556 x = 0
1557 return str(x) == value
1558
1559# Non-negative integer ranges dialog data item
1560
1561class NonNegativeIntegerRangesDataItem(LineEditDataItem):
1562
1563 def __init__(self, glb, label, placeholder_text, column_name, parent):
1564 super(NonNegativeIntegerRangesDataItem, self).__init__(glb, label, placeholder_text, parent)
1565
1566 self.column_name = column_name
1567
1568 def DoValidate(self, input_string):
1569 singles = []
1570 ranges = []
1571 for value in [x.strip() for x in input_string.split(",")]:
1572 if "-" in value:
1573 vrange = value.split("-")
1574 if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]):
1575 return self.InvalidValue(value)
1576 ranges.append(vrange)
1577 else:
1578 if not self.IsNumber(value):
1579 return self.InvalidValue(value)
1580 singles.append(value)
1581 ranges = [("(" + self.column_name + " >= " + r[0] + " AND " + self.column_name + " <= " + r[1] + ")") for r in ranges]
1582 if len(singles):
1583 ranges.append(self.column_name + " IN (" + ",".join(singles) + ")")
1584 self.value = " OR ".join(ranges)
1585
1586# Positive integer dialog data item
1587
1588class PositiveIntegerDataItem(LineEditDataItem):
1589
1590 def __init__(self, glb, label, placeholder_text, parent, id = "", default = ""):
1591 super(PositiveIntegerDataItem, self).__init__(glb, label, placeholder_text, parent, id, default)
1592
1593 def DoValidate(self, input_string):
1594 if not self.IsNumber(input_string.strip()):
1595 return self.InvalidValue(input_string)
1596 value = int(input_string.strip())
1597 if value <= 0:
1598 return self.InvalidValue(input_string)
1599 self.value = str(value)
1600
1601# Dialog data item converted and validated using a SQL table
1602
1603class SQLTableDataItem(LineEditDataItem):
1604
1605 def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent):
1606 super(SQLTableDataItem, self).__init__(glb, label, placeholder_text, parent)
1607
1608 self.table_name = table_name
1609 self.match_column = match_column
1610 self.column_name1 = column_name1
1611 self.column_name2 = column_name2
1612
1516 def ValueToIds(self, value): 1613 def ValueToIds(self, value):
1517 ids = [] 1614 ids = []
1518 query = QSqlQuery(self.glb.db) 1615 query = QSqlQuery(self.glb.db)
@@ -1523,6 +1620,42 @@ class SQLTableDialogDataItem():
1523 ids.append(str(query.value(0))) 1620 ids.append(str(query.value(0)))
1524 return ids 1621 return ids
1525 1622
1623 def DoValidate(self, input_string):
1624 all_ids = []
1625 for value in [x.strip() for x in input_string.split(",")]:
1626 ids = self.ValueToIds(value)
1627 if len(ids):
1628 all_ids.extend(ids)
1629 else:
1630 return self.InvalidValue(value)
1631 self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")"
1632 if self.column_name2:
1633 self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )"
1634
1635# Sample time ranges dialog data item converted and validated using 'samples' SQL table
1636
1637class SampleTimeRangesDataItem(LineEditDataItem):
1638
1639 def __init__(self, glb, label, placeholder_text, column_name, parent):
1640 self.column_name = column_name
1641
1642 self.last_id = 0
1643 self.first_time = 0
1644 self.last_time = 2 ** 64
1645
1646 query = QSqlQuery(glb.db)
1647 QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1")
1648 if query.next():
1649 self.last_id = int(query.value(0))
1650 self.last_time = int(query.value(1))
1651 QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1")
1652 if query.next():
1653 self.first_time = int(query.value(0))
1654 if placeholder_text:
1655 placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time)
1656
1657 super(SampleTimeRangesDataItem, self).__init__(glb, label, placeholder_text, parent)
1658
1526 def IdBetween(self, query, lower_id, higher_id, order): 1659 def IdBetween(self, query, lower_id, higher_id, order):
1527 QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1") 1660 QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1")
1528 if query.next(): 1661 if query.next():
@@ -1560,7 +1693,6 @@ class SQLTableDialogDataItem():
1560 return str(lower_id) 1693 return str(lower_id)
1561 1694
1562 def ConvertRelativeTime(self, val): 1695 def ConvertRelativeTime(self, val):
1563 print "val ", val
1564 mult = 1 1696 mult = 1
1565 suffix = val[-2:] 1697 suffix = val[-2:]
1566 if suffix == "ms": 1698 if suffix == "ms":
@@ -1582,29 +1714,23 @@ class SQLTableDialogDataItem():
1582 return str(val) 1714 return str(val)
1583 1715
1584 def ConvertTimeRange(self, vrange): 1716 def ConvertTimeRange(self, vrange):
1585 print "vrange ", vrange
1586 if vrange[0] == "": 1717 if vrange[0] == "":
1587 vrange[0] = str(self.first_time) 1718 vrange[0] = str(self.first_time)
1588 if vrange[1] == "": 1719 if vrange[1] == "":
1589 vrange[1] = str(self.last_time) 1720 vrange[1] = str(self.last_time)
1590 vrange[0] = self.ConvertRelativeTime(vrange[0]) 1721 vrange[0] = self.ConvertRelativeTime(vrange[0])
1591 vrange[1] = self.ConvertRelativeTime(vrange[1]) 1722 vrange[1] = self.ConvertRelativeTime(vrange[1])
1592 print "vrange2 ", vrange
1593 if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): 1723 if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]):
1594 return False 1724 return False
1595 print "ok1"
1596 beg_range = max(int(vrange[0]), self.first_time) 1725 beg_range = max(int(vrange[0]), self.first_time)
1597 end_range = min(int(vrange[1]), self.last_time) 1726 end_range = min(int(vrange[1]), self.last_time)
1598 if beg_range > self.last_time or end_range < self.first_time: 1727 if beg_range > self.last_time or end_range < self.first_time:
1599 return False 1728 return False
1600 print "ok2"
1601 vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True) 1729 vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True)
1602 vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False) 1730 vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False)
1603 print "vrange3 ", vrange
1604 return True 1731 return True
1605 1732
1606 def AddTimeRange(self, value, ranges): 1733 def AddTimeRange(self, value, ranges):
1607 print "value ", value
1608 n = value.count("-") 1734 n = value.count("-")
1609 if n == 1: 1735 if n == 1:
1610 pass 1736 pass
@@ -1622,111 +1748,31 @@ class SQLTableDialogDataItem():
1622 return True 1748 return True
1623 return False 1749 return False
1624 1750
1625 def InvalidValue(self, value): 1751 def DoValidate(self, input_string):
1626 self.value = "" 1752 ranges = []
1627 palette = QPalette() 1753 for value in [x.strip() for x in input_string.split(",")]:
1628 palette.setColor(QPalette.Text,Qt.red) 1754 if not self.AddTimeRange(value, ranges):
1629 self.widget.setPalette(palette) 1755 return self.InvalidValue(value)
1630 self.red = True 1756 ranges = [("(" + self.column_name + " >= " + r[0] + " AND " + self.column_name + " <= " + r[1] + ")") for r in ranges]
1631 self.error = self.label + " invalid value '" + value + "'" 1757 self.value = " OR ".join(ranges)
1632 self.parent.ShowMessage(self.error)
1633 1758
1634 def IsNumber(self, value): 1759# Report Dialog Base
1635 try:
1636 x = int(value)
1637 except:
1638 x = 0
1639 return str(x) == value
1640 1760
1641 def Invalidate(self): 1761class ReportDialogBase(QDialog):
1642 self.validated = False
1643 1762
1644 def Validate(self): 1763 def __init__(self, glb, title, items, partial, parent=None):
1645 input_string = self.widget.text() 1764 super(ReportDialogBase, self).__init__(parent)
1646 self.validated = True
1647 if self.red:
1648 palette = QPalette()
1649 self.widget.setPalette(palette)
1650 self.red = False
1651 if not len(input_string.strip()):
1652 self.error = ""
1653 self.value = ""
1654 return
1655 if self.table_name == "<timeranges>":
1656 ranges = []
1657 for value in [x.strip() for x in input_string.split(",")]:
1658 if not self.AddTimeRange(value, ranges):
1659 return self.InvalidValue(value)
1660 ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges]
1661 self.value = " OR ".join(ranges)
1662 elif self.table_name == "<ranges>":
1663 singles = []
1664 ranges = []
1665 for value in [x.strip() for x in input_string.split(",")]:
1666 if "-" in value:
1667 vrange = value.split("-")
1668 if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]):
1669 return self.InvalidValue(value)
1670 ranges.append(vrange)
1671 else:
1672 if not self.IsNumber(value):
1673 return self.InvalidValue(value)
1674 singles.append(value)
1675 ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges]
1676 if len(singles):
1677 ranges.append(self.column_name1 + " IN (" + ",".join(singles) + ")")
1678 self.value = " OR ".join(ranges)
1679 elif self.table_name:
1680 all_ids = []
1681 for value in [x.strip() for x in input_string.split(",")]:
1682 ids = self.ValueToIds(value)
1683 if len(ids):
1684 all_ids.extend(ids)
1685 else:
1686 return self.InvalidValue(value)
1687 self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")"
1688 if self.column_name2:
1689 self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )"
1690 else:
1691 self.value = input_string.strip()
1692 self.error = ""
1693 self.parent.ClearMessage()
1694
1695 def IsValid(self):
1696 if not self.validated:
1697 self.Validate()
1698 if len(self.error):
1699 self.parent.ShowMessage(self.error)
1700 return False
1701 return True
1702
1703# Selected branch report creation dialog
1704
1705class SelectedBranchDialog(QDialog):
1706
1707 def __init__(self, glb, parent=None):
1708 super(SelectedBranchDialog, self).__init__(parent)
1709 1765
1710 self.glb = glb 1766 self.glb = glb
1711 1767
1712 self.name = "" 1768 self.report_vars = ReportVars()
1713 self.where_clause = ""
1714 1769
1715 self.setWindowTitle("Selected Branches") 1770 self.setWindowTitle(title)
1716 self.setMinimumWidth(600) 1771 self.setMinimumWidth(600)
1717 1772
1718 items = ( 1773 self.data_items = [x(glb, self) for x in items]
1719 ("Report name:", "Enter a name to appear in the window title bar", "", "", "", ""), 1774
1720 ("Time ranges:", "Enter time ranges", "<timeranges>", "", "samples.id", ""), 1775 self.partial = partial
1721 ("CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "<ranges>", "", "cpu", ""),
1722 ("Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", ""),
1723 ("PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", ""),
1724 ("TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", ""),
1725 ("DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id"),
1726 ("Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id"),
1727 ("Raw SQL clause: ", "Enter a raw SQL WHERE clause", "", "", "", ""),
1728 )
1729 self.data_items = [SQLTableDialogDataItem(glb, *x, parent=self) for x in items]
1730 1776
1731 self.grid = QGridLayout() 1777 self.grid = QGridLayout()
1732 1778
@@ -1758,23 +1804,28 @@ class SelectedBranchDialog(QDialog):
1758 self.setLayout(self.vbox); 1804 self.setLayout(self.vbox);
1759 1805
1760 def Ok(self): 1806 def Ok(self):
1761 self.name = self.data_items[0].value 1807 vars = self.report_vars
1762 if not self.name: 1808 for d in self.data_items:
1809 if d.id == "REPORTNAME":
1810 vars.name = d.value
1811 if not vars.name:
1763 self.ShowMessage("Report name is required") 1812 self.ShowMessage("Report name is required")
1764 return 1813 return
1765 for d in self.data_items: 1814 for d in self.data_items:
1766 if not d.IsValid(): 1815 if not d.IsValid():
1767 return 1816 return
1768 for d in self.data_items[1:]: 1817 for d in self.data_items[1:]:
1769 if len(d.value): 1818 if d.id == "LIMIT":
1770 if len(self.where_clause): 1819 vars.limit = d.value
1771 self.where_clause += " AND " 1820 elif len(d.value):
1772 self.where_clause += d.value 1821 if len(vars.where_clause):
1773 if len(self.where_clause): 1822 vars.where_clause += " AND "
1774 self.where_clause = " AND ( " + self.where_clause + " ) " 1823 vars.where_clause += d.value
1775 else: 1824 if len(vars.where_clause):
1776 self.ShowMessage("No selection") 1825 if self.partial:
1777 return 1826 vars.where_clause = " AND ( " + vars.where_clause + " ) "
1827 else:
1828 vars.where_clause = " WHERE " + vars.where_clause + " "
1778 self.accept() 1829 self.accept()
1779 1830
1780 def ShowMessage(self, msg): 1831 def ShowMessage(self, msg):
@@ -1783,6 +1834,23 @@ class SelectedBranchDialog(QDialog):
1783 def ClearMessage(self): 1834 def ClearMessage(self):
1784 self.status.setText("") 1835 self.status.setText("")
1785 1836
1837# Selected branch report creation dialog
1838
1839class SelectedBranchDialog(ReportDialogBase):
1840
1841 def __init__(self, glb, parent=None):
1842 title = "Selected Branches"
1843 items = (lambda g, p: LineEditDataItem(g, "Report name:", "Enter a name to appear in the window title bar", p, "REPORTNAME"),
1844 lambda g, p: SampleTimeRangesDataItem(g, "Time ranges:", "Enter time ranges", "samples.id", p),
1845 lambda g, p: NonNegativeIntegerRangesDataItem(g, "CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "cpu", p),
1846 lambda g, p: SQLTableDataItem(g, "Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", "", p),
1847 lambda g, p: SQLTableDataItem(g, "PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", "", p),
1848 lambda g, p: SQLTableDataItem(g, "TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", "", p),
1849 lambda g, p: SQLTableDataItem(g, "DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id", p),
1850 lambda g, p: SQLTableDataItem(g, "Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id", p),
1851 lambda g, p: LineEditDataItem(g, "Raw SQL clause: ", "Enter a raw SQL WHERE clause", p))
1852 super(SelectedBranchDialog, self).__init__(glb, title, items, True, parent)
1853
1786# Event list 1854# Event list
1787 1855
1788def GetEventList(db): 1856def GetEventList(db):
@@ -1793,6 +1861,16 @@ def GetEventList(db):
1793 events.append(query.value(0)) 1861 events.append(query.value(0))
1794 return events 1862 return events
1795 1863
1864# Is a table selectable
1865
1866def IsSelectable(db, table):
1867 query = QSqlQuery(db)
1868 try:
1869 QueryExec(query, "SELECT * FROM " + table + " LIMIT 1")
1870 except:
1871 return False
1872 return True
1873
1796# SQL data preparation 1874# SQL data preparation
1797 1875
1798def SQLTableDataPrep(query, count): 1876def SQLTableDataPrep(query, count):
@@ -1818,12 +1896,13 @@ class SQLTableModel(TableModel):
1818 1896
1819 progress = Signal(object) 1897 progress = Signal(object)
1820 1898
1821 def __init__(self, glb, sql, column_count, parent=None): 1899 def __init__(self, glb, sql, column_headers, parent=None):
1822 super(SQLTableModel, self).__init__(parent) 1900 super(SQLTableModel, self).__init__(parent)
1823 self.glb = glb 1901 self.glb = glb
1824 self.more = True 1902 self.more = True
1825 self.populated = 0 1903 self.populated = 0
1826 self.fetcher = SQLFetcher(glb, sql, lambda x, y=column_count: SQLTableDataPrep(x, y), self.AddSample) 1904 self.column_headers = column_headers
1905 self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): SQLTableDataPrep(x, y), self.AddSample)
1827 self.fetcher.done.connect(self.Update) 1906 self.fetcher.done.connect(self.Update)
1828 self.fetcher.Fetch(glb_chunk_sz) 1907 self.fetcher.Fetch(glb_chunk_sz)
1829 1908
@@ -1861,6 +1940,12 @@ class SQLTableModel(TableModel):
1861 def HasMoreRecords(self): 1940 def HasMoreRecords(self):
1862 return self.more 1941 return self.more
1863 1942
1943 def columnCount(self, parent=None):
1944 return len(self.column_headers)
1945
1946 def columnHeader(self, column):
1947 return self.column_headers[column]
1948
1864# SQL automatic table data model 1949# SQL automatic table data model
1865 1950
1866class SQLAutoTableModel(SQLTableModel): 1951class SQLAutoTableModel(SQLTableModel):
@@ -1870,12 +1955,12 @@ class SQLAutoTableModel(SQLTableModel):
1870 if table_name == "comm_threads_view": 1955 if table_name == "comm_threads_view":
1871 # For now, comm_threads_view has no id column 1956 # For now, comm_threads_view has no id column
1872 sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz) 1957 sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz)
1873 self.column_headers = [] 1958 column_headers = []
1874 query = QSqlQuery(glb.db) 1959 query = QSqlQuery(glb.db)
1875 if glb.dbref.is_sqlite3: 1960 if glb.dbref.is_sqlite3:
1876 QueryExec(query, "PRAGMA table_info(" + table_name + ")") 1961 QueryExec(query, "PRAGMA table_info(" + table_name + ")")
1877 while query.next(): 1962 while query.next():
1878 self.column_headers.append(query.value(1)) 1963 column_headers.append(query.value(1))
1879 if table_name == "sqlite_master": 1964 if table_name == "sqlite_master":
1880 sql = "SELECT * FROM " + table_name 1965 sql = "SELECT * FROM " + table_name
1881 else: 1966 else:
@@ -1888,14 +1973,8 @@ class SQLAutoTableModel(SQLTableModel):
1888 schema = "public" 1973 schema = "public"
1889 QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'") 1974 QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'")
1890 while query.next(): 1975 while query.next():
1891 self.column_headers.append(query.value(0)) 1976 column_headers.append(query.value(0))
1892 super(SQLAutoTableModel, self).__init__(glb, sql, len(self.column_headers), parent) 1977 super(SQLAutoTableModel, self).__init__(glb, sql, column_headers, parent)
1893
1894 def columnCount(self, parent=None):
1895 return len(self.column_headers)
1896
1897 def columnHeader(self, column):
1898 return self.column_headers[column]
1899 1978
1900# Base class for custom ResizeColumnsToContents 1979# Base class for custom ResizeColumnsToContents
1901 1980
@@ -1998,6 +2077,103 @@ def GetTableList(glb):
1998 tables.append("information_schema.columns") 2077 tables.append("information_schema.columns")
1999 return tables 2078 return tables
2000 2079
2080# Top Calls data model
2081
2082class TopCallsModel(SQLTableModel):
2083
2084 def __init__(self, glb, report_vars, parent=None):
2085 text = ""
2086 if not glb.dbref.is_sqlite3:
2087 text = "::text"
2088 limit = ""
2089 if len(report_vars.limit):
2090 limit = " LIMIT " + report_vars.limit
2091 sql = ("SELECT comm, pid, tid, name,"
2092 " CASE"
2093 " WHEN (short_name = '[kernel.kallsyms]') THEN '[kernel]'" + text +
2094 " ELSE short_name"
2095 " END AS dso,"
2096 " call_time, return_time, (return_time - call_time) AS elapsed_time, branch_count, "
2097 " CASE"
2098 " WHEN (calls.flags = 1) THEN 'no call'" + text +
2099 " WHEN (calls.flags = 2) THEN 'no return'" + text +
2100 " WHEN (calls.flags = 3) THEN 'no call/return'" + text +
2101 " ELSE ''" + text +
2102 " END AS flags"
2103 " FROM calls"
2104 " INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
2105 " INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
2106 " INNER JOIN dsos ON symbols.dso_id = dsos.id"
2107 " INNER JOIN comms ON calls.comm_id = comms.id"
2108 " INNER JOIN threads ON calls.thread_id = threads.id" +
2109 report_vars.where_clause +
2110 " ORDER BY elapsed_time DESC" +
2111 limit
2112 )
2113 column_headers = ("Command", "PID", "TID", "Symbol", "Object", "Call Time", "Return Time", "Elapsed Time (ns)", "Branch Count", "Flags")
2114 self.alignment = (Qt.AlignLeft, Qt.AlignLeft, Qt.AlignLeft, Qt.AlignLeft, Qt.AlignLeft, Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignLeft)
2115 super(TopCallsModel, self).__init__(glb, sql, column_headers, parent)
2116
2117 def columnAlignment(self, column):
2118 return self.alignment[column]
2119
2120# Top Calls report creation dialog
2121
2122class TopCallsDialog(ReportDialogBase):
2123
2124 def __init__(self, glb, parent=None):
2125 title = "Top Calls by Elapsed Time"
2126 items = (lambda g, p: LineEditDataItem(g, "Report name:", "Enter a name to appear in the window title bar", p, "REPORTNAME"),
2127 lambda g, p: SQLTableDataItem(g, "Commands:", "Only calls with these commands will be included", "comms", "comm", "comm_id", "", p),
2128 lambda g, p: SQLTableDataItem(g, "PIDs:", "Only calls with these process IDs will be included", "threads", "pid", "thread_id", "", p),
2129 lambda g, p: SQLTableDataItem(g, "TIDs:", "Only calls with these thread IDs will be included", "threads", "tid", "thread_id", "", p),
2130 lambda g, p: SQLTableDataItem(g, "DSOs:", "Only calls with these DSOs will be included", "dsos", "short_name", "dso_id", "", p),
2131 lambda g, p: SQLTableDataItem(g, "Symbols:", "Only calls with these symbols will be included", "symbols", "name", "symbol_id", "", p),
2132 lambda g, p: LineEditDataItem(g, "Raw SQL clause: ", "Enter a raw SQL WHERE clause", p),
2133 lambda g, p: PositiveIntegerDataItem(g, "Record limit:", "Limit selection to this number of records", p, "LIMIT", "100"))
2134 super(TopCallsDialog, self).__init__(glb, title, items, False, parent)
2135
2136# Top Calls window
2137
2138class TopCallsWindow(QMdiSubWindow, ResizeColumnsToContentsBase):
2139
2140 def __init__(self, glb, report_vars, parent=None):
2141 super(TopCallsWindow, self).__init__(parent)
2142
2143 self.data_model = LookupCreateModel("Top Calls " + report_vars.UniqueId(), lambda: TopCallsModel(glb, report_vars))
2144 self.model = self.data_model
2145
2146 self.view = QTableView()
2147 self.view.setModel(self.model)
2148 self.view.setEditTriggers(QAbstractItemView.NoEditTriggers)
2149 self.view.verticalHeader().setVisible(False)
2150
2151 self.ResizeColumnsToContents()
2152
2153 self.find_bar = FindBar(self, self, True)
2154
2155 self.finder = ChildDataItemFinder(self.model)
2156
2157 self.fetch_bar = FetchMoreRecordsBar(self.data_model, self)
2158
2159 self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget())
2160
2161 self.setWidget(self.vbox.Widget())
2162
2163 AddSubWindow(glb.mainwindow.mdi_area, self, report_vars.name)
2164
2165 def Find(self, value, direction, pattern, context):
2166 self.view.setFocus()
2167 self.find_bar.Busy()
2168 self.finder.Find(value, direction, pattern, context, self.FindDone)
2169
2170 def FindDone(self, row):
2171 self.find_bar.Idle()
2172 if row >= 0:
2173 self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex()))
2174 else:
2175 self.find_bar.NotFound()
2176
2001# Action Definition 2177# Action Definition
2002 2178
2003def CreateAction(label, tip, callback, parent=None, shortcut=None): 2179def CreateAction(label, tip, callback, parent=None, shortcut=None):
@@ -2101,6 +2277,7 @@ p.c2 {
2101<p class=c2><a href=#callgraph>1.1 Context-Sensitive Call Graph</a></p> 2277<p class=c2><a href=#callgraph>1.1 Context-Sensitive Call Graph</a></p>
2102<p class=c2><a href=#allbranches>1.2 All branches</a></p> 2278<p class=c2><a href=#allbranches>1.2 All branches</a></p>
2103<p class=c2><a href=#selectedbranches>1.3 Selected branches</a></p> 2279<p class=c2><a href=#selectedbranches>1.3 Selected branches</a></p>
2280<p class=c2><a href=#topcallsbyelapsedtime>1.4 Top calls by elapsed time</a></p>
2104<p class=c1><a href=#tables>2. Tables</a></p> 2281<p class=c1><a href=#tables>2. Tables</a></p>
2105<h1 id=reports>1. Reports</h1> 2282<h1 id=reports>1. Reports</h1>
2106<h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2> 2283<h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2>
@@ -2176,6 +2353,10 @@ ms, us or ns. Also, negative values are relative to the end of trace. Examples:
2176 -10ms- The last 10ms 2353 -10ms- The last 10ms
2177</pre> 2354</pre>
2178N.B. Due to the granularity of timestamps, there could be no branches in any given time range. 2355N.B. Due to the granularity of timestamps, there could be no branches in any given time range.
2356<h2 id=topcallsbyelapsedtime>1.4 Top calls by elapsed time</h2>
2357The Top calls by elapsed time report displays calls in descending order of time elapsed between when the function was called and when it returned.
2358The data is reduced by various selection criteria. A dialog box displays available criteria which are AND'ed together.
2359If not all data is fetched, a Fetch bar is provided. Ctrl-F displays a Find bar.
2179<h1 id=tables>2. Tables</h1> 2360<h1 id=tables>2. Tables</h1>
2180The Tables menu shows all tables and views in the database. Most tables have an associated view 2361The Tables menu shows all tables and views in the database. Most tables have an associated view
2181which displays the information in a more friendly way. Not all data for large tables is fetched 2362which displays the information in a more friendly way. Not all data for large tables is fetched
@@ -2305,10 +2486,14 @@ class MainWindow(QMainWindow):
2305 edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")])) 2486 edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")]))
2306 2487
2307 reports_menu = menu.addMenu("&Reports") 2488 reports_menu = menu.addMenu("&Reports")
2308 reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self)) 2489 if IsSelectable(glb.db, "calls"):
2490 reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self))
2309 2491
2310 self.EventMenu(GetEventList(glb.db), reports_menu) 2492 self.EventMenu(GetEventList(glb.db), reports_menu)
2311 2493
2494 if IsSelectable(glb.db, "calls"):
2495 reports_menu.addAction(CreateAction("&Top calls by elapsed time", "Create a new window displaying top calls by elapsed time", self.NewTopCalls, self))
2496
2312 self.TableMenu(GetTableList(glb), menu) 2497 self.TableMenu(GetTableList(glb), menu)
2313 2498
2314 self.window_menu = WindowMenu(self.mdi_area, menu) 2499 self.window_menu = WindowMenu(self.mdi_area, menu)
@@ -2364,14 +2549,20 @@ class MainWindow(QMainWindow):
2364 def NewCallGraph(self): 2549 def NewCallGraph(self):
2365 CallGraphWindow(self.glb, self) 2550 CallGraphWindow(self.glb, self)
2366 2551
2552 def NewTopCalls(self):
2553 dialog = TopCallsDialog(self.glb, self)
2554 ret = dialog.exec_()
2555 if ret:
2556 TopCallsWindow(self.glb, dialog.report_vars, self)
2557
2367 def NewBranchView(self, event_id): 2558 def NewBranchView(self, event_id):
2368 BranchWindow(self.glb, event_id, "", "", self) 2559 BranchWindow(self.glb, event_id, ReportVars(), self)
2369 2560
2370 def NewSelectedBranchView(self, event_id): 2561 def NewSelectedBranchView(self, event_id):
2371 dialog = SelectedBranchDialog(self.glb, self) 2562 dialog = SelectedBranchDialog(self.glb, self)
2372 ret = dialog.exec_() 2563 ret = dialog.exec_()
2373 if ret: 2564 if ret:
2374 BranchWindow(self.glb, event_id, dialog.name, dialog.where_clause, self) 2565 BranchWindow(self.glb, event_id, dialog.report_vars, self)
2375 2566
2376 def NewTableView(self, table_name): 2567 def NewTableView(self, table_name):
2377 TableWindow(self.glb, table_name, self) 2568 TableWindow(self.glb, table_name, self)
diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py
index cafeff3d74db..3648e8b986ec 100644
--- a/tools/perf/scripts/python/failed-syscalls-by-pid.py
+++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py
@@ -5,6 +5,8 @@
5# Displays system-wide failed system call totals, broken down by pid. 5# Displays system-wide failed system call totals, broken down by pid.
6# If a [comm] arg is specified, only syscalls called by [comm] are displayed. 6# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
7 7
8from __future__ import print_function
9
8import os 10import os
9import sys 11import sys
10 12
@@ -32,7 +34,7 @@ if len(sys.argv) > 1:
32syscalls = autodict() 34syscalls = autodict()
33 35
34def trace_begin(): 36def trace_begin():
35 print "Press control+C to stop and show the summary" 37 print("Press control+C to stop and show the summary")
36 38
37def trace_end(): 39def trace_end():
38 print_error_totals() 40 print_error_totals()
@@ -57,22 +59,21 @@ def syscalls__sys_exit(event_name, context, common_cpu,
57 59
58def print_error_totals(): 60def print_error_totals():
59 if for_comm is not None: 61 if for_comm is not None:
60 print "\nsyscall errors for %s:\n\n" % (for_comm), 62 print("\nsyscall errors for %s:\n" % (for_comm))
61 else: 63 else:
62 print "\nsyscall errors:\n\n", 64 print("\nsyscall errors:\n")
63 65
64 print "%-30s %10s\n" % ("comm [pid]", "count"), 66 print("%-30s %10s" % ("comm [pid]", "count"))
65 print "%-30s %10s\n" % ("------------------------------", \ 67 print("%-30s %10s" % ("------------------------------", "----------"))
66 "----------"),
67 68
68 comm_keys = syscalls.keys() 69 comm_keys = syscalls.keys()
69 for comm in comm_keys: 70 for comm in comm_keys:
70 pid_keys = syscalls[comm].keys() 71 pid_keys = syscalls[comm].keys()
71 for pid in pid_keys: 72 for pid in pid_keys:
72 print "\n%s [%d]\n" % (comm, pid), 73 print("\n%s [%d]" % (comm, pid))
73 id_keys = syscalls[comm][pid].keys() 74 id_keys = syscalls[comm][pid].keys()
74 for id in id_keys: 75 for id in id_keys:
75 print " syscall: %-16s\n" % syscall_name(id), 76 print(" syscall: %-16s" % syscall_name(id))
76 ret_keys = syscalls[comm][pid][id].keys() 77 ret_keys = syscalls[comm][pid][id].keys()
77 for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True): 78 for ret, val in sorted(syscalls[comm][pid][id].items(), key = lambda kv: (kv[1], kv[0]), reverse = True):
78 print " err = %-20s %10d\n" % (strerror(ret), val), 79 print(" err = %-20s %10d" % (strerror(ret), val))
diff --git a/tools/perf/scripts/python/mem-phys-addr.py b/tools/perf/scripts/python/mem-phys-addr.py
index ebee2c5ae496..fb0bbcbfa0f0 100644
--- a/tools/perf/scripts/python/mem-phys-addr.py
+++ b/tools/perf/scripts/python/mem-phys-addr.py
@@ -4,6 +4,8 @@
4# Copyright (c) 2018, Intel Corporation. 4# Copyright (c) 2018, Intel Corporation.
5 5
6from __future__ import division 6from __future__ import division
7from __future__ import print_function
8
7import os 9import os
8import sys 10import sys
9import struct 11import struct
@@ -31,21 +33,23 @@ def parse_iomem():
31 for i, j in enumerate(f): 33 for i, j in enumerate(f):
32 m = re.split('-|:',j,2) 34 m = re.split('-|:',j,2)
33 if m[2].strip() == 'System RAM': 35 if m[2].strip() == 'System RAM':
34 system_ram.append(long(m[0], 16)) 36 system_ram.append(int(m[0], 16))
35 system_ram.append(long(m[1], 16)) 37 system_ram.append(int(m[1], 16))
36 if m[2].strip() == 'Persistent Memory': 38 if m[2].strip() == 'Persistent Memory':
37 pmem.append(long(m[0], 16)) 39 pmem.append(int(m[0], 16))
38 pmem.append(long(m[1], 16)) 40 pmem.append(int(m[1], 16))
39 41
40def print_memory_type(): 42def print_memory_type():
41 print "Event: %s" % (event_name) 43 print("Event: %s" % (event_name))
42 print "%-40s %10s %10s\n" % ("Memory type", "count", "percentage"), 44 print("%-40s %10s %10s\n" % ("Memory type", "count", "percentage"), end='')
43 print "%-40s %10s %10s\n" % ("----------------------------------------", \ 45 print("%-40s %10s %10s\n" % ("----------------------------------------",
44 "-----------", "-----------"), 46 "-----------", "-----------"),
47 end='');
45 total = sum(load_mem_type_cnt.values()) 48 total = sum(load_mem_type_cnt.values())
46 for mem_type, count in sorted(load_mem_type_cnt.most_common(), \ 49 for mem_type, count in sorted(load_mem_type_cnt.most_common(), \
47 key = lambda(k, v): (v, k), reverse = True): 50 key = lambda kv: (kv[1], kv[0]), reverse = True):
48 print "%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total), 51 print("%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total),
52 end='')
49 53
50def trace_begin(): 54def trace_begin():
51 parse_iomem() 55 parse_iomem()
@@ -80,7 +84,7 @@ def find_memory_type(phys_addr):
80 f.seek(0, 0) 84 f.seek(0, 0)
81 for j in f: 85 for j in f:
82 m = re.split('-|:',j,2) 86 m = re.split('-|:',j,2)
83 if long(m[0], 16) <= phys_addr <= long(m[1], 16): 87 if int(m[0], 16) <= phys_addr <= int(m[1], 16):
84 return m[2] 88 return m[2]
85 return "N/A" 89 return "N/A"
86 90
diff --git a/tools/perf/scripts/python/net_dropmonitor.py b/tools/perf/scripts/python/net_dropmonitor.py
index a150164b44a3..212557a02c50 100755
--- a/tools/perf/scripts/python/net_dropmonitor.py
+++ b/tools/perf/scripts/python/net_dropmonitor.py
@@ -1,6 +1,8 @@
1# Monitor the system for dropped packets and proudce a report of drop locations and counts 1# Monitor the system for dropped packets and proudce a report of drop locations and counts
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3 3
4from __future__ import print_function
5
4import os 6import os
5import sys 7import sys
6 8
@@ -50,19 +52,19 @@ def get_sym(sloc):
50 return (None, 0) 52 return (None, 0)
51 53
52def print_drop_table(): 54def print_drop_table():
53 print "%25s %25s %25s" % ("LOCATION", "OFFSET", "COUNT") 55 print("%25s %25s %25s" % ("LOCATION", "OFFSET", "COUNT"))
54 for i in drop_log.keys(): 56 for i in drop_log.keys():
55 (sym, off) = get_sym(i) 57 (sym, off) = get_sym(i)
56 if sym == None: 58 if sym == None:
57 sym = i 59 sym = i
58 print "%25s %25s %25s" % (sym, off, drop_log[i]) 60 print("%25s %25s %25s" % (sym, off, drop_log[i]))
59 61
60 62
61def trace_begin(): 63def trace_begin():
62 print "Starting trace (Ctrl-C to dump results)" 64 print("Starting trace (Ctrl-C to dump results)")
63 65
64def trace_end(): 66def trace_end():
65 print "Gathering kallsyms data" 67 print("Gathering kallsyms data")
66 get_kallsyms_table() 68 get_kallsyms_table()
67 print_drop_table() 69 print_drop_table()
68 70
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py
index 9b2050f778f1..267bda49325d 100644
--- a/tools/perf/scripts/python/netdev-times.py
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -8,6 +8,8 @@
8# dev=: show only thing related to specified device 8# dev=: show only thing related to specified device
9# debug: work with debug mode. It shows buffer status. 9# debug: work with debug mode. It shows buffer status.
10 10
11from __future__ import print_function
12
11import os 13import os
12import sys 14import sys
13 15
@@ -17,6 +19,7 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
17from perf_trace_context import * 19from perf_trace_context import *
18from Core import * 20from Core import *
19from Util import * 21from Util import *
22from functools import cmp_to_key
20 23
21all_event_list = []; # insert all tracepoint event related with this script 24all_event_list = []; # insert all tracepoint event related with this script
22irq_dic = {}; # key is cpu and value is a list which stacks irqs 25irq_dic = {}; # key is cpu and value is a list which stacks irqs
@@ -61,12 +64,12 @@ def diff_msec(src, dst):
61def print_transmit(hunk): 64def print_transmit(hunk):
62 if dev != 0 and hunk['dev'].find(dev) < 0: 65 if dev != 0 and hunk['dev'].find(dev) < 0:
63 return 66 return
64 print "%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" % \ 67 print("%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" %
65 (hunk['dev'], hunk['len'], 68 (hunk['dev'], hunk['len'],
66 nsecs_secs(hunk['queue_t']), 69 nsecs_secs(hunk['queue_t']),
67 nsecs_nsecs(hunk['queue_t'])/1000, 70 nsecs_nsecs(hunk['queue_t'])/1000,
68 diff_msec(hunk['queue_t'], hunk['xmit_t']), 71 diff_msec(hunk['queue_t'], hunk['xmit_t']),
69 diff_msec(hunk['xmit_t'], hunk['free_t'])) 72 diff_msec(hunk['xmit_t'], hunk['free_t'])))
70 73
71# Format for displaying rx packet processing 74# Format for displaying rx packet processing
72PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)" 75PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)"
@@ -98,55 +101,55 @@ def print_receive(hunk):
98 if show_hunk == 0: 101 if show_hunk == 0:
99 return 102 return
100 103
101 print "%d.%06dsec cpu=%d" % \ 104 print("%d.%06dsec cpu=%d" %
102 (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu) 105 (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu))
103 for i in range(len(irq_list)): 106 for i in range(len(irq_list)):
104 print PF_IRQ_ENTRY % \ 107 print(PF_IRQ_ENTRY %
105 (diff_msec(base_t, irq_list[i]['irq_ent_t']), 108 (diff_msec(base_t, irq_list[i]['irq_ent_t']),
106 irq_list[i]['irq'], irq_list[i]['name']) 109 irq_list[i]['irq'], irq_list[i]['name']))
107 print PF_JOINT 110 print(PF_JOINT)
108 irq_event_list = irq_list[i]['event_list'] 111 irq_event_list = irq_list[i]['event_list']
109 for j in range(len(irq_event_list)): 112 for j in range(len(irq_event_list)):
110 irq_event = irq_event_list[j] 113 irq_event = irq_event_list[j]
111 if irq_event['event'] == 'netif_rx': 114 if irq_event['event'] == 'netif_rx':
112 print PF_NET_RX % \ 115 print(PF_NET_RX %
113 (diff_msec(base_t, irq_event['time']), 116 (diff_msec(base_t, irq_event['time']),
114 irq_event['skbaddr']) 117 irq_event['skbaddr']))
115 print PF_JOINT 118 print(PF_JOINT)
116 print PF_SOFT_ENTRY % \ 119 print(PF_SOFT_ENTRY %
117 diff_msec(base_t, hunk['sirq_ent_t']) 120 diff_msec(base_t, hunk['sirq_ent_t']))
118 print PF_JOINT 121 print(PF_JOINT)
119 event_list = hunk['event_list'] 122 event_list = hunk['event_list']
120 for i in range(len(event_list)): 123 for i in range(len(event_list)):
121 event = event_list[i] 124 event = event_list[i]
122 if event['event_name'] == 'napi_poll': 125 if event['event_name'] == 'napi_poll':
123 print PF_NAPI_POLL % \ 126 print(PF_NAPI_POLL %
124 (diff_msec(base_t, event['event_t']), event['dev']) 127 (diff_msec(base_t, event['event_t']), event['dev']))
125 if i == len(event_list) - 1: 128 if i == len(event_list) - 1:
126 print "" 129 print("")
127 else: 130 else:
128 print PF_JOINT 131 print(PF_JOINT)
129 else: 132 else:
130 print PF_NET_RECV % \ 133 print(PF_NET_RECV %
131 (diff_msec(base_t, event['event_t']), event['skbaddr'], 134 (diff_msec(base_t, event['event_t']), event['skbaddr'],
132 event['len']) 135 event['len']))
133 if 'comm' in event.keys(): 136 if 'comm' in event.keys():
134 print PF_WJOINT 137 print(PF_WJOINT)
135 print PF_CPY_DGRAM % \ 138 print(PF_CPY_DGRAM %
136 (diff_msec(base_t, event['comm_t']), 139 (diff_msec(base_t, event['comm_t']),
137 event['pid'], event['comm']) 140 event['pid'], event['comm']))
138 elif 'handle' in event.keys(): 141 elif 'handle' in event.keys():
139 print PF_WJOINT 142 print(PF_WJOINT)
140 if event['handle'] == "kfree_skb": 143 if event['handle'] == "kfree_skb":
141 print PF_KFREE_SKB % \ 144 print(PF_KFREE_SKB %
142 (diff_msec(base_t, 145 (diff_msec(base_t,
143 event['comm_t']), 146 event['comm_t']),
144 event['location']) 147 event['location']))
145 elif event['handle'] == "consume_skb": 148 elif event['handle'] == "consume_skb":
146 print PF_CONS_SKB % \ 149 print(PF_CONS_SKB %
147 diff_msec(base_t, 150 diff_msec(base_t,
148 event['comm_t']) 151 event['comm_t']))
149 print PF_JOINT 152 print(PF_JOINT)
150 153
151def trace_begin(): 154def trace_begin():
152 global show_tx 155 global show_tx
@@ -172,8 +175,7 @@ def trace_begin():
172 175
173def trace_end(): 176def trace_end():
174 # order all events in time 177 # order all events in time
175 all_event_list.sort(lambda a,b :cmp(a[EINFO_IDX_TIME], 178 all_event_list.sort(key=cmp_to_key(lambda a,b :a[EINFO_IDX_TIME] < b[EINFO_IDX_TIME]))
176 b[EINFO_IDX_TIME]))
177 # process all events 179 # process all events
178 for i in range(len(all_event_list)): 180 for i in range(len(all_event_list)):
179 event_info = all_event_list[i] 181 event_info = all_event_list[i]
@@ -210,19 +212,19 @@ def trace_end():
210 print_receive(receive_hunk_list[i]) 212 print_receive(receive_hunk_list[i])
211 # display transmit hunks 213 # display transmit hunks
212 if show_tx: 214 if show_tx:
213 print " dev len Qdisc " \ 215 print(" dev len Qdisc "
214 " netdevice free" 216 " netdevice free")
215 for i in range(len(tx_free_list)): 217 for i in range(len(tx_free_list)):
216 print_transmit(tx_free_list[i]) 218 print_transmit(tx_free_list[i])
217 if debug: 219 if debug:
218 print "debug buffer status" 220 print("debug buffer status")
219 print "----------------------------" 221 print("----------------------------")
220 print "xmit Qdisc:remain:%d overflow:%d" % \ 222 print("xmit Qdisc:remain:%d overflow:%d" %
221 (len(tx_queue_list), of_count_tx_queue_list) 223 (len(tx_queue_list), of_count_tx_queue_list))
222 print "xmit netdevice:remain:%d overflow:%d" % \ 224 print("xmit netdevice:remain:%d overflow:%d" %
223 (len(tx_xmit_list), of_count_tx_xmit_list) 225 (len(tx_xmit_list), of_count_tx_xmit_list))
224 print "receive:remain:%d overflow:%d" % \ 226 print("receive:remain:%d overflow:%d" %
225 (len(rx_skb_list), of_count_rx_skb_list) 227 (len(rx_skb_list), of_count_rx_skb_list))
226 228
227# called from perf, when it finds a correspoinding event 229# called from perf, when it finds a correspoinding event
228def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, callchain, vec): 230def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, callchain, vec):
diff --git a/tools/perf/scripts/python/powerpc-hcalls.py b/tools/perf/scripts/python/powerpc-hcalls.py
index 00e0e7476e55..8b78dc790adb 100644
--- a/tools/perf/scripts/python/powerpc-hcalls.py
+++ b/tools/perf/scripts/python/powerpc-hcalls.py
@@ -4,6 +4,8 @@
4# 4#
5# Hypervisor call statisics 5# Hypervisor call statisics
6 6
7from __future__ import print_function
8
7import os 9import os
8import sys 10import sys
9 11
@@ -149,7 +151,7 @@ hcall_table = {
149} 151}
150 152
151def hcall_table_lookup(opcode): 153def hcall_table_lookup(opcode):
152 if (hcall_table.has_key(opcode)): 154 if (opcode in hcall_table):
153 return hcall_table[opcode] 155 return hcall_table[opcode]
154 else: 156 else:
155 return opcode 157 return opcode
@@ -157,8 +159,8 @@ def hcall_table_lookup(opcode):
157print_ptrn = '%-28s%10s%10s%10s%10s' 159print_ptrn = '%-28s%10s%10s%10s%10s'
158 160
159def trace_end(): 161def trace_end():
160 print print_ptrn % ('hcall', 'count', 'min(ns)', 'max(ns)', 'avg(ns)') 162 print(print_ptrn % ('hcall', 'count', 'min(ns)', 'max(ns)', 'avg(ns)'))
161 print '-' * 68 163 print('-' * 68)
162 for opcode in output: 164 for opcode in output:
163 h_name = hcall_table_lookup(opcode) 165 h_name = hcall_table_lookup(opcode)
164 time = output[opcode]['time'] 166 time = output[opcode]['time']
@@ -166,14 +168,14 @@ def trace_end():
166 min_t = output[opcode]['min'] 168 min_t = output[opcode]['min']
167 max_t = output[opcode]['max'] 169 max_t = output[opcode]['max']
168 170
169 print print_ptrn % (h_name, cnt, min_t, max_t, time/cnt) 171 print(print_ptrn % (h_name, cnt, min_t, max_t, time//cnt))
170 172
171def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain, 173def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain,
172 opcode, retval): 174 opcode, retval):
173 if (d_enter.has_key(cpu) and d_enter[cpu].has_key(opcode)): 175 if (cpu in d_enter and opcode in d_enter[cpu]):
174 diff = nsecs(sec, nsec) - d_enter[cpu][opcode] 176 diff = nsecs(sec, nsec) - d_enter[cpu][opcode]
175 177
176 if (output.has_key(opcode)): 178 if (opcode in output):
177 output[opcode]['time'] += diff 179 output[opcode]['time'] += diff
178 output[opcode]['cnt'] += 1 180 output[opcode]['cnt'] += 1
179 if (output[opcode]['min'] > diff): 181 if (output[opcode]['min'] > diff):
@@ -190,11 +192,11 @@ def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain,
190 192
191 del d_enter[cpu][opcode] 193 del d_enter[cpu][opcode]
192# else: 194# else:
193# print "Can't find matching hcall_enter event. Ignoring sample" 195# print("Can't find matching hcall_enter event. Ignoring sample")
194 196
195def powerpc__hcall_entry(event_name, context, cpu, sec, nsec, pid, comm, 197def powerpc__hcall_entry(event_name, context, cpu, sec, nsec, pid, comm,
196 callchain, opcode): 198 callchain, opcode):
197 if (d_enter.has_key(cpu)): 199 if (cpu in d_enter):
198 d_enter[cpu][opcode] = nsecs(sec, nsec) 200 d_enter[cpu][opcode] = nsecs(sec, nsec)
199 else: 201 else:
200 d_enter[cpu] = {opcode: nsecs(sec, nsec)} 202 d_enter[cpu] = {opcode: nsecs(sec, nsec)}
diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py
index 3473e7f66081..3984bf51f3c5 100644
--- a/tools/perf/scripts/python/sched-migration.py
+++ b/tools/perf/scripts/python/sched-migration.py
@@ -1,5 +1,3 @@
1#!/usr/bin/python
2#
3# Cpu task migration overview toy 1# Cpu task migration overview toy
4# 2#
5# Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com> 3# Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py
index 61621b93affb..987ffae7c8ca 100644
--- a/tools/perf/scripts/python/sctop.py
+++ b/tools/perf/scripts/python/sctop.py
@@ -8,7 +8,14 @@
8# will be refreshed every [interval] seconds. The default interval is 8# will be refreshed every [interval] seconds. The default interval is
9# 3 seconds. 9# 3 seconds.
10 10
11import os, sys, thread, time 11from __future__ import print_function
12
13import os, sys, time
14
15try:
16 import thread
17except ImportError:
18 import _thread as thread
12 19
13sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 20sys.path.append(os.environ['PERF_EXEC_PATH'] + \
14 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 21 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
@@ -62,18 +69,19 @@ def print_syscall_totals(interval):
62 while 1: 69 while 1:
63 clear_term() 70 clear_term()
64 if for_comm is not None: 71 if for_comm is not None:
65 print "\nsyscall events for %s:\n\n" % (for_comm), 72 print("\nsyscall events for %s:\n" % (for_comm))
66 else: 73 else:
67 print "\nsyscall events:\n\n", 74 print("\nsyscall events:\n")
68 75
69 print "%-40s %10s\n" % ("event", "count"), 76 print("%-40s %10s" % ("event", "count"))
70 print "%-40s %10s\n" % ("----------------------------------------", \ 77 print("%-40s %10s" %
71 "----------"), 78 ("----------------------------------------",
79 "----------"))
72 80
73 for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ 81 for id, val in sorted(syscalls.items(), key = lambda kv: (kv[1], kv[0]), \
74 reverse = True): 82 reverse = True):
75 try: 83 try:
76 print "%-40s %10d\n" % (syscall_name(id), val), 84 print("%-40s %10d" % (syscall_name(id), val))
77 except TypeError: 85 except TypeError:
78 pass 86 pass
79 syscalls.clear() 87 syscalls.clear()
diff --git a/tools/perf/scripts/python/stackcollapse.py b/tools/perf/scripts/python/stackcollapse.py
index 1697b5e18c96..5e703efaddcc 100755
--- a/tools/perf/scripts/python/stackcollapse.py
+++ b/tools/perf/scripts/python/stackcollapse.py
@@ -19,6 +19,8 @@
19# Written by Paolo Bonzini <pbonzini@redhat.com> 19# Written by Paolo Bonzini <pbonzini@redhat.com>
20# Based on Brendan Gregg's stackcollapse-perf.pl script. 20# Based on Brendan Gregg's stackcollapse-perf.pl script.
21 21
22from __future__ import print_function
23
22import os 24import os
23import sys 25import sys
24from collections import defaultdict 26from collections import defaultdict
@@ -120,7 +122,6 @@ def process_event(param_dict):
120 lines[stack_string] = lines[stack_string] + 1 122 lines[stack_string] = lines[stack_string] + 1
121 123
122def trace_end(): 124def trace_end():
123 list = lines.keys() 125 list = sorted(lines)
124 list.sort()
125 for stack in list: 126 for stack in list:
126 print "%s %d" % (stack, lines[stack]) 127 print("%s %d" % (stack, lines[stack]))
diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py
index 8410672efb8b..01fa933ff3cf 100644
--- a/tools/perf/scripts/python/stat-cpi.py
+++ b/tools/perf/scripts/python/stat-cpi.py
@@ -1,6 +1,7 @@
1#!/usr/bin/env python
2# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
3 2
3from __future__ import print_function
4
4data = {} 5data = {}
5times = [] 6times = []
6threads = [] 7threads = []
@@ -20,8 +21,8 @@ def store_key(time, cpu, thread):
20 threads.append(thread) 21 threads.append(thread)
21 22
22def store(time, event, cpu, thread, val, ena, run): 23def store(time, event, cpu, thread, val, ena, run):
23 #print "event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % \ 24 #print("event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" %
24 # (event, cpu, thread, time, val, ena, run) 25 # (event, cpu, thread, time, val, ena, run))
25 26
26 store_key(time, cpu, thread) 27 store_key(time, cpu, thread)
27 key = get_key(time, event, cpu, thread) 28 key = get_key(time, event, cpu, thread)
@@ -59,7 +60,7 @@ def stat__interval(time):
59 if ins != 0: 60 if ins != 0:
60 cpi = cyc/float(ins) 61 cpi = cyc/float(ins)
61 62
62 print "%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins) 63 print("%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins))
63 64
64def trace_end(): 65def trace_end():
65 pass 66 pass
@@ -75,4 +76,4 @@ def trace_end():
75# if ins != 0: 76# if ins != 0:
76# cpi = cyc/float(ins) 77# cpi = cyc/float(ins)
77# 78#
78# print "time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi) 79# print("time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi))
diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py
index daf314cc5dd3..42782487b0e9 100644
--- a/tools/perf/scripts/python/syscall-counts-by-pid.py
+++ b/tools/perf/scripts/python/syscall-counts-by-pid.py
@@ -5,6 +5,8 @@
5# Displays system-wide system call totals, broken down by syscall. 5# Displays system-wide system call totals, broken down by syscall.
6# If a [comm] arg is specified, only syscalls called by [comm] are displayed. 6# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
7 7
8from __future__ import print_function
9
8import os, sys 10import os, sys
9 11
10sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 12sys.path.append(os.environ['PERF_EXEC_PATH'] + \
@@ -31,7 +33,7 @@ if len(sys.argv) > 1:
31syscalls = autodict() 33syscalls = autodict()
32 34
33def trace_begin(): 35def trace_begin():
34 print "Press control+C to stop and show the summary" 36 print("Press control+C to stop and show the summary")
35 37
36def trace_end(): 38def trace_end():
37 print_syscall_totals() 39 print_syscall_totals()
@@ -55,20 +57,20 @@ def syscalls__sys_enter(event_name, context, common_cpu,
55 57
56def print_syscall_totals(): 58def print_syscall_totals():
57 if for_comm is not None: 59 if for_comm is not None:
58 print "\nsyscall events for %s:\n\n" % (for_comm), 60 print("\nsyscall events for %s:\n" % (for_comm))
59 else: 61 else:
60 print "\nsyscall events by comm/pid:\n\n", 62 print("\nsyscall events by comm/pid:\n")
61 63
62 print "%-40s %10s\n" % ("comm [pid]/syscalls", "count"), 64 print("%-40s %10s" % ("comm [pid]/syscalls", "count"))
63 print "%-40s %10s\n" % ("----------------------------------------", \ 65 print("%-40s %10s" % ("----------------------------------------",
64 "----------"), 66 "----------"))
65 67
66 comm_keys = syscalls.keys() 68 comm_keys = syscalls.keys()
67 for comm in comm_keys: 69 for comm in comm_keys:
68 pid_keys = syscalls[comm].keys() 70 pid_keys = syscalls[comm].keys()
69 for pid in pid_keys: 71 for pid in pid_keys:
70 print "\n%s [%d]\n" % (comm, pid), 72 print("\n%s [%d]" % (comm, pid))
71 id_keys = syscalls[comm][pid].keys() 73 id_keys = syscalls[comm][pid].keys()
72 for id, val in sorted(syscalls[comm][pid].iteritems(), \ 74 for id, val in sorted(syscalls[comm][pid].items(), \
73 key = lambda(k, v): (v, k), reverse = True): 75 key = lambda kv: (kv[1], kv[0]), reverse = True):
74 print " %-38s %10d\n" % (syscall_name(id), val), 76 print(" %-38s %10d" % (syscall_name(id), val))
diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py
index e66a7730aeb5..0ebd89cfd42c 100644
--- a/tools/perf/scripts/python/syscall-counts.py
+++ b/tools/perf/scripts/python/syscall-counts.py
@@ -5,6 +5,8 @@
5# Displays system-wide system call totals, broken down by syscall. 5# Displays system-wide system call totals, broken down by syscall.
6# If a [comm] arg is specified, only syscalls called by [comm] are displayed. 6# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
7 7
8from __future__ import print_function
9
8import os 10import os
9import sys 11import sys
10 12
@@ -28,7 +30,7 @@ if len(sys.argv) > 1:
28syscalls = autodict() 30syscalls = autodict()
29 31
30def trace_begin(): 32def trace_begin():
31 print "Press control+C to stop and show the summary" 33 print("Press control+C to stop and show the summary")
32 34
33def trace_end(): 35def trace_end():
34 print_syscall_totals() 36 print_syscall_totals()
@@ -51,14 +53,14 @@ def syscalls__sys_enter(event_name, context, common_cpu,
51 53
52def print_syscall_totals(): 54def print_syscall_totals():
53 if for_comm is not None: 55 if for_comm is not None:
54 print "\nsyscall events for %s:\n\n" % (for_comm), 56 print("\nsyscall events for %s:\n" % (for_comm))
55 else: 57 else:
56 print "\nsyscall events:\n\n", 58 print("\nsyscall events:\n")
57 59
58 print "%-40s %10s\n" % ("event", "count"), 60 print("%-40s %10s" % ("event", "count"))
59 print "%-40s %10s\n" % ("----------------------------------------", \ 61 print("%-40s %10s" % ("----------------------------------------",
60 "-----------"), 62 "-----------"))
61 63
62 for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ 64 for id, val in sorted(syscalls.items(), key = lambda kv: (kv[1], kv[0]), \
63 reverse = True): 65 reverse = True):
64 print "%-40s %10d\n" % (syscall_name(id), val), 66 print("%-40s %10d" % (syscall_name(id), val))
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index e952127e4fb0..cb39ac46bc73 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -1,4 +1,3 @@
1#! /usr/bin/python
2# SPDX-License-Identifier: GPL-2.0 1# SPDX-License-Identifier: GPL-2.0
3 2
4from __future__ import print_function 3from __future__ import print_function
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index a20cbc445426..57fc544aedb0 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -15,7 +15,6 @@
15#include <sys/mman.h> 15#include <sys/mman.h>
16#include <linux/compiler.h> 16#include <linux/compiler.h>
17#include <linux/hw_breakpoint.h> 17#include <linux/hw_breakpoint.h>
18#include <sys/ioctl.h>
19 18
20#include "tests.h" 19#include "tests.h"
21#include "debug.h" 20#include "debug.h"
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index dbf2c69944d2..4ebd2681e760 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -15,6 +15,8 @@
15#include "thread_map.h" 15#include "thread_map.h"
16#include "cpumap.h" 16#include "cpumap.h"
17#include "machine.h" 17#include "machine.h"
18#include "map.h"
19#include "symbol.h"
18#include "event.h" 20#include "event.h"
19#include "thread.h" 21#include "thread.h"
20 22
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 7c8d2e422401..077c306c1cae 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -10,6 +10,7 @@
10#include "../util/unwind.h" 10#include "../util/unwind.h"
11#include "perf_regs.h" 11#include "perf_regs.h"
12#include "map.h" 12#include "map.h"
13#include "symbol.h"
13#include "thread.h" 14#include "thread.h"
14#include "callchain.h" 15#include "callchain.h"
15 16
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 5cbba70bcdd0..ea7acf403727 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -43,7 +43,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
43 return -1; 43 return -1;
44 } 44 }
45 45
46 if (perf_evsel__test_field(evsel, "prev_comm", 16, true)) 46 if (perf_evsel__test_field(evsel, "prev_comm", 16, false))
47 ret = -1; 47 ret = -1;
48 48
49 if (perf_evsel__test_field(evsel, "prev_pid", 4, true)) 49 if (perf_evsel__test_field(evsel, "prev_pid", 4, true))
@@ -55,7 +55,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
55 if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true)) 55 if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true))
56 ret = -1; 56 ret = -1;
57 57
58 if (perf_evsel__test_field(evsel, "next_comm", 16, true)) 58 if (perf_evsel__test_field(evsel, "next_comm", 16, false))
59 ret = -1; 59 ret = -1;
60 60
61 if (perf_evsel__test_field(evsel, "next_pid", 4, true)) 61 if (perf_evsel__test_field(evsel, "next_pid", 4, true))
@@ -73,7 +73,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
73 return -1; 73 return -1;
74 } 74 }
75 75
76 if (perf_evsel__test_field(evsel, "comm", 16, true)) 76 if (perf_evsel__test_field(evsel, "comm", 16, false))
77 ret = -1; 77 ret = -1;
78 78
79 if (perf_evsel__test_field(evsel, "pid", 4, true)) 79 if (perf_evsel__test_field(evsel, "pid", 4, true))
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index b889a28fd80b..469958cd7fe0 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -2,6 +2,7 @@
2#include <inttypes.h> 2#include <inttypes.h>
3#include "perf.h" 3#include "perf.h"
4#include "util/debug.h" 4#include "util/debug.h"
5#include "util/map.h"
5#include "util/symbol.h" 6#include "util/symbol.h"
6#include "util/sort.h" 7#include "util/sort.h"
7#include "util/evsel.h" 8#include "util/evsel.h"
@@ -161,7 +162,7 @@ out:
161void print_hists_in(struct hists *hists) 162void print_hists_in(struct hists *hists)
162{ 163{
163 int i = 0; 164 int i = 0;
164 struct rb_root *root; 165 struct rb_root_cached *root;
165 struct rb_node *node; 166 struct rb_node *node;
166 167
167 if (hists__has(hists, need_collapse)) 168 if (hists__has(hists, need_collapse))
@@ -170,7 +171,7 @@ void print_hists_in(struct hists *hists)
170 root = hists->entries_in; 171 root = hists->entries_in;
171 172
172 pr_info("----- %s --------\n", __func__); 173 pr_info("----- %s --------\n", __func__);
173 node = rb_first(root); 174 node = rb_first_cached(root);
174 while (node) { 175 while (node) {
175 struct hist_entry *he; 176 struct hist_entry *he;
176 177
@@ -191,13 +192,13 @@ void print_hists_in(struct hists *hists)
191void print_hists_out(struct hists *hists) 192void print_hists_out(struct hists *hists)
192{ 193{
193 int i = 0; 194 int i = 0;
194 struct rb_root *root; 195 struct rb_root_cached *root;
195 struct rb_node *node; 196 struct rb_node *node;
196 197
197 root = &hists->entries; 198 root = &hists->entries;
198 199
199 pr_info("----- %s --------\n", __func__); 200 pr_info("----- %s --------\n", __func__);
200 node = rb_first(root); 201 node = rb_first_cached(root);
201 while (node) { 202 while (node) {
202 struct hist_entry *he; 203 struct hist_entry *he;
203 204
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 65fe02bebbee..7a2eed6c783e 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -2,6 +2,7 @@
2#include "perf.h" 2#include "perf.h"
3#include "util/debug.h" 3#include "util/debug.h"
4#include "util/event.h" 4#include "util/event.h"
5#include "util/map.h"
5#include "util/symbol.h" 6#include "util/symbol.h"
6#include "util/sort.h" 7#include "util/sort.h"
7#include "util/evsel.h" 8#include "util/evsel.h"
@@ -125,8 +126,8 @@ out:
125static void del_hist_entries(struct hists *hists) 126static void del_hist_entries(struct hists *hists)
126{ 127{
127 struct hist_entry *he; 128 struct hist_entry *he;
128 struct rb_root *root_in; 129 struct rb_root_cached *root_in;
129 struct rb_root *root_out; 130 struct rb_root_cached *root_out;
130 struct rb_node *node; 131 struct rb_node *node;
131 132
132 if (hists__has(hists, need_collapse)) 133 if (hists__has(hists, need_collapse))
@@ -136,12 +137,12 @@ static void del_hist_entries(struct hists *hists)
136 137
137 root_out = &hists->entries; 138 root_out = &hists->entries;
138 139
139 while (!RB_EMPTY_ROOT(root_out)) { 140 while (!RB_EMPTY_ROOT(&root_out->rb_root)) {
140 node = rb_first(root_out); 141 node = rb_first_cached(root_out);
141 142
142 he = rb_entry(node, struct hist_entry, rb_node); 143 he = rb_entry(node, struct hist_entry, rb_node);
143 rb_erase(node, root_out); 144 rb_erase_cached(node, root_out);
144 rb_erase(&he->rb_node_in, root_in); 145 rb_erase_cached(&he->rb_node_in, root_in);
145 hist_entry__delete(he); 146 hist_entry__delete(he);
146 } 147 }
147} 148}
@@ -198,7 +199,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec
198 print_hists_out(hists); 199 print_hists_out(hists);
199 } 200 }
200 201
201 root = &hists->entries; 202 root = &hists->entries.rb_root;
202 for (node = rb_first(root), i = 0; 203 for (node = rb_first(root), i = 0;
203 node && (he = rb_entry(node, struct hist_entry, rb_node)); 204 node && (he = rb_entry(node, struct hist_entry, rb_node));
204 node = rb_next(node), i++) { 205 node = rb_next(node), i++) {
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 1c5bedab3c2c..975844807fe2 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -1,6 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include "perf.h" 2#include "perf.h"
3#include "util/debug.h" 3#include "util/debug.h"
4#include "util/map.h"
4#include "util/symbol.h" 5#include "util/symbol.h"
5#include "util/sort.h" 6#include "util/sort.h"
6#include "util/evsel.h" 7#include "util/evsel.h"
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 9a9d06cb0222..af633db63f4d 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -142,7 +142,7 @@ static int find_sample(struct sample *samples, size_t nr_samples,
142static int __validate_match(struct hists *hists) 142static int __validate_match(struct hists *hists)
143{ 143{
144 size_t count = 0; 144 size_t count = 0;
145 struct rb_root *root; 145 struct rb_root_cached *root;
146 struct rb_node *node; 146 struct rb_node *node;
147 147
148 /* 148 /*
@@ -153,7 +153,7 @@ static int __validate_match(struct hists *hists)
153 else 153 else
154 root = hists->entries_in; 154 root = hists->entries_in;
155 155
156 node = rb_first(root); 156 node = rb_first_cached(root);
157 while (node) { 157 while (node) {
158 struct hist_entry *he; 158 struct hist_entry *he;
159 159
@@ -192,7 +192,7 @@ static int __validate_link(struct hists *hists, int idx)
192 size_t count = 0; 192 size_t count = 0;
193 size_t count_pair = 0; 193 size_t count_pair = 0;
194 size_t count_dummy = 0; 194 size_t count_dummy = 0;
195 struct rb_root *root; 195 struct rb_root_cached *root;
196 struct rb_node *node; 196 struct rb_node *node;
197 197
198 /* 198 /*
@@ -205,7 +205,7 @@ static int __validate_link(struct hists *hists, int idx)
205 else 205 else
206 root = hists->entries_in; 206 root = hists->entries_in;
207 207
208 node = rb_first(root); 208 node = rb_first_cached(root);
209 while (node) { 209 while (node) {
210 struct hist_entry *he; 210 struct hist_entry *he;
211 211
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index faacb4f41460..0a510c524a5d 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -2,6 +2,7 @@
2#include "perf.h" 2#include "perf.h"
3#include "util/debug.h" 3#include "util/debug.h"
4#include "util/event.h" 4#include "util/event.h"
5#include "util/map.h"
5#include "util/symbol.h" 6#include "util/symbol.h"
6#include "util/sort.h" 7#include "util/sort.h"
7#include "util/evsel.h" 8#include "util/evsel.h"
@@ -91,8 +92,8 @@ out:
91static void del_hist_entries(struct hists *hists) 92static void del_hist_entries(struct hists *hists)
92{ 93{
93 struct hist_entry *he; 94 struct hist_entry *he;
94 struct rb_root *root_in; 95 struct rb_root_cached *root_in;
95 struct rb_root *root_out; 96 struct rb_root_cached *root_out;
96 struct rb_node *node; 97 struct rb_node *node;
97 98
98 if (hists__has(hists, need_collapse)) 99 if (hists__has(hists, need_collapse))
@@ -102,12 +103,12 @@ static void del_hist_entries(struct hists *hists)
102 103
103 root_out = &hists->entries; 104 root_out = &hists->entries;
104 105
105 while (!RB_EMPTY_ROOT(root_out)) { 106 while (!RB_EMPTY_ROOT(&root_out->rb_root)) {
106 node = rb_first(root_out); 107 node = rb_first_cached(root_out);
107 108
108 he = rb_entry(node, struct hist_entry, rb_node); 109 he = rb_entry(node, struct hist_entry, rb_node);
109 rb_erase(node, root_out); 110 rb_erase_cached(node, root_out);
110 rb_erase(&he->rb_node_in, root_in); 111 rb_erase_cached(&he->rb_node_in, root_in);
111 hist_entry__delete(he); 112 hist_entry__delete(he);
112 } 113 }
113} 114}
@@ -126,7 +127,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
126 int err; 127 int err;
127 struct hists *hists = evsel__hists(evsel); 128 struct hists *hists = evsel__hists(evsel);
128 struct hist_entry *he; 129 struct hist_entry *he;
129 struct rb_root *root; 130 struct rb_root_cached *root;
130 struct rb_node *node; 131 struct rb_node *node;
131 132
132 field_order = NULL; 133 field_order = NULL;
@@ -162,7 +163,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine)
162 } 163 }
163 164
164 root = &hists->entries; 165 root = &hists->entries;
165 node = rb_first(root); 166 node = rb_first_cached(root);
166 he = rb_entry(node, struct hist_entry, rb_node); 167 he = rb_entry(node, struct hist_entry, rb_node);
167 TEST_ASSERT_VAL("Invalid hist entry", 168 TEST_ASSERT_VAL("Invalid hist entry",
168 !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") && 169 !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
@@ -228,7 +229,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
228 int err; 229 int err;
229 struct hists *hists = evsel__hists(evsel); 230 struct hists *hists = evsel__hists(evsel);
230 struct hist_entry *he; 231 struct hist_entry *he;
231 struct rb_root *root; 232 struct rb_root_cached *root;
232 struct rb_node *node; 233 struct rb_node *node;
233 234
234 field_order = "overhead,cpu"; 235 field_order = "overhead,cpu";
@@ -262,7 +263,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine)
262 } 263 }
263 264
264 root = &hists->entries; 265 root = &hists->entries;
265 node = rb_first(root); 266 node = rb_first_cached(root);
266 he = rb_entry(node, struct hist_entry, rb_node); 267 he = rb_entry(node, struct hist_entry, rb_node);
267 TEST_ASSERT_VAL("Invalid hist entry", 268 TEST_ASSERT_VAL("Invalid hist entry",
268 CPU(he) == 1 && PID(he) == 100 && he->stat.period == 300); 269 CPU(he) == 1 && PID(he) == 100 && he->stat.period == 300);
@@ -284,7 +285,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
284 int err; 285 int err;
285 struct hists *hists = evsel__hists(evsel); 286 struct hists *hists = evsel__hists(evsel);
286 struct hist_entry *he; 287 struct hist_entry *he;
287 struct rb_root *root; 288 struct rb_root_cached *root;
288 struct rb_node *node; 289 struct rb_node *node;
289 290
290 field_order = "comm,overhead,dso"; 291 field_order = "comm,overhead,dso";
@@ -316,7 +317,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine)
316 } 317 }
317 318
318 root = &hists->entries; 319 root = &hists->entries;
319 node = rb_first(root); 320 node = rb_first_cached(root);
320 he = rb_entry(node, struct hist_entry, rb_node); 321 he = rb_entry(node, struct hist_entry, rb_node);
321 TEST_ASSERT_VAL("Invalid hist entry", 322 TEST_ASSERT_VAL("Invalid hist entry",
322 !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") && 323 !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
@@ -358,7 +359,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
358 int err; 359 int err;
359 struct hists *hists = evsel__hists(evsel); 360 struct hists *hists = evsel__hists(evsel);
360 struct hist_entry *he; 361 struct hist_entry *he;
361 struct rb_root *root; 362 struct rb_root_cached *root;
362 struct rb_node *node; 363 struct rb_node *node;
363 364
364 field_order = "dso,sym,comm,overhead,dso"; 365 field_order = "dso,sym,comm,overhead,dso";
@@ -394,7 +395,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine)
394 } 395 }
395 396
396 root = &hists->entries; 397 root = &hists->entries;
397 node = rb_first(root); 398 node = rb_first_cached(root);
398 he = rb_entry(node, struct hist_entry, rb_node); 399 he = rb_entry(node, struct hist_entry, rb_node);
399 TEST_ASSERT_VAL("Invalid hist entry", 400 TEST_ASSERT_VAL("Invalid hist entry",
400 !strcmp(DSO(he), "perf") && !strcmp(SYM(he), "cmd_record") && 401 !strcmp(DSO(he), "perf") && !strcmp(SYM(he), "cmd_record") &&
@@ -460,7 +461,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine)
460 int err; 461 int err;
461 struct hists *hists = evsel__hists(evsel); 462 struct hists *hists = evsel__hists(evsel);
462 struct hist_entry *he; 463 struct hist_entry *he;
463 struct rb_root *root; 464 struct rb_root_cached *root;
464 struct rb_node *node; 465 struct rb_node *node;
465 466
466 field_order = "cpu,pid,comm,dso,sym"; 467 field_order = "cpu,pid,comm,dso,sym";
@@ -497,7 +498,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine)
497 } 498 }
498 499
499 root = &hists->entries; 500 root = &hists->entries;
500 node = rb_first(root); 501 node = rb_first_cached(root);
501 he = rb_entry(node, struct hist_entry, rb_node); 502 he = rb_entry(node, struct hist_entry, rb_node);
502 503
503 TEST_ASSERT_VAL("Invalid hist entry", 504 TEST_ASSERT_VAL("Invalid hist entry",
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 5ede9b561d32..ba87e6e8d18c 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -11,6 +11,7 @@
11#include "tests.h" 11#include "tests.h"
12#include "machine.h" 12#include "machine.h"
13#include "thread_map.h" 13#include "thread_map.h"
14#include "map.h"
14#include "symbol.h" 15#include "symbol.h"
15#include "thread.h" 16#include "thread.h"
16#include "util.h" 17#include "util.h"
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 3b97ac018d5a..4a69c07f4101 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1330,6 +1330,26 @@ static int test__checkevent_complex_name(struct perf_evlist *evlist)
1330 return 0; 1330 return 0;
1331} 1331}
1332 1332
1333static int test__sym_event_slash(struct perf_evlist *evlist)
1334{
1335 struct perf_evsel *evsel = perf_evlist__first(evlist);
1336
1337 TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE);
1338 TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES);
1339 TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
1340 return 0;
1341}
1342
1343static int test__sym_event_dc(struct perf_evlist *evlist)
1344{
1345 struct perf_evsel *evsel = perf_evlist__first(evlist);
1346
1347 TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE);
1348 TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES);
1349 TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
1350 return 0;
1351}
1352
1333static int count_tracepoints(void) 1353static int count_tracepoints(void)
1334{ 1354{
1335 struct dirent *events_ent; 1355 struct dirent *events_ent;
@@ -1670,6 +1690,16 @@ static struct evlist_test test__events[] = {
1670 .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk", 1690 .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk",
1671 .check = test__checkevent_complex_name, 1691 .check = test__checkevent_complex_name,
1672 .id = 53 1692 .id = 53
1693 },
1694 {
1695 .name = "cycles//u",
1696 .check = test__sym_event_slash,
1697 .id = 54,
1698 },
1699 {
1700 .name = "cycles:k",
1701 .check = test__sym_event_dc,
1702 .id = 55,
1673 } 1703 }
1674}; 1704};
1675 1705
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 7bedf8608fdd..14a78898d79e 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -4,7 +4,9 @@
4#include "util.h" 4#include "util.h"
5#include "tests.h" 5#include "tests.h"
6#include <errno.h> 6#include <errno.h>
7#include <stdio.h>
7#include <linux/kernel.h> 8#include <linux/kernel.h>
9#include <linux/limits.h>
8 10
9/* Simulated format definitions. */ 11/* Simulated format definitions. */
10static struct test_format { 12static struct test_format {
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 0e2d00d69e6e..236ce0d6c826 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -1,9 +1,11 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <stdbool.h> 2#include <stdbool.h>
3#include <inttypes.h> 3#include <inttypes.h>
4#include <linux/bitops.h>
4#include <linux/kernel.h> 5#include <linux/kernel.h>
5#include <linux/types.h> 6#include <linux/types.h>
6 7
8#include "branch.h"
7#include "util.h" 9#include "util.h"
8#include "event.h" 10#include "event.h"
9#include "evsel.h" 11#include "evsel.h"
diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
index 5059452d27dd..8bfaa630389c 100644
--- a/tools/perf/tests/sdt.c
+++ b/tools/perf/tests/sdt.c
@@ -3,6 +3,7 @@
3#include <stdio.h> 3#include <stdio.h>
4#include <sys/epoll.h> 4#include <sys/epoll.h>
5#include <util/evlist.h> 5#include <util/evlist.h>
6#include <util/symbol.h>
6#include <linux/filter.h> 7#include <linux/filter.h>
7#include "tests.h" 8#include "tests.h"
8#include "debug.h" 9#include "debug.h"
diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh
index 6293cc660947..e37787be672b 100644
--- a/tools/perf/tests/shell/lib/probe.sh
+++ b/tools/perf/tests/shell/lib/probe.sh
@@ -4,3 +4,8 @@ skip_if_no_perf_probe() {
4 perf probe 2>&1 | grep -q 'is not a perf-command' && return 2 4 perf probe 2>&1 | grep -q 'is not a perf-command' && return 2
5 return 0 5 return 0
6} 6}
7
8skip_if_no_perf_trace() {
9 perf trace -h 2>&1 | grep -q -e 'is not a perf-command' -e 'trace command not available' && return 2
10 return 0
11}
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 50109f27ca07..147efeb6b195 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -12,6 +12,7 @@
12. $(dirname $0)/lib/probe.sh 12. $(dirname $0)/lib/probe.sh
13 13
14skip_if_no_perf_probe || exit 2 14skip_if_no_perf_probe || exit 2
15skip_if_no_perf_trace || exit 2
15 16
16. $(dirname $0)/lib/probe_vfs_getname.sh 17. $(dirname $0)/lib/probe_vfs_getname.sh
17 18
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index 637365099b7d..85f328ddf897 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -1,15 +1,15 @@
1libperf-y += clone.o 1perf-y += clone.o
2libperf-y += fcntl.o 2perf-y += fcntl.o
3libperf-y += flock.o 3perf-y += flock.o
4ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) 4ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
5libperf-y += ioctl.o 5perf-y += ioctl.o
6endif 6endif
7libperf-y += kcmp.o 7perf-y += kcmp.o
8libperf-y += mount_flags.o 8perf-y += mount_flags.o
9libperf-y += pkey_alloc.o 9perf-y += pkey_alloc.o
10libperf-y += arch_prctl.o 10perf-y += arch_prctl.o
11libperf-y += prctl.o 11perf-y += prctl.o
12libperf-y += renameat.o 12perf-y += renameat.o
13libperf-y += sockaddr.o 13perf-y += sockaddr.o
14libperf-y += socket.o 14perf-y += socket.o
15libperf-y += statx.o 15perf-y += statx.o
diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c
index 620350d41209..52242fa4072b 100644
--- a/tools/perf/trace/beauty/ioctl.c
+++ b/tools/perf/trace/beauty/ioctl.c
@@ -175,7 +175,7 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, boo
175size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg) 175size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg)
176{ 176{
177 unsigned long cmd = arg->val; 177 unsigned long cmd = arg->val;
178 unsigned int fd = syscall_arg__val(arg, 0); 178 int fd = syscall_arg__val(arg, 0);
179 struct file *file = thread__files_entry(arg->thread, fd); 179 struct file *file = thread__files_entry(arg->thread, fd);
180 180
181 if (file != NULL) { 181 if (file != NULL) {
diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c
index 6897fab40dcc..d4d10b33ba0e 100644
--- a/tools/perf/trace/beauty/waitid_options.c
+++ b/tools/perf/trace/beauty/waitid_options.c
@@ -11,7 +11,7 @@ static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size,
11 11
12#define P_OPTION(n) \ 12#define P_OPTION(n) \
13 if (options & W##n) { \ 13 if (options & W##n) { \
14 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : #n); \ 14 printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \
15 options &= ~W##n; \ 15 options &= ~W##n; \
16 } 16 }
17 17
diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build
index 0a73538c0441..3aff83c3275f 100644
--- a/tools/perf/ui/Build
+++ b/tools/perf/ui/Build
@@ -1,14 +1,14 @@
1libperf-y += setup.o 1perf-y += setup.o
2libperf-y += helpline.o 2perf-y += helpline.o
3libperf-y += progress.o 3perf-y += progress.o
4libperf-y += util.o 4perf-y += util.o
5libperf-y += hist.o 5perf-y += hist.o
6libperf-y += stdio/hist.o 6perf-y += stdio/hist.o
7 7
8CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" 8CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))"
9 9
10libperf-$(CONFIG_SLANG) += browser.o 10perf-$(CONFIG_SLANG) += browser.o
11libperf-$(CONFIG_SLANG) += browsers/ 11perf-$(CONFIG_SLANG) += browsers/
12libperf-$(CONFIG_SLANG) += tui/ 12perf-$(CONFIG_SLANG) += tui/
13 13
14CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST 14CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST
diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build
index de223f5bed58..8fee56b46502 100644
--- a/tools/perf/ui/browsers/Build
+++ b/tools/perf/ui/browsers/Build
@@ -1,8 +1,8 @@
1libperf-y += annotate.o 1perf-y += annotate.o
2libperf-y += hists.o 2perf-y += hists.o
3libperf-y += map.o 3perf-y += map.o
4libperf-y += scripts.o 4perf-y += scripts.o
5libperf-y += header.o 5perf-y += header.o
6 6
7CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST 7CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST
8CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST 8CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 82e16bf84466..35bdfd8b1e71 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -7,6 +7,7 @@
7#include "../../util/annotate.h" 7#include "../../util/annotate.h"
8#include "../../util/hist.h" 8#include "../../util/hist.h"
9#include "../../util/sort.h" 9#include "../../util/sort.h"
10#include "../../util/map.h"
10#include "../../util/symbol.h" 11#include "../../util/symbol.h"
11#include "../../util/evsel.h" 12#include "../../util/evsel.h"
12#include "../../util/evlist.h" 13#include "../../util/evlist.h"
diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c
index d75492189acb..5aeb663dd184 100644
--- a/tools/perf/ui/browsers/header.c
+++ b/tools/perf/ui/browsers/header.c
@@ -35,7 +35,7 @@ static int list_menu__run(struct ui_browser *menu)
35{ 35{
36 int key; 36 int key;
37 unsigned long offset; 37 unsigned long offset;
38 const char help[] = 38 static const char help[] =
39 "h/?/F1 Show this window\n" 39 "h/?/F1 Show this window\n"
40 "UP/DOWN/PGUP\n" 40 "UP/DOWN/PGUP\n"
41 "PGDN/SPACE\n" 41 "PGDN/SPACE\n"
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index ffac1d54a3d4..aef800d97ea1 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -8,9 +8,12 @@
8#include <linux/rbtree.h> 8#include <linux/rbtree.h>
9#include <sys/ttydefaults.h> 9#include <sys/ttydefaults.h>
10 10
11#include "../../util/callchain.h"
11#include "../../util/evsel.h" 12#include "../../util/evsel.h"
12#include "../../util/evlist.h" 13#include "../../util/evlist.h"
13#include "../../util/hist.h" 14#include "../../util/hist.h"
15#include "../../util/map.h"
16#include "../../util/symbol.h"
14#include "../../util/pstack.h" 17#include "../../util/pstack.h"
15#include "../../util/sort.h" 18#include "../../util/sort.h"
16#include "../../util/util.h" 19#include "../../util/util.h"
@@ -49,7 +52,7 @@ static int hist_browser__get_folding(struct hist_browser *browser)
49 struct hists *hists = browser->hists; 52 struct hists *hists = browser->hists;
50 int unfolded_rows = 0; 53 int unfolded_rows = 0;
51 54
52 for (nd = rb_first(&hists->entries); 55 for (nd = rb_first_cached(&hists->entries);
53 (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; 56 (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
54 nd = rb_hierarchy_next(nd)) { 57 nd = rb_hierarchy_next(nd)) {
55 struct hist_entry *he = 58 struct hist_entry *he =
@@ -267,7 +270,7 @@ static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he,
267 if (he->has_no_entry) 270 if (he->has_no_entry)
268 return 1; 271 return 1;
269 272
270 node = rb_first(&he->hroot_out); 273 node = rb_first_cached(&he->hroot_out);
271 while (node) { 274 while (node) {
272 float percent; 275 float percent;
273 276
@@ -372,7 +375,7 @@ static void hist_entry__init_have_children(struct hist_entry *he)
372 he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain); 375 he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
373 callchain__init_have_children(&he->sorted_chain); 376 callchain__init_have_children(&he->sorted_chain);
374 } else { 377 } else {
375 he->has_children = !RB_EMPTY_ROOT(&he->hroot_out); 378 he->has_children = !RB_EMPTY_ROOT(&he->hroot_out.rb_root);
376 } 379 }
377 380
378 he->init_have_children = true; 381 he->init_have_children = true;
@@ -508,7 +511,7 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
508 struct hist_entry *child; 511 struct hist_entry *child;
509 int n = 0; 512 int n = 0;
510 513
511 for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) { 514 for (nd = rb_first_cached(&he->hroot_out); nd; nd = rb_next(nd)) {
512 child = rb_entry(nd, struct hist_entry, rb_node); 515 child = rb_entry(nd, struct hist_entry, rb_node);
513 percent = hist_entry__get_percent_limit(child); 516 percent = hist_entry__get_percent_limit(child);
514 if (!child->filtered && percent >= hb->min_pcnt) 517 if (!child->filtered && percent >= hb->min_pcnt)
@@ -566,7 +569,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
566 struct rb_node *nd; 569 struct rb_node *nd;
567 struct hist_entry *he; 570 struct hist_entry *he;
568 571
569 nd = rb_first(&browser->hists->entries); 572 nd = rb_first_cached(&browser->hists->entries);
570 while (nd) { 573 while (nd) {
571 he = rb_entry(nd, struct hist_entry, rb_node); 574 he = rb_entry(nd, struct hist_entry, rb_node);
572 575
@@ -1738,7 +1741,7 @@ static void ui_browser__hists_init_top(struct ui_browser *browser)
1738 struct hist_browser *hb; 1741 struct hist_browser *hb;
1739 1742
1740 hb = container_of(browser, struct hist_browser, b); 1743 hb = container_of(browser, struct hist_browser, b);
1741 browser->top = rb_first(&hb->hists->entries); 1744 browser->top = rb_first_cached(&hb->hists->entries);
1742 } 1745 }
1743} 1746}
1744 1747
@@ -2649,7 +2652,7 @@ add_socket_opt(struct hist_browser *browser, struct popup_action *act,
2649static void hist_browser__update_nr_entries(struct hist_browser *hb) 2652static void hist_browser__update_nr_entries(struct hist_browser *hb)
2650{ 2653{
2651 u64 nr_entries = 0; 2654 u64 nr_entries = 0;
2652 struct rb_node *nd = rb_first(&hb->hists->entries); 2655 struct rb_node *nd = rb_first_cached(&hb->hists->entries);
2653 2656
2654 if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) { 2657 if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) {
2655 hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries; 2658 hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries;
@@ -2669,7 +2672,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb,
2669 double percent) 2672 double percent)
2670{ 2673{
2671 struct hist_entry *he; 2674 struct hist_entry *he;
2672 struct rb_node *nd = rb_first(&hb->hists->entries); 2675 struct rb_node *nd = rb_first_cached(&hb->hists->entries);
2673 u64 total = hists__total_period(hb->hists); 2676 u64 total = hists__total_period(hb->hists);
2674 u64 min_callchain_hits = total * (percent / 100); 2677 u64 min_callchain_hits = total * (percent / 100);
2675 2678
@@ -2748,7 +2751,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
2748 "S Zoom into current Processor Socket\n" \ 2751 "S Zoom into current Processor Socket\n" \
2749 2752
2750 /* help messages are sorted by lexical order of the hotkey */ 2753 /* help messages are sorted by lexical order of the hotkey */
2751 const char report_help[] = HIST_BROWSER_HELP_COMMON 2754 static const char report_help[] = HIST_BROWSER_HELP_COMMON
2752 "i Show header information\n" 2755 "i Show header information\n"
2753 "P Print histograms to perf.hist.N\n" 2756 "P Print histograms to perf.hist.N\n"
2754 "r Run available scripts\n" 2757 "r Run available scripts\n"
@@ -2756,7 +2759,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
2756 "t Zoom into current Thread\n" 2759 "t Zoom into current Thread\n"
2757 "V Verbose (DSO names in callchains, etc)\n" 2760 "V Verbose (DSO names in callchains, etc)\n"
2758 "/ Filter symbol by name"; 2761 "/ Filter symbol by name";
2759 const char top_help[] = HIST_BROWSER_HELP_COMMON 2762 static const char top_help[] = HIST_BROWSER_HELP_COMMON
2760 "P Print histograms to perf.hist.N\n" 2763 "P Print histograms to perf.hist.N\n"
2761 "t Zoom into current Thread\n" 2764 "t Zoom into current Thread\n"
2762 "V Verbose (DSO names in callchains, etc)\n" 2765 "V Verbose (DSO names in callchains, etc)\n"
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index 5b8b8c637686..c70d9337405b 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -6,6 +6,7 @@
6#include <linux/bitops.h> 6#include <linux/bitops.h>
7#include "../../util/util.h" 7#include "../../util/util.h"
8#include "../../util/debug.h" 8#include "../../util/debug.h"
9#include "../../util/map.h"
9#include "../../util/symbol.h" 10#include "../../util/symbol.h"
10#include "../browser.h" 11#include "../browser.h"
11#include "../helpline.h" 12#include "../helpline.h"
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 48428c9acd89..df49c9ba1785 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -1,8 +1,11 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include "gtk.h" 2#include "gtk.h"
3#include "util/sort.h"
3#include "util/debug.h" 4#include "util/debug.h"
4#include "util/annotate.h" 5#include "util/annotate.h"
5#include "util/evsel.h" 6#include "util/evsel.h"
7#include "util/map.h"
8#include "util/symbol.h"
6#include "ui/helpline.h" 9#include "ui/helpline.h"
7#include <inttypes.h> 10#include <inttypes.h>
8#include <signal.h> 11#include <signal.h>
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 4ab663ec3e5e..0c08890f006a 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -1,6 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include "../evlist.h" 2#include "../evlist.h"
3#include "../cache.h" 3#include "../cache.h"
4#include "../callchain.h"
4#include "../evsel.h" 5#include "../evsel.h"
5#include "../sort.h" 6#include "../sort.h"
6#include "../hist.h" 7#include "../hist.h"
@@ -353,7 +354,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
353 354
354 g_object_unref(GTK_TREE_MODEL(store)); 355 g_object_unref(GTK_TREE_MODEL(store));
355 356
356 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { 357 for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) {
357 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 358 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
358 GtkTreeIter iter; 359 GtkTreeIter iter;
359 u64 total = hists__total_period(h->hists); 360 u64 total = hists__total_period(h->hists);
@@ -401,7 +402,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
401} 402}
402 403
403static void perf_gtk__add_hierarchy_entries(struct hists *hists, 404static void perf_gtk__add_hierarchy_entries(struct hists *hists,
404 struct rb_root *root, 405 struct rb_root_cached *root,
405 GtkTreeStore *store, 406 GtkTreeStore *store,
406 GtkTreeIter *parent, 407 GtkTreeIter *parent,
407 struct perf_hpp *hpp, 408 struct perf_hpp *hpp,
@@ -415,7 +416,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists,
415 u64 total = hists__total_period(hists); 416 u64 total = hists__total_period(hists);
416 int size; 417 int size;
417 418
418 for (node = rb_first(root); node; node = rb_next(node)) { 419 for (node = rb_first_cached(root); node; node = rb_next(node)) {
419 GtkTreeIter iter; 420 GtkTreeIter iter;
420 float percent; 421 float percent;
421 char *bf; 422 char *bf;
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index fe3dfaa64a91..412d6f1626e3 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -3,6 +3,7 @@
3#include <math.h> 3#include <math.h>
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5 5
6#include "../util/callchain.h"
6#include "../util/hist.h" 7#include "../util/hist.h"
7#include "../util/util.h" 8#include "../util/util.h"
8#include "../util/sort.h" 9#include "../util/sort.h"
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index 74c4ae1f0a05..a60f2993d390 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -2,8 +2,12 @@
2#include <stdio.h> 2#include <stdio.h>
3#include <linux/string.h> 3#include <linux/string.h>
4 4
5#include "../../util/callchain.h"
5#include "../../util/util.h" 6#include "../../util/util.h"
6#include "../../util/hist.h" 7#include "../../util/hist.h"
8#include "../../util/map.h"
9#include "../../util/map_groups.h"
10#include "../../util/symbol.h"
7#include "../../util/sort.h" 11#include "../../util/sort.h"
8#include "../../util/evsel.h" 12#include "../../util/evsel.h"
9#include "../../util/srcline.h" 13#include "../../util/srcline.h"
@@ -788,7 +792,8 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
788 792
789 indent = hists__overhead_width(hists) + 4; 793 indent = hists__overhead_width(hists) + 4;
790 794
791 for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { 795 for (nd = rb_first_cached(&hists->entries); nd;
796 nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) {
792 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 797 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
793 float percent; 798 float percent;
794 799
diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build
index 9e4c6ca41a9f..f916df33a1a7 100644
--- a/tools/perf/ui/tui/Build
+++ b/tools/perf/ui/tui/Build
@@ -1,4 +1,4 @@
1libperf-y += setup.o 1perf-y += setup.o
2libperf-y += util.o 2perf-y += util.o
3libperf-y += helpline.o 3perf-y += helpline.o
4libperf-y += progress.o 4perf-y += progress.o
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index af72be7f5b3b..8dd3102301ea 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,158 +1,164 @@
1libperf-y += annotate.o 1perf-y += annotate.o
2libperf-y += block-range.o 2perf-y += block-range.o
3libperf-y += build-id.o 3perf-y += build-id.o
4libperf-y += config.o 4perf-y += config.o
5libperf-y += ctype.o 5perf-y += ctype.o
6libperf-y += db-export.o 6perf-y += db-export.o
7libperf-y += env.o 7perf-y += env.o
8libperf-y += event.o 8perf-y += event.o
9libperf-y += evlist.o 9perf-y += evlist.o
10libperf-y += evsel.o 10perf-y += evsel.o
11libperf-y += evsel_fprintf.o 11perf-y += evsel_fprintf.o
12libperf-y += find_bit.o 12perf-y += find_bit.o
13libperf-y += get_current_dir_name.o 13perf-y += get_current_dir_name.o
14libperf-y += kallsyms.o 14perf-y += kallsyms.o
15libperf-y += levenshtein.o 15perf-y += levenshtein.o
16libperf-y += llvm-utils.o 16perf-y += llvm-utils.o
17libperf-y += mmap.o 17perf-y += mmap.o
18libperf-y += memswap.o 18perf-y += memswap.o
19libperf-y += parse-events.o 19perf-y += parse-events.o
20libperf-y += perf_regs.o 20perf-y += perf_regs.o
21libperf-y += path.o 21perf-y += path.o
22libperf-y += print_binary.o 22perf-y += print_binary.o
23libperf-y += rbtree.o 23perf-y += rbtree.o
24libperf-y += libstring.o 24perf-y += libstring.o
25libperf-y += bitmap.o 25perf-y += bitmap.o
26libperf-y += hweight.o 26perf-y += hweight.o
27libperf-y += smt.o 27perf-y += smt.o
28libperf-y += strbuf.o 28perf-y += strbuf.o
29libperf-y += string.o 29perf-y += string.o
30libperf-y += strlist.o 30perf-y += strlist.o
31libperf-y += strfilter.o 31perf-y += strfilter.o
32libperf-y += top.o 32perf-y += top.o
33libperf-y += usage.o 33perf-y += usage.o
34libperf-y += dso.o 34perf-y += dso.o
35libperf-y += symbol.o 35perf-y += symbol.o
36libperf-y += symbol_fprintf.o 36perf-y += symbol_fprintf.o
37libperf-y += color.o 37perf-y += color.o
38libperf-y += metricgroup.o 38perf-y += color_config.o
39libperf-y += header.o 39perf-y += metricgroup.o
40libperf-y += callchain.o 40perf-y += header.o
41libperf-y += values.o 41perf-y += callchain.o
42libperf-y += debug.o 42perf-y += values.o
43libperf-y += machine.o 43perf-y += debug.o
44libperf-y += map.o 44perf-y += machine.o
45libperf-y += pstack.o 45perf-y += map.o
46libperf-y += session.o 46perf-y += pstack.o
47libperf-$(CONFIG_TRACE) += syscalltbl.o 47perf-y += session.o
48libperf-y += ordered-events.o 48perf-y += sample-raw.o
49libperf-y += namespaces.o 49perf-y += s390-sample-raw.o
50libperf-y += comm.o 50perf-$(CONFIG_TRACE) += syscalltbl.o
51libperf-y += thread.o 51perf-y += ordered-events.o
52libperf-y += thread_map.o 52perf-y += namespaces.o
53libperf-y += trace-event-parse.o 53perf-y += comm.o
54libperf-y += parse-events-flex.o 54perf-y += thread.o
55libperf-y += parse-events-bison.o 55perf-y += thread_map.o
56libperf-y += pmu.o 56perf-y += trace-event-parse.o
57libperf-y += pmu-flex.o 57perf-y += parse-events-flex.o
58libperf-y += pmu-bison.o 58perf-y += parse-events-bison.o
59libperf-y += trace-event-read.o 59perf-y += pmu.o
60libperf-y += trace-event-info.o 60perf-y += pmu-flex.o
61libperf-y += trace-event-scripting.o 61perf-y += pmu-bison.o
62libperf-y += trace-event.o 62perf-y += trace-event-read.o
63libperf-y += svghelper.o 63perf-y += trace-event-info.o
64libperf-y += sort.o 64perf-y += trace-event-scripting.o
65libperf-y += hist.o 65perf-y += trace-event.o
66libperf-y += util.o 66perf-y += svghelper.o
67libperf-y += xyarray.o 67perf-y += sort.o
68libperf-y += cpumap.o 68perf-y += hist.o
69libperf-y += cgroup.o 69perf-y += util.o
70libperf-y += target.o 70perf-y += xyarray.o
71libperf-y += rblist.o 71perf-y += cpumap.o
72libperf-y += intlist.o 72perf-y += cputopo.o
73libperf-y += vdso.o 73perf-y += cgroup.o
74libperf-y += counts.o 74perf-y += target.o
75libperf-y += stat.o 75perf-y += rblist.o
76libperf-y += stat-shadow.o 76perf-y += intlist.o
77libperf-y += stat-display.o 77perf-y += vdso.o
78libperf-y += record.o 78perf-y += counts.o
79libperf-y += srcline.o 79perf-y += stat.o
80libperf-y += srccode.o 80perf-y += stat-shadow.o
81libperf-y += data.o 81perf-y += stat-display.o
82libperf-y += tsc.o 82perf-y += record.o
83libperf-y += cloexec.o 83perf-y += srcline.o
84libperf-y += call-path.o 84perf-y += srccode.o
85libperf-y += rwsem.o 85perf-y += data.o
86libperf-y += thread-stack.o 86perf-y += tsc.o
87libperf-$(CONFIG_AUXTRACE) += auxtrace.o 87perf-y += cloexec.o
88libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ 88perf-y += call-path.o
89libperf-$(CONFIG_AUXTRACE) += intel-pt.o 89perf-y += rwsem.o
90libperf-$(CONFIG_AUXTRACE) += intel-bts.o 90perf-y += thread-stack.o
91libperf-$(CONFIG_AUXTRACE) += arm-spe.o 91perf-$(CONFIG_AUXTRACE) += auxtrace.o
92libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o 92perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
93libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o 93perf-$(CONFIG_AUXTRACE) += intel-pt.o
94perf-$(CONFIG_AUXTRACE) += intel-bts.o
95perf-$(CONFIG_AUXTRACE) += arm-spe.o
96perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
97perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
94 98
95ifdef CONFIG_LIBOPENCSD 99ifdef CONFIG_LIBOPENCSD
96libperf-$(CONFIG_AUXTRACE) += cs-etm.o 100perf-$(CONFIG_AUXTRACE) += cs-etm.o
97libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ 101perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
98endif 102endif
99 103
100libperf-y += parse-branch-options.o 104perf-y += parse-branch-options.o
101libperf-y += dump-insn.o 105perf-y += dump-insn.o
102libperf-y += parse-regs-options.o 106perf-y += parse-regs-options.o
103libperf-y += term.o 107perf-y += term.o
104libperf-y += help-unknown-cmd.o 108perf-y += help-unknown-cmd.o
105libperf-y += mem-events.o 109perf-y += mem-events.o
106libperf-y += vsprintf.o 110perf-y += vsprintf.o
107libperf-y += drv_configs.o 111perf-y += units.o
108libperf-y += units.o 112perf-y += time-utils.o
109libperf-y += time-utils.o 113perf-y += expr-bison.o
110libperf-y += expr-bison.o 114perf-y += branch.o
111libperf-y += branch.o 115perf-y += mem2node.o
112libperf-y += mem2node.o 116
113 117perf-$(CONFIG_LIBBPF) += bpf-loader.o
114libperf-$(CONFIG_LIBBPF) += bpf-loader.o 118perf-$(CONFIG_LIBBPF) += bpf_map.o
115libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o 119perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
116libperf-$(CONFIG_LIBELF) += symbol-elf.o 120perf-$(CONFIG_LIBELF) += symbol-elf.o
117libperf-$(CONFIG_LIBELF) += probe-file.o 121perf-$(CONFIG_LIBELF) += probe-file.o
118libperf-$(CONFIG_LIBELF) += probe-event.o 122perf-$(CONFIG_LIBELF) += probe-event.o
119 123
120ifndef CONFIG_LIBELF 124ifndef CONFIG_LIBELF
121libperf-y += symbol-minimal.o 125perf-y += symbol-minimal.o
122endif 126endif
123 127
124ifndef CONFIG_SETNS 128ifndef CONFIG_SETNS
125libperf-y += setns.o 129perf-y += setns.o
126endif 130endif
127 131
128libperf-$(CONFIG_DWARF) += probe-finder.o 132perf-$(CONFIG_DWARF) += probe-finder.o
129libperf-$(CONFIG_DWARF) += dwarf-aux.o 133perf-$(CONFIG_DWARF) += dwarf-aux.o
130libperf-$(CONFIG_DWARF) += dwarf-regs.o 134perf-$(CONFIG_DWARF) += dwarf-regs.o
131 135
132libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o 136perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
133libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o 137perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
134libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o 138perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
135libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o 139perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
136libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o 140perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
137 141
138libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o 142perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
139 143
140libperf-y += scripting-engines/ 144perf-y += scripting-engines/
141 145
142libperf-$(CONFIG_ZLIB) += zlib.o 146perf-$(CONFIG_ZLIB) += zlib.o
143libperf-$(CONFIG_LZMA) += lzma.o 147perf-$(CONFIG_LZMA) += lzma.o
144libperf-y += demangle-java.o 148perf-y += demangle-java.o
145libperf-y += demangle-rust.o 149perf-y += demangle-rust.o
146 150
147ifdef CONFIG_JITDUMP 151ifdef CONFIG_JITDUMP
148libperf-$(CONFIG_LIBELF) += jitdump.o 152perf-$(CONFIG_LIBELF) += jitdump.o
149libperf-$(CONFIG_LIBELF) += genelf.o 153perf-$(CONFIG_LIBELF) += genelf.o
150libperf-$(CONFIG_DWARF) += genelf_debug.o 154perf-$(CONFIG_DWARF) += genelf_debug.o
151endif 155endif
152 156
153libperf-y += perf-hooks.o 157perf-y += perf-hooks.o
154 158
155libperf-$(CONFIG_CXX) += c++/ 159perf-$(CONFIG_LIBBPF) += bpf-event.o
160
161perf-$(CONFIG_CXX) += c++/
156 162
157CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" 163CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
158CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" 164CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 70de8f6b3aee..11a8a447a3af 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -9,6 +9,7 @@
9 9
10#include <errno.h> 10#include <errno.h>
11#include <inttypes.h> 11#include <inttypes.h>
12#include <libgen.h>
12#include "util.h" 13#include "util.h"
13#include "ui/ui.h" 14#include "ui/ui.h"
14#include "sort.h" 15#include "sort.h"
@@ -16,6 +17,7 @@
16#include "color.h" 17#include "color.h"
17#include "config.h" 18#include "config.h"
18#include "cache.h" 19#include "cache.h"
20#include "map.h"
19#include "symbol.h" 21#include "symbol.h"
20#include "units.h" 22#include "units.h"
21#include "debug.h" 23#include "debug.h"
@@ -1889,6 +1891,7 @@ int symbol__annotate(struct symbol *sym, struct map *map,
1889 struct annotation_options *options, 1891 struct annotation_options *options,
1890 struct arch **parch) 1892 struct arch **parch)
1891{ 1893{
1894 struct annotation *notes = symbol__annotation(sym);
1892 struct annotate_args args = { 1895 struct annotate_args args = {
1893 .privsize = privsize, 1896 .privsize = privsize,
1894 .evsel = evsel, 1897 .evsel = evsel,
@@ -1919,6 +1922,7 @@ int symbol__annotate(struct symbol *sym, struct map *map,
1919 1922
1920 args.ms.map = map; 1923 args.ms.map = map;
1921 args.ms.sym = sym; 1924 args.ms.sym = sym;
1925 notes->start = map__rip_2objdump(map, sym->start);
1922 1926
1923 return symbol__disassemble(sym, &args); 1927 return symbol__disassemble(sym, &args);
1924} 1928}
@@ -2794,8 +2798,6 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev
2794 2798
2795 symbol__calc_percent(sym, evsel); 2799 symbol__calc_percent(sym, evsel);
2796 2800
2797 notes->start = map__rip_2objdump(map, sym->start);
2798
2799 annotation__set_offsets(notes, size); 2801 annotation__set_offsets(notes, size);
2800 annotation__mark_jump_targets(notes, sym); 2802 annotation__mark_jump_targets(notes, sym);
2801 annotation__compute_ipc(notes, size); 2803 annotation__compute_ipc(notes, size);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index fb6463730ba4..95053cab41fe 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -4,16 +4,24 @@
4 4
5#include <stdbool.h> 5#include <stdbool.h>
6#include <stdint.h> 6#include <stdint.h>
7#include <stdio.h>
7#include <linux/types.h> 8#include <linux/types.h>
8#include "symbol.h"
9#include "hist.h"
10#include "sort.h"
11#include <linux/list.h> 9#include <linux/list.h>
12#include <linux/rbtree.h> 10#include <linux/rbtree.h>
13#include <pthread.h> 11#include <pthread.h>
14#include <asm/bug.h> 12#include <asm/bug.h>
13#include "symbol_conf.h"
15 14
15struct hist_browser_timer;
16struct hist_entry;
16struct ins_ops; 17struct ins_ops;
18struct map;
19struct map_symbol;
20struct addr_map_symbol;
21struct option;
22struct perf_sample;
23struct perf_evsel;
24struct symbol;
17 25
18struct ins { 26struct ins {
19 const char *name; 27 const char *name;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index f69961c4a4f3..267e54df511b 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -27,6 +27,7 @@
27#include <linux/bitops.h> 27#include <linux/bitops.h>
28#include <linux/log2.h> 28#include <linux/log2.h>
29#include <linux/string.h> 29#include <linux/string.h>
30#include <linux/time64.h>
30 31
31#include <sys/param.h> 32#include <sys/param.h>
32#include <stdlib.h> 33#include <stdlib.h>
@@ -41,6 +42,7 @@
41#include "pmu.h" 42#include "pmu.h"
42#include "evsel.h" 43#include "evsel.h"
43#include "cpumap.h" 44#include "cpumap.h"
45#include "symbol.h"
44#include "thread_map.h" 46#include "thread_map.h"
45#include "asm/bug.h" 47#include "asm/bug.h"
46#include "auxtrace.h" 48#include "auxtrace.h"
@@ -857,7 +859,7 @@ void auxtrace_buffer__free(struct auxtrace_buffer *buffer)
857 859
858void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, 860void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
859 int code, int cpu, pid_t pid, pid_t tid, u64 ip, 861 int code, int cpu, pid_t pid, pid_t tid, u64 ip,
860 const char *msg) 862 const char *msg, u64 timestamp)
861{ 863{
862 size_t size; 864 size_t size;
863 865
@@ -869,7 +871,9 @@ void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
869 auxtrace_error->cpu = cpu; 871 auxtrace_error->cpu = cpu;
870 auxtrace_error->pid = pid; 872 auxtrace_error->pid = pid;
871 auxtrace_error->tid = tid; 873 auxtrace_error->tid = tid;
874 auxtrace_error->fmt = 1;
872 auxtrace_error->ip = ip; 875 auxtrace_error->ip = ip;
876 auxtrace_error->time = timestamp;
873 strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG); 877 strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG);
874 878
875 size = (void *)auxtrace_error->msg - (void *)auxtrace_error + 879 size = (void *)auxtrace_error->msg - (void *)auxtrace_error +
@@ -1159,12 +1163,27 @@ static const char *auxtrace_error_name(int type)
1159size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp) 1163size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp)
1160{ 1164{
1161 struct auxtrace_error_event *e = &event->auxtrace_error; 1165 struct auxtrace_error_event *e = &event->auxtrace_error;
1166 unsigned long long nsecs = e->time;
1167 const char *msg = e->msg;
1162 int ret; 1168 int ret;
1163 1169
1164 ret = fprintf(fp, " %s error type %u", 1170 ret = fprintf(fp, " %s error type %u",
1165 auxtrace_error_name(e->type), e->type); 1171 auxtrace_error_name(e->type), e->type);
1172
1173 if (e->fmt && nsecs) {
1174 unsigned long secs = nsecs / NSEC_PER_SEC;
1175
1176 nsecs -= secs * NSEC_PER_SEC;
1177 ret += fprintf(fp, " time %lu.%09llu", secs, nsecs);
1178 } else {
1179 ret += fprintf(fp, " time 0");
1180 }
1181
1182 if (!e->fmt)
1183 msg = (const char *)&e->time;
1184
1166 ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n", 1185 ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n",
1167 e->cpu, e->pid, e->tid, e->ip, e->code, e->msg); 1186 e->cpu, e->pid, e->tid, e->ip, e->code, msg);
1168 return ret; 1187 return ret;
1169} 1188}
1170 1189
@@ -1278,9 +1297,9 @@ static int __auxtrace_mmap__read(struct perf_mmap *map,
1278 } 1297 }
1279 1298
1280 /* padding must be written by fn() e.g. record__process_auxtrace() */ 1299 /* padding must be written by fn() e.g. record__process_auxtrace() */
1281 padding = size & 7; 1300 padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1);
1282 if (padding) 1301 if (padding)
1283 padding = 8 - padding; 1302 padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding;
1284 1303
1285 memset(&ev, 0, sizeof(ev)); 1304 memset(&ev, 0, sizeof(ev));
1286 ev.auxtrace.header.type = PERF_RECORD_AUXTRACE; 1305 ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 8e50f96d4b23..c69bcd9a3091 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -40,6 +40,9 @@ struct record_opts;
40struct auxtrace_info_event; 40struct auxtrace_info_event;
41struct events_stats; 41struct events_stats;
42 42
43/* Auxtrace records must have the same alignment as perf event records */
44#define PERF_AUXTRACE_RECORD_ALIGNMENT 8
45
43enum auxtrace_type { 46enum auxtrace_type {
44 PERF_AUXTRACE_UNKNOWN, 47 PERF_AUXTRACE_UNKNOWN,
45 PERF_AUXTRACE_INTEL_PT, 48 PERF_AUXTRACE_INTEL_PT,
@@ -516,7 +519,7 @@ void auxtrace_index__free(struct list_head *head);
516 519
517void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, 520void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
518 int code, int cpu, pid_t pid, pid_t tid, u64 ip, 521 int code, int cpu, pid_t pid, pid_t tid, u64 ip,
519 const char *msg); 522 const char *msg, u64 timestamp);
520 523
521int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, 524int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
522 struct perf_tool *tool, 525 struct perf_tool *tool,
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
index f1451c987eec..1be432657501 100644
--- a/tools/perf/util/block-range.c
+++ b/tools/perf/util/block-range.c
@@ -1,6 +1,8 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include "block-range.h" 2#include "block-range.h"
3#include "annotate.h" 3#include "annotate.h"
4#include <assert.h>
5#include <stdlib.h>
4 6
5struct { 7struct {
6 struct rb_root root; 8 struct rb_root root;
diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h
index a5ba719d69fb..ec0fb534bf56 100644
--- a/tools/perf/util/block-range.h
+++ b/tools/perf/util/block-range.h
@@ -2,7 +2,11 @@
2#ifndef __PERF_BLOCK_RANGE_H 2#ifndef __PERF_BLOCK_RANGE_H
3#define __PERF_BLOCK_RANGE_H 3#define __PERF_BLOCK_RANGE_H
4 4
5#include "symbol.h" 5#include <stdbool.h>
6#include <linux/rbtree.h>
7#include <linux/types.h>
8
9struct symbol;
6 10
7/* 11/*
8 * struct block_range - non-overlapping parts of basic blocks 12 * struct block_range - non-overlapping parts of basic blocks
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
new file mode 100644
index 000000000000..028c8ec1f62a
--- /dev/null
+++ b/tools/perf/util/bpf-event.c
@@ -0,0 +1,263 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <errno.h>
3#include <stdlib.h>
4#include <bpf/bpf.h>
5#include <bpf/btf.h>
6#include <linux/btf.h>
7#include "bpf-event.h"
8#include "debug.h"
9#include "symbol.h"
10#include "machine.h"
11
12#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
13
14static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
15{
16 int ret = 0;
17 size_t i;
18
19 for (i = 0; i < len; i++)
20 ret += snprintf(buf + ret, size - ret, "%02x", data[i]);
21 return ret;
22}
23
24int machine__process_bpf_event(struct machine *machine __maybe_unused,
25 union perf_event *event,
26 struct perf_sample *sample __maybe_unused)
27{
28 if (dump_trace)
29 perf_event__fprintf_bpf_event(event, stdout);
30 return 0;
31}
32
33/*
34 * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf
35 * program. One PERF_RECORD_BPF_EVENT is generated for the program. And
36 * one PERF_RECORD_KSYMBOL is generated for each sub program.
37 *
38 * Returns:
39 * 0 for success;
40 * -1 for failures;
41 * -2 for lack of kernel support.
42 */
43static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool,
44 perf_event__handler_t process,
45 struct machine *machine,
46 int fd,
47 union perf_event *event,
48 struct record_opts *opts)
49{
50 struct ksymbol_event *ksymbol_event = &event->ksymbol_event;
51 struct bpf_event *bpf_event = &event->bpf_event;
52 u32 sub_prog_cnt, i, func_info_rec_size = 0;
53 u8 (*prog_tags)[BPF_TAG_SIZE] = NULL;
54 struct bpf_prog_info info = { .type = 0, };
55 u32 info_len = sizeof(info);
56 void *func_infos = NULL;
57 u64 *prog_addrs = NULL;
58 struct btf *btf = NULL;
59 u32 *prog_lens = NULL;
60 bool has_btf = false;
61 char errbuf[512];
62 int err = 0;
63
64 /* Call bpf_obj_get_info_by_fd() to get sizes of arrays */
65 err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
66
67 if (err) {
68 pr_debug("%s: failed to get BPF program info: %s, aborting\n",
69 __func__, str_error_r(errno, errbuf, sizeof(errbuf)));
70 return -1;
71 }
72 if (info_len < offsetof(struct bpf_prog_info, prog_tags)) {
73 pr_debug("%s: the kernel is too old, aborting\n", __func__);
74 return -2;
75 }
76
77 /* number of ksyms, func_lengths, and tags should match */
78 sub_prog_cnt = info.nr_jited_ksyms;
79 if (sub_prog_cnt != info.nr_prog_tags ||
80 sub_prog_cnt != info.nr_jited_func_lens)
81 return -1;
82
83 /* check BTF func info support */
84 if (info.btf_id && info.nr_func_info && info.func_info_rec_size) {
85 /* btf func info number should be same as sub_prog_cnt */
86 if (sub_prog_cnt != info.nr_func_info) {
87 pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__);
88 return -1;
89 }
90 if (btf__get_from_id(info.btf_id, &btf)) {
91 pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info.btf_id);
92 return -1;
93 }
94 func_info_rec_size = info.func_info_rec_size;
95 func_infos = calloc(sub_prog_cnt, func_info_rec_size);
96 if (!func_infos) {
97 pr_debug("%s: failed to allocate memory for func_infos, aborting\n", __func__);
98 return -1;
99 }
100 has_btf = true;
101 }
102
103 /*
104 * We need address, length, and tag for each sub program.
105 * Allocate memory and call bpf_obj_get_info_by_fd() again
106 */
107 prog_addrs = calloc(sub_prog_cnt, sizeof(u64));
108 if (!prog_addrs) {
109 pr_debug("%s: failed to allocate memory for prog_addrs, aborting\n", __func__);
110 goto out;
111 }
112 prog_lens = calloc(sub_prog_cnt, sizeof(u32));
113 if (!prog_lens) {
114 pr_debug("%s: failed to allocate memory for prog_lens, aborting\n", __func__);
115 goto out;
116 }
117 prog_tags = calloc(sub_prog_cnt, BPF_TAG_SIZE);
118 if (!prog_tags) {
119 pr_debug("%s: failed to allocate memory for prog_tags, aborting\n", __func__);
120 goto out;
121 }
122
123 memset(&info, 0, sizeof(info));
124 info.nr_jited_ksyms = sub_prog_cnt;
125 info.nr_jited_func_lens = sub_prog_cnt;
126 info.nr_prog_tags = sub_prog_cnt;
127 info.jited_ksyms = ptr_to_u64(prog_addrs);
128 info.jited_func_lens = ptr_to_u64(prog_lens);
129 info.prog_tags = ptr_to_u64(prog_tags);
130 info_len = sizeof(info);
131 if (has_btf) {
132 info.nr_func_info = sub_prog_cnt;
133 info.func_info_rec_size = func_info_rec_size;
134 info.func_info = ptr_to_u64(func_infos);
135 }
136
137 err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
138 if (err) {
139 pr_debug("%s: failed to get BPF program info, aborting\n", __func__);
140 goto out;
141 }
142
143 /* Synthesize PERF_RECORD_KSYMBOL */
144 for (i = 0; i < sub_prog_cnt; i++) {
145 const struct bpf_func_info *finfo;
146 const char *short_name = NULL;
147 const struct btf_type *t;
148 int name_len;
149
150 *ksymbol_event = (struct ksymbol_event){
151 .header = {
152 .type = PERF_RECORD_KSYMBOL,
153 .size = offsetof(struct ksymbol_event, name),
154 },
155 .addr = prog_addrs[i],
156 .len = prog_lens[i],
157 .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
158 .flags = 0,
159 };
160 name_len = snprintf(ksymbol_event->name, KSYM_NAME_LEN,
161 "bpf_prog_");
162 name_len += snprintf_hex(ksymbol_event->name + name_len,
163 KSYM_NAME_LEN - name_len,
164 prog_tags[i], BPF_TAG_SIZE);
165 if (has_btf) {
166 finfo = func_infos + i * info.func_info_rec_size;
167 t = btf__type_by_id(btf, finfo->type_id);
168 short_name = btf__name_by_offset(btf, t->name_off);
169 } else if (i == 0 && sub_prog_cnt == 1) {
170 /* no subprog */
171 if (info.name[0])
172 short_name = info.name;
173 } else
174 short_name = "F";
175 if (short_name)
176 name_len += snprintf(ksymbol_event->name + name_len,
177 KSYM_NAME_LEN - name_len,
178 "_%s", short_name);
179
180 ksymbol_event->header.size += PERF_ALIGN(name_len + 1,
181 sizeof(u64));
182
183 memset((void *)event + event->header.size, 0, machine->id_hdr_size);
184 event->header.size += machine->id_hdr_size;
185 err = perf_tool__process_synth_event(tool, event,
186 machine, process);
187 }
188
189 /* Synthesize PERF_RECORD_BPF_EVENT */
190 if (opts->bpf_event) {
191 *bpf_event = (struct bpf_event){
192 .header = {
193 .type = PERF_RECORD_BPF_EVENT,
194 .size = sizeof(struct bpf_event),
195 },
196 .type = PERF_BPF_EVENT_PROG_LOAD,
197 .flags = 0,
198 .id = info.id,
199 };
200 memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE);
201 memset((void *)event + event->header.size, 0, machine->id_hdr_size);
202 event->header.size += machine->id_hdr_size;
203 err = perf_tool__process_synth_event(tool, event,
204 machine, process);
205 }
206
207out:
208 free(prog_tags);
209 free(prog_lens);
210 free(prog_addrs);
211 free(func_infos);
212 free(btf);
213 return err ? -1 : 0;
214}
215
216int perf_event__synthesize_bpf_events(struct perf_tool *tool,
217 perf_event__handler_t process,
218 struct machine *machine,
219 struct record_opts *opts)
220{
221 union perf_event *event;
222 __u32 id = 0;
223 int err;
224 int fd;
225
226 event = malloc(sizeof(event->bpf_event) + KSYM_NAME_LEN + machine->id_hdr_size);
227 if (!event)
228 return -1;
229 while (true) {
230 err = bpf_prog_get_next_id(id, &id);
231 if (err) {
232 if (errno == ENOENT) {
233 err = 0;
234 break;
235 }
236 pr_debug("%s: can't get next program: %s%s\n",
237 __func__, strerror(errno),
238 errno == EINVAL ? " -- kernel too old?" : "");
239 /* don't report error on old kernel or EPERM */
240 err = (errno == EINVAL || errno == EPERM) ? 0 : -1;
241 break;
242 }
243 fd = bpf_prog_get_fd_by_id(id);
244 if (fd < 0) {
245 pr_debug("%s: failed to get fd for prog_id %u\n",
246 __func__, id);
247 continue;
248 }
249
250 err = perf_event__synthesize_one_bpf_prog(tool, process,
251 machine, fd,
252 event, opts);
253 close(fd);
254 if (err) {
255 /* do not return error for old kernel */
256 if (err == -2)
257 err = 0;
258 break;
259 }
260 }
261 free(event);
262 return err;
263}
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
new file mode 100644
index 000000000000..7890067e1a37
--- /dev/null
+++ b/tools/perf/util/bpf-event.h
@@ -0,0 +1,38 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __PERF_BPF_EVENT_H
3#define __PERF_BPF_EVENT_H
4
5#include <linux/compiler.h>
6#include "event.h"
7
8struct machine;
9union perf_event;
10struct perf_sample;
11struct perf_tool;
12struct record_opts;
13
14#ifdef HAVE_LIBBPF_SUPPORT
15int machine__process_bpf_event(struct machine *machine, union perf_event *event,
16 struct perf_sample *sample);
17
18int perf_event__synthesize_bpf_events(struct perf_tool *tool,
19 perf_event__handler_t process,
20 struct machine *machine,
21 struct record_opts *opts);
22#else
23static inline int machine__process_bpf_event(struct machine *machine __maybe_unused,
24 union perf_event *event __maybe_unused,
25 struct perf_sample *sample __maybe_unused)
26{
27 return 0;
28}
29
30static inline int perf_event__synthesize_bpf_events(struct perf_tool *tool __maybe_unused,
31 perf_event__handler_t process __maybe_unused,
32 struct machine *machine __maybe_unused,
33 struct record_opts *opts __maybe_unused)
34{
35 return 0;
36}
37#endif // HAVE_LIBBPF_SUPPORT
38#endif
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 31b7e5a1453b..251d9ea6252f 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -15,6 +15,7 @@
15#include <errno.h> 15#include <errno.h>
16#include "perf.h" 16#include "perf.h"
17#include "debug.h" 17#include "debug.h"
18#include "evlist.h"
18#include "bpf-loader.h" 19#include "bpf-loader.h"
19#include "bpf-prologue.h" 20#include "bpf-prologue.h"
20#include "probe-event.h" 21#include "probe-event.h"
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
index 62d245a90e1d..3f46856e3330 100644
--- a/tools/perf/util/bpf-loader.h
+++ b/tools/perf/util/bpf-loader.h
@@ -8,11 +8,7 @@
8 8
9#include <linux/compiler.h> 9#include <linux/compiler.h>
10#include <linux/err.h> 10#include <linux/err.h>
11#include <string.h>
12#include <bpf/libbpf.h> 11#include <bpf/libbpf.h>
13#include "probe-event.h"
14#include "evlist.h"
15#include "debug.h"
16 12
17enum bpf_loader_errno { 13enum bpf_loader_errno {
18 __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100, 14 __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100,
@@ -44,6 +40,7 @@ enum bpf_loader_errno {
44}; 40};
45 41
46struct perf_evsel; 42struct perf_evsel;
43struct perf_evlist;
47struct bpf_object; 44struct bpf_object;
48struct parse_events_term; 45struct parse_events_term;
49#define PERF_BPF_PROBE_GROUP "perf_bpf_probe" 46#define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
@@ -87,6 +84,8 @@ struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const cha
87int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size); 84int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size);
88#else 85#else
89#include <errno.h> 86#include <errno.h>
87#include <string.h>
88#include "debug.h"
90 89
91static inline struct bpf_object * 90static inline struct bpf_object *
92bpf__prepare_load(const char *filename __maybe_unused, 91bpf__prepare_load(const char *filename __maybe_unused,
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c
new file mode 100644
index 000000000000..eb853ca67cf4
--- /dev/null
+++ b/tools/perf/util/bpf_map.c
@@ -0,0 +1,72 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3#include "util/bpf_map.h"
4#include <bpf/bpf.h>
5#include <bpf/libbpf.h>
6#include <linux/err.h>
7#include <linux/kernel.h>
8#include <stdbool.h>
9#include <stdlib.h>
10#include <unistd.h>
11
12static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def)
13{
14 return def->type == BPF_MAP_TYPE_PERCPU_HASH ||
15 def->type == BPF_MAP_TYPE_PERCPU_ARRAY ||
16 def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
17 def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
18}
19
20static void *bpf_map_def__alloc_value(const struct bpf_map_def *def)
21{
22 if (bpf_map_def__is_per_cpu(def))
23 return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF));
24
25 return malloc(def->value_size);
26}
27
28int bpf_map__fprintf(struct bpf_map *map, FILE *fp)
29{
30 const struct bpf_map_def *def = bpf_map__def(map);
31 void *prev_key = NULL, *key, *value;
32 int fd = bpf_map__fd(map), err;
33 int printed = 0;
34
35 if (fd < 0)
36 return fd;
37
38 if (IS_ERR(def))
39 return PTR_ERR(def);
40
41 err = -ENOMEM;
42 key = malloc(def->key_size);
43 if (key == NULL)
44 goto out;
45
46 value = bpf_map_def__alloc_value(def);
47 if (value == NULL)
48 goto out_free_key;
49
50 while ((err = bpf_map_get_next_key(fd, prev_key, key) == 0)) {
51 int intkey = *(int *)key;
52
53 if (!bpf_map_lookup_elem(fd, key, value)) {
54 bool boolval = *(bool *)value;
55 if (boolval)
56 printed += fprintf(fp, "[%d] = %d,\n", intkey, boolval);
57 } else {
58 printed += fprintf(fp, "[%d] = ERROR,\n", intkey);
59 }
60
61 prev_key = key;
62 }
63
64 if (err == ENOENT)
65 err = printed;
66
67 free(value);
68out_free_key:
69 free(key);
70out:
71 return err;
72}
diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h
new file mode 100644
index 000000000000..d6abd5e47af8
--- /dev/null
+++ b/tools/perf/util/bpf_map.h
@@ -0,0 +1,22 @@
1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2#ifndef __PERF_BPF_MAP_H
3#define __PERF_BPF_MAP_H 1
4
5#include <stdio.h>
6#include <linux/compiler.h>
7struct bpf_map;
8
9#ifdef HAVE_LIBBPF_SUPPORT
10
11int bpf_map__fprintf(struct bpf_map *map, FILE *fp);
12
13#else
14
15static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused)
16{
17 return 0;
18}
19
20#endif // HAVE_LIBBPF_SUPPORT
21
22#endif // __PERF_BPF_MAP_H
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 1e3c7c5cdc63..64f96b79f1d7 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -1,8 +1,31 @@
1#ifndef _PERF_BRANCH_H 1#ifndef _PERF_BRANCH_H
2#define _PERF_BRANCH_H 1 2#define _PERF_BRANCH_H 1
3 3
4#include <stdio.h>
4#include <stdint.h> 5#include <stdint.h>
5#include "../perf.h" 6#include <linux/perf_event.h>
7#include <linux/types.h>
8
9struct branch_flags {
10 u64 mispred:1;
11 u64 predicted:1;
12 u64 in_tx:1;
13 u64 abort:1;
14 u64 cycles:16;
15 u64 type:4;
16 u64 reserved:40;
17};
18
19struct branch_entry {
20 u64 from;
21 u64 to;
22 struct branch_flags flags;
23};
24
25struct branch_stack {
26 u64 nr;
27 struct branch_entry entries[0];
28};
6 29
7struct branch_type_stat { 30struct branch_type_stat {
8 bool branch_to; 31 bool branch_to;
@@ -13,8 +36,6 @@ struct branch_type_stat {
13 u64 cross_2m; 36 u64 cross_2m;
14}; 37};
15 38
16struct branch_flags;
17
18void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, 39void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
19 u64 from, u64 to); 40 u64 from, u64 to);
20 41
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 04b1d53e4bf9..bff0d17920ed 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -15,6 +15,8 @@
15#include <sys/types.h> 15#include <sys/types.h>
16#include "build-id.h" 16#include "build-id.h"
17#include "event.h" 17#include "event.h"
18#include "namespaces.h"
19#include "map.h"
18#include "symbol.h" 20#include "symbol.h"
19#include "thread.h" 21#include "thread.h"
20#include <linux/kernel.h> 22#include <linux/kernel.h>
@@ -363,7 +365,8 @@ int perf_session__write_buildid_table(struct perf_session *session,
363 if (err) 365 if (err)
364 return err; 366 return err;
365 367
366 for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { 368 for (nd = rb_first_cached(&session->machines.guests); nd;
369 nd = rb_next(nd)) {
367 struct machine *pos = rb_entry(nd, struct machine, rb_node); 370 struct machine *pos = rb_entry(nd, struct machine, rb_node);
368 err = machine__write_buildid_table(pos, fd); 371 err = machine__write_buildid_table(pos, fd);
369 if (err) 372 if (err)
@@ -396,7 +399,8 @@ int dsos__hit_all(struct perf_session *session)
396 if (err) 399 if (err)
397 return err; 400 return err;
398 401
399 for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { 402 for (nd = rb_first_cached(&session->machines.guests); nd;
403 nd = rb_next(nd)) {
400 struct machine *pos = rb_entry(nd, struct machine, rb_node); 404 struct machine *pos = rb_entry(nd, struct machine, rb_node);
401 405
402 err = machine__hit_all_dsos(pos); 406 err = machine__hit_all_dsos(pos);
@@ -849,7 +853,8 @@ int perf_session__cache_build_ids(struct perf_session *session)
849 853
850 ret = machine__cache_build_ids(&session->machines.host); 854 ret = machine__cache_build_ids(&session->machines.host);
851 855
852 for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { 856 for (nd = rb_first_cached(&session->machines.guests); nd;
857 nd = rb_next(nd)) {
853 struct machine *pos = rb_entry(nd, struct machine, rb_node); 858 struct machine *pos = rb_entry(nd, struct machine, rb_node);
854 ret |= machine__cache_build_ids(pos); 859 ret |= machine__cache_build_ids(pos);
855 } 860 }
@@ -866,7 +871,8 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
866 struct rb_node *nd; 871 struct rb_node *nd;
867 bool ret = machine__read_build_ids(&session->machines.host, with_hits); 872 bool ret = machine__read_build_ids(&session->machines.host, with_hits);
868 873
869 for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { 874 for (nd = rb_first_cached(&session->machines.guests); nd;
875 nd = rb_next(nd)) {
870 struct machine *pos = rb_entry(nd, struct machine, rb_node); 876 struct machine *pos = rb_entry(nd, struct machine, rb_node);
871 ret |= machine__read_build_ids(pos, with_hits); 877 ret |= machine__read_build_ids(pos, with_hits);
872 } 878 }
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
index f0c565164a97..93668f38f1ed 100644
--- a/tools/perf/util/build-id.h
+++ b/tools/perf/util/build-id.h
@@ -6,9 +6,10 @@
6#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) 6#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1)
7 7
8#include "tool.h" 8#include "tool.h"
9#include "namespaces.h"
10#include <linux/types.h> 9#include <linux/types.h>
11 10
11struct nsinfo;
12
12extern struct perf_tool build_id__mark_dso_hit_ops; 13extern struct perf_tool build_id__mark_dso_hit_ops;
13struct dso; 14struct dso;
14struct feat_fd; 15struct feat_fd;
diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build
index 988fef1b11d7..613ecfd76527 100644
--- a/tools/perf/util/c++/Build
+++ b/tools/perf/util/c++/Build
@@ -1,2 +1,2 @@
1libperf-$(CONFIG_CLANGLLVM) += clang.o 1perf-$(CONFIG_CLANGLLVM) += clang.o
2libperf-$(CONFIG_CLANGLLVM) += clang-test.o 2perf-$(CONFIG_CLANGLLVM) += clang-test.o
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index dc2212e12184..abb608b09269 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -23,8 +23,10 @@
23#include "util.h" 23#include "util.h"
24#include "sort.h" 24#include "sort.h"
25#include "machine.h" 25#include "machine.h"
26#include "map.h"
26#include "callchain.h" 27#include "callchain.h"
27#include "branch.h" 28#include "branch.h"
29#include "symbol.h"
28 30
29#define CALLCHAIN_PARAM_DEFAULT \ 31#define CALLCHAIN_PARAM_DEFAULT \
30 .mode = CHAIN_GRAPH_ABS, \ 32 .mode = CHAIN_GRAPH_ABS, \
@@ -1577,3 +1579,18 @@ int callchain_cursor__copy(struct callchain_cursor *dst,
1577 1579
1578 return rc; 1580 return rc;
1579} 1581}
1582
1583/*
1584 * Initialize a cursor before adding entries inside, but keep
1585 * the previously allocated entries as a cache.
1586 */
1587void callchain_cursor_reset(struct callchain_cursor *cursor)
1588{
1589 struct callchain_cursor_node *node;
1590
1591 cursor->nr = 0;
1592 cursor->last = &cursor->first;
1593
1594 for (node = cursor->first; node != NULL; node = node->next)
1595 map__zput(node->map);
1596}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 99d38ac019b8..80e056a3d882 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -2,14 +2,14 @@
2#ifndef __PERF_CALLCHAIN_H 2#ifndef __PERF_CALLCHAIN_H
3#define __PERF_CALLCHAIN_H 3#define __PERF_CALLCHAIN_H
4 4
5#include "../perf.h"
6#include <linux/list.h> 5#include <linux/list.h>
7#include <linux/rbtree.h> 6#include <linux/rbtree.h>
8#include "event.h" 7#include "event.h"
9#include "map.h" 8#include "map_symbol.h"
10#include "symbol.h"
11#include "branch.h" 9#include "branch.h"
12 10
11struct map;
12
13#define HELP_PAD "\t\t\t\t" 13#define HELP_PAD "\t\t\t\t"
14 14
15#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" 15#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n"
@@ -188,20 +188,7 @@ int callchain_append(struct callchain_root *root,
188int callchain_merge(struct callchain_cursor *cursor, 188int callchain_merge(struct callchain_cursor *cursor,
189 struct callchain_root *dst, struct callchain_root *src); 189 struct callchain_root *dst, struct callchain_root *src);
190 190
191/* 191void callchain_cursor_reset(struct callchain_cursor *cursor);
192 * Initialize a cursor before adding entries inside, but keep
193 * the previously allocated entries as a cache.
194 */
195static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
196{
197 struct callchain_cursor_node *node;
198
199 cursor->nr = 0;
200 cursor->last = &cursor->first;
201
202 for (node = cursor->first; node != NULL; node = node->next)
203 map__zput(node->map);
204}
205 192
206int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, 193int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
207 struct map *map, struct symbol *sym, 194 struct map *map, struct symbol *sym,
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
index 39e628b8938e..39b8c4ec4e2e 100644
--- a/tools/perf/util/color.c
+++ b/tools/perf/util/color.c
@@ -1,7 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h> 2#include <linux/kernel.h>
3#include "cache.h" 3#include "cache.h"
4#include "config.h"
5#include <stdlib.h> 4#include <stdlib.h>
6#include <stdio.h> 5#include <stdio.h>
7#include "color.h" 6#include "color.h"
@@ -10,44 +9,6 @@
10 9
11int perf_use_color_default = -1; 10int perf_use_color_default = -1;
12 11
13int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
14{
15 if (value) {
16 if (!strcasecmp(value, "never"))
17 return 0;
18 if (!strcasecmp(value, "always"))
19 return 1;
20 if (!strcasecmp(value, "auto"))
21 goto auto_color;
22 }
23
24 /* Missing or explicit false to turn off colorization */
25 if (!perf_config_bool(var, value))
26 return 0;
27
28 /* any normal truth value defaults to 'auto' */
29 auto_color:
30 if (stdout_is_tty < 0)
31 stdout_is_tty = isatty(1);
32 if (stdout_is_tty || pager_in_use()) {
33 char *term = getenv("TERM");
34 if (term && strcmp(term, "dumb"))
35 return 1;
36 }
37 return 0;
38}
39
40int perf_color_default_config(const char *var, const char *value,
41 void *cb __maybe_unused)
42{
43 if (!strcmp(var, "color.ui")) {
44 perf_use_color_default = perf_config_colorbool(var, value, -1);
45 return 0;
46 }
47
48 return 0;
49}
50
51static int __color_vsnprintf(char *bf, size_t size, const char *color, 12static int __color_vsnprintf(char *bf, size_t size, const char *color,
52 const char *fmt, va_list args, const char *trail) 13 const char *fmt, va_list args, const char *trail)
53{ 14{
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
index 22777b1812ee..01f7bed21c9b 100644
--- a/tools/perf/util/color.h
+++ b/tools/perf/util/color.h
@@ -3,6 +3,7 @@
3#define __PERF_COLOR_H 3#define __PERF_COLOR_H
4 4
5#include <stdio.h> 5#include <stdio.h>
6#include <stdarg.h>
6 7
7/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ 8/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
8#define COLOR_MAXLEN 24 9#define COLOR_MAXLEN 24
diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c
new file mode 100644
index 000000000000..817dc56e7e95
--- /dev/null
+++ b/tools/perf/util/color_config.c
@@ -0,0 +1,47 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h>
3#include "cache.h"
4#include "config.h"
5#include <stdlib.h>
6#include <stdio.h>
7#include "color.h"
8#include <math.h>
9#include <unistd.h>
10
11int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
12{
13 if (value) {
14 if (!strcasecmp(value, "never"))
15 return 0;
16 if (!strcasecmp(value, "always"))
17 return 1;
18 if (!strcasecmp(value, "auto"))
19 goto auto_color;
20 }
21
22 /* Missing or explicit false to turn off colorization */
23 if (!perf_config_bool(var, value))
24 return 0;
25
26 /* any normal truth value defaults to 'auto' */
27 auto_color:
28 if (stdout_is_tty < 0)
29 stdout_is_tty = isatty(1);
30 if (stdout_is_tty || pager_in_use()) {
31 char *term = getenv("TERM");
32 if (term && strcmp(term, "dumb"))
33 return 1;
34 }
35 return 0;
36}
37
38int perf_color_default_config(const char *var, const char *value,
39 void *cb __maybe_unused)
40{
41 if (!strcmp(var, "color.ui")) {
42 perf_use_color_default = perf_config_colorbool(var, value, -1);
43 return 0;
44 }
45
46 return 0;
47}
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index 31279a7bd919..1066de92af12 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -6,6 +6,7 @@
6#include <stdio.h> 6#include <stdio.h>
7#include <string.h> 7#include <string.h>
8#include <linux/refcount.h> 8#include <linux/refcount.h>
9#include <linux/rbtree.h>
9#include "rwsem.h" 10#include "rwsem.h"
10 11
11struct comm_str { 12struct comm_str {
diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h
index 3e5c438fe85e..f35d8fbfa2dd 100644
--- a/tools/perf/util/comm.h
+++ b/tools/perf/util/comm.h
@@ -2,9 +2,9 @@
2#ifndef __PERF_COMM_H 2#ifndef __PERF_COMM_H
3#define __PERF_COMM_H 3#define __PERF_COMM_H
4 4
5#include "../perf.h"
6#include <linux/rbtree.h>
7#include <linux/list.h> 5#include <linux/list.h>
6#include <linux/types.h>
7#include <stdbool.h>
8 8
9struct comm_str; 9struct comm_str;
10 10
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 1ea8f898f1a1..fa092511c52b 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -13,6 +13,7 @@
13#include <sys/param.h> 13#include <sys/param.h>
14#include "util.h" 14#include "util.h"
15#include "cache.h" 15#include "cache.h"
16#include "callchain.h"
16#include <subcmd/exec-cmd.h> 17#include <subcmd/exec-cmd.h>
17#include "util/event.h" /* proc_map_timeout */ 18#include "util/event.h" /* proc_map_timeout */
18#include "util/hist.h" /* perf_hist_config */ 19#include "util/hist.h" /* perf_hist_config */
diff --git a/tools/perf/util/cpu-set-sched.h b/tools/perf/util/cpu-set-sched.h
new file mode 100644
index 000000000000..8cf4e40d322a
--- /dev/null
+++ b/tools/perf/util/cpu-set-sched.h
@@ -0,0 +1,50 @@
1// SPDX-License-Identifier: LGPL-2.1
2// Definitions taken from glibc for use with older systems, same licensing.
3#ifndef _CPU_SET_SCHED_PERF_H
4#define _CPU_SET_SCHED_PERF_H
5
6#include <features.h>
7#include <sched.h>
8
9#ifndef CPU_EQUAL
10#ifndef __CPU_EQUAL_S
11#if __GNUC_PREREQ (2, 91)
12# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
13 (__builtin_memcmp (cpusetp1, cpusetp2, setsize) == 0)
14#else
15# define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \
16 (__extension__ \
17 ({ const __cpu_mask *__arr1 = (cpusetp1)->__bits; \
18 const __cpu_mask *__arr2 = (cpusetp2)->__bits; \
19 size_t __imax = (setsize) / sizeof (__cpu_mask); \
20 size_t __i; \
21 for (__i = 0; __i < __imax; ++__i) \
22 if (__arr1[__i] != __arr2[__i]) \
23 break; \
24 __i == __imax; }))
25#endif
26#endif // __CPU_EQUAL_S
27
28#define CPU_EQUAL(cpusetp1, cpusetp2) \
29 __CPU_EQUAL_S (sizeof (cpu_set_t), cpusetp1, cpusetp2)
30#endif // CPU_EQUAL
31
32#ifndef CPU_OR
33#ifndef __CPU_OP_S
34#define __CPU_OP_S(setsize, destset, srcset1, srcset2, op) \
35 (__extension__ \
36 ({ cpu_set_t *__dest = (destset); \
37 const __cpu_mask *__arr1 = (srcset1)->__bits; \
38 const __cpu_mask *__arr2 = (srcset2)->__bits; \
39 size_t __imax = (setsize) / sizeof (__cpu_mask); \
40 size_t __i; \
41 for (__i = 0; __i < __imax; ++__i) \
42 ((__cpu_mask *) __dest->__bits)[__i] = __arr1[__i] op __arr2[__i]; \
43 __dest; }))
44#endif // __CPU_OP_S
45
46#define CPU_OR(destset, srcset1, srcset2) \
47 __CPU_OP_S (sizeof (cpu_set_t), destset, srcset1, srcset2, |)
48#endif // CPU_OR
49
50#endif // _CPU_SET_SCHED_PERF_H
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 383674f448fc..0b599229bc7e 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -681,7 +681,7 @@ size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size)
681 681
682#undef COMMA 682#undef COMMA
683 683
684 pr_debug("cpumask list: %s\n", buf); 684 pr_debug2("cpumask list: %s\n", buf);
685 return ret; 685 return ret;
686} 686}
687 687
@@ -730,3 +730,13 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size)
730 buf[size - 1] = '\0'; 730 buf[size - 1] = '\0';
731 return ptr - buf; 731 return ptr - buf;
732} 732}
733
734const struct cpu_map *cpu_map__online(void) /* thread unsafe */
735{
736 static const struct cpu_map *online = NULL;
737
738 if (!online)
739 online = cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */
740
741 return online;
742}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index ed8999d1a640..f00ce624b9f7 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -29,6 +29,7 @@ int cpu_map__get_core_id(int cpu);
29int cpu_map__get_core(struct cpu_map *map, int idx, void *data); 29int cpu_map__get_core(struct cpu_map *map, int idx, void *data);
30int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); 30int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
31int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); 31int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
32const struct cpu_map *cpu_map__online(void); /* thread unsafe */
32 33
33struct cpu_map *cpu_map__get(struct cpu_map *map); 34struct cpu_map *cpu_map__get(struct cpu_map *map);
34void cpu_map__put(struct cpu_map *map); 35void cpu_map__put(struct cpu_map *map);
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
new file mode 100644
index 000000000000..ece0710249d4
--- /dev/null
+++ b/tools/perf/util/cputopo.c
@@ -0,0 +1,277 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <sys/param.h>
3#include <inttypes.h>
4#include <api/fs/fs.h>
5
6#include "cputopo.h"
7#include "cpumap.h"
8#include "util.h"
9#include "env.h"
10
11
12#define CORE_SIB_FMT \
13 "%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
14#define THRD_SIB_FMT \
15 "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
16#define NODE_ONLINE_FMT \
17 "%s/devices/system/node/online"
18#define NODE_MEMINFO_FMT \
19 "%s/devices/system/node/node%d/meminfo"
20#define NODE_CPULIST_FMT \
21 "%s/devices/system/node/node%d/cpulist"
22
23static int build_cpu_topology(struct cpu_topology *tp, int cpu)
24{
25 FILE *fp;
26 char filename[MAXPATHLEN];
27 char *buf = NULL, *p;
28 size_t len = 0;
29 ssize_t sret;
30 u32 i = 0;
31 int ret = -1;
32
33 scnprintf(filename, MAXPATHLEN, CORE_SIB_FMT,
34 sysfs__mountpoint(), cpu);
35 fp = fopen(filename, "r");
36 if (!fp)
37 goto try_threads;
38
39 sret = getline(&buf, &len, fp);
40 fclose(fp);
41 if (sret <= 0)
42 goto try_threads;
43
44 p = strchr(buf, '\n');
45 if (p)
46 *p = '\0';
47
48 for (i = 0; i < tp->core_sib; i++) {
49 if (!strcmp(buf, tp->core_siblings[i]))
50 break;
51 }
52 if (i == tp->core_sib) {
53 tp->core_siblings[i] = buf;
54 tp->core_sib++;
55 buf = NULL;
56 len = 0;
57 }
58 ret = 0;
59
60try_threads:
61 scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT,
62 sysfs__mountpoint(), cpu);
63 fp = fopen(filename, "r");
64 if (!fp)
65 goto done;
66
67 if (getline(&buf, &len, fp) <= 0)
68 goto done;
69
70 p = strchr(buf, '\n');
71 if (p)
72 *p = '\0';
73
74 for (i = 0; i < tp->thread_sib; i++) {
75 if (!strcmp(buf, tp->thread_siblings[i]))
76 break;
77 }
78 if (i == tp->thread_sib) {
79 tp->thread_siblings[i] = buf;
80 tp->thread_sib++;
81 buf = NULL;
82 }
83 ret = 0;
84done:
85 if (fp)
86 fclose(fp);
87 free(buf);
88 return ret;
89}
90
91void cpu_topology__delete(struct cpu_topology *tp)
92{
93 u32 i;
94
95 if (!tp)
96 return;
97
98 for (i = 0 ; i < tp->core_sib; i++)
99 zfree(&tp->core_siblings[i]);
100
101 for (i = 0 ; i < tp->thread_sib; i++)
102 zfree(&tp->thread_siblings[i]);
103
104 free(tp);
105}
106
107struct cpu_topology *cpu_topology__new(void)
108{
109 struct cpu_topology *tp = NULL;
110 void *addr;
111 u32 nr, i;
112 size_t sz;
113 long ncpus;
114 int ret = -1;
115 struct cpu_map *map;
116
117 ncpus = cpu__max_present_cpu();
118
119 /* build online CPU map */
120 map = cpu_map__new(NULL);
121 if (map == NULL) {
122 pr_debug("failed to get system cpumap\n");
123 return NULL;
124 }
125
126 nr = (u32)(ncpus & UINT_MAX);
127
128 sz = nr * sizeof(char *);
129 addr = calloc(1, sizeof(*tp) + 2 * sz);
130 if (!addr)
131 goto out_free;
132
133 tp = addr;
134 addr += sizeof(*tp);
135 tp->core_siblings = addr;
136 addr += sz;
137 tp->thread_siblings = addr;
138
139 for (i = 0; i < nr; i++) {
140 if (!cpu_map__has(map, i))
141 continue;
142
143 ret = build_cpu_topology(tp, i);
144 if (ret < 0)
145 break;
146 }
147
148out_free:
149 cpu_map__put(map);
150 if (ret) {
151 cpu_topology__delete(tp);
152 tp = NULL;
153 }
154 return tp;
155}
156
157static int load_numa_node(struct numa_topology_node *node, int nr)
158{
159 char str[MAXPATHLEN];
160 char field[32];
161 char *buf = NULL, *p;
162 size_t len = 0;
163 int ret = -1;
164 FILE *fp;
165 u64 mem;
166
167 node->node = (u32) nr;
168
169 scnprintf(str, MAXPATHLEN, NODE_MEMINFO_FMT,
170 sysfs__mountpoint(), nr);
171 fp = fopen(str, "r");
172 if (!fp)
173 return -1;
174
175 while (getline(&buf, &len, fp) > 0) {
176 /* skip over invalid lines */
177 if (!strchr(buf, ':'))
178 continue;
179 if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2)
180 goto err;
181 if (!strcmp(field, "MemTotal:"))
182 node->mem_total = mem;
183 if (!strcmp(field, "MemFree:"))
184 node->mem_free = mem;
185 if (node->mem_total && node->mem_free)
186 break;
187 }
188
189 fclose(fp);
190 fp = NULL;
191
192 scnprintf(str, MAXPATHLEN, NODE_CPULIST_FMT,
193 sysfs__mountpoint(), nr);
194
195 fp = fopen(str, "r");
196 if (!fp)
197 return -1;
198
199 if (getline(&buf, &len, fp) <= 0)
200 goto err;
201
202 p = strchr(buf, '\n');
203 if (p)
204 *p = '\0';
205
206 node->cpus = buf;
207 fclose(fp);
208 return 0;
209
210err:
211 free(buf);
212 if (fp)
213 fclose(fp);
214 return ret;
215}
216
217struct numa_topology *numa_topology__new(void)
218{
219 struct cpu_map *node_map = NULL;
220 struct numa_topology *tp = NULL;
221 char path[MAXPATHLEN];
222 char *buf = NULL;
223 size_t len = 0;
224 u32 nr, i;
225 FILE *fp;
226 char *c;
227
228 scnprintf(path, MAXPATHLEN, NODE_ONLINE_FMT,
229 sysfs__mountpoint());
230
231 fp = fopen(path, "r");
232 if (!fp)
233 return NULL;
234
235 if (getline(&buf, &len, fp) <= 0)
236 goto out;
237
238 c = strchr(buf, '\n');
239 if (c)
240 *c = '\0';
241
242 node_map = cpu_map__new(buf);
243 if (!node_map)
244 goto out;
245
246 nr = (u32) node_map->nr;
247
248 tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr);
249 if (!tp)
250 goto out;
251
252 tp->nr = nr;
253
254 for (i = 0; i < nr; i++) {
255 if (load_numa_node(&tp->nodes[i], node_map->map[i])) {
256 numa_topology__delete(tp);
257 tp = NULL;
258 break;
259 }
260 }
261
262out:
263 free(buf);
264 fclose(fp);
265 cpu_map__put(node_map);
266 return tp;
267}
268
269void numa_topology__delete(struct numa_topology *tp)
270{
271 u32 i;
272
273 for (i = 0; i < tp->nr; i++)
274 free(tp->nodes[i].cpus);
275
276 free(tp);
277}
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
new file mode 100644
index 000000000000..47a97e71acdf
--- /dev/null
+++ b/tools/perf/util/cputopo.h
@@ -0,0 +1,33 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __PERF_CPUTOPO_H
3#define __PERF_CPUTOPO_H
4
5#include <linux/types.h>
6#include "env.h"
7
8struct cpu_topology {
9 u32 core_sib;
10 u32 thread_sib;
11 char **core_siblings;
12 char **thread_siblings;
13};
14
15struct numa_topology_node {
16 char *cpus;
17 u32 node;
18 u64 mem_total;
19 u64 mem_free;
20};
21
22struct numa_topology {
23 u32 nr;
24 struct numa_topology_node nodes[0];
25};
26
27struct cpu_topology *cpu_topology__new(void);
28void cpu_topology__delete(struct cpu_topology *tp);
29
30struct numa_topology *numa_topology__new(void);
31void numa_topology__delete(struct numa_topology *tp);
32
33#endif /* __PERF_CPUTOPO_H */
diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build
index bc22c39c727f..216cb17a3322 100644
--- a/tools/perf/util/cs-etm-decoder/Build
+++ b/tools/perf/util/cs-etm-decoder/Build
@@ -1 +1 @@
libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 8c155575c6c5..ba4c623cd8de 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -290,6 +290,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
290 decoder->packet_buffer[i].instr_count = 0; 290 decoder->packet_buffer[i].instr_count = 0;
291 decoder->packet_buffer[i].last_instr_taken_branch = false; 291 decoder->packet_buffer[i].last_instr_taken_branch = false;
292 decoder->packet_buffer[i].last_instr_size = 0; 292 decoder->packet_buffer[i].last_instr_size = 0;
293 decoder->packet_buffer[i].last_instr_type = 0;
294 decoder->packet_buffer[i].last_instr_subtype = 0;
295 decoder->packet_buffer[i].last_instr_cond = 0;
296 decoder->packet_buffer[i].flags = 0;
297 decoder->packet_buffer[i].exception_number = UINT32_MAX;
298 decoder->packet_buffer[i].trace_chan_id = UINT8_MAX;
293 decoder->packet_buffer[i].cpu = INT_MIN; 299 decoder->packet_buffer[i].cpu = INT_MIN;
294 } 300 }
295} 301}
@@ -300,14 +306,12 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
300 enum cs_etm_sample_type sample_type) 306 enum cs_etm_sample_type sample_type)
301{ 307{
302 u32 et = 0; 308 u32 et = 0;
303 struct int_node *inode = NULL; 309 int cpu;
304 310
305 if (decoder->packet_count >= MAX_BUFFER - 1) 311 if (decoder->packet_count >= MAX_BUFFER - 1)
306 return OCSD_RESP_FATAL_SYS_ERR; 312 return OCSD_RESP_FATAL_SYS_ERR;
307 313
308 /* Search the RB tree for the cpu associated with this traceID */ 314 if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
309 inode = intlist__find(traceid_list, trace_chan_id);
310 if (!inode)
311 return OCSD_RESP_FATAL_SYS_ERR; 315 return OCSD_RESP_FATAL_SYS_ERR;
312 316
313 et = decoder->tail; 317 et = decoder->tail;
@@ -317,12 +321,18 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
317 321
318 decoder->packet_buffer[et].sample_type = sample_type; 322 decoder->packet_buffer[et].sample_type = sample_type;
319 decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; 323 decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
320 decoder->packet_buffer[et].cpu = *((int *)inode->priv); 324 decoder->packet_buffer[et].cpu = cpu;
321 decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; 325 decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
322 decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; 326 decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
323 decoder->packet_buffer[et].instr_count = 0; 327 decoder->packet_buffer[et].instr_count = 0;
324 decoder->packet_buffer[et].last_instr_taken_branch = false; 328 decoder->packet_buffer[et].last_instr_taken_branch = false;
325 decoder->packet_buffer[et].last_instr_size = 0; 329 decoder->packet_buffer[et].last_instr_size = 0;
330 decoder->packet_buffer[et].last_instr_type = 0;
331 decoder->packet_buffer[et].last_instr_subtype = 0;
332 decoder->packet_buffer[et].last_instr_cond = 0;
333 decoder->packet_buffer[et].flags = 0;
334 decoder->packet_buffer[et].exception_number = UINT32_MAX;
335 decoder->packet_buffer[et].trace_chan_id = trace_chan_id;
326 336
327 if (decoder->packet_count == MAX_BUFFER - 1) 337 if (decoder->packet_count == MAX_BUFFER - 1)
328 return OCSD_RESP_WAIT; 338 return OCSD_RESP_WAIT;
@@ -366,6 +376,9 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
366 packet->start_addr = elem->st_addr; 376 packet->start_addr = elem->st_addr;
367 packet->end_addr = elem->en_addr; 377 packet->end_addr = elem->en_addr;
368 packet->instr_count = elem->num_instr_range; 378 packet->instr_count = elem->num_instr_range;
379 packet->last_instr_type = elem->last_i_type;
380 packet->last_instr_subtype = elem->last_i_subtype;
381 packet->last_instr_cond = elem->last_instr_cond;
369 382
370 switch (elem->last_i_type) { 383 switch (elem->last_i_type) {
371 case OCSD_INSTR_BR: 384 case OCSD_INSTR_BR:
@@ -395,10 +408,20 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder,
395 408
396static ocsd_datapath_resp_t 409static ocsd_datapath_resp_t
397cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, 410cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder,
411 const ocsd_generic_trace_elem *elem,
398 const uint8_t trace_chan_id) 412 const uint8_t trace_chan_id)
399{ 413{ int ret = 0;
400 return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 414 struct cs_etm_packet *packet;
401 CS_ETM_EXCEPTION); 415
416 ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
417 CS_ETM_EXCEPTION);
418 if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
419 return ret;
420
421 packet = &decoder->packet_buffer[decoder->tail];
422 packet->exception_number = elem->exception_number;
423
424 return ret;
402} 425}
403 426
404static ocsd_datapath_resp_t 427static ocsd_datapath_resp_t
@@ -432,7 +455,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
432 trace_chan_id); 455 trace_chan_id);
433 break; 456 break;
434 case OCSD_GEN_TRC_ELEM_EXCEPTION: 457 case OCSD_GEN_TRC_ELEM_EXCEPTION:
435 resp = cs_etm_decoder__buffer_exception(decoder, 458 resp = cs_etm_decoder__buffer_exception(decoder, elem,
436 trace_chan_id); 459 trace_chan_id);
437 break; 460 break;
438 case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: 461 case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index a6407d41598f..3ab11dfa92ae 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -15,13 +15,6 @@
15 15
16struct cs_etm_decoder; 16struct cs_etm_decoder;
17 17
18struct cs_etm_buffer {
19 const unsigned char *buf;
20 size_t len;
21 u64 offset;
22 u64 ref_timestamp;
23};
24
25enum cs_etm_sample_type { 18enum cs_etm_sample_type {
26 CS_ETM_EMPTY, 19 CS_ETM_EMPTY,
27 CS_ETM_RANGE, 20 CS_ETM_RANGE,
@@ -43,8 +36,14 @@ struct cs_etm_packet {
43 u64 start_addr; 36 u64 start_addr;
44 u64 end_addr; 37 u64 end_addr;
45 u32 instr_count; 38 u32 instr_count;
39 u32 last_instr_type;
40 u32 last_instr_subtype;
41 u32 flags;
42 u32 exception_number;
43 u8 last_instr_cond;
46 u8 last_instr_taken_branch; 44 u8 last_instr_taken_branch;
47 u8 last_instr_size; 45 u8 last_instr_size;
46 u8 trace_chan_id;
48 int cpu; 47 int cpu;
49}; 48};
50 49
@@ -99,9 +98,10 @@ enum {
99 CS_ETM_PROTO_PTM, 98 CS_ETM_PROTO_PTM,
100}; 99};
101 100
102enum { 101enum cs_etm_decoder_operation {
103 CS_ETM_OPERATION_PRINT = 1, 102 CS_ETM_OPERATION_PRINT = 1,
104 CS_ETM_OPERATION_DECODE, 103 CS_ETM_OPERATION_DECODE,
104 CS_ETM_OPERATION_MAX,
105}; 105};
106 106
107int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, 107int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 27a374ddf661..110804936fc3 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -12,6 +12,7 @@
12#include <linux/log2.h> 12#include <linux/log2.h>
13#include <linux/types.h> 13#include <linux/types.h>
14 14
15#include <opencsd/ocsd_if_types.h>
15#include <stdlib.h> 16#include <stdlib.h>
16 17
17#include "auxtrace.h" 18#include "auxtrace.h"
@@ -24,6 +25,7 @@
24#include "machine.h" 25#include "machine.h"
25#include "map.h" 26#include "map.h"
26#include "perf.h" 27#include "perf.h"
28#include "symbol.h"
27#include "thread.h" 29#include "thread.h"
28#include "thread_map.h" 30#include "thread_map.h"
29#include "thread-stack.h" 31#include "thread-stack.h"
@@ -63,13 +65,10 @@ struct cs_etm_queue {
63 struct thread *thread; 65 struct thread *thread;
64 struct cs_etm_decoder *decoder; 66 struct cs_etm_decoder *decoder;
65 struct auxtrace_buffer *buffer; 67 struct auxtrace_buffer *buffer;
66 const struct cs_etm_state *state;
67 union perf_event *event_buf; 68 union perf_event *event_buf;
68 unsigned int queue_nr; 69 unsigned int queue_nr;
69 pid_t pid, tid; 70 pid_t pid, tid;
70 int cpu; 71 int cpu;
71 u64 time;
72 u64 timestamp;
73 u64 offset; 72 u64 offset;
74 u64 period_instructions; 73 u64 period_instructions;
75 struct branch_stack *last_branch; 74 struct branch_stack *last_branch;
@@ -77,11 +76,13 @@ struct cs_etm_queue {
77 size_t last_branch_pos; 76 size_t last_branch_pos;
78 struct cs_etm_packet *prev_packet; 77 struct cs_etm_packet *prev_packet;
79 struct cs_etm_packet *packet; 78 struct cs_etm_packet *packet;
79 const unsigned char *buf;
80 size_t buf_len, buf_used;
80}; 81};
81 82
82static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 83static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
83static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 84static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
84 pid_t tid, u64 time_); 85 pid_t tid);
85 86
86/* PTMs ETMIDR [11:8] set to b0011 */ 87/* PTMs ETMIDR [11:8] set to b0011 */
87#define ETMIDR_PTM_VERSION 0x00000300 88#define ETMIDR_PTM_VERSION 0x00000300
@@ -96,6 +97,34 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
96 return CS_ETM_PROTO_ETMV3; 97 return CS_ETM_PROTO_ETMV3;
97} 98}
98 99
100static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
101{
102 struct int_node *inode;
103 u64 *metadata;
104
105 inode = intlist__find(traceid_list, trace_chan_id);
106 if (!inode)
107 return -EINVAL;
108
109 metadata = inode->priv;
110 *magic = metadata[CS_ETM_MAGIC];
111 return 0;
112}
113
114int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
115{
116 struct int_node *inode;
117 u64 *metadata;
118
119 inode = intlist__find(traceid_list, trace_chan_id);
120 if (!inode)
121 return -EINVAL;
122
123 metadata = inode->priv;
124 *cpu = (int)metadata[CS_ETM_CPU];
125 return 0;
126}
127
99static void cs_etm__packet_dump(const char *pkt_string) 128static void cs_etm__packet_dump(const char *pkt_string)
100{ 129{
101 const char *color = PERF_COLOR_BLUE; 130 const char *color = PERF_COLOR_BLUE;
@@ -109,10 +138,83 @@ static void cs_etm__packet_dump(const char *pkt_string)
109 fflush(stdout); 138 fflush(stdout);
110} 139}
111 140
141static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
142 struct cs_etm_auxtrace *etm, int idx,
143 u32 etmidr)
144{
145 u64 **metadata = etm->metadata;
146
147 t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
148 t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR];
149 t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR];
150}
151
152static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
153 struct cs_etm_auxtrace *etm, int idx)
154{
155 u64 **metadata = etm->metadata;
156
157 t_params[idx].protocol = CS_ETM_PROTO_ETMV4i;
158 t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0];
159 t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1];
160 t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2];
161 t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8];
162 t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR];
163 t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR];
164}
165
166static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
167 struct cs_etm_auxtrace *etm)
168{
169 int i;
170 u32 etmidr;
171 u64 architecture;
172
173 for (i = 0; i < etm->num_cpu; i++) {
174 architecture = etm->metadata[i][CS_ETM_MAGIC];
175
176 switch (architecture) {
177 case __perf_cs_etmv3_magic:
178 etmidr = etm->metadata[i][CS_ETM_ETMIDR];
179 cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr);
180 break;
181 case __perf_cs_etmv4_magic:
182 cs_etm__set_trace_param_etmv4(t_params, etm, i);
183 break;
184 default:
185 return -EINVAL;
186 }
187 }
188
189 return 0;
190}
191
192static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
193 struct cs_etm_queue *etmq,
194 enum cs_etm_decoder_operation mode)
195{
196 int ret = -EINVAL;
197
198 if (!(mode < CS_ETM_OPERATION_MAX))
199 goto out;
200
201 d_params->packet_printer = cs_etm__packet_dump;
202 d_params->operation = mode;
203 d_params->data = etmq;
204 d_params->formatted = true;
205 d_params->fsyncs = false;
206 d_params->hsyncs = false;
207 d_params->frame_aligned = true;
208
209 ret = 0;
210out:
211 return ret;
212}
213
112static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, 214static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
113 struct auxtrace_buffer *buffer) 215 struct auxtrace_buffer *buffer)
114{ 216{
115 int i, ret; 217 int ret;
116 const char *color = PERF_COLOR_BLUE; 218 const char *color = PERF_COLOR_BLUE;
117 struct cs_etm_decoder_params d_params; 219 struct cs_etm_decoder_params d_params;
118 struct cs_etm_trace_params *t_params; 220 struct cs_etm_trace_params *t_params;
@@ -126,48 +228,22 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
126 228
127 /* Use metadata to fill in trace parameters for trace decoder */ 229 /* Use metadata to fill in trace parameters for trace decoder */
128 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 230 t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
129 for (i = 0; i < etm->num_cpu; i++) { 231
130 if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { 232 if (!t_params)
131 u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; 233 return;
132 234
133 t_params[i].protocol = 235 if (cs_etm__init_trace_params(t_params, etm))
134 cs_etm__get_v7_protocol_version(etmidr); 236 goto out_free;
135 t_params[i].etmv3.reg_ctrl =
136 etm->metadata[i][CS_ETM_ETMCR];
137 t_params[i].etmv3.reg_trc_id =
138 etm->metadata[i][CS_ETM_ETMTRACEIDR];
139 } else if (etm->metadata[i][CS_ETM_MAGIC] ==
140 __perf_cs_etmv4_magic) {
141 t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
142 t_params[i].etmv4.reg_idr0 =
143 etm->metadata[i][CS_ETMV4_TRCIDR0];
144 t_params[i].etmv4.reg_idr1 =
145 etm->metadata[i][CS_ETMV4_TRCIDR1];
146 t_params[i].etmv4.reg_idr2 =
147 etm->metadata[i][CS_ETMV4_TRCIDR2];
148 t_params[i].etmv4.reg_idr8 =
149 etm->metadata[i][CS_ETMV4_TRCIDR8];
150 t_params[i].etmv4.reg_configr =
151 etm->metadata[i][CS_ETMV4_TRCCONFIGR];
152 t_params[i].etmv4.reg_traceidr =
153 etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
154 }
155 }
156 237
157 /* Set decoder parameters to simply print the trace packets */ 238 /* Set decoder parameters to simply print the trace packets */
158 d_params.packet_printer = cs_etm__packet_dump; 239 if (cs_etm__init_decoder_params(&d_params, NULL,
159 d_params.operation = CS_ETM_OPERATION_PRINT; 240 CS_ETM_OPERATION_PRINT))
160 d_params.formatted = true; 241 goto out_free;
161 d_params.fsyncs = false;
162 d_params.hsyncs = false;
163 d_params.frame_aligned = true;
164 242
165 decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 243 decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
166 244
167 zfree(&t_params);
168
169 if (!decoder) 245 if (!decoder)
170 return; 246 goto out_free;
171 do { 247 do {
172 size_t consumed; 248 size_t consumed;
173 249
@@ -182,6 +258,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
182 } while (buffer_used < buffer->size); 258 } while (buffer_used < buffer->size);
183 259
184 cs_etm_decoder__free(decoder); 260 cs_etm_decoder__free(decoder);
261
262out_free:
263 zfree(&t_params);
185} 264}
186 265
187static int cs_etm__flush_events(struct perf_session *session, 266static int cs_etm__flush_events(struct perf_session *session,
@@ -205,7 +284,7 @@ static int cs_etm__flush_events(struct perf_session *session,
205 if (ret < 0) 284 if (ret < 0)
206 return ret; 285 return ret;
207 286
208 return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); 287 return cs_etm__process_timeless_queues(etm, -1);
209} 288}
210 289
211static void cs_etm__free_queue(void *priv) 290static void cs_etm__free_queue(void *priv)
@@ -251,7 +330,7 @@ static void cs_etm__free(struct perf_session *session)
251 cs_etm__free_events(session); 330 cs_etm__free_events(session);
252 session->auxtrace = NULL; 331 session->auxtrace = NULL;
253 332
254 /* First remove all traceID/CPU# nodes for the RB tree */ 333 /* First remove all traceID/metadata nodes for the RB tree */
255 intlist__for_each_entry_safe(inode, tmp, traceid_list) 334 intlist__for_each_entry_safe(inode, tmp, traceid_list)
256 intlist__remove(traceid_list, inode); 335 intlist__remove(traceid_list, inode);
257 /* Then the RB tree itself */ 336 /* Then the RB tree itself */
@@ -297,7 +376,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
297 struct addr_location al; 376 struct addr_location al;
298 377
299 if (!etmq) 378 if (!etmq)
300 return -1; 379 return 0;
301 380
302 machine = etmq->etm->machine; 381 machine = etmq->etm->machine;
303 cpumode = cs_etm__cpu_mode(etmq, address); 382 cpumode = cs_etm__cpu_mode(etmq, address);
@@ -305,7 +384,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
305 thread = etmq->thread; 384 thread = etmq->thread;
306 if (!thread) { 385 if (!thread) {
307 if (cpumode != PERF_RECORD_MISC_KERNEL) 386 if (cpumode != PERF_RECORD_MISC_KERNEL)
308 return -EINVAL; 387 return 0;
309 thread = etmq->etm->unknown_thread; 388 thread = etmq->etm->unknown_thread;
310 } 389 }
311 390
@@ -328,12 +407,10 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
328 return len; 407 return len;
329} 408}
330 409
331static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, 410static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
332 unsigned int queue_nr)
333{ 411{
334 int i;
335 struct cs_etm_decoder_params d_params; 412 struct cs_etm_decoder_params d_params;
336 struct cs_etm_trace_params *t_params; 413 struct cs_etm_trace_params *t_params = NULL;
337 struct cs_etm_queue *etmq; 414 struct cs_etm_queue *etmq;
338 size_t szp = sizeof(struct cs_etm_packet); 415 size_t szp = sizeof(struct cs_etm_packet);
339 416
@@ -368,59 +445,22 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
368 if (!etmq->event_buf) 445 if (!etmq->event_buf)
369 goto out_free; 446 goto out_free;
370 447
371 etmq->etm = etm;
372 etmq->queue_nr = queue_nr;
373 etmq->pid = -1;
374 etmq->tid = -1;
375 etmq->cpu = -1;
376
377 /* Use metadata to fill in trace parameters for trace decoder */ 448 /* Use metadata to fill in trace parameters for trace decoder */
378 t_params = zalloc(sizeof(*t_params) * etm->num_cpu); 449 t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
379 450
380 if (!t_params) 451 if (!t_params)
381 goto out_free; 452 goto out_free;
382 453
383 for (i = 0; i < etm->num_cpu; i++) { 454 if (cs_etm__init_trace_params(t_params, etm))
384 if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { 455 goto out_free;
385 u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR];
386
387 t_params[i].protocol =
388 cs_etm__get_v7_protocol_version(etmidr);
389 t_params[i].etmv3.reg_ctrl =
390 etm->metadata[i][CS_ETM_ETMCR];
391 t_params[i].etmv3.reg_trc_id =
392 etm->metadata[i][CS_ETM_ETMTRACEIDR];
393 } else if (etm->metadata[i][CS_ETM_MAGIC] ==
394 __perf_cs_etmv4_magic) {
395 t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
396 t_params[i].etmv4.reg_idr0 =
397 etm->metadata[i][CS_ETMV4_TRCIDR0];
398 t_params[i].etmv4.reg_idr1 =
399 etm->metadata[i][CS_ETMV4_TRCIDR1];
400 t_params[i].etmv4.reg_idr2 =
401 etm->metadata[i][CS_ETMV4_TRCIDR2];
402 t_params[i].etmv4.reg_idr8 =
403 etm->metadata[i][CS_ETMV4_TRCIDR8];
404 t_params[i].etmv4.reg_configr =
405 etm->metadata[i][CS_ETMV4_TRCCONFIGR];
406 t_params[i].etmv4.reg_traceidr =
407 etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
408 }
409 }
410 456
411 /* Set decoder parameters to simply print the trace packets */ 457 /* Set decoder parameters to decode trace packets */
412 d_params.packet_printer = cs_etm__packet_dump; 458 if (cs_etm__init_decoder_params(&d_params, etmq,
413 d_params.operation = CS_ETM_OPERATION_DECODE; 459 CS_ETM_OPERATION_DECODE))
414 d_params.formatted = true; 460 goto out_free;
415 d_params.fsyncs = false;
416 d_params.hsyncs = false;
417 d_params.frame_aligned = true;
418 d_params.data = etmq;
419 461
420 etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); 462 etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
421 463
422 zfree(&t_params);
423
424 if (!etmq->decoder) 464 if (!etmq->decoder)
425 goto out_free; 465 goto out_free;
426 466
@@ -433,14 +473,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
433 cs_etm__mem_access)) 473 cs_etm__mem_access))
434 goto out_free_decoder; 474 goto out_free_decoder;
435 475
436 etmq->offset = 0; 476 zfree(&t_params);
437 etmq->period_instructions = 0;
438
439 return etmq; 477 return etmq;
440 478
441out_free_decoder: 479out_free_decoder:
442 cs_etm_decoder__free(etmq->decoder); 480 cs_etm_decoder__free(etmq->decoder);
443out_free: 481out_free:
482 zfree(&t_params);
444 zfree(&etmq->event_buf); 483 zfree(&etmq->event_buf);
445 zfree(&etmq->last_branch); 484 zfree(&etmq->last_branch);
446 zfree(&etmq->last_branch_rb); 485 zfree(&etmq->last_branch_rb);
@@ -455,24 +494,30 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
455 struct auxtrace_queue *queue, 494 struct auxtrace_queue *queue,
456 unsigned int queue_nr) 495 unsigned int queue_nr)
457{ 496{
497 int ret = 0;
458 struct cs_etm_queue *etmq = queue->priv; 498 struct cs_etm_queue *etmq = queue->priv;
459 499
460 if (list_empty(&queue->head) || etmq) 500 if (list_empty(&queue->head) || etmq)
461 return 0; 501 goto out;
462 502
463 etmq = cs_etm__alloc_queue(etm, queue_nr); 503 etmq = cs_etm__alloc_queue(etm);
464 504
465 if (!etmq) 505 if (!etmq) {
466 return -ENOMEM; 506 ret = -ENOMEM;
507 goto out;
508 }
467 509
468 queue->priv = etmq; 510 queue->priv = etmq;
469 511 etmq->etm = etm;
470 if (queue->cpu != -1) 512 etmq->queue_nr = queue_nr;
471 etmq->cpu = queue->cpu; 513 etmq->cpu = queue->cpu;
472
473 etmq->tid = queue->tid; 514 etmq->tid = queue->tid;
515 etmq->pid = -1;
516 etmq->offset = 0;
517 etmq->period_instructions = 0;
474 518
475 return 0; 519out:
520 return ret;
476} 521}
477 522
478static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) 523static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
@@ -480,6 +525,9 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
480 unsigned int i; 525 unsigned int i;
481 int ret; 526 int ret;
482 527
528 if (!etm->kernel_start)
529 etm->kernel_start = machine__kernel_start(etm->machine);
530
483 for (i = 0; i < etm->queues.nr_queues; i++) { 531 for (i = 0; i < etm->queues.nr_queues; i++) {
484 ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); 532 ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
485 if (ret) 533 if (ret)
@@ -637,7 +685,7 @@ static int cs_etm__inject_event(union perf_event *event,
637 685
638 686
639static int 687static int
640cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) 688cs_etm__get_trace(struct cs_etm_queue *etmq)
641{ 689{
642 struct auxtrace_buffer *aux_buffer = etmq->buffer; 690 struct auxtrace_buffer *aux_buffer = etmq->buffer;
643 struct auxtrace_buffer *old_buffer = aux_buffer; 691 struct auxtrace_buffer *old_buffer = aux_buffer;
@@ -651,7 +699,7 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
651 if (!aux_buffer) { 699 if (!aux_buffer) {
652 if (old_buffer) 700 if (old_buffer)
653 auxtrace_buffer__drop_data(old_buffer); 701 auxtrace_buffer__drop_data(old_buffer);
654 buff->len = 0; 702 etmq->buf_len = 0;
655 return 0; 703 return 0;
656 } 704 }
657 705
@@ -671,13 +719,11 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
671 if (old_buffer) 719 if (old_buffer)
672 auxtrace_buffer__drop_data(old_buffer); 720 auxtrace_buffer__drop_data(old_buffer);
673 721
674 buff->offset = aux_buffer->offset; 722 etmq->buf_used = 0;
675 buff->len = aux_buffer->size; 723 etmq->buf_len = aux_buffer->size;
676 buff->buf = aux_buffer->data; 724 etmq->buf = aux_buffer->data;
677
678 buff->ref_timestamp = aux_buffer->reference;
679 725
680 return buff->len; 726 return etmq->buf_len;
681} 727}
682 728
683static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, 729static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
@@ -719,7 +765,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
719 sample.stream_id = etmq->etm->instructions_id; 765 sample.stream_id = etmq->etm->instructions_id;
720 sample.period = period; 766 sample.period = period;
721 sample.cpu = etmq->packet->cpu; 767 sample.cpu = etmq->packet->cpu;
722 sample.flags = 0; 768 sample.flags = etmq->prev_packet->flags;
723 sample.insn_len = 1; 769 sample.insn_len = 1;
724 sample.cpumode = event->sample.header.misc; 770 sample.cpumode = event->sample.header.misc;
725 771
@@ -778,7 +824,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
778 sample.stream_id = etmq->etm->branches_id; 824 sample.stream_id = etmq->etm->branches_id;
779 sample.period = 1; 825 sample.period = 1;
780 sample.cpu = etmq->packet->cpu; 826 sample.cpu = etmq->packet->cpu;
781 sample.flags = 0; 827 sample.flags = etmq->prev_packet->flags;
782 sample.cpumode = event->sample.header.misc; 828 sample.cpumode = event->sample.header.misc;
783 829
784 /* 830 /*
@@ -1106,95 +1152,489 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq)
1106 1152
1107 return 0; 1153 return 0;
1108} 1154}
1155/*
1156 * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue
1157 * if need be.
1158 * Returns: < 0 if error
1159 * = 0 if no more auxtrace_buffer to read
1160 * > 0 if the current buffer isn't empty yet
1161 */
1162static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1163{
1164 int ret;
1165
1166 if (!etmq->buf_len) {
1167 ret = cs_etm__get_trace(etmq);
1168 if (ret <= 0)
1169 return ret;
1170 /*
1171 * We cannot assume consecutive blocks in the data file
1172 * are contiguous, reset the decoder to force re-sync.
1173 */
1174 ret = cs_etm_decoder__reset(etmq->decoder);
1175 if (ret)
1176 return ret;
1177 }
1178
1179 return etmq->buf_len;
1180}
1181
1182static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
1183 struct cs_etm_packet *packet,
1184 u64 end_addr)
1185{
1186 u16 instr16;
1187 u32 instr32;
1188 u64 addr;
1189
1190 switch (packet->isa) {
1191 case CS_ETM_ISA_T32:
1192 /*
1193 * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247:
1194 *
1195 * b'15 b'8
1196 * +-----------------+--------+
1197 * | 1 1 0 1 1 1 1 1 | imm8 |
1198 * +-----------------+--------+
1199 *
1200 * According to the specifiction, it only defines SVC for T32
1201 * with 16 bits instruction and has no definition for 32bits;
1202 * so below only read 2 bytes as instruction size for T32.
1203 */
1204 addr = end_addr - 2;
1205 cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16);
1206 if ((instr16 & 0xFF00) == 0xDF00)
1207 return true;
1208
1209 break;
1210 case CS_ETM_ISA_A32:
1211 /*
1212 * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247:
1213 *
1214 * b'31 b'28 b'27 b'24
1215 * +---------+---------+-------------------------+
1216 * | !1111 | 1 1 1 1 | imm24 |
1217 * +---------+---------+-------------------------+
1218 */
1219 addr = end_addr - 4;
1220 cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32);
1221 if ((instr32 & 0x0F000000) == 0x0F000000 &&
1222 (instr32 & 0xF0000000) != 0xF0000000)
1223 return true;
1224
1225 break;
1226 case CS_ETM_ISA_A64:
1227 /*
1228 * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294:
1229 *
1230 * b'31 b'21 b'4 b'0
1231 * +-----------------------+---------+-----------+
1232 * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 |
1233 * +-----------------------+---------+-----------+
1234 */
1235 addr = end_addr - 4;
1236 cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32);
1237 if ((instr32 & 0xFFE0001F) == 0xd4000001)
1238 return true;
1239
1240 break;
1241 case CS_ETM_ISA_UNKNOWN:
1242 default:
1243 break;
1244 }
1245
1246 return false;
1247}
1248
1249static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic)
1250{
1251 struct cs_etm_packet *packet = etmq->packet;
1252 struct cs_etm_packet *prev_packet = etmq->prev_packet;
1253
1254 if (magic == __perf_cs_etmv3_magic)
1255 if (packet->exception_number == CS_ETMV3_EXC_SVC)
1256 return true;
1257
1258 /*
1259 * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and
1260 * HVC cases; need to check if it's SVC instruction based on
1261 * packet address.
1262 */
1263 if (magic == __perf_cs_etmv4_magic) {
1264 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
1265 cs_etm__is_svc_instr(etmq, prev_packet,
1266 prev_packet->end_addr))
1267 return true;
1268 }
1269
1270 return false;
1271}
1272
1273static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic)
1274{
1275 struct cs_etm_packet *packet = etmq->packet;
1276
1277 if (magic == __perf_cs_etmv3_magic)
1278 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
1279 packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT ||
1280 packet->exception_number == CS_ETMV3_EXC_PE_RESET ||
1281 packet->exception_number == CS_ETMV3_EXC_IRQ ||
1282 packet->exception_number == CS_ETMV3_EXC_FIQ)
1283 return true;
1284
1285 if (magic == __perf_cs_etmv4_magic)
1286 if (packet->exception_number == CS_ETMV4_EXC_RESET ||
1287 packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT ||
1288 packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR ||
1289 packet->exception_number == CS_ETMV4_EXC_INST_DEBUG ||
1290 packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG ||
1291 packet->exception_number == CS_ETMV4_EXC_IRQ ||
1292 packet->exception_number == CS_ETMV4_EXC_FIQ)
1293 return true;
1294
1295 return false;
1296}
1297
1298static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic)
1299{
1300 struct cs_etm_packet *packet = etmq->packet;
1301 struct cs_etm_packet *prev_packet = etmq->prev_packet;
1302
1303 if (magic == __perf_cs_etmv3_magic)
1304 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
1305 packet->exception_number == CS_ETMV3_EXC_HYP ||
1306 packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE ||
1307 packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR ||
1308 packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT ||
1309 packet->exception_number == CS_ETMV3_EXC_DATA_FAULT ||
1310 packet->exception_number == CS_ETMV3_EXC_GENERIC)
1311 return true;
1312
1313 if (magic == __perf_cs_etmv4_magic) {
1314 if (packet->exception_number == CS_ETMV4_EXC_TRAP ||
1315 packet->exception_number == CS_ETMV4_EXC_ALIGNMENT ||
1316 packet->exception_number == CS_ETMV4_EXC_INST_FAULT ||
1317 packet->exception_number == CS_ETMV4_EXC_DATA_FAULT)
1318 return true;
1319
1320 /*
1321 * For CS_ETMV4_EXC_CALL, except SVC other instructions
1322 * (SMC, HVC) are taken as sync exceptions.
1323 */
1324 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
1325 !cs_etm__is_svc_instr(etmq, prev_packet,
1326 prev_packet->end_addr))
1327 return true;
1328
1329 /*
1330 * ETMv4 has 5 bits for exception number; if the numbers
1331 * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ]
1332 * they are implementation defined exceptions.
1333 *
1334 * For this case, simply take it as sync exception.
1335 */
1336 if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
1337 packet->exception_number <= CS_ETMV4_EXC_END)
1338 return true;
1339 }
1340
1341 return false;
1342}
1343
1344static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
1345{
1346 struct cs_etm_packet *packet = etmq->packet;
1347 struct cs_etm_packet *prev_packet = etmq->prev_packet;
1348 u64 magic;
1349 int ret;
1350
1351 switch (packet->sample_type) {
1352 case CS_ETM_RANGE:
1353 /*
1354 * Immediate branch instruction without neither link nor
1355 * return flag, it's normal branch instruction within
1356 * the function.
1357 */
1358 if (packet->last_instr_type == OCSD_INSTR_BR &&
1359 packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
1360 packet->flags = PERF_IP_FLAG_BRANCH;
1361
1362 if (packet->last_instr_cond)
1363 packet->flags |= PERF_IP_FLAG_CONDITIONAL;
1364 }
1365
1366 /*
1367 * Immediate branch instruction with link (e.g. BL), this is
1368 * branch instruction for function call.
1369 */
1370 if (packet->last_instr_type == OCSD_INSTR_BR &&
1371 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
1372 packet->flags = PERF_IP_FLAG_BRANCH |
1373 PERF_IP_FLAG_CALL;
1374
1375 /*
1376 * Indirect branch instruction with link (e.g. BLR), this is
1377 * branch instruction for function call.
1378 */
1379 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
1380 packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
1381 packet->flags = PERF_IP_FLAG_BRANCH |
1382 PERF_IP_FLAG_CALL;
1383
1384 /*
1385 * Indirect branch instruction with subtype of
1386 * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for
1387 * function return for A32/T32.
1388 */
1389 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
1390 packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
1391 packet->flags = PERF_IP_FLAG_BRANCH |
1392 PERF_IP_FLAG_RETURN;
1393
1394 /*
1395 * Indirect branch instruction without link (e.g. BR), usually
1396 * this is used for function return, especially for functions
1397 * within dynamic link lib.
1398 */
1399 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
1400 packet->last_instr_subtype == OCSD_S_INSTR_NONE)
1401 packet->flags = PERF_IP_FLAG_BRANCH |
1402 PERF_IP_FLAG_RETURN;
1403
1404 /* Return instruction for function return. */
1405 if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
1406 packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
1407 packet->flags = PERF_IP_FLAG_BRANCH |
1408 PERF_IP_FLAG_RETURN;
1409
1410 /*
1411 * Decoder might insert a discontinuity in the middle of
1412 * instruction packets, fixup prev_packet with flag
1413 * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
1414 */
1415 if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1416 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
1417 PERF_IP_FLAG_TRACE_BEGIN;
1418
1419 /*
1420 * If the previous packet is an exception return packet
1421 * and the return address just follows SVC instuction,
1422 * it needs to calibrate the previous packet sample flags
1423 * as PERF_IP_FLAG_SYSCALLRET.
1424 */
1425 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
1426 PERF_IP_FLAG_RETURN |
1427 PERF_IP_FLAG_INTERRUPT) &&
1428 cs_etm__is_svc_instr(etmq, packet, packet->start_addr))
1429 prev_packet->flags = PERF_IP_FLAG_BRANCH |
1430 PERF_IP_FLAG_RETURN |
1431 PERF_IP_FLAG_SYSCALLRET;
1432 break;
1433 case CS_ETM_DISCONTINUITY:
1434 /*
1435 * The trace is discontinuous, if the previous packet is
1436 * instruction packet, set flag PERF_IP_FLAG_TRACE_END
1437 * for previous packet.
1438 */
1439 if (prev_packet->sample_type == CS_ETM_RANGE)
1440 prev_packet->flags |= PERF_IP_FLAG_BRANCH |
1441 PERF_IP_FLAG_TRACE_END;
1442 break;
1443 case CS_ETM_EXCEPTION:
1444 ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
1445 if (ret)
1446 return ret;
1447
1448 /* The exception is for system call. */
1449 if (cs_etm__is_syscall(etmq, magic))
1450 packet->flags = PERF_IP_FLAG_BRANCH |
1451 PERF_IP_FLAG_CALL |
1452 PERF_IP_FLAG_SYSCALLRET;
1453 /*
1454 * The exceptions are triggered by external signals from bus,
1455 * interrupt controller, debug module, PE reset or halt.
1456 */
1457 else if (cs_etm__is_async_exception(etmq, magic))
1458 packet->flags = PERF_IP_FLAG_BRANCH |
1459 PERF_IP_FLAG_CALL |
1460 PERF_IP_FLAG_ASYNC |
1461 PERF_IP_FLAG_INTERRUPT;
1462 /*
1463 * Otherwise, exception is caused by trap, instruction &
1464 * data fault, or alignment errors.
1465 */
1466 else if (cs_etm__is_sync_exception(etmq, magic))
1467 packet->flags = PERF_IP_FLAG_BRANCH |
1468 PERF_IP_FLAG_CALL |
1469 PERF_IP_FLAG_INTERRUPT;
1470
1471 /*
1472 * When the exception packet is inserted, since exception
1473 * packet is not used standalone for generating samples
1474 * and it's affiliation to the previous instruction range
1475 * packet; so set previous range packet flags to tell perf
1476 * it is an exception taken branch.
1477 */
1478 if (prev_packet->sample_type == CS_ETM_RANGE)
1479 prev_packet->flags = packet->flags;
1480 break;
1481 case CS_ETM_EXCEPTION_RET:
1482 /*
1483 * When the exception return packet is inserted, since
1484 * exception return packet is not used standalone for
1485 * generating samples and it's affiliation to the previous
1486 * instruction range packet; so set previous range packet
1487 * flags to tell perf it is an exception return branch.
1488 *
1489 * The exception return can be for either system call or
1490 * other exception types; unfortunately the packet doesn't
1491 * contain exception type related info so we cannot decide
1492 * the exception type purely based on exception return packet.
1493 * If we record the exception number from exception packet and
1494 * reuse it for excpetion return packet, this is not reliable
1495 * due the trace can be discontinuity or the interrupt can
1496 * be nested, thus the recorded exception number cannot be
1497 * used for exception return packet for these two cases.
1498 *
1499 * For exception return packet, we only need to distinguish the
1500 * packet is for system call or for other types. Thus the
1501 * decision can be deferred when receive the next packet which
1502 * contains the return address, based on the return address we
1503 * can read out the previous instruction and check if it's a
1504 * system call instruction and then calibrate the sample flag
1505 * as needed.
1506 */
1507 if (prev_packet->sample_type == CS_ETM_RANGE)
1508 prev_packet->flags = PERF_IP_FLAG_BRANCH |
1509 PERF_IP_FLAG_RETURN |
1510 PERF_IP_FLAG_INTERRUPT;
1511 break;
1512 case CS_ETM_EMPTY:
1513 default:
1514 break;
1515 }
1516
1517 return 0;
1518}
1519
1520static int cs_etm__decode_data_block(struct cs_etm_queue *etmq)
1521{
1522 int ret = 0;
1523 size_t processed = 0;
1524
1525 /*
1526 * Packets are decoded and added to the decoder's packet queue
1527 * until the decoder packet processing callback has requested that
1528 * processing stops or there is nothing left in the buffer. Normal
1529 * operations that stop processing are a timestamp packet or a full
1530 * decoder buffer queue.
1531 */
1532 ret = cs_etm_decoder__process_data_block(etmq->decoder,
1533 etmq->offset,
1534 &etmq->buf[etmq->buf_used],
1535 etmq->buf_len,
1536 &processed);
1537 if (ret)
1538 goto out;
1539
1540 etmq->offset += processed;
1541 etmq->buf_used += processed;
1542 etmq->buf_len -= processed;
1543
1544out:
1545 return ret;
1546}
1547
1548static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq)
1549{
1550 int ret;
1551
1552 /* Process each packet in this chunk */
1553 while (1) {
1554 ret = cs_etm_decoder__get_packet(etmq->decoder,
1555 etmq->packet);
1556 if (ret <= 0)
1557 /*
1558 * Stop processing this chunk on
1559 * end of data or error
1560 */
1561 break;
1562
1563 /*
1564 * Since packet addresses are swapped in packet
1565 * handling within below switch() statements,
1566 * thus setting sample flags must be called
1567 * prior to switch() statement to use address
1568 * information before packets swapping.
1569 */
1570 ret = cs_etm__set_sample_flags(etmq);
1571 if (ret < 0)
1572 break;
1573
1574 switch (etmq->packet->sample_type) {
1575 case CS_ETM_RANGE:
1576 /*
1577 * If the packet contains an instruction
1578 * range, generate instruction sequence
1579 * events.
1580 */
1581 cs_etm__sample(etmq);
1582 break;
1583 case CS_ETM_EXCEPTION:
1584 case CS_ETM_EXCEPTION_RET:
1585 /*
1586 * If the exception packet is coming,
1587 * make sure the previous instruction
1588 * range packet to be handled properly.
1589 */
1590 cs_etm__exception(etmq);
1591 break;
1592 case CS_ETM_DISCONTINUITY:
1593 /*
1594 * Discontinuity in trace, flush
1595 * previous branch stack
1596 */
1597 cs_etm__flush(etmq);
1598 break;
1599 case CS_ETM_EMPTY:
1600 /*
1601 * Should not receive empty packet,
1602 * report error.
1603 */
1604 pr_err("CS ETM Trace: empty packet\n");
1605 return -EINVAL;
1606 default:
1607 break;
1608 }
1609 }
1610
1611 return ret;
1612}
1109 1613
1110static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 1614static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
1111{ 1615{
1112 struct cs_etm_auxtrace *etm = etmq->etm;
1113 struct cs_etm_buffer buffer;
1114 size_t buffer_used, processed;
1115 int err = 0; 1616 int err = 0;
1116 1617
1117 if (!etm->kernel_start)
1118 etm->kernel_start = machine__kernel_start(etm->machine);
1119
1120 /* Go through each buffer in the queue and decode them one by one */ 1618 /* Go through each buffer in the queue and decode them one by one */
1121 while (1) { 1619 while (1) {
1122 buffer_used = 0; 1620 err = cs_etm__get_data_block(etmq);
1123 memset(&buffer, 0, sizeof(buffer));
1124 err = cs_etm__get_trace(&buffer, etmq);
1125 if (err <= 0) 1621 if (err <= 0)
1126 return err; 1622 return err;
1127 /*
1128 * We cannot assume consecutive blocks in the data file are
1129 * contiguous, reset the decoder to force re-sync.
1130 */
1131 err = cs_etm_decoder__reset(etmq->decoder);
1132 if (err != 0)
1133 return err;
1134 1623
1135 /* Run trace decoder until buffer consumed or end of trace */ 1624 /* Run trace decoder until buffer consumed or end of trace */
1136 do { 1625 do {
1137 processed = 0; 1626 err = cs_etm__decode_data_block(etmq);
1138 err = cs_etm_decoder__process_data_block(
1139 etmq->decoder,
1140 etmq->offset,
1141 &buffer.buf[buffer_used],
1142 buffer.len - buffer_used,
1143 &processed);
1144 if (err) 1627 if (err)
1145 return err; 1628 return err;
1146 1629
1147 etmq->offset += processed; 1630 /*
1148 buffer_used += processed; 1631 * Process each packet in this chunk, nothing to do if
1149 1632 * an error occurs other than hoping the next one will
1150 /* Process each packet in this chunk */ 1633 * be better.
1151 while (1) { 1634 */
1152 err = cs_etm_decoder__get_packet(etmq->decoder, 1635 err = cs_etm__process_decoder_queue(etmq);
1153 etmq->packet); 1636
1154 if (err <= 0) 1637 } while (etmq->buf_len);
1155 /*
1156 * Stop processing this chunk on
1157 * end of data or error
1158 */
1159 break;
1160
1161 switch (etmq->packet->sample_type) {
1162 case CS_ETM_RANGE:
1163 /*
1164 * If the packet contains an instruction
1165 * range, generate instruction sequence
1166 * events.
1167 */
1168 cs_etm__sample(etmq);
1169 break;
1170 case CS_ETM_EXCEPTION:
1171 case CS_ETM_EXCEPTION_RET:
1172 /*
1173 * If the exception packet is coming,
1174 * make sure the previous instruction
1175 * range packet to be handled properly.
1176 */
1177 cs_etm__exception(etmq);
1178 break;
1179 case CS_ETM_DISCONTINUITY:
1180 /*
1181 * Discontinuity in trace, flush
1182 * previous branch stack
1183 */
1184 cs_etm__flush(etmq);
1185 break;
1186 case CS_ETM_EMPTY:
1187 /*
1188 * Should not receive empty packet,
1189 * report error.
1190 */
1191 pr_err("CS ETM Trace: empty packet\n");
1192 return -EINVAL;
1193 default:
1194 break;
1195 }
1196 }
1197 } while (buffer.len > buffer_used);
1198 1638
1199 if (err == 0) 1639 if (err == 0)
1200 /* Flush any remaining branch stack entries */ 1640 /* Flush any remaining branch stack entries */
@@ -1205,7 +1645,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
1205} 1645}
1206 1646
1207static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 1647static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
1208 pid_t tid, u64 time_) 1648 pid_t tid)
1209{ 1649{
1210 unsigned int i; 1650 unsigned int i;
1211 struct auxtrace_queues *queues = &etm->queues; 1651 struct auxtrace_queues *queues = &etm->queues;
@@ -1215,7 +1655,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
1215 struct cs_etm_queue *etmq = queue->priv; 1655 struct cs_etm_queue *etmq = queue->priv;
1216 1656
1217 if (etmq && ((tid == -1) || (etmq->tid == tid))) { 1657 if (etmq && ((tid == -1) || (etmq->tid == tid))) {
1218 etmq->time = time_;
1219 cs_etm__set_pid_tid_cpu(etm, queue); 1658 cs_etm__set_pid_tid_cpu(etm, queue);
1220 cs_etm__run_decoder(etmq); 1659 cs_etm__run_decoder(etmq);
1221 } 1660 }
@@ -1259,8 +1698,7 @@ static int cs_etm__process_event(struct perf_session *session,
1259 1698
1260 if (event->header.type == PERF_RECORD_EXIT) 1699 if (event->header.type == PERF_RECORD_EXIT)
1261 return cs_etm__process_timeless_queues(etm, 1700 return cs_etm__process_timeless_queues(etm,
1262 event->fork.tid, 1701 event->fork.tid);
1263 sample->time);
1264 1702
1265 return 0; 1703 return 0;
1266} 1704}
@@ -1414,9 +1852,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
1414 0xffffffff); 1852 0xffffffff);
1415 1853
1416 /* 1854 /*
1417 * Create an RB tree for traceID-CPU# tuple. Since the conversion has 1855 * Create an RB tree for traceID-metadata tuple. Since the conversion
1418 * to be made for each packet that gets decoded, optimizing access in 1856 * has to be made for each packet that gets decoded, optimizing access
1419 * anything other than a sequential array is worth doing. 1857 * in anything other than a sequential array is worth doing.
1420 */ 1858 */
1421 traceid_list = intlist__new(NULL); 1859 traceid_list = intlist__new(NULL);
1422 if (!traceid_list) { 1860 if (!traceid_list) {
@@ -1482,8 +1920,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
1482 err = -EINVAL; 1920 err = -EINVAL;
1483 goto err_free_metadata; 1921 goto err_free_metadata;
1484 } 1922 }
1485 /* All good, associate the traceID with the CPU# */ 1923 /* All good, associate the traceID with the metadata pointer */
1486 inode->priv = &metadata[j][CS_ETM_CPU]; 1924 inode->priv = metadata[j];
1487 } 1925 }
1488 1926
1489 /* 1927 /*
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 37f8d48179ca..0e97c196147a 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -53,7 +53,51 @@ enum {
53 CS_ETMV4_PRIV_MAX, 53 CS_ETMV4_PRIV_MAX,
54}; 54};
55 55
56/* RB tree for quick conversion between traceID and CPUs */ 56/*
57 * ETMv3 exception encoding number:
58 * See Embedded Trace Macrocell spcification (ARM IHI 0014Q)
59 * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors.
60 */
61enum {
62 CS_ETMV3_EXC_NONE = 0,
63 CS_ETMV3_EXC_DEBUG_HALT = 1,
64 CS_ETMV3_EXC_SMC = 2,
65 CS_ETMV3_EXC_HYP = 3,
66 CS_ETMV3_EXC_ASYNC_DATA_ABORT = 4,
67 CS_ETMV3_EXC_JAZELLE_THUMBEE = 5,
68 CS_ETMV3_EXC_PE_RESET = 8,
69 CS_ETMV3_EXC_UNDEFINED_INSTR = 9,
70 CS_ETMV3_EXC_SVC = 10,
71 CS_ETMV3_EXC_PREFETCH_ABORT = 11,
72 CS_ETMV3_EXC_DATA_FAULT = 12,
73 CS_ETMV3_EXC_GENERIC = 13,
74 CS_ETMV3_EXC_IRQ = 14,
75 CS_ETMV3_EXC_FIQ = 15,
76};
77
78/*
79 * ETMv4 exception encoding number:
80 * See ARM Embedded Trace Macrocell Architecture Specification (ARM IHI 0064D)
81 * table 6-12 Possible values for the TYPE field in an Exception instruction
82 * trace packet, for ARMv7-A/R and ARMv8-A/R PEs.
83 */
84enum {
85 CS_ETMV4_EXC_RESET = 0,
86 CS_ETMV4_EXC_DEBUG_HALT = 1,
87 CS_ETMV4_EXC_CALL = 2,
88 CS_ETMV4_EXC_TRAP = 3,
89 CS_ETMV4_EXC_SYSTEM_ERROR = 4,
90 CS_ETMV4_EXC_INST_DEBUG = 6,
91 CS_ETMV4_EXC_DATA_DEBUG = 7,
92 CS_ETMV4_EXC_ALIGNMENT = 10,
93 CS_ETMV4_EXC_INST_FAULT = 11,
94 CS_ETMV4_EXC_DATA_FAULT = 12,
95 CS_ETMV4_EXC_IRQ = 14,
96 CS_ETMV4_EXC_FIQ = 15,
97 CS_ETMV4_EXC_END = 31,
98};
99
100/* RB tree for quick conversion between traceID and metadata pointers */
57struct intlist *traceid_list; 101struct intlist *traceid_list;
58 102
59#define KiB(x) ((x) * 1024) 103#define KiB(x) ((x) * 1024)
@@ -61,14 +105,15 @@ struct intlist *traceid_list;
61 105
62#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) 106#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
63 107
64static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL; 108#define __perf_cs_etmv3_magic 0x3030303030303030ULL
65static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; 109#define __perf_cs_etmv4_magic 0x4040404040404040ULL
66#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) 110#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
67#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) 111#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
68 112
69#ifdef HAVE_CSTRACE_SUPPORT 113#ifdef HAVE_CSTRACE_SUPPORT
70int cs_etm__process_auxtrace_info(union perf_event *event, 114int cs_etm__process_auxtrace_info(union perf_event *event,
71 struct perf_session *session); 115 struct perf_session *session);
116int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
72#else 117#else
73static inline int 118static inline int
74cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, 119cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
@@ -76,6 +121,12 @@ cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
76{ 121{
77 return -1; 122 return -1;
78} 123}
124
125static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused,
126 int *cpu __maybe_unused)
127{
128 return -1;
129}
79#endif 130#endif
80 131
81#endif 132#endif
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 2a36fab76994..26af43ad9ddd 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -1578,7 +1578,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
1578{ 1578{
1579 struct perf_session *session; 1579 struct perf_session *session;
1580 struct perf_data data = { 1580 struct perf_data data = {
1581 .file = { .path = input, .fd = -1 }, 1581 .path = input,
1582 .mode = PERF_DATA_MODE_READ, 1582 .mode = PERF_DATA_MODE_READ,
1583 .force = opts->force, 1583 .force = opts->force,
1584 }; 1584 };
@@ -1650,7 +1650,7 @@ int bt_convert__perf2ctf(const char *input, const char *path,
1650 1650
1651 fprintf(stderr, 1651 fprintf(stderr,
1652 "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", 1652 "[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
1653 data.file.path, path); 1653 data.path, path);
1654 1654
1655 fprintf(stderr, 1655 fprintf(stderr,
1656 "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples", 1656 "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples",
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index d8cfc19ddb10..7bd5ddeb7a41 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -7,11 +7,117 @@
7#include <fcntl.h> 7#include <fcntl.h>
8#include <unistd.h> 8#include <unistd.h>
9#include <string.h> 9#include <string.h>
10#include <asm/bug.h>
11#include <sys/types.h>
12#include <dirent.h>
10 13
11#include "data.h" 14#include "data.h"
12#include "util.h" 15#include "util.h"
13#include "debug.h" 16#include "debug.h"
14 17
18static void close_dir(struct perf_data_file *files, int nr)
19{
20 while (--nr >= 1) {
21 close(files[nr].fd);
22 free(files[nr].path);
23 }
24 free(files);
25}
26
27void perf_data__close_dir(struct perf_data *data)
28{
29 close_dir(data->dir.files, data->dir.nr);
30}
31
32int perf_data__create_dir(struct perf_data *data, int nr)
33{
34 struct perf_data_file *files = NULL;
35 int i, ret = -1;
36
37 files = zalloc(nr * sizeof(*files));
38 if (!files)
39 return -ENOMEM;
40
41 data->dir.files = files;
42 data->dir.nr = nr;
43
44 for (i = 0; i < nr; i++) {
45 struct perf_data_file *file = &files[i];
46
47 if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0)
48 goto out_err;
49
50 ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
51 if (ret < 0)
52 goto out_err;
53
54 file->fd = ret;
55 }
56
57 return 0;
58
59out_err:
60 close_dir(files, i);
61 return ret;
62}
63
64int perf_data__open_dir(struct perf_data *data)
65{
66 struct perf_data_file *files = NULL;
67 struct dirent *dent;
68 int ret = -1;
69 DIR *dir;
70 int nr = 0;
71
72 dir = opendir(data->path);
73 if (!dir)
74 return -EINVAL;
75
76 while ((dent = readdir(dir)) != NULL) {
77 struct perf_data_file *file;
78 char path[PATH_MAX];
79 struct stat st;
80
81 snprintf(path, sizeof(path), "%s/%s", data->path, dent->d_name);
82 if (stat(path, &st))
83 continue;
84
85 if (!S_ISREG(st.st_mode) || strncmp(dent->d_name, "data", 4))
86 continue;
87
88 ret = -ENOMEM;
89
90 file = realloc(files, (nr + 1) * sizeof(*files));
91 if (!file)
92 goto out_err;
93
94 files = file;
95 file = &files[nr++];
96
97 file->path = strdup(path);
98 if (!file->path)
99 goto out_err;
100
101 ret = open(file->path, O_RDONLY);
102 if (ret < 0)
103 goto out_err;
104
105 file->fd = ret;
106 file->size = st.st_size;
107 }
108
109 if (!files)
110 return -EINVAL;
111
112 data->dir.files = files;
113 data->dir.nr = nr;
114 return 0;
115
116out_err:
117 close_dir(files, nr);
118 return ret;
119}
120
15static bool check_pipe(struct perf_data *data) 121static bool check_pipe(struct perf_data *data)
16{ 122{
17 struct stat st; 123 struct stat st;
@@ -19,11 +125,11 @@ static bool check_pipe(struct perf_data *data)
19 int fd = perf_data__is_read(data) ? 125 int fd = perf_data__is_read(data) ?
20 STDIN_FILENO : STDOUT_FILENO; 126 STDIN_FILENO : STDOUT_FILENO;
21 127
22 if (!data->file.path) { 128 if (!data->path) {
23 if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) 129 if (!fstat(fd, &st) && S_ISFIFO(st.st_mode))
24 is_pipe = true; 130 is_pipe = true;
25 } else { 131 } else {
26 if (!strcmp(data->file.path, "-")) 132 if (!strcmp(data->path, "-"))
27 is_pipe = true; 133 is_pipe = true;
28 } 134 }
29 135
@@ -37,13 +143,31 @@ static int check_backup(struct perf_data *data)
37{ 143{
38 struct stat st; 144 struct stat st;
39 145
40 if (!stat(data->file.path, &st) && st.st_size) { 146 if (perf_data__is_read(data))
41 /* TODO check errors properly */ 147 return 0;
148
149 if (!stat(data->path, &st) && st.st_size) {
42 char oldname[PATH_MAX]; 150 char oldname[PATH_MAX];
151 int ret;
152
43 snprintf(oldname, sizeof(oldname), "%s.old", 153 snprintf(oldname, sizeof(oldname), "%s.old",
44 data->file.path); 154 data->path);
45 unlink(oldname); 155
46 rename(data->file.path, oldname); 156 ret = rm_rf_perf_data(oldname);
157 if (ret) {
158 pr_err("Can't remove old data: %s (%s)\n",
159 ret == -2 ?
160 "Unknown file found" : strerror(errno),
161 oldname);
162 return -1;
163 }
164
165 if (rename(data->path, oldname)) {
166 pr_err("Can't move data: %s (%s to %s)\n",
167 strerror(errno),
168 data->path, oldname);
169 return -1;
170 }
47 } 171 }
48 172
49 return 0; 173 return 0;
@@ -82,7 +206,7 @@ static int open_file_read(struct perf_data *data)
82 goto out_close; 206 goto out_close;
83 } 207 }
84 208
85 data->size = st.st_size; 209 data->file.size = st.st_size;
86 return fd; 210 return fd;
87 211
88 out_close: 212 out_close:
@@ -95,9 +219,6 @@ static int open_file_write(struct perf_data *data)
95 int fd; 219 int fd;
96 char sbuf[STRERR_BUFSIZE]; 220 char sbuf[STRERR_BUFSIZE];
97 221
98 if (check_backup(data))
99 return -1;
100
101 fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, 222 fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC,
102 S_IRUSR|S_IWUSR); 223 S_IRUSR|S_IWUSR);
103 224
@@ -115,8 +236,22 @@ static int open_file(struct perf_data *data)
115 fd = perf_data__is_read(data) ? 236 fd = perf_data__is_read(data) ?
116 open_file_read(data) : open_file_write(data); 237 open_file_read(data) : open_file_write(data);
117 238
239 if (fd < 0) {
240 free(data->file.path);
241 return -1;
242 }
243
118 data->file.fd = fd; 244 data->file.fd = fd;
119 return fd < 0 ? -1 : 0; 245 return 0;
246}
247
248static int open_file_dup(struct perf_data *data)
249{
250 data->file.path = strdup(data->path);
251 if (!data->file.path)
252 return -ENOMEM;
253
254 return open_file(data);
120} 255}
121 256
122int perf_data__open(struct perf_data *data) 257int perf_data__open(struct perf_data *data)
@@ -124,14 +259,18 @@ int perf_data__open(struct perf_data *data)
124 if (check_pipe(data)) 259 if (check_pipe(data))
125 return 0; 260 return 0;
126 261
127 if (!data->file.path) 262 if (!data->path)
128 data->file.path = "perf.data"; 263 data->path = "perf.data";
129 264
130 return open_file(data); 265 if (check_backup(data))
266 return -1;
267
268 return open_file_dup(data);
131} 269}
132 270
133void perf_data__close(struct perf_data *data) 271void perf_data__close(struct perf_data *data)
134{ 272{
273 free(data->file.path);
135 close(data->file.fd); 274 close(data->file.fd);
136} 275}
137 276
@@ -159,15 +298,15 @@ int perf_data__switch(struct perf_data *data,
159 if (perf_data__is_read(data)) 298 if (perf_data__is_read(data))
160 return -EINVAL; 299 return -EINVAL;
161 300
162 if (asprintf(&new_filepath, "%s.%s", data->file.path, postfix) < 0) 301 if (asprintf(&new_filepath, "%s.%s", data->path, postfix) < 0)
163 return -ENOMEM; 302 return -ENOMEM;
164 303
165 /* 304 /*
166 * Only fire a warning, don't return error, continue fill 305 * Only fire a warning, don't return error, continue fill
167 * original file. 306 * original file.
168 */ 307 */
169 if (rename(data->file.path, new_filepath)) 308 if (rename(data->path, new_filepath))
170 pr_warning("Failed to rename %s to %s\n", data->file.path, new_filepath); 309 pr_warning("Failed to rename %s to %s\n", data->path, new_filepath);
171 310
172 if (!at_exit) { 311 if (!at_exit) {
173 close(data->file.fd); 312 close(data->file.fd);
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
index 4828f7feea89..14b47be2bd69 100644
--- a/tools/perf/util/data.h
+++ b/tools/perf/util/data.h
@@ -10,16 +10,22 @@ enum perf_data_mode {
10}; 10};
11 11
12struct perf_data_file { 12struct perf_data_file {
13 const char *path; 13 char *path;
14 int fd; 14 int fd;
15 unsigned long size;
15}; 16};
16 17
17struct perf_data { 18struct perf_data {
19 const char *path;
18 struct perf_data_file file; 20 struct perf_data_file file;
19 bool is_pipe; 21 bool is_pipe;
20 bool force; 22 bool force;
21 unsigned long size;
22 enum perf_data_mode mode; 23 enum perf_data_mode mode;
24
25 struct {
26 struct perf_data_file *files;
27 int nr;
28 } dir;
23}; 29};
24 30
25static inline bool perf_data__is_read(struct perf_data *data) 31static inline bool perf_data__is_read(struct perf_data *data)
@@ -44,7 +50,7 @@ static inline int perf_data__fd(struct perf_data *data)
44 50
45static inline unsigned long perf_data__size(struct perf_data *data) 51static inline unsigned long perf_data__size(struct perf_data *data)
46{ 52{
47 return data->size; 53 return data->file.size;
48} 54}
49 55
50int perf_data__open(struct perf_data *data); 56int perf_data__open(struct perf_data *data);
@@ -63,4 +69,8 @@ ssize_t perf_data_file__write(struct perf_data_file *file,
63int perf_data__switch(struct perf_data *data, 69int perf_data__switch(struct perf_data *data,
64 const char *postfix, 70 const char *postfix,
65 size_t pos, bool at_exit); 71 size_t pos, bool at_exit);
72
73int perf_data__create_dir(struct perf_data *data, int nr);
74int perf_data__open_dir(struct perf_data *data);
75void perf_data__close_dir(struct perf_data *data);
66#endif /* __PERF_DATA_H */ 76#endif /* __PERF_DATA_H */
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 69fbb0a72d0c..de9b4769d06c 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -20,6 +20,7 @@
20#include "thread.h" 20#include "thread.h"
21#include "comm.h" 21#include "comm.h"
22#include "symbol.h" 22#include "symbol.h"
23#include "map.h"
23#include "event.h" 24#include "event.h"
24#include "util.h" 25#include "util.h"
25#include "thread-stack.h" 26#include "thread-stack.h"
diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c
deleted file mode 100644
index eec754243f4d..000000000000
--- a/tools/perf/util/drv_configs.c
+++ /dev/null
@@ -1,78 +0,0 @@
1/*
2 * drv_configs.h: Interface to apply PMU specific configuration
3 * Copyright (c) 2016-2018, Linaro Ltd.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#include "drv_configs.h"
17#include "evlist.h"
18#include "evsel.h"
19#include "pmu.h"
20#include <errno.h>
21
22static int
23perf_evsel__apply_drv_configs(struct perf_evsel *evsel,
24 struct perf_evsel_config_term **err_term)
25{
26 bool found = false;
27 int err = 0;
28 struct perf_evsel_config_term *term;
29 struct perf_pmu *pmu = NULL;
30
31 while ((pmu = perf_pmu__scan(pmu)) != NULL)
32 if (pmu->type == evsel->attr.type) {
33 found = true;
34 break;
35 }
36
37 list_for_each_entry(term, &evsel->config_terms, list) {
38 if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
39 continue;
40
41 /*
42 * We have a configuration term, report an error if we
43 * can't find the PMU or if the PMU driver doesn't support
44 * cmd line driver configuration.
45 */
46 if (!found || !pmu->set_drv_config) {
47 err = -EINVAL;
48 *err_term = term;
49 break;
50 }
51
52 err = pmu->set_drv_config(term);
53 if (err) {
54 *err_term = term;
55 break;
56 }
57 }
58
59 return err;
60}
61
62int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
63 struct perf_evsel **err_evsel,
64 struct perf_evsel_config_term **err_term)
65{
66 struct perf_evsel *evsel;
67 int err = 0;
68
69 evlist__for_each_entry(evlist, evsel) {
70 err = perf_evsel__apply_drv_configs(evsel, err_term);
71 if (err) {
72 *err_evsel = evsel;
73 break;
74 }
75 }
76
77 return err;
78}
diff --git a/tools/perf/util/drv_configs.h b/tools/perf/util/drv_configs.h
deleted file mode 100644
index 32bc9babc2e0..000000000000
--- a/tools/perf/util/drv_configs.h
+++ /dev/null
@@ -1,26 +0,0 @@
1/*
2 * drv_configs.h: Interface to apply PMU specific configuration
3 * Copyright (c) 2016-2018, Linaro Ltd.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 */
15
16#ifndef __PERF_DRV_CONFIGS_H
17#define __PERF_DRV_CONFIGS_H
18
19#include "drv_configs.h"
20#include "evlist.h"
21#include "evsel.h"
22
23int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
24 struct perf_evsel **err_evsel,
25 struct perf_evsel_config_term **term);
26#endif
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 62c8cf622607..ba58ba603b69 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -8,8 +8,11 @@
8#include <unistd.h> 8#include <unistd.h>
9#include <errno.h> 9#include <errno.h>
10#include <fcntl.h> 10#include <fcntl.h>
11#include <libgen.h>
11#include "compress.h" 12#include "compress.h"
13#include "namespaces.h"
12#include "path.h" 14#include "path.h"
15#include "map.h"
13#include "symbol.h" 16#include "symbol.h"
14#include "srcline.h" 17#include "srcline.h"
15#include "dso.h" 18#include "dso.h"
@@ -1195,10 +1198,10 @@ struct dso *dso__new(const char *name)
1195 strcpy(dso->name, name); 1198 strcpy(dso->name, name);
1196 dso__set_long_name(dso, dso->name, false); 1199 dso__set_long_name(dso, dso->name, false);
1197 dso__set_short_name(dso, dso->name, false); 1200 dso__set_short_name(dso, dso->name, false);
1198 dso->symbols = dso->symbol_names = RB_ROOT; 1201 dso->symbols = dso->symbol_names = RB_ROOT_CACHED;
1199 dso->data.cache = RB_ROOT; 1202 dso->data.cache = RB_ROOT;
1200 dso->inlined_nodes = RB_ROOT; 1203 dso->inlined_nodes = RB_ROOT_CACHED;
1201 dso->srclines = RB_ROOT; 1204 dso->srclines = RB_ROOT_CACHED;
1202 dso->data.fd = -1; 1205 dso->data.fd = -1;
1203 dso->data.status = DSO_DATA_STATUS_UNKNOWN; 1206 dso->data.status = DSO_DATA_STATUS_UNKNOWN;
1204 dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; 1207 dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
@@ -1467,7 +1470,7 @@ size_t dso__fprintf(struct dso *dso, FILE *fp)
1467 ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT "); 1470 ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT ");
1468 ret += dso__fprintf_buildid(dso, fp); 1471 ret += dso__fprintf_buildid(dso, fp);
1469 ret += fprintf(fp, ")\n"); 1472 ret += fprintf(fp, ")\n");
1470 for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) { 1473 for (nd = rb_first_cached(&dso->symbols); nd; nd = rb_next(nd)) {
1471 struct symbol *pos = rb_entry(nd, struct symbol, rb_node); 1474 struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
1472 ret += symbol__fprintf(pos, fp); 1475 ret += symbol__fprintf(pos, fp);
1473 } 1476 }
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 8c8a7abe809d..bb417c54c25a 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -7,13 +7,14 @@
7#include <linux/rbtree.h> 7#include <linux/rbtree.h>
8#include <sys/types.h> 8#include <sys/types.h>
9#include <stdbool.h> 9#include <stdbool.h>
10#include <stdio.h>
10#include "rwsem.h" 11#include "rwsem.h"
11#include <linux/types.h>
12#include <linux/bitops.h> 12#include <linux/bitops.h>
13#include "map.h"
14#include "namespaces.h"
15#include "build-id.h" 13#include "build-id.h"
16 14
15struct machine;
16struct map;
17
17enum dso_binary_type { 18enum dso_binary_type {
18 DSO_BINARY_TYPE__KALLSYMS = 0, 19 DSO_BINARY_TYPE__KALLSYMS = 0,
19 DSO_BINARY_TYPE__GUEST_KALLSYMS, 20 DSO_BINARY_TYPE__GUEST_KALLSYMS,
@@ -140,10 +141,10 @@ struct dso {
140 struct list_head node; 141 struct list_head node;
141 struct rb_node rb_node; /* rbtree node sorted by long name */ 142 struct rb_node rb_node; /* rbtree node sorted by long name */
142 struct rb_root *root; /* root of rbtree that rb_node is in */ 143 struct rb_root *root; /* root of rbtree that rb_node is in */
143 struct rb_root symbols; 144 struct rb_root_cached symbols;
144 struct rb_root symbol_names; 145 struct rb_root_cached symbol_names;
145 struct rb_root inlined_nodes; 146 struct rb_root_cached inlined_nodes;
146 struct rb_root srclines; 147 struct rb_root_cached srclines;
147 struct { 148 struct {
148 u64 addr; 149 u64 addr;
149 struct symbol *symbol; 150 struct symbol *symbol;
@@ -235,7 +236,7 @@ bool dso__loaded(const struct dso *dso);
235 236
236static inline bool dso__has_symbols(const struct dso *dso) 237static inline bool dso__has_symbols(const struct dso *dso)
237{ 238{
238 return !RB_EMPTY_ROOT(&dso->symbols); 239 return !RB_EMPTY_ROOT(&dso->symbols.rb_root);
239} 240}
240 241
241bool dso__sorted_by_name(const struct dso *dso); 242bool dso__sorted_by_name(const struct dso *dso);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 937a5a4f71cc..ba7be74fad6e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -21,9 +21,13 @@
21#include "thread.h" 21#include "thread.h"
22#include "thread_map.h" 22#include "thread_map.h"
23#include "sane_ctype.h" 23#include "sane_ctype.h"
24#include "map.h"
25#include "symbol.h"
24#include "symbol/kallsyms.h" 26#include "symbol/kallsyms.h"
25#include "asm/bug.h" 27#include "asm/bug.h"
26#include "stat.h" 28#include "stat.h"
29#include "session.h"
30#include "bpf-event.h"
27 31
28#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 32#define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500
29 33
@@ -45,6 +49,8 @@ static const char *perf_event__names[] = {
45 [PERF_RECORD_SWITCH] = "SWITCH", 49 [PERF_RECORD_SWITCH] = "SWITCH",
46 [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", 50 [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
47 [PERF_RECORD_NAMESPACES] = "NAMESPACES", 51 [PERF_RECORD_NAMESPACES] = "NAMESPACES",
52 [PERF_RECORD_KSYMBOL] = "KSYMBOL",
53 [PERF_RECORD_BPF_EVENT] = "BPF_EVENT",
48 [PERF_RECORD_HEADER_ATTR] = "ATTR", 54 [PERF_RECORD_HEADER_ATTR] = "ATTR",
49 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", 55 [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
50 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", 56 [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -1329,6 +1335,22 @@ int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
1329 return machine__process_switch_event(machine, event); 1335 return machine__process_switch_event(machine, event);
1330} 1336}
1331 1337
1338int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused,
1339 union perf_event *event,
1340 struct perf_sample *sample __maybe_unused,
1341 struct machine *machine)
1342{
1343 return machine__process_ksymbol(machine, event, sample);
1344}
1345
1346int perf_event__process_bpf_event(struct perf_tool *tool __maybe_unused,
1347 union perf_event *event,
1348 struct perf_sample *sample __maybe_unused,
1349 struct machine *machine)
1350{
1351 return machine__process_bpf_event(machine, event, sample);
1352}
1353
1332size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) 1354size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
1333{ 1355{
1334 return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", 1356 return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
@@ -1461,6 +1483,21 @@ static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
1461 return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); 1483 return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost);
1462} 1484}
1463 1485
1486size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
1487{
1488 return fprintf(fp, " ksymbol event with addr %" PRIx64 " len %u type %u flags 0x%x name %s\n",
1489 event->ksymbol_event.addr, event->ksymbol_event.len,
1490 event->ksymbol_event.ksym_type,
1491 event->ksymbol_event.flags, event->ksymbol_event.name);
1492}
1493
1494size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp)
1495{
1496 return fprintf(fp, " bpf event with type %u, flags %u, id %u\n",
1497 event->bpf_event.type, event->bpf_event.flags,
1498 event->bpf_event.id);
1499}
1500
1464size_t perf_event__fprintf(union perf_event *event, FILE *fp) 1501size_t perf_event__fprintf(union perf_event *event, FILE *fp)
1465{ 1502{
1466 size_t ret = fprintf(fp, "PERF_RECORD_%s", 1503 size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -1496,6 +1533,12 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
1496 case PERF_RECORD_LOST: 1533 case PERF_RECORD_LOST:
1497 ret += perf_event__fprintf_lost(event, fp); 1534 ret += perf_event__fprintf_lost(event, fp);
1498 break; 1535 break;
1536 case PERF_RECORD_KSYMBOL:
1537 ret += perf_event__fprintf_ksymbol(event, fp);
1538 break;
1539 case PERF_RECORD_BPF_EVENT:
1540 ret += perf_event__fprintf_bpf_event(event, fp);
1541 break;
1499 default: 1542 default:
1500 ret += fprintf(fp, "\n"); 1543 ret += fprintf(fp, "\n");
1501 } 1544 }
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index eb95f3384958..36ae7e92dab1 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -5,6 +5,7 @@
5#include <limits.h> 5#include <limits.h>
6#include <stdio.h> 6#include <stdio.h>
7#include <linux/kernel.h> 7#include <linux/kernel.h>
8#include <linux/bpf.h>
8 9
9#include "../perf.h" 10#include "../perf.h"
10#include "build-id.h" 11#include "build-id.h"
@@ -84,6 +85,29 @@ struct throttle_event {
84 u64 stream_id; 85 u64 stream_id;
85}; 86};
86 87
88#ifndef KSYM_NAME_LEN
89#define KSYM_NAME_LEN 256
90#endif
91
92struct ksymbol_event {
93 struct perf_event_header header;
94 u64 addr;
95 u32 len;
96 u16 ksym_type;
97 u16 flags;
98 char name[KSYM_NAME_LEN];
99};
100
101struct bpf_event {
102 struct perf_event_header header;
103 u16 type;
104 u16 flags;
105 u32 id;
106
107 /* for bpf_prog types */
108 u8 tag[BPF_TAG_SIZE]; // prog tag
109};
110
87#define PERF_SAMPLE_MASK \ 111#define PERF_SAMPLE_MASK \
88 (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \ 112 (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \
89 PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \ 113 PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \
@@ -137,26 +161,7 @@ struct ip_callchain {
137 u64 ips[0]; 161 u64 ips[0];
138}; 162};
139 163
140struct branch_flags { 164struct branch_stack;
141 u64 mispred:1;
142 u64 predicted:1;
143 u64 in_tx:1;
144 u64 abort:1;
145 u64 cycles:16;
146 u64 type:4;
147 u64 reserved:40;
148};
149
150struct branch_entry {
151 u64 from;
152 u64 to;
153 struct branch_flags flags;
154};
155
156struct branch_stack {
157 u64 nr;
158 struct branch_entry entries[0];
159};
160 165
161enum { 166enum {
162 PERF_IP_FLAG_BRANCH = 1ULL << 0, 167 PERF_IP_FLAG_BRANCH = 1ULL << 0,
@@ -527,8 +532,9 @@ struct auxtrace_error_event {
527 u32 cpu; 532 u32 cpu;
528 u32 pid; 533 u32 pid;
529 u32 tid; 534 u32 tid;
530 u32 reserved__; /* For alignment */ 535 u32 fmt;
531 u64 ip; 536 u64 ip;
537 u64 time;
532 char msg[MAX_AUXTRACE_ERROR_MSG]; 538 char msg[MAX_AUXTRACE_ERROR_MSG];
533}; 539};
534 540
@@ -651,6 +657,8 @@ union perf_event {
651 struct stat_round_event stat_round; 657 struct stat_round_event stat_round;
652 struct time_conv_event time_conv; 658 struct time_conv_event time_conv;
653 struct feature_event feat; 659 struct feature_event feat;
660 struct ksymbol_event ksymbol_event;
661 struct bpf_event bpf_event;
654}; 662};
655 663
656void perf_event__print_totals(void); 664void perf_event__print_totals(void);
@@ -748,6 +756,14 @@ int perf_event__process_exit(struct perf_tool *tool,
748 union perf_event *event, 756 union perf_event *event,
749 struct perf_sample *sample, 757 struct perf_sample *sample,
750 struct machine *machine); 758 struct machine *machine);
759int perf_event__process_ksymbol(struct perf_tool *tool,
760 union perf_event *event,
761 struct perf_sample *sample,
762 struct machine *machine);
763int perf_event__process_bpf_event(struct perf_tool *tool,
764 union perf_event *event,
765 struct perf_sample *sample,
766 struct machine *machine);
751int perf_tool__process_synth_event(struct perf_tool *tool, 767int perf_tool__process_synth_event(struct perf_tool *tool,
752 union perf_event *event, 768 union perf_event *event,
753 struct machine *machine, 769 struct machine *machine,
@@ -811,6 +827,8 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
811size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); 827size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
812size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); 828size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
813size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); 829size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
830size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
831size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp);
814size_t perf_event__fprintf(union perf_event *event, FILE *fp); 832size_t perf_event__fprintf(union perf_event *event, FILE *fp);
815 833
816int kallsyms__get_function_start(const char *kallsyms_filename, 834int kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 8c902276d4b4..08cedb643ea6 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1022,7 +1022,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
1022 */ 1022 */
1023int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1023int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1024 unsigned int auxtrace_pages, 1024 unsigned int auxtrace_pages,
1025 bool auxtrace_overwrite, int nr_cblocks) 1025 bool auxtrace_overwrite, int nr_cblocks, int affinity)
1026{ 1026{
1027 struct perf_evsel *evsel; 1027 struct perf_evsel *evsel;
1028 const struct cpu_map *cpus = evlist->cpus; 1028 const struct cpu_map *cpus = evlist->cpus;
@@ -1032,7 +1032,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1032 * Its value is decided by evsel's write_backward. 1032 * Its value is decided by evsel's write_backward.
1033 * So &mp should not be passed through const pointer. 1033 * So &mp should not be passed through const pointer.
1034 */ 1034 */
1035 struct mmap_params mp = { .nr_cblocks = nr_cblocks }; 1035 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity };
1036 1036
1037 if (!evlist->mmap) 1037 if (!evlist->mmap)
1038 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1038 evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1064,7 +1064,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1064 1064
1065int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) 1065int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
1066{ 1066{
1067 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0); 1067 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS);
1068} 1068}
1069 1069
1070int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1070int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 868294491194..744906dd4887 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -49,6 +49,9 @@ struct perf_evlist {
49 struct perf_evsel *selected; 49 struct perf_evsel *selected;
50 struct events_stats stats; 50 struct events_stats stats;
51 struct perf_env *env; 51 struct perf_env *env;
52 void (*trace_event_sample_raw)(struct perf_evlist *evlist,
53 union perf_event *event,
54 struct perf_sample *sample);
52 u64 first_sample_time; 55 u64 first_sample_time;
53 u64 last_sample_time; 56 u64 last_sample_time;
54}; 57};
@@ -162,7 +165,7 @@ unsigned long perf_event_mlock_kb_in_pages(void);
162 165
163int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 166int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
164 unsigned int auxtrace_pages, 167 unsigned int auxtrace_pages,
165 bool auxtrace_overwrite, int nr_cblocks); 168 bool auxtrace_overwrite, int nr_cblocks, int affinity);
166int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); 169int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
167void perf_evlist__munmap(struct perf_evlist *evlist); 170void perf_evlist__munmap(struct perf_evlist *evlist);
168 171
@@ -314,5 +317,4 @@ void perf_evlist__force_leader(struct perf_evlist *evlist);
314 317
315struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist, 318struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist,
316 struct perf_evsel *evsel); 319 struct perf_evsel *evsel);
317
318#endif /* __PERF_EVLIST_H */ 320#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index dbc0466db368..dfe2958e6287 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -956,6 +956,14 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
956 attr->sample_freq = 0; 956 attr->sample_freq = 0;
957 attr->sample_period = 0; 957 attr->sample_period = 0;
958 attr->write_backward = 0; 958 attr->write_backward = 0;
959
960 /*
961 * We don't get sample for slave events, we make them
962 * when delivering group leader sample. Set the slave
963 * event to follow the master sample_type to ease up
964 * report.
965 */
966 attr->sample_type = leader->attr.sample_type;
959 } 967 }
960 968
961 if (opts->no_samples) 969 if (opts->no_samples)
@@ -1035,6 +1043,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
1035 attr->mmap = track; 1043 attr->mmap = track;
1036 attr->mmap2 = track && !perf_missing_features.mmap2; 1044 attr->mmap2 = track && !perf_missing_features.mmap2;
1037 attr->comm = track; 1045 attr->comm = track;
1046 attr->ksymbol = track && !perf_missing_features.ksymbol;
1047 attr->bpf_event = track && opts->bpf_event &&
1048 !perf_missing_features.bpf_event;
1038 1049
1039 if (opts->record_namespaces) 1050 if (opts->record_namespaces)
1040 attr->namespaces = track; 1051 attr->namespaces = track;
@@ -1652,6 +1663,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
1652 PRINT_ATTRf(context_switch, p_unsigned); 1663 PRINT_ATTRf(context_switch, p_unsigned);
1653 PRINT_ATTRf(write_backward, p_unsigned); 1664 PRINT_ATTRf(write_backward, p_unsigned);
1654 PRINT_ATTRf(namespaces, p_unsigned); 1665 PRINT_ATTRf(namespaces, p_unsigned);
1666 PRINT_ATTRf(ksymbol, p_unsigned);
1667 PRINT_ATTRf(bpf_event, p_unsigned);
1655 1668
1656 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); 1669 PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
1657 PRINT_ATTRf(bp_type, p_unsigned); 1670 PRINT_ATTRf(bp_type, p_unsigned);
@@ -1811,6 +1824,10 @@ fallback_missing_features:
1811 PERF_SAMPLE_BRANCH_NO_CYCLES); 1824 PERF_SAMPLE_BRANCH_NO_CYCLES);
1812 if (perf_missing_features.group_read && evsel->attr.inherit) 1825 if (perf_missing_features.group_read && evsel->attr.inherit)
1813 evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID); 1826 evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
1827 if (perf_missing_features.ksymbol)
1828 evsel->attr.ksymbol = 0;
1829 if (perf_missing_features.bpf_event)
1830 evsel->attr.bpf_event = 0;
1814retry_sample_id: 1831retry_sample_id:
1815 if (perf_missing_features.sample_id_all) 1832 if (perf_missing_features.sample_id_all)
1816 evsel->attr.sample_id_all = 0; 1833 evsel->attr.sample_id_all = 0;
@@ -1930,7 +1947,15 @@ try_fallback:
1930 * Must probe features in the order they were added to the 1947 * Must probe features in the order they were added to the
1931 * perf_event_attr interface. 1948 * perf_event_attr interface.
1932 */ 1949 */
1933 if (!perf_missing_features.write_backward && evsel->attr.write_backward) { 1950 if (!perf_missing_features.bpf_event && evsel->attr.bpf_event) {
1951 perf_missing_features.bpf_event = true;
1952 pr_debug2("switching off bpf_event\n");
1953 goto fallback_missing_features;
1954 } else if (!perf_missing_features.ksymbol && evsel->attr.ksymbol) {
1955 perf_missing_features.ksymbol = true;
1956 pr_debug2("switching off ksymbol\n");
1957 goto fallback_missing_features;
1958 } else if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
1934 perf_missing_features.write_backward = true; 1959 perf_missing_features.write_backward = true;
1935 pr_debug2("switching off write_backward\n"); 1960 pr_debug2("switching off write_backward\n");
1936 goto out_close; 1961 goto out_close;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 82a289ce8b0c..cc578e02e08f 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -8,7 +8,7 @@
8#include <linux/perf_event.h> 8#include <linux/perf_event.h>
9#include <linux/types.h> 9#include <linux/types.h>
10#include "xyarray.h" 10#include "xyarray.h"
11#include "symbol.h" 11#include "symbol_conf.h"
12#include "cpumap.h" 12#include "cpumap.h"
13#include "counts.h" 13#include "counts.h"
14 14
@@ -168,6 +168,8 @@ struct perf_missing_features {
168 bool lbr_flags; 168 bool lbr_flags;
169 bool write_backward; 169 bool write_backward;
170 bool group_read; 170 bool group_read;
171 bool ksymbol;
172 bool bpf_event;
171}; 173};
172 174
173extern struct perf_missing_features perf_missing_features; 175extern struct perf_missing_features perf_missing_features;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index dec6d218c31c..01b324c275b9 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -39,6 +39,7 @@
39#include "tool.h" 39#include "tool.h"
40#include "time-utils.h" 40#include "time-utils.h"
41#include "units.h" 41#include "units.h"
42#include "cputopo.h"
42 43
43#include "sane_ctype.h" 44#include "sane_ctype.h"
44 45
@@ -526,17 +527,11 @@ static int write_event_desc(struct feat_fd *ff,
526static int write_cmdline(struct feat_fd *ff, 527static int write_cmdline(struct feat_fd *ff,
527 struct perf_evlist *evlist __maybe_unused) 528 struct perf_evlist *evlist __maybe_unused)
528{ 529{
529 char buf[MAXPATHLEN]; 530 char pbuf[MAXPATHLEN], *buf;
530 u32 n; 531 int i, ret, n;
531 int i, ret;
532 532
533 /* actual path to perf binary */ 533 /* actual path to perf binary */
534 ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1); 534 buf = perf_exe(pbuf, MAXPATHLEN);
535 if (ret <= 0)
536 return -1;
537
538 /* readlink() does not add null termination */
539 buf[ret] = '\0';
540 535
541 /* account for binary path */ 536 /* account for binary path */
542 n = perf_env.nr_cmdline + 1; 537 n = perf_env.nr_cmdline + 1;
@@ -557,160 +552,15 @@ static int write_cmdline(struct feat_fd *ff,
557 return 0; 552 return 0;
558} 553}
559 554
560#define CORE_SIB_FMT \
561 "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list"
562#define THRD_SIB_FMT \
563 "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list"
564
565struct cpu_topo {
566 u32 cpu_nr;
567 u32 core_sib;
568 u32 thread_sib;
569 char **core_siblings;
570 char **thread_siblings;
571};
572
573static int build_cpu_topo(struct cpu_topo *tp, int cpu)
574{
575 FILE *fp;
576 char filename[MAXPATHLEN];
577 char *buf = NULL, *p;
578 size_t len = 0;
579 ssize_t sret;
580 u32 i = 0;
581 int ret = -1;
582
583 sprintf(filename, CORE_SIB_FMT, cpu);
584 fp = fopen(filename, "r");
585 if (!fp)
586 goto try_threads;
587
588 sret = getline(&buf, &len, fp);
589 fclose(fp);
590 if (sret <= 0)
591 goto try_threads;
592
593 p = strchr(buf, '\n');
594 if (p)
595 *p = '\0';
596
597 for (i = 0; i < tp->core_sib; i++) {
598 if (!strcmp(buf, tp->core_siblings[i]))
599 break;
600 }
601 if (i == tp->core_sib) {
602 tp->core_siblings[i] = buf;
603 tp->core_sib++;
604 buf = NULL;
605 len = 0;
606 }
607 ret = 0;
608
609try_threads:
610 sprintf(filename, THRD_SIB_FMT, cpu);
611 fp = fopen(filename, "r");
612 if (!fp)
613 goto done;
614
615 if (getline(&buf, &len, fp) <= 0)
616 goto done;
617
618 p = strchr(buf, '\n');
619 if (p)
620 *p = '\0';
621
622 for (i = 0; i < tp->thread_sib; i++) {
623 if (!strcmp(buf, tp->thread_siblings[i]))
624 break;
625 }
626 if (i == tp->thread_sib) {
627 tp->thread_siblings[i] = buf;
628 tp->thread_sib++;
629 buf = NULL;
630 }
631 ret = 0;
632done:
633 if(fp)
634 fclose(fp);
635 free(buf);
636 return ret;
637}
638
639static void free_cpu_topo(struct cpu_topo *tp)
640{
641 u32 i;
642
643 if (!tp)
644 return;
645
646 for (i = 0 ; i < tp->core_sib; i++)
647 zfree(&tp->core_siblings[i]);
648
649 for (i = 0 ; i < tp->thread_sib; i++)
650 zfree(&tp->thread_siblings[i]);
651
652 free(tp);
653}
654
655static struct cpu_topo *build_cpu_topology(void)
656{
657 struct cpu_topo *tp = NULL;
658 void *addr;
659 u32 nr, i;
660 size_t sz;
661 long ncpus;
662 int ret = -1;
663 struct cpu_map *map;
664
665 ncpus = cpu__max_present_cpu();
666
667 /* build online CPU map */
668 map = cpu_map__new(NULL);
669 if (map == NULL) {
670 pr_debug("failed to get system cpumap\n");
671 return NULL;
672 }
673
674 nr = (u32)(ncpus & UINT_MAX);
675
676 sz = nr * sizeof(char *);
677 addr = calloc(1, sizeof(*tp) + 2 * sz);
678 if (!addr)
679 goto out_free;
680
681 tp = addr;
682 tp->cpu_nr = nr;
683 addr += sizeof(*tp);
684 tp->core_siblings = addr;
685 addr += sz;
686 tp->thread_siblings = addr;
687
688 for (i = 0; i < nr; i++) {
689 if (!cpu_map__has(map, i))
690 continue;
691
692 ret = build_cpu_topo(tp, i);
693 if (ret < 0)
694 break;
695 }
696
697out_free:
698 cpu_map__put(map);
699 if (ret) {
700 free_cpu_topo(tp);
701 tp = NULL;
702 }
703 return tp;
704}
705 555
706static int write_cpu_topology(struct feat_fd *ff, 556static int write_cpu_topology(struct feat_fd *ff,
707 struct perf_evlist *evlist __maybe_unused) 557 struct perf_evlist *evlist __maybe_unused)
708{ 558{
709 struct cpu_topo *tp; 559 struct cpu_topology *tp;
710 u32 i; 560 u32 i;
711 int ret, j; 561 int ret, j;
712 562
713 tp = build_cpu_topology(); 563 tp = cpu_topology__new();
714 if (!tp) 564 if (!tp)
715 return -1; 565 return -1;
716 566
@@ -748,7 +598,7 @@ static int write_cpu_topology(struct feat_fd *ff,
748 return ret; 598 return ret;
749 } 599 }
750done: 600done:
751 free_cpu_topo(tp); 601 cpu_topology__delete(tp);
752 return ret; 602 return ret;
753} 603}
754 604
@@ -783,112 +633,45 @@ static int write_total_mem(struct feat_fd *ff,
783 return ret; 633 return ret;
784} 634}
785 635
786static int write_topo_node(struct feat_fd *ff, int node)
787{
788 char str[MAXPATHLEN];
789 char field[32];
790 char *buf = NULL, *p;
791 size_t len = 0;
792 FILE *fp;
793 u64 mem_total, mem_free, mem;
794 int ret = -1;
795
796 sprintf(str, "/sys/devices/system/node/node%d/meminfo", node);
797 fp = fopen(str, "r");
798 if (!fp)
799 return -1;
800
801 while (getline(&buf, &len, fp) > 0) {
802 /* skip over invalid lines */
803 if (!strchr(buf, ':'))
804 continue;
805 if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2)
806 goto done;
807 if (!strcmp(field, "MemTotal:"))
808 mem_total = mem;
809 if (!strcmp(field, "MemFree:"))
810 mem_free = mem;
811 }
812
813 fclose(fp);
814 fp = NULL;
815
816 ret = do_write(ff, &mem_total, sizeof(u64));
817 if (ret)
818 goto done;
819
820 ret = do_write(ff, &mem_free, sizeof(u64));
821 if (ret)
822 goto done;
823
824 ret = -1;
825 sprintf(str, "/sys/devices/system/node/node%d/cpulist", node);
826
827 fp = fopen(str, "r");
828 if (!fp)
829 goto done;
830
831 if (getline(&buf, &len, fp) <= 0)
832 goto done;
833
834 p = strchr(buf, '\n');
835 if (p)
836 *p = '\0';
837
838 ret = do_write_string(ff, buf);
839done:
840 free(buf);
841 if (fp)
842 fclose(fp);
843 return ret;
844}
845
846static int write_numa_topology(struct feat_fd *ff, 636static int write_numa_topology(struct feat_fd *ff,
847 struct perf_evlist *evlist __maybe_unused) 637 struct perf_evlist *evlist __maybe_unused)
848{ 638{
849 char *buf = NULL; 639 struct numa_topology *tp;
850 size_t len = 0;
851 FILE *fp;
852 struct cpu_map *node_map = NULL;
853 char *c;
854 u32 nr, i, j;
855 int ret = -1; 640 int ret = -1;
641 u32 i;
856 642
857 fp = fopen("/sys/devices/system/node/online", "r"); 643 tp = numa_topology__new();
858 if (!fp) 644 if (!tp)
859 return -1; 645 return -ENOMEM;
860
861 if (getline(&buf, &len, fp) <= 0)
862 goto done;
863 646
864 c = strchr(buf, '\n'); 647 ret = do_write(ff, &tp->nr, sizeof(u32));
865 if (c) 648 if (ret < 0)
866 *c = '\0'; 649 goto err;
867 650
868 node_map = cpu_map__new(buf); 651 for (i = 0; i < tp->nr; i++) {
869 if (!node_map) 652 struct numa_topology_node *n = &tp->nodes[i];
870 goto done;
871 653
872 nr = (u32)node_map->nr; 654 ret = do_write(ff, &n->node, sizeof(u32));
655 if (ret < 0)
656 goto err;
873 657
874 ret = do_write(ff, &nr, sizeof(nr)); 658 ret = do_write(ff, &n->mem_total, sizeof(u64));
875 if (ret < 0) 659 if (ret)
876 goto done; 660 goto err;
877 661
878 for (i = 0; i < nr; i++) { 662 ret = do_write(ff, &n->mem_free, sizeof(u64));
879 j = (u32)node_map->map[i]; 663 if (ret)
880 ret = do_write(ff, &j, sizeof(j)); 664 goto err;
881 if (ret < 0)
882 break;
883 665
884 ret = write_topo_node(ff, i); 666 ret = do_write_string(ff, n->cpus);
885 if (ret < 0) 667 if (ret < 0)
886 break; 668 goto err;
887 } 669 }
888done: 670
889 free(buf); 671 ret = 0;
890 fclose(fp); 672
891 cpu_map__put(node_map); 673err:
674 numa_topology__delete(tp);
892 return ret; 675 return ret;
893} 676}
894 677
@@ -1042,11 +825,9 @@ static int write_cpuid(struct feat_fd *ff,
1042 int ret; 825 int ret;
1043 826
1044 ret = get_cpuid(buffer, sizeof(buffer)); 827 ret = get_cpuid(buffer, sizeof(buffer));
1045 if (!ret) 828 if (ret)
1046 goto write_it; 829 return -1;
1047 830
1048 return -1;
1049write_it:
1050 return do_write_string(ff, buffer); 831 return do_write_string(ff, buffer);
1051} 832}
1052 833
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 8aad8330e392..669f961316f0 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1,4 +1,5 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include "callchain.h"
2#include "util.h" 3#include "util.h"
3#include "build-id.h" 4#include "build-id.h"
4#include "hist.h" 5#include "hist.h"
@@ -11,6 +12,7 @@
11#include "evsel.h" 12#include "evsel.h"
12#include "annotate.h" 13#include "annotate.h"
13#include "srcline.h" 14#include "srcline.h"
15#include "symbol.h"
14#include "thread.h" 16#include "thread.h"
15#include "ui/progress.h" 17#include "ui/progress.h"
16#include <errno.h> 18#include <errno.h>
@@ -209,7 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
209 211
210void hists__output_recalc_col_len(struct hists *hists, int max_rows) 212void hists__output_recalc_col_len(struct hists *hists, int max_rows)
211{ 213{
212 struct rb_node *next = rb_first(&hists->entries); 214 struct rb_node *next = rb_first_cached(&hists->entries);
213 struct hist_entry *n; 215 struct hist_entry *n;
214 int row = 0; 216 int row = 0;
215 217
@@ -296,7 +298,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
296 298
297 if (!he->leaf) { 299 if (!he->leaf) {
298 struct hist_entry *child; 300 struct hist_entry *child;
299 struct rb_node *node = rb_first(&he->hroot_out); 301 struct rb_node *node = rb_first_cached(&he->hroot_out);
300 while (node) { 302 while (node) {
301 child = rb_entry(node, struct hist_entry, rb_node); 303 child = rb_entry(node, struct hist_entry, rb_node);
302 node = rb_next(node); 304 node = rb_next(node);
@@ -311,8 +313,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
311 313
312static void hists__delete_entry(struct hists *hists, struct hist_entry *he) 314static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
313{ 315{
314 struct rb_root *root_in; 316 struct rb_root_cached *root_in;
315 struct rb_root *root_out; 317 struct rb_root_cached *root_out;
316 318
317 if (he->parent_he) { 319 if (he->parent_he) {
318 root_in = &he->parent_he->hroot_in; 320 root_in = &he->parent_he->hroot_in;
@@ -325,8 +327,8 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
325 root_out = &hists->entries; 327 root_out = &hists->entries;
326 } 328 }
327 329
328 rb_erase(&he->rb_node_in, root_in); 330 rb_erase_cached(&he->rb_node_in, root_in);
329 rb_erase(&he->rb_node, root_out); 331 rb_erase_cached(&he->rb_node, root_out);
330 332
331 --hists->nr_entries; 333 --hists->nr_entries;
332 if (!he->filtered) 334 if (!he->filtered)
@@ -337,7 +339,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
337 339
338void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) 340void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
339{ 341{
340 struct rb_node *next = rb_first(&hists->entries); 342 struct rb_node *next = rb_first_cached(&hists->entries);
341 struct hist_entry *n; 343 struct hist_entry *n;
342 344
343 while (next) { 345 while (next) {
@@ -353,7 +355,7 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
353 355
354void hists__delete_entries(struct hists *hists) 356void hists__delete_entries(struct hists *hists)
355{ 357{
356 struct rb_node *next = rb_first(&hists->entries); 358 struct rb_node *next = rb_first_cached(&hists->entries);
357 struct hist_entry *n; 359 struct hist_entry *n;
358 360
359 while (next) { 361 while (next) {
@@ -435,8 +437,8 @@ static int hist_entry__init(struct hist_entry *he,
435 } 437 }
436 INIT_LIST_HEAD(&he->pairs.node); 438 INIT_LIST_HEAD(&he->pairs.node);
437 thread__get(he->thread); 439 thread__get(he->thread);
438 he->hroot_in = RB_ROOT; 440 he->hroot_in = RB_ROOT_CACHED;
439 he->hroot_out = RB_ROOT; 441 he->hroot_out = RB_ROOT_CACHED;
440 442
441 if (!symbol_conf.report_hierarchy) 443 if (!symbol_conf.report_hierarchy)
442 he->leaf = true; 444 he->leaf = true;
@@ -513,8 +515,9 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
513 int64_t cmp; 515 int64_t cmp;
514 u64 period = entry->stat.period; 516 u64 period = entry->stat.period;
515 u64 weight = entry->stat.weight; 517 u64 weight = entry->stat.weight;
518 bool leftmost = true;
516 519
517 p = &hists->entries_in->rb_node; 520 p = &hists->entries_in->rb_root.rb_node;
518 521
519 while (*p != NULL) { 522 while (*p != NULL) {
520 parent = *p; 523 parent = *p;
@@ -557,8 +560,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
557 560
558 if (cmp < 0) 561 if (cmp < 0)
559 p = &(*p)->rb_left; 562 p = &(*p)->rb_left;
560 else 563 else {
561 p = &(*p)->rb_right; 564 p = &(*p)->rb_right;
565 leftmost = false;
566 }
562 } 567 }
563 568
564 he = hist_entry__new(entry, sample_self); 569 he = hist_entry__new(entry, sample_self);
@@ -570,7 +575,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
570 hists->nr_entries++; 575 hists->nr_entries++;
571 576
572 rb_link_node(&he->rb_node_in, parent, p); 577 rb_link_node(&he->rb_node_in, parent, p);
573 rb_insert_color(&he->rb_node_in, hists->entries_in); 578 rb_insert_color_cached(&he->rb_node_in, hists->entries_in, leftmost);
574out: 579out:
575 if (sample_self) 580 if (sample_self)
576 he_stat__add_cpumode_period(&he->stat, al->cpumode, period); 581 he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
@@ -1279,16 +1284,17 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
1279} 1284}
1280 1285
1281static struct hist_entry *hierarchy_insert_entry(struct hists *hists, 1286static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
1282 struct rb_root *root, 1287 struct rb_root_cached *root,
1283 struct hist_entry *he, 1288 struct hist_entry *he,
1284 struct hist_entry *parent_he, 1289 struct hist_entry *parent_he,
1285 struct perf_hpp_list *hpp_list) 1290 struct perf_hpp_list *hpp_list)
1286{ 1291{
1287 struct rb_node **p = &root->rb_node; 1292 struct rb_node **p = &root->rb_root.rb_node;
1288 struct rb_node *parent = NULL; 1293 struct rb_node *parent = NULL;
1289 struct hist_entry *iter, *new; 1294 struct hist_entry *iter, *new;
1290 struct perf_hpp_fmt *fmt; 1295 struct perf_hpp_fmt *fmt;
1291 int64_t cmp; 1296 int64_t cmp;
1297 bool leftmost = true;
1292 1298
1293 while (*p != NULL) { 1299 while (*p != NULL) {
1294 parent = *p; 1300 parent = *p;
@@ -1308,8 +1314,10 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
1308 1314
1309 if (cmp < 0) 1315 if (cmp < 0)
1310 p = &parent->rb_left; 1316 p = &parent->rb_left;
1311 else 1317 else {
1312 p = &parent->rb_right; 1318 p = &parent->rb_right;
1319 leftmost = false;
1320 }
1313 } 1321 }
1314 1322
1315 new = hist_entry__new(he, true); 1323 new = hist_entry__new(he, true);
@@ -1343,12 +1351,12 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
1343 } 1351 }
1344 1352
1345 rb_link_node(&new->rb_node_in, parent, p); 1353 rb_link_node(&new->rb_node_in, parent, p);
1346 rb_insert_color(&new->rb_node_in, root); 1354 rb_insert_color_cached(&new->rb_node_in, root, leftmost);
1347 return new; 1355 return new;
1348} 1356}
1349 1357
1350static int hists__hierarchy_insert_entry(struct hists *hists, 1358static int hists__hierarchy_insert_entry(struct hists *hists,
1351 struct rb_root *root, 1359 struct rb_root_cached *root,
1352 struct hist_entry *he) 1360 struct hist_entry *he)
1353{ 1361{
1354 struct perf_hpp_list_node *node; 1362 struct perf_hpp_list_node *node;
@@ -1395,13 +1403,14 @@ static int hists__hierarchy_insert_entry(struct hists *hists,
1395} 1403}
1396 1404
1397static int hists__collapse_insert_entry(struct hists *hists, 1405static int hists__collapse_insert_entry(struct hists *hists,
1398 struct rb_root *root, 1406 struct rb_root_cached *root,
1399 struct hist_entry *he) 1407 struct hist_entry *he)
1400{ 1408{
1401 struct rb_node **p = &root->rb_node; 1409 struct rb_node **p = &root->rb_root.rb_node;
1402 struct rb_node *parent = NULL; 1410 struct rb_node *parent = NULL;
1403 struct hist_entry *iter; 1411 struct hist_entry *iter;
1404 int64_t cmp; 1412 int64_t cmp;
1413 bool leftmost = true;
1405 1414
1406 if (symbol_conf.report_hierarchy) 1415 if (symbol_conf.report_hierarchy)
1407 return hists__hierarchy_insert_entry(hists, root, he); 1416 return hists__hierarchy_insert_entry(hists, root, he);
@@ -1432,19 +1441,21 @@ static int hists__collapse_insert_entry(struct hists *hists,
1432 1441
1433 if (cmp < 0) 1442 if (cmp < 0)
1434 p = &(*p)->rb_left; 1443 p = &(*p)->rb_left;
1435 else 1444 else {
1436 p = &(*p)->rb_right; 1445 p = &(*p)->rb_right;
1446 leftmost = false;
1447 }
1437 } 1448 }
1438 hists->nr_entries++; 1449 hists->nr_entries++;
1439 1450
1440 rb_link_node(&he->rb_node_in, parent, p); 1451 rb_link_node(&he->rb_node_in, parent, p);
1441 rb_insert_color(&he->rb_node_in, root); 1452 rb_insert_color_cached(&he->rb_node_in, root, leftmost);
1442 return 1; 1453 return 1;
1443} 1454}
1444 1455
1445struct rb_root *hists__get_rotate_entries_in(struct hists *hists) 1456struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists)
1446{ 1457{
1447 struct rb_root *root; 1458 struct rb_root_cached *root;
1448 1459
1449 pthread_mutex_lock(&hists->lock); 1460 pthread_mutex_lock(&hists->lock);
1450 1461
@@ -1467,7 +1478,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
1467 1478
1468int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) 1479int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
1469{ 1480{
1470 struct rb_root *root; 1481 struct rb_root_cached *root;
1471 struct rb_node *next; 1482 struct rb_node *next;
1472 struct hist_entry *n; 1483 struct hist_entry *n;
1473 int ret; 1484 int ret;
@@ -1479,7 +1490,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
1479 1490
1480 root = hists__get_rotate_entries_in(hists); 1491 root = hists__get_rotate_entries_in(hists);
1481 1492
1482 next = rb_first(root); 1493 next = rb_first_cached(root);
1483 1494
1484 while (next) { 1495 while (next) {
1485 if (session_done()) 1496 if (session_done())
@@ -1487,7 +1498,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
1487 n = rb_entry(next, struct hist_entry, rb_node_in); 1498 n = rb_entry(next, struct hist_entry, rb_node_in);
1488 next = rb_next(&n->rb_node_in); 1499 next = rb_next(&n->rb_node_in);
1489 1500
1490 rb_erase(&n->rb_node_in, root); 1501 rb_erase_cached(&n->rb_node_in, root);
1491 ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n); 1502 ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n);
1492 if (ret < 0) 1503 if (ret < 0)
1493 return -1; 1504 return -1;
@@ -1558,7 +1569,7 @@ static void hierarchy_recalc_total_periods(struct hists *hists)
1558 struct rb_node *node; 1569 struct rb_node *node;
1559 struct hist_entry *he; 1570 struct hist_entry *he;
1560 1571
1561 node = rb_first(&hists->entries); 1572 node = rb_first_cached(&hists->entries);
1562 1573
1563 hists->stats.total_period = 0; 1574 hists->stats.total_period = 0;
1564 hists->stats.total_non_filtered_period = 0; 1575 hists->stats.total_non_filtered_period = 0;
@@ -1578,13 +1589,14 @@ static void hierarchy_recalc_total_periods(struct hists *hists)
1578 } 1589 }
1579} 1590}
1580 1591
1581static void hierarchy_insert_output_entry(struct rb_root *root, 1592static void hierarchy_insert_output_entry(struct rb_root_cached *root,
1582 struct hist_entry *he) 1593 struct hist_entry *he)
1583{ 1594{
1584 struct rb_node **p = &root->rb_node; 1595 struct rb_node **p = &root->rb_root.rb_node;
1585 struct rb_node *parent = NULL; 1596 struct rb_node *parent = NULL;
1586 struct hist_entry *iter; 1597 struct hist_entry *iter;
1587 struct perf_hpp_fmt *fmt; 1598 struct perf_hpp_fmt *fmt;
1599 bool leftmost = true;
1588 1600
1589 while (*p != NULL) { 1601 while (*p != NULL) {
1590 parent = *p; 1602 parent = *p;
@@ -1592,12 +1604,14 @@ static void hierarchy_insert_output_entry(struct rb_root *root,
1592 1604
1593 if (hist_entry__sort(he, iter) > 0) 1605 if (hist_entry__sort(he, iter) > 0)
1594 p = &parent->rb_left; 1606 p = &parent->rb_left;
1595 else 1607 else {
1596 p = &parent->rb_right; 1608 p = &parent->rb_right;
1609 leftmost = false;
1610 }
1597 } 1611 }
1598 1612
1599 rb_link_node(&he->rb_node, parent, p); 1613 rb_link_node(&he->rb_node, parent, p);
1600 rb_insert_color(&he->rb_node, root); 1614 rb_insert_color_cached(&he->rb_node, root, leftmost);
1601 1615
1602 /* update column width of dynamic entry */ 1616 /* update column width of dynamic entry */
1603 perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { 1617 perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
@@ -1608,16 +1622,16 @@ static void hierarchy_insert_output_entry(struct rb_root *root,
1608 1622
1609static void hists__hierarchy_output_resort(struct hists *hists, 1623static void hists__hierarchy_output_resort(struct hists *hists,
1610 struct ui_progress *prog, 1624 struct ui_progress *prog,
1611 struct rb_root *root_in, 1625 struct rb_root_cached *root_in,
1612 struct rb_root *root_out, 1626 struct rb_root_cached *root_out,
1613 u64 min_callchain_hits, 1627 u64 min_callchain_hits,
1614 bool use_callchain) 1628 bool use_callchain)
1615{ 1629{
1616 struct rb_node *node; 1630 struct rb_node *node;
1617 struct hist_entry *he; 1631 struct hist_entry *he;
1618 1632
1619 *root_out = RB_ROOT; 1633 *root_out = RB_ROOT_CACHED;
1620 node = rb_first(root_in); 1634 node = rb_first_cached(root_in);
1621 1635
1622 while (node) { 1636 while (node) {
1623 he = rb_entry(node, struct hist_entry, rb_node_in); 1637 he = rb_entry(node, struct hist_entry, rb_node_in);
@@ -1660,15 +1674,16 @@ static void hists__hierarchy_output_resort(struct hists *hists,
1660 } 1674 }
1661} 1675}
1662 1676
1663static void __hists__insert_output_entry(struct rb_root *entries, 1677static void __hists__insert_output_entry(struct rb_root_cached *entries,
1664 struct hist_entry *he, 1678 struct hist_entry *he,
1665 u64 min_callchain_hits, 1679 u64 min_callchain_hits,
1666 bool use_callchain) 1680 bool use_callchain)
1667{ 1681{
1668 struct rb_node **p = &entries->rb_node; 1682 struct rb_node **p = &entries->rb_root.rb_node;
1669 struct rb_node *parent = NULL; 1683 struct rb_node *parent = NULL;
1670 struct hist_entry *iter; 1684 struct hist_entry *iter;
1671 struct perf_hpp_fmt *fmt; 1685 struct perf_hpp_fmt *fmt;
1686 bool leftmost = true;
1672 1687
1673 if (use_callchain) { 1688 if (use_callchain) {
1674 if (callchain_param.mode == CHAIN_GRAPH_REL) { 1689 if (callchain_param.mode == CHAIN_GRAPH_REL) {
@@ -1689,12 +1704,14 @@ static void __hists__insert_output_entry(struct rb_root *entries,
1689 1704
1690 if (hist_entry__sort(he, iter) > 0) 1705 if (hist_entry__sort(he, iter) > 0)
1691 p = &(*p)->rb_left; 1706 p = &(*p)->rb_left;
1692 else 1707 else {
1693 p = &(*p)->rb_right; 1708 p = &(*p)->rb_right;
1709 leftmost = false;
1710 }
1694 } 1711 }
1695 1712
1696 rb_link_node(&he->rb_node, parent, p); 1713 rb_link_node(&he->rb_node, parent, p);
1697 rb_insert_color(&he->rb_node, entries); 1714 rb_insert_color_cached(&he->rb_node, entries, leftmost);
1698 1715
1699 perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { 1716 perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
1700 if (perf_hpp__is_dynamic_entry(fmt) && 1717 if (perf_hpp__is_dynamic_entry(fmt) &&
@@ -1704,9 +1721,10 @@ static void __hists__insert_output_entry(struct rb_root *entries,
1704} 1721}
1705 1722
1706static void output_resort(struct hists *hists, struct ui_progress *prog, 1723static void output_resort(struct hists *hists, struct ui_progress *prog,
1707 bool use_callchain, hists__resort_cb_t cb) 1724 bool use_callchain, hists__resort_cb_t cb,
1725 void *cb_arg)
1708{ 1726{
1709 struct rb_root *root; 1727 struct rb_root_cached *root;
1710 struct rb_node *next; 1728 struct rb_node *next;
1711 struct hist_entry *n; 1729 struct hist_entry *n;
1712 u64 callchain_total; 1730 u64 callchain_total;
@@ -1736,14 +1754,14 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
1736 else 1754 else
1737 root = hists->entries_in; 1755 root = hists->entries_in;
1738 1756
1739 next = rb_first(root); 1757 next = rb_first_cached(root);
1740 hists->entries = RB_ROOT; 1758 hists->entries = RB_ROOT_CACHED;
1741 1759
1742 while (next) { 1760 while (next) {
1743 n = rb_entry(next, struct hist_entry, rb_node_in); 1761 n = rb_entry(next, struct hist_entry, rb_node_in);
1744 next = rb_next(&n->rb_node_in); 1762 next = rb_next(&n->rb_node_in);
1745 1763
1746 if (cb && cb(n)) 1764 if (cb && cb(n, cb_arg))
1747 continue; 1765 continue;
1748 1766
1749 __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); 1767 __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
@@ -1757,7 +1775,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
1757 } 1775 }
1758} 1776}
1759 1777
1760void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) 1778void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog,
1779 hists__resort_cb_t cb, void *cb_arg)
1761{ 1780{
1762 bool use_callchain; 1781 bool use_callchain;
1763 1782
@@ -1768,18 +1787,23 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro
1768 1787
1769 use_callchain |= symbol_conf.show_branchflag_count; 1788 use_callchain |= symbol_conf.show_branchflag_count;
1770 1789
1771 output_resort(evsel__hists(evsel), prog, use_callchain, NULL); 1790 output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg);
1791}
1792
1793void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog)
1794{
1795 return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL);
1772} 1796}
1773 1797
1774void hists__output_resort(struct hists *hists, struct ui_progress *prog) 1798void hists__output_resort(struct hists *hists, struct ui_progress *prog)
1775{ 1799{
1776 output_resort(hists, prog, symbol_conf.use_callchain, NULL); 1800 output_resort(hists, prog, symbol_conf.use_callchain, NULL, NULL);
1777} 1801}
1778 1802
1779void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, 1803void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
1780 hists__resort_cb_t cb) 1804 hists__resort_cb_t cb)
1781{ 1805{
1782 output_resort(hists, prog, symbol_conf.use_callchain, cb); 1806 output_resort(hists, prog, symbol_conf.use_callchain, cb, NULL);
1783} 1807}
1784 1808
1785static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd) 1809static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
@@ -1798,7 +1822,7 @@ struct rb_node *rb_hierarchy_last(struct rb_node *node)
1798 struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); 1822 struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
1799 1823
1800 while (can_goto_child(he, HMD_NORMAL)) { 1824 while (can_goto_child(he, HMD_NORMAL)) {
1801 node = rb_last(&he->hroot_out); 1825 node = rb_last(&he->hroot_out.rb_root);
1802 he = rb_entry(node, struct hist_entry, rb_node); 1826 he = rb_entry(node, struct hist_entry, rb_node);
1803 } 1827 }
1804 return node; 1828 return node;
@@ -1809,7 +1833,7 @@ struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_di
1809 struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); 1833 struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
1810 1834
1811 if (can_goto_child(he, hmd)) 1835 if (can_goto_child(he, hmd))
1812 node = rb_first(&he->hroot_out); 1836 node = rb_first_cached(&he->hroot_out);
1813 else 1837 else
1814 node = rb_next(node); 1838 node = rb_next(node);
1815 1839
@@ -1847,7 +1871,7 @@ bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit)
1847 if (he->leaf) 1871 if (he->leaf)
1848 return false; 1872 return false;
1849 1873
1850 node = rb_first(&he->hroot_out); 1874 node = rb_first_cached(&he->hroot_out);
1851 child = rb_entry(node, struct hist_entry, rb_node); 1875 child = rb_entry(node, struct hist_entry, rb_node);
1852 1876
1853 while (node && child->filtered) { 1877 while (node && child->filtered) {
@@ -1965,7 +1989,7 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil
1965 hists__reset_filter_stats(hists); 1989 hists__reset_filter_stats(hists);
1966 hists__reset_col_len(hists); 1990 hists__reset_col_len(hists);
1967 1991
1968 for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { 1992 for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) {
1969 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 1993 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
1970 1994
1971 if (filter(hists, h)) 1995 if (filter(hists, h))
@@ -1975,13 +1999,15 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil
1975 } 1999 }
1976} 2000}
1977 2001
1978static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) 2002static void resort_filtered_entry(struct rb_root_cached *root,
2003 struct hist_entry *he)
1979{ 2004{
1980 struct rb_node **p = &root->rb_node; 2005 struct rb_node **p = &root->rb_root.rb_node;
1981 struct rb_node *parent = NULL; 2006 struct rb_node *parent = NULL;
1982 struct hist_entry *iter; 2007 struct hist_entry *iter;
1983 struct rb_root new_root = RB_ROOT; 2008 struct rb_root_cached new_root = RB_ROOT_CACHED;
1984 struct rb_node *nd; 2009 struct rb_node *nd;
2010 bool leftmost = true;
1985 2011
1986 while (*p != NULL) { 2012 while (*p != NULL) {
1987 parent = *p; 2013 parent = *p;
@@ -1989,22 +2015,24 @@ static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
1989 2015
1990 if (hist_entry__sort(he, iter) > 0) 2016 if (hist_entry__sort(he, iter) > 0)
1991 p = &(*p)->rb_left; 2017 p = &(*p)->rb_left;
1992 else 2018 else {
1993 p = &(*p)->rb_right; 2019 p = &(*p)->rb_right;
2020 leftmost = false;
2021 }
1994 } 2022 }
1995 2023
1996 rb_link_node(&he->rb_node, parent, p); 2024 rb_link_node(&he->rb_node, parent, p);
1997 rb_insert_color(&he->rb_node, root); 2025 rb_insert_color_cached(&he->rb_node, root, leftmost);
1998 2026
1999 if (he->leaf || he->filtered) 2027 if (he->leaf || he->filtered)
2000 return; 2028 return;
2001 2029
2002 nd = rb_first(&he->hroot_out); 2030 nd = rb_first_cached(&he->hroot_out);
2003 while (nd) { 2031 while (nd) {
2004 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 2032 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
2005 2033
2006 nd = rb_next(nd); 2034 nd = rb_next(nd);
2007 rb_erase(&h->rb_node, &he->hroot_out); 2035 rb_erase_cached(&h->rb_node, &he->hroot_out);
2008 2036
2009 resort_filtered_entry(&new_root, h); 2037 resort_filtered_entry(&new_root, h);
2010 } 2038 }
@@ -2015,14 +2043,14 @@ static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
2015static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg) 2043static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
2016{ 2044{
2017 struct rb_node *nd; 2045 struct rb_node *nd;
2018 struct rb_root new_root = RB_ROOT; 2046 struct rb_root_cached new_root = RB_ROOT_CACHED;
2019 2047
2020 hists->stats.nr_non_filtered_samples = 0; 2048 hists->stats.nr_non_filtered_samples = 0;
2021 2049
2022 hists__reset_filter_stats(hists); 2050 hists__reset_filter_stats(hists);
2023 hists__reset_col_len(hists); 2051 hists__reset_col_len(hists);
2024 2052
2025 nd = rb_first(&hists->entries); 2053 nd = rb_first_cached(&hists->entries);
2026 while (nd) { 2054 while (nd) {
2027 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 2055 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
2028 int ret; 2056 int ret;
@@ -2066,12 +2094,12 @@ static void hists__filter_hierarchy(struct hists *hists, int type, const void *a
2066 * resort output after applying a new filter since filter in a lower 2094 * resort output after applying a new filter since filter in a lower
2067 * hierarchy can change periods in a upper hierarchy. 2095 * hierarchy can change periods in a upper hierarchy.
2068 */ 2096 */
2069 nd = rb_first(&hists->entries); 2097 nd = rb_first_cached(&hists->entries);
2070 while (nd) { 2098 while (nd) {
2071 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); 2099 struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
2072 2100
2073 nd = rb_next(nd); 2101 nd = rb_next(nd);
2074 rb_erase(&h->rb_node, &hists->entries); 2102 rb_erase_cached(&h->rb_node, &hists->entries);
2075 2103
2076 resort_filtered_entry(&new_root, h); 2104 resort_filtered_entry(&new_root, h);
2077 } 2105 }
@@ -2140,18 +2168,19 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered)
2140static struct hist_entry *hists__add_dummy_entry(struct hists *hists, 2168static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
2141 struct hist_entry *pair) 2169 struct hist_entry *pair)
2142{ 2170{
2143 struct rb_root *root; 2171 struct rb_root_cached *root;
2144 struct rb_node **p; 2172 struct rb_node **p;
2145 struct rb_node *parent = NULL; 2173 struct rb_node *parent = NULL;
2146 struct hist_entry *he; 2174 struct hist_entry *he;
2147 int64_t cmp; 2175 int64_t cmp;
2176 bool leftmost = true;
2148 2177
2149 if (hists__has(hists, need_collapse)) 2178 if (hists__has(hists, need_collapse))
2150 root = &hists->entries_collapsed; 2179 root = &hists->entries_collapsed;
2151 else 2180 else
2152 root = hists->entries_in; 2181 root = hists->entries_in;
2153 2182
2154 p = &root->rb_node; 2183 p = &root->rb_root.rb_node;
2155 2184
2156 while (*p != NULL) { 2185 while (*p != NULL) {
2157 parent = *p; 2186 parent = *p;
@@ -2164,8 +2193,10 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
2164 2193
2165 if (cmp < 0) 2194 if (cmp < 0)
2166 p = &(*p)->rb_left; 2195 p = &(*p)->rb_left;
2167 else 2196 else {
2168 p = &(*p)->rb_right; 2197 p = &(*p)->rb_right;
2198 leftmost = false;
2199 }
2169 } 2200 }
2170 2201
2171 he = hist_entry__new(pair, true); 2202 he = hist_entry__new(pair, true);
@@ -2175,7 +2206,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
2175 if (symbol_conf.cumulate_callchain) 2206 if (symbol_conf.cumulate_callchain)
2176 memset(he->stat_acc, 0, sizeof(he->stat)); 2207 memset(he->stat_acc, 0, sizeof(he->stat));
2177 rb_link_node(&he->rb_node_in, parent, p); 2208 rb_link_node(&he->rb_node_in, parent, p);
2178 rb_insert_color(&he->rb_node_in, root); 2209 rb_insert_color_cached(&he->rb_node_in, root, leftmost);
2179 hists__inc_stats(hists, he); 2210 hists__inc_stats(hists, he);
2180 he->dummy = true; 2211 he->dummy = true;
2181 } 2212 }
@@ -2184,15 +2215,16 @@ out:
2184} 2215}
2185 2216
2186static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, 2217static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
2187 struct rb_root *root, 2218 struct rb_root_cached *root,
2188 struct hist_entry *pair) 2219 struct hist_entry *pair)
2189{ 2220{
2190 struct rb_node **p; 2221 struct rb_node **p;
2191 struct rb_node *parent = NULL; 2222 struct rb_node *parent = NULL;
2192 struct hist_entry *he; 2223 struct hist_entry *he;
2193 struct perf_hpp_fmt *fmt; 2224 struct perf_hpp_fmt *fmt;
2225 bool leftmost = true;
2194 2226
2195 p = &root->rb_node; 2227 p = &root->rb_root.rb_node;
2196 while (*p != NULL) { 2228 while (*p != NULL) {
2197 int64_t cmp = 0; 2229 int64_t cmp = 0;
2198 2230
@@ -2209,14 +2241,16 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
2209 2241
2210 if (cmp < 0) 2242 if (cmp < 0)
2211 p = &parent->rb_left; 2243 p = &parent->rb_left;
2212 else 2244 else {
2213 p = &parent->rb_right; 2245 p = &parent->rb_right;
2246 leftmost = false;
2247 }
2214 } 2248 }
2215 2249
2216 he = hist_entry__new(pair, true); 2250 he = hist_entry__new(pair, true);
2217 if (he) { 2251 if (he) {
2218 rb_link_node(&he->rb_node_in, parent, p); 2252 rb_link_node(&he->rb_node_in, parent, p);
2219 rb_insert_color(&he->rb_node_in, root); 2253 rb_insert_color_cached(&he->rb_node_in, root, leftmost);
2220 2254
2221 he->dummy = true; 2255 he->dummy = true;
2222 he->hists = hists; 2256 he->hists = hists;
@@ -2233,9 +2267,9 @@ static struct hist_entry *hists__find_entry(struct hists *hists,
2233 struct rb_node *n; 2267 struct rb_node *n;
2234 2268
2235 if (hists__has(hists, need_collapse)) 2269 if (hists__has(hists, need_collapse))
2236 n = hists->entries_collapsed.rb_node; 2270 n = hists->entries_collapsed.rb_root.rb_node;
2237 else 2271 else
2238 n = hists->entries_in->rb_node; 2272 n = hists->entries_in->rb_root.rb_node;
2239 2273
2240 while (n) { 2274 while (n) {
2241 struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in); 2275 struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in);
@@ -2252,10 +2286,10 @@ static struct hist_entry *hists__find_entry(struct hists *hists,
2252 return NULL; 2286 return NULL;
2253} 2287}
2254 2288
2255static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root, 2289static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *root,
2256 struct hist_entry *he) 2290 struct hist_entry *he)
2257{ 2291{
2258 struct rb_node *n = root->rb_node; 2292 struct rb_node *n = root->rb_root.rb_node;
2259 2293
2260 while (n) { 2294 while (n) {
2261 struct hist_entry *iter; 2295 struct hist_entry *iter;
@@ -2280,13 +2314,13 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root,
2280 return NULL; 2314 return NULL;
2281} 2315}
2282 2316
2283static void hists__match_hierarchy(struct rb_root *leader_root, 2317static void hists__match_hierarchy(struct rb_root_cached *leader_root,
2284 struct rb_root *other_root) 2318 struct rb_root_cached *other_root)
2285{ 2319{
2286 struct rb_node *nd; 2320 struct rb_node *nd;
2287 struct hist_entry *pos, *pair; 2321 struct hist_entry *pos, *pair;
2288 2322
2289 for (nd = rb_first(leader_root); nd; nd = rb_next(nd)) { 2323 for (nd = rb_first_cached(leader_root); nd; nd = rb_next(nd)) {
2290 pos = rb_entry(nd, struct hist_entry, rb_node_in); 2324 pos = rb_entry(nd, struct hist_entry, rb_node_in);
2291 pair = hists__find_hierarchy_entry(other_root, pos); 2325 pair = hists__find_hierarchy_entry(other_root, pos);
2292 2326
@@ -2302,7 +2336,7 @@ static void hists__match_hierarchy(struct rb_root *leader_root,
2302 */ 2336 */
2303void hists__match(struct hists *leader, struct hists *other) 2337void hists__match(struct hists *leader, struct hists *other)
2304{ 2338{
2305 struct rb_root *root; 2339 struct rb_root_cached *root;
2306 struct rb_node *nd; 2340 struct rb_node *nd;
2307 struct hist_entry *pos, *pair; 2341 struct hist_entry *pos, *pair;
2308 2342
@@ -2317,7 +2351,7 @@ void hists__match(struct hists *leader, struct hists *other)
2317 else 2351 else
2318 root = leader->entries_in; 2352 root = leader->entries_in;
2319 2353
2320 for (nd = rb_first(root); nd; nd = rb_next(nd)) { 2354 for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) {
2321 pos = rb_entry(nd, struct hist_entry, rb_node_in); 2355 pos = rb_entry(nd, struct hist_entry, rb_node_in);
2322 pair = hists__find_entry(other, pos); 2356 pair = hists__find_entry(other, pos);
2323 2357
@@ -2328,13 +2362,13 @@ void hists__match(struct hists *leader, struct hists *other)
2328 2362
2329static int hists__link_hierarchy(struct hists *leader_hists, 2363static int hists__link_hierarchy(struct hists *leader_hists,
2330 struct hist_entry *parent, 2364 struct hist_entry *parent,
2331 struct rb_root *leader_root, 2365 struct rb_root_cached *leader_root,
2332 struct rb_root *other_root) 2366 struct rb_root_cached *other_root)
2333{ 2367{
2334 struct rb_node *nd; 2368 struct rb_node *nd;
2335 struct hist_entry *pos, *leader; 2369 struct hist_entry *pos, *leader;
2336 2370
2337 for (nd = rb_first(other_root); nd; nd = rb_next(nd)) { 2371 for (nd = rb_first_cached(other_root); nd; nd = rb_next(nd)) {
2338 pos = rb_entry(nd, struct hist_entry, rb_node_in); 2372 pos = rb_entry(nd, struct hist_entry, rb_node_in);
2339 2373
2340 if (hist_entry__has_pairs(pos)) { 2374 if (hist_entry__has_pairs(pos)) {
@@ -2377,7 +2411,7 @@ static int hists__link_hierarchy(struct hists *leader_hists,
2377 */ 2411 */
2378int hists__link(struct hists *leader, struct hists *other) 2412int hists__link(struct hists *leader, struct hists *other)
2379{ 2413{
2380 struct rb_root *root; 2414 struct rb_root_cached *root;
2381 struct rb_node *nd; 2415 struct rb_node *nd;
2382 struct hist_entry *pos, *pair; 2416 struct hist_entry *pos, *pair;
2383 2417
@@ -2393,7 +2427,7 @@ int hists__link(struct hists *leader, struct hists *other)
2393 else 2427 else
2394 root = other->entries_in; 2428 root = other->entries_in;
2395 2429
2396 for (nd = rb_first(root); nd; nd = rb_next(nd)) { 2430 for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) {
2397 pos = rb_entry(nd, struct hist_entry, rb_node_in); 2431 pos = rb_entry(nd, struct hist_entry, rb_node_in);
2398 2432
2399 if (!hist_entry__has_pairs(pos)) { 2433 if (!hist_entry__has_pairs(pos)) {
@@ -2566,10 +2600,10 @@ int perf_hist_config(const char *var, const char *value)
2566int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) 2600int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
2567{ 2601{
2568 memset(hists, 0, sizeof(*hists)); 2602 memset(hists, 0, sizeof(*hists));
2569 hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; 2603 hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT_CACHED;
2570 hists->entries_in = &hists->entries_in_array[0]; 2604 hists->entries_in = &hists->entries_in_array[0];
2571 hists->entries_collapsed = RB_ROOT; 2605 hists->entries_collapsed = RB_ROOT_CACHED;
2572 hists->entries = RB_ROOT; 2606 hists->entries = RB_ROOT_CACHED;
2573 pthread_mutex_init(&hists->lock, NULL); 2607 pthread_mutex_init(&hists->lock, NULL);
2574 hists->socket_filter = -1; 2608 hists->socket_filter = -1;
2575 hists->hpp_list = hpp_list; 2609 hists->hpp_list = hpp_list;
@@ -2577,14 +2611,14 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
2577 return 0; 2611 return 0;
2578} 2612}
2579 2613
2580static void hists__delete_remaining_entries(struct rb_root *root) 2614static void hists__delete_remaining_entries(struct rb_root_cached *root)
2581{ 2615{
2582 struct rb_node *node; 2616 struct rb_node *node;
2583 struct hist_entry *he; 2617 struct hist_entry *he;
2584 2618
2585 while (!RB_EMPTY_ROOT(root)) { 2619 while (!RB_EMPTY_ROOT(&root->rb_root)) {
2586 node = rb_first(root); 2620 node = rb_first_cached(root);
2587 rb_erase(node, root); 2621 rb_erase_cached(node, root);
2588 2622
2589 he = rb_entry(node, struct hist_entry, rb_node_in); 2623 he = rb_entry(node, struct hist_entry, rb_node_in);
2590 hist_entry__delete(he); 2624 hist_entry__delete(he);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 664b5eda8d51..4af27fbab24f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -2,9 +2,9 @@
2#ifndef __PERF_HIST_H 2#ifndef __PERF_HIST_H
3#define __PERF_HIST_H 3#define __PERF_HIST_H
4 4
5#include <linux/rbtree.h>
5#include <linux/types.h> 6#include <linux/types.h>
6#include <pthread.h> 7#include <pthread.h>
7#include "callchain.h"
8#include "evsel.h" 8#include "evsel.h"
9#include "header.h" 9#include "header.h"
10#include "color.h" 10#include "color.h"
@@ -13,6 +13,9 @@
13struct hist_entry; 13struct hist_entry;
14struct hist_entry_ops; 14struct hist_entry_ops;
15struct addr_location; 15struct addr_location;
16struct map_symbol;
17struct mem_info;
18struct branch_info;
16struct symbol; 19struct symbol;
17 20
18enum hist_filter { 21enum hist_filter {
@@ -70,10 +73,10 @@ struct thread;
70struct dso; 73struct dso;
71 74
72struct hists { 75struct hists {
73 struct rb_root entries_in_array[2]; 76 struct rb_root_cached entries_in_array[2];
74 struct rb_root *entries_in; 77 struct rb_root_cached *entries_in;
75 struct rb_root entries; 78 struct rb_root_cached entries;
76 struct rb_root entries_collapsed; 79 struct rb_root_cached entries_collapsed;
77 u64 nr_entries; 80 u64 nr_entries;
78 u64 nr_non_filtered_entries; 81 u64 nr_non_filtered_entries;
79 u64 callchain_period; 82 u64 callchain_period;
@@ -160,8 +163,10 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
160 struct perf_hpp_fmt *fmt, int printed); 163 struct perf_hpp_fmt *fmt, int printed);
161void hist_entry__delete(struct hist_entry *he); 164void hist_entry__delete(struct hist_entry *he);
162 165
163typedef int (*hists__resort_cb_t)(struct hist_entry *he); 166typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg);
164 167
168void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog,
169 hists__resort_cb_t cb, void *cb_arg);
165void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); 170void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog);
166void hists__output_resort(struct hists *hists, struct ui_progress *prog); 171void hists__output_resort(struct hists *hists, struct ui_progress *prog);
167void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, 172void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
@@ -230,7 +235,7 @@ static __pure inline bool hists__has_callchains(struct hists *hists)
230int hists__init(void); 235int hists__init(void);
231int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); 236int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
232 237
233struct rb_root *hists__get_rotate_entries_in(struct hists *hists); 238struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists);
234 239
235struct perf_hpp { 240struct perf_hpp {
236 char *buf; 241 char *buf;
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index ee6ca65f81f4..0c0180c67574 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -27,6 +27,8 @@
27#include "evsel.h" 27#include "evsel.h"
28#include "evlist.h" 28#include "evlist.h"
29#include "machine.h" 29#include "machine.h"
30#include "map.h"
31#include "symbol.h"
30#include "session.h" 32#include "session.h"
31#include "util.h" 33#include "util.h"
32#include "thread.h" 34#include "thread.h"
@@ -142,7 +144,7 @@ static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
142 144
143 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 145 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
144 INTEL_BTS_ERR_LOST, sample->cpu, sample->pid, 146 INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
145 sample->tid, 0, "Lost trace data"); 147 sample->tid, 0, "Lost trace data", sample->time);
146 148
147 err = perf_session__deliver_synth_event(bts->session, &event, NULL); 149 err = perf_session__deliver_synth_event(bts->session, &event, NULL);
148 if (err) 150 if (err)
@@ -372,7 +374,7 @@ static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
372 374
373 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 375 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
374 INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip, 376 INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
375 "Failed to get instruction"); 377 "Failed to get instruction", 0);
376 378
377 err = perf_session__deliver_synth_event(bts->session, &event, NULL); 379 err = perf_session__deliver_synth_event(bts->session, &event, NULL);
378 if (err) 380 if (err)
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
index 1b704fbea9de..23bf788f84b9 100644
--- a/tools/perf/util/intel-pt-decoder/Build
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -1,4 +1,4 @@
1libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o 1perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
2 2
3inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk 3inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
4inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt 4inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 4503f3ca45ab..6e03db142091 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -26,6 +26,7 @@
26 26
27#include "../cache.h" 27#include "../cache.h"
28#include "../util.h" 28#include "../util.h"
29#include "../auxtrace.h"
29 30
30#include "intel-pt-insn-decoder.h" 31#include "intel-pt-insn-decoder.h"
31#include "intel-pt-pkt-decoder.h" 32#include "intel-pt-pkt-decoder.h"
@@ -867,7 +868,7 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
867 868
868 ret = intel_pt_get_packet(decoder->buf, decoder->len, 869 ret = intel_pt_get_packet(decoder->buf, decoder->len,
869 &decoder->packet); 870 &decoder->packet);
870 if (ret == INTEL_PT_NEED_MORE_BYTES && 871 if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
871 decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { 872 decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
872 ret = intel_pt_get_split_packet(decoder); 873 ret = intel_pt_get_split_packet(decoder);
873 if (ret < 0) 874 if (ret < 0)
@@ -1394,7 +1395,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
1394{ 1395{
1395 intel_pt_log("ERROR: Buffer overflow\n"); 1396 intel_pt_log("ERROR: Buffer overflow\n");
1396 intel_pt_clear_tx_flags(decoder); 1397 intel_pt_clear_tx_flags(decoder);
1397 decoder->cbr = 0;
1398 decoder->timestamp_insn_cnt = 0; 1398 decoder->timestamp_insn_cnt = 0;
1399 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; 1399 decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
1400 decoder->overflow = true; 1400 decoder->overflow = true;
@@ -2575,6 +2575,34 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
2575 } 2575 }
2576} 2576}
2577 2577
2578#define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1)
2579
2580/**
2581 * adj_for_padding - adjust overlap to account for padding.
2582 * @buf_b: second buffer
2583 * @buf_a: first buffer
2584 * @len_a: size of first buffer
2585 *
2586 * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap
2587 * accordingly.
2588 *
2589 * Return: A pointer into @buf_b from where non-overlapped data starts
2590 */
2591static unsigned char *adj_for_padding(unsigned char *buf_b,
2592 unsigned char *buf_a, size_t len_a)
2593{
2594 unsigned char *p = buf_b - MAX_PADDING;
2595 unsigned char *q = buf_a + len_a - MAX_PADDING;
2596 int i;
2597
2598 for (i = MAX_PADDING; i; i--, p++, q++) {
2599 if (*p != *q)
2600 break;
2601 }
2602
2603 return p;
2604}
2605
2578/** 2606/**
2579 * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data 2607 * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
2580 * using TSC. 2608 * using TSC.
@@ -2625,8 +2653,11 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
2625 2653
2626 /* Same TSC, so buffers are consecutive */ 2654 /* Same TSC, so buffers are consecutive */
2627 if (!cmp && rem_b >= rem_a) { 2655 if (!cmp && rem_b >= rem_a) {
2656 unsigned char *start;
2657
2628 *consecutive = true; 2658 *consecutive = true;
2629 return buf_b + len_b - (rem_b - rem_a); 2659 start = buf_b + len_b - (rem_b - rem_a);
2660 return adj_for_padding(start, buf_a, len_a);
2630 } 2661 }
2631 if (cmp < 0) 2662 if (cmp < 0)
2632 return buf_b; /* tsc_a < tsc_b => no overlap */ 2663 return buf_b; /* tsc_a < tsc_b => no overlap */
@@ -2689,7 +2720,7 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
2689 found = memmem(buf_a, len_a, buf_b, len_a); 2720 found = memmem(buf_a, len_a, buf_b, len_a);
2690 if (found) { 2721 if (found) {
2691 *consecutive = true; 2722 *consecutive = true;
2692 return buf_b + len_a; 2723 return adj_for_padding(buf_b + len_a, buf_a, len_a);
2693 } 2724 }
2694 2725
2695 /* Try again at next PSB in buffer 'a' */ 2726 /* Try again at next PSB in buffer 'a' */
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 2e72373ec6df..3b497bab4324 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1411,7 +1411,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
1411} 1411}
1412 1412
1413static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, 1413static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1414 pid_t pid, pid_t tid, u64 ip) 1414 pid_t pid, pid_t tid, u64 ip, u64 timestamp)
1415{ 1415{
1416 union perf_event event; 1416 union perf_event event;
1417 char msg[MAX_AUXTRACE_ERROR_MSG]; 1417 char msg[MAX_AUXTRACE_ERROR_MSG];
@@ -1420,7 +1420,7 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1420 intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); 1420 intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
1421 1421
1422 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 1422 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
1423 code, cpu, pid, tid, ip, msg); 1423 code, cpu, pid, tid, ip, msg, timestamp);
1424 1424
1425 err = perf_session__deliver_synth_event(pt->session, &event, NULL); 1425 err = perf_session__deliver_synth_event(pt->session, &event, NULL);
1426 if (err) 1426 if (err)
@@ -1430,6 +1430,18 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1430 return err; 1430 return err;
1431} 1431}
1432 1432
1433static int intel_ptq_synth_error(struct intel_pt_queue *ptq,
1434 const struct intel_pt_state *state)
1435{
1436 struct intel_pt *pt = ptq->pt;
1437 u64 tm = ptq->timestamp;
1438
1439 tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc);
1440
1441 return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid,
1442 ptq->tid, state->from_ip, tm);
1443}
1444
1433static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) 1445static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
1434{ 1446{
1435 struct auxtrace_queue *queue; 1447 struct auxtrace_queue *queue;
@@ -1676,10 +1688,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1676 intel_pt_next_tid(pt, ptq); 1688 intel_pt_next_tid(pt, ptq);
1677 } 1689 }
1678 if (pt->synth_opts.errors) { 1690 if (pt->synth_opts.errors) {
1679 err = intel_pt_synth_error(pt, state->err, 1691 err = intel_ptq_synth_error(ptq, state);
1680 ptq->cpu, ptq->pid,
1681 ptq->tid,
1682 state->from_ip);
1683 if (err) 1692 if (err)
1684 return err; 1693 return err;
1685 } 1694 }
@@ -1804,7 +1813,7 @@ static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
1804static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) 1813static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
1805{ 1814{
1806 return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, 1815 return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
1807 sample->pid, sample->tid, 0); 1816 sample->pid, sample->tid, 0, sample->time);
1808} 1817}
1809 1818
1810static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) 1819static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
index 85bab8735fa9..5c19ee001299 100644
--- a/tools/perf/util/intlist.h
+++ b/tools/perf/util/intlist.h
@@ -45,7 +45,7 @@ static inline unsigned int intlist__nr_entries(const struct intlist *ilist)
45/* For intlist iteration */ 45/* For intlist iteration */
46static inline struct int_node *intlist__first(struct intlist *ilist) 46static inline struct int_node *intlist__first(struct intlist *ilist)
47{ 47{
48 struct rb_node *rn = rb_first(&ilist->rblist.entries); 48 struct rb_node *rn = rb_first_cached(&ilist->rblist.entries);
49 return rn ? rb_entry(rn, struct int_node, rb_node) : NULL; 49 return rn ? rb_entry(rn, struct int_node, rb_node) : NULL;
50} 50}
51static inline struct int_node *intlist__next(struct int_node *in) 51static inline struct int_node *intlist__next(struct int_node *in)
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index bf249552a9b0..eda28d3570bc 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -2,6 +2,7 @@
2#include <sys/sysmacros.h> 2#include <sys/sysmacros.h>
3#include <sys/types.h> 3#include <sys/types.h>
4#include <errno.h> 4#include <errno.h>
5#include <libgen.h>
5#include <stdio.h> 6#include <stdio.h>
6#include <stdlib.h> 7#include <stdlib.h>
7#include <string.h> 8#include <string.h>
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
index 7b1f06567521..1403dec189b4 100644
--- a/tools/perf/util/kvm-stat.h
+++ b/tools/perf/util/kvm-stat.h
@@ -3,12 +3,13 @@
3#define __PERF_KVM_STAT_H 3#define __PERF_KVM_STAT_H
4 4
5#include "../perf.h" 5#include "../perf.h"
6#include "evsel.h"
7#include "evlist.h"
8#include "session.h"
9#include "tool.h" 6#include "tool.h"
10#include "stat.h" 7#include "stat.h"
11 8
9struct perf_evsel;
10struct perf_evlist;
11struct perf_session;
12
12struct event_key { 13struct event_key {
13 #define INVALID_KEY (~0ULL) 14 #define INVALID_KEY (~0ULL)
14 u64 key; 15 u64 key;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 143f7057d581..61959aba7e27 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -10,6 +10,7 @@
10#include "hist.h" 10#include "hist.h"
11#include "machine.h" 11#include "machine.h"
12#include "map.h" 12#include "map.h"
13#include "symbol.h"
13#include "sort.h" 14#include "sort.h"
14#include "strlist.h" 15#include "strlist.h"
15#include "thread.h" 16#include "thread.h"
@@ -21,6 +22,7 @@
21#include "unwind.h" 22#include "unwind.h"
22#include "linux/hash.h" 23#include "linux/hash.h"
23#include "asm/bug.h" 24#include "asm/bug.h"
25#include "bpf-event.h"
24 26
25#include "sane_ctype.h" 27#include "sane_ctype.h"
26#include <symbol/kallsyms.h> 28#include <symbol/kallsyms.h>
@@ -41,7 +43,7 @@ static void machine__threads_init(struct machine *machine)
41 43
42 for (i = 0; i < THREADS__TABLE_SIZE; i++) { 44 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
43 struct threads *threads = &machine->threads[i]; 45 struct threads *threads = &machine->threads[i];
44 threads->entries = RB_ROOT; 46 threads->entries = RB_ROOT_CACHED;
45 init_rwsem(&threads->lock); 47 init_rwsem(&threads->lock);
46 threads->nr = 0; 48 threads->nr = 0;
47 INIT_LIST_HEAD(&threads->dead); 49 INIT_LIST_HEAD(&threads->dead);
@@ -179,7 +181,7 @@ void machine__delete_threads(struct machine *machine)
179 for (i = 0; i < THREADS__TABLE_SIZE; i++) { 181 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
180 struct threads *threads = &machine->threads[i]; 182 struct threads *threads = &machine->threads[i];
181 down_write(&threads->lock); 183 down_write(&threads->lock);
182 nd = rb_first(&threads->entries); 184 nd = rb_first_cached(&threads->entries);
183 while (nd) { 185 while (nd) {
184 struct thread *t = rb_entry(nd, struct thread, rb_node); 186 struct thread *t = rb_entry(nd, struct thread, rb_node);
185 187
@@ -222,7 +224,7 @@ void machine__delete(struct machine *machine)
222void machines__init(struct machines *machines) 224void machines__init(struct machines *machines)
223{ 225{
224 machine__init(&machines->host, "", HOST_KERNEL_ID); 226 machine__init(&machines->host, "", HOST_KERNEL_ID);
225 machines->guests = RB_ROOT; 227 machines->guests = RB_ROOT_CACHED;
226} 228}
227 229
228void machines__exit(struct machines *machines) 230void machines__exit(struct machines *machines)
@@ -234,9 +236,10 @@ void machines__exit(struct machines *machines)
234struct machine *machines__add(struct machines *machines, pid_t pid, 236struct machine *machines__add(struct machines *machines, pid_t pid,
235 const char *root_dir) 237 const char *root_dir)
236{ 238{
237 struct rb_node **p = &machines->guests.rb_node; 239 struct rb_node **p = &machines->guests.rb_root.rb_node;
238 struct rb_node *parent = NULL; 240 struct rb_node *parent = NULL;
239 struct machine *pos, *machine = malloc(sizeof(*machine)); 241 struct machine *pos, *machine = malloc(sizeof(*machine));
242 bool leftmost = true;
240 243
241 if (machine == NULL) 244 if (machine == NULL)
242 return NULL; 245 return NULL;
@@ -251,12 +254,14 @@ struct machine *machines__add(struct machines *machines, pid_t pid,
251 pos = rb_entry(parent, struct machine, rb_node); 254 pos = rb_entry(parent, struct machine, rb_node);
252 if (pid < pos->pid) 255 if (pid < pos->pid)
253 p = &(*p)->rb_left; 256 p = &(*p)->rb_left;
254 else 257 else {
255 p = &(*p)->rb_right; 258 p = &(*p)->rb_right;
259 leftmost = false;
260 }
256 } 261 }
257 262
258 rb_link_node(&machine->rb_node, parent, p); 263 rb_link_node(&machine->rb_node, parent, p);
259 rb_insert_color(&machine->rb_node, &machines->guests); 264 rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost);
260 265
261 return machine; 266 return machine;
262} 267}
@@ -267,7 +272,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec)
267 272
268 machines->host.comm_exec = comm_exec; 273 machines->host.comm_exec = comm_exec;
269 274
270 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { 275 for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
271 struct machine *machine = rb_entry(nd, struct machine, rb_node); 276 struct machine *machine = rb_entry(nd, struct machine, rb_node);
272 277
273 machine->comm_exec = comm_exec; 278 machine->comm_exec = comm_exec;
@@ -276,7 +281,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec)
276 281
277struct machine *machines__find(struct machines *machines, pid_t pid) 282struct machine *machines__find(struct machines *machines, pid_t pid)
278{ 283{
279 struct rb_node **p = &machines->guests.rb_node; 284 struct rb_node **p = &machines->guests.rb_root.rb_node;
280 struct rb_node *parent = NULL; 285 struct rb_node *parent = NULL;
281 struct machine *machine; 286 struct machine *machine;
282 struct machine *default_machine = NULL; 287 struct machine *default_machine = NULL;
@@ -339,7 +344,7 @@ void machines__process_guests(struct machines *machines,
339{ 344{
340 struct rb_node *nd; 345 struct rb_node *nd;
341 346
342 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { 347 for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
343 struct machine *pos = rb_entry(nd, struct machine, rb_node); 348 struct machine *pos = rb_entry(nd, struct machine, rb_node);
344 process(pos, data); 349 process(pos, data);
345 } 350 }
@@ -352,7 +357,8 @@ void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
352 357
353 machines->host.id_hdr_size = id_hdr_size; 358 machines->host.id_hdr_size = id_hdr_size;
354 359
355 for (node = rb_first(&machines->guests); node; node = rb_next(node)) { 360 for (node = rb_first_cached(&machines->guests); node;
361 node = rb_next(node)) {
356 machine = rb_entry(node, struct machine, rb_node); 362 machine = rb_entry(node, struct machine, rb_node);
357 machine->id_hdr_size = id_hdr_size; 363 machine->id_hdr_size = id_hdr_size;
358 } 364 }
@@ -465,9 +471,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
465 pid_t pid, pid_t tid, 471 pid_t pid, pid_t tid,
466 bool create) 472 bool create)
467{ 473{
468 struct rb_node **p = &threads->entries.rb_node; 474 struct rb_node **p = &threads->entries.rb_root.rb_node;
469 struct rb_node *parent = NULL; 475 struct rb_node *parent = NULL;
470 struct thread *th; 476 struct thread *th;
477 bool leftmost = true;
471 478
472 th = threads__get_last_match(threads, machine, pid, tid); 479 th = threads__get_last_match(threads, machine, pid, tid);
473 if (th) 480 if (th)
@@ -485,8 +492,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
485 492
486 if (tid < th->tid) 493 if (tid < th->tid)
487 p = &(*p)->rb_left; 494 p = &(*p)->rb_left;
488 else 495 else {
489 p = &(*p)->rb_right; 496 p = &(*p)->rb_right;
497 leftmost = false;
498 }
490 } 499 }
491 500
492 if (!create) 501 if (!create)
@@ -495,7 +504,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
495 th = thread__new(pid, tid); 504 th = thread__new(pid, tid);
496 if (th != NULL) { 505 if (th != NULL) {
497 rb_link_node(&th->rb_node, parent, p); 506 rb_link_node(&th->rb_node, parent, p);
498 rb_insert_color(&th->rb_node, &threads->entries); 507 rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost);
499 508
500 /* 509 /*
501 * We have to initialize map_groups separately 510 * We have to initialize map_groups separately
@@ -506,7 +515,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
506 * leader and that would screwed the rb tree. 515 * leader and that would screwed the rb tree.
507 */ 516 */
508 if (thread__init_map_groups(th, machine)) { 517 if (thread__init_map_groups(th, machine)) {
509 rb_erase_init(&th->rb_node, &threads->entries); 518 rb_erase_cached(&th->rb_node, &threads->entries);
510 RB_CLEAR_NODE(&th->rb_node); 519 RB_CLEAR_NODE(&th->rb_node);
511 thread__put(th); 520 thread__put(th);
512 return NULL; 521 return NULL;
@@ -681,6 +690,59 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
681 return 0; 690 return 0;
682} 691}
683 692
693static int machine__process_ksymbol_register(struct machine *machine,
694 union perf_event *event,
695 struct perf_sample *sample __maybe_unused)
696{
697 struct symbol *sym;
698 struct map *map;
699
700 map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
701 if (!map) {
702 map = dso__new_map(event->ksymbol_event.name);
703 if (!map)
704 return -ENOMEM;
705
706 map->start = event->ksymbol_event.addr;
707 map->pgoff = map->start;
708 map->end = map->start + event->ksymbol_event.len;
709 map_groups__insert(&machine->kmaps, map);
710 }
711
712 sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len,
713 0, 0, event->ksymbol_event.name);
714 if (!sym)
715 return -ENOMEM;
716 dso__insert_symbol(map->dso, sym);
717 return 0;
718}
719
720static int machine__process_ksymbol_unregister(struct machine *machine,
721 union perf_event *event,
722 struct perf_sample *sample __maybe_unused)
723{
724 struct map *map;
725
726 map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr);
727 if (map)
728 map_groups__remove(&machine->kmaps, map);
729
730 return 0;
731}
732
733int machine__process_ksymbol(struct machine *machine __maybe_unused,
734 union perf_event *event,
735 struct perf_sample *sample)
736{
737 if (dump_trace)
738 perf_event__fprintf_ksymbol(event, stdout);
739
740 if (event->ksymbol_event.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER)
741 return machine__process_ksymbol_unregister(machine, event,
742 sample);
743 return machine__process_ksymbol_register(machine, event, sample);
744}
745
684static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename) 746static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename)
685{ 747{
686 const char *dup_filename; 748 const char *dup_filename;
@@ -744,7 +806,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
744 struct rb_node *nd; 806 struct rb_node *nd;
745 size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp); 807 size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
746 808
747 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { 809 for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
748 struct machine *pos = rb_entry(nd, struct machine, rb_node); 810 struct machine *pos = rb_entry(nd, struct machine, rb_node);
749 ret += __dsos__fprintf(&pos->dsos.head, fp); 811 ret += __dsos__fprintf(&pos->dsos.head, fp);
750 } 812 }
@@ -764,7 +826,7 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
764 struct rb_node *nd; 826 struct rb_node *nd;
765 size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm); 827 size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
766 828
767 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { 829 for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
768 struct machine *pos = rb_entry(nd, struct machine, rb_node); 830 struct machine *pos = rb_entry(nd, struct machine, rb_node);
769 ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm); 831 ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
770 } 832 }
@@ -804,7 +866,8 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
804 866
805 ret = fprintf(fp, "Threads: %u\n", threads->nr); 867 ret = fprintf(fp, "Threads: %u\n", threads->nr);
806 868
807 for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { 869 for (nd = rb_first_cached(&threads->entries); nd;
870 nd = rb_next(nd)) {
808 struct thread *pos = rb_entry(nd, struct thread, rb_node); 871 struct thread *pos = rb_entry(nd, struct thread, rb_node);
809 872
810 ret += thread__fprintf(pos, fp); 873 ret += thread__fprintf(pos, fp);
@@ -1107,7 +1170,7 @@ failure:
1107 1170
1108void machines__destroy_kernel_maps(struct machines *machines) 1171void machines__destroy_kernel_maps(struct machines *machines)
1109{ 1172{
1110 struct rb_node *next = rb_first(&machines->guests); 1173 struct rb_node *next = rb_first_cached(&machines->guests);
1111 1174
1112 machine__destroy_kernel_maps(&machines->host); 1175 machine__destroy_kernel_maps(&machines->host);
1113 1176
@@ -1115,7 +1178,7 @@ void machines__destroy_kernel_maps(struct machines *machines)
1115 struct machine *pos = rb_entry(next, struct machine, rb_node); 1178 struct machine *pos = rb_entry(next, struct machine, rb_node);
1116 1179
1117 next = rb_next(&pos->rb_node); 1180 next = rb_next(&pos->rb_node);
1118 rb_erase(&pos->rb_node, &machines->guests); 1181 rb_erase_cached(&pos->rb_node, &machines->guests);
1119 machine__delete(pos); 1182 machine__delete(pos);
1120 } 1183 }
1121} 1184}
@@ -1680,7 +1743,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th,
1680 BUG_ON(refcount_read(&th->refcnt) == 0); 1743 BUG_ON(refcount_read(&th->refcnt) == 0);
1681 if (lock) 1744 if (lock)
1682 down_write(&threads->lock); 1745 down_write(&threads->lock);
1683 rb_erase_init(&th->rb_node, &threads->entries); 1746 rb_erase_cached(&th->rb_node, &threads->entries);
1684 RB_CLEAR_NODE(&th->rb_node); 1747 RB_CLEAR_NODE(&th->rb_node);
1685 --threads->nr; 1748 --threads->nr;
1686 /* 1749 /*
@@ -1812,6 +1875,10 @@ int machine__process_event(struct machine *machine, union perf_event *event,
1812 case PERF_RECORD_SWITCH: 1875 case PERF_RECORD_SWITCH:
1813 case PERF_RECORD_SWITCH_CPU_WIDE: 1876 case PERF_RECORD_SWITCH_CPU_WIDE:
1814 ret = machine__process_switch_event(machine, event); break; 1877 ret = machine__process_switch_event(machine, event); break;
1878 case PERF_RECORD_KSYMBOL:
1879 ret = machine__process_ksymbol(machine, event, sample); break;
1880 case PERF_RECORD_BPF_EVENT:
1881 ret = machine__process_bpf_event(machine, event, sample); break;
1815 default: 1882 default:
1816 ret = -1; 1883 ret = -1;
1817 break; 1884 break;
@@ -2453,7 +2520,8 @@ int machine__for_each_thread(struct machine *machine,
2453 2520
2454 for (i = 0; i < THREADS__TABLE_SIZE; i++) { 2521 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
2455 threads = &machine->threads[i]; 2522 threads = &machine->threads[i];
2456 for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { 2523 for (nd = rb_first_cached(&threads->entries); nd;
2524 nd = rb_next(nd)) {
2457 thread = rb_entry(nd, struct thread, rb_node); 2525 thread = rb_entry(nd, struct thread, rb_node);
2458 rc = fn(thread, priv); 2526 rc = fn(thread, priv);
2459 if (rc != 0) 2527 if (rc != 0)
@@ -2480,7 +2548,7 @@ int machines__for_each_thread(struct machines *machines,
2480 if (rc != 0) 2548 if (rc != 0)
2481 return rc; 2549 return rc;
2482 2550
2483 for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { 2551 for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) {
2484 struct machine *machine = rb_entry(nd, struct machine, rb_node); 2552 struct machine *machine = rb_entry(nd, struct machine, rb_node);
2485 2553
2486 rc = machine__for_each_thread(machine, fn, priv); 2554 rc = machine__for_each_thread(machine, fn, priv);
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index a5d1da60f751..f70ab98a7bde 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -4,7 +4,7 @@
4 4
5#include <sys/types.h> 5#include <sys/types.h>
6#include <linux/rbtree.h> 6#include <linux/rbtree.h>
7#include "map.h" 7#include "map_groups.h"
8#include "dso.h" 8#include "dso.h"
9#include "event.h" 9#include "event.h"
10#include "rwsem.h" 10#include "rwsem.h"
@@ -29,11 +29,11 @@ struct vdso_info;
29#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) 29#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS)
30 30
31struct threads { 31struct threads {
32 struct rb_root entries; 32 struct rb_root_cached entries;
33 struct rw_semaphore lock; 33 struct rw_semaphore lock;
34 unsigned int nr; 34 unsigned int nr;
35 struct list_head dead; 35 struct list_head dead;
36 struct thread *last_match; 36 struct thread *last_match;
37}; 37};
38 38
39struct machine { 39struct machine {
@@ -130,6 +130,9 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
130 struct perf_sample *sample); 130 struct perf_sample *sample);
131int machine__process_mmap2_event(struct machine *machine, union perf_event *event, 131int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
132 struct perf_sample *sample); 132 struct perf_sample *sample);
133int machine__process_ksymbol(struct machine *machine,
134 union perf_event *event,
135 struct perf_sample *sample);
133int machine__process_event(struct machine *machine, union perf_event *event, 136int machine__process_event(struct machine *machine, union perf_event *event,
134 struct perf_sample *sample); 137 struct perf_sample *sample);
135 138
@@ -137,7 +140,7 @@ typedef void (*machine__process_t)(struct machine *machine, void *data);
137 140
138struct machines { 141struct machines {
139 struct machine host; 142 struct machine host;
140 struct rb_root guests; 143 struct rb_root_cached guests;
141}; 144};
142 145
143void machines__init(struct machines *machines); 146void machines__init(struct machines *machines);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 6751301a755c..fbeb0c6efaa6 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -286,8 +286,8 @@ void map__put(struct map *map)
286 286
287void map__fixup_start(struct map *map) 287void map__fixup_start(struct map *map)
288{ 288{
289 struct rb_root *symbols = &map->dso->symbols; 289 struct rb_root_cached *symbols = &map->dso->symbols;
290 struct rb_node *nd = rb_first(symbols); 290 struct rb_node *nd = rb_first_cached(symbols);
291 if (nd != NULL) { 291 if (nd != NULL) {
292 struct symbol *sym = rb_entry(nd, struct symbol, rb_node); 292 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
293 map->start = sym->start; 293 map->start = sym->start;
@@ -296,8 +296,8 @@ void map__fixup_start(struct map *map)
296 296
297void map__fixup_end(struct map *map) 297void map__fixup_end(struct map *map)
298{ 298{
299 struct rb_root *symbols = &map->dso->symbols; 299 struct rb_root_cached *symbols = &map->dso->symbols;
300 struct rb_node *nd = rb_last(symbols); 300 struct rb_node *nd = rb_last(&symbols->rb_root);
301 if (nd != NULL) { 301 if (nd != NULL) {
302 struct symbol *sym = rb_entry(nd, struct symbol, rb_node); 302 struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
303 map->end = sym->end; 303 map->end = sym->end;
@@ -557,6 +557,12 @@ void map_groups__init(struct map_groups *mg, struct machine *machine)
557 refcount_set(&mg->refcnt, 1); 557 refcount_set(&mg->refcnt, 1);
558} 558}
559 559
560void map_groups__insert(struct map_groups *mg, struct map *map)
561{
562 maps__insert(&mg->maps, map);
563 map->groups = mg;
564}
565
560static void __maps__purge(struct maps *maps) 566static void __maps__purge(struct maps *maps)
561{ 567{
562 struct rb_root *root = &maps->entries; 568 struct rb_root *root = &maps->entries;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 09282aa45c80..0e20749f2c55 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -6,12 +6,10 @@
6#include <linux/compiler.h> 6#include <linux/compiler.h>
7#include <linux/list.h> 7#include <linux/list.h>
8#include <linux/rbtree.h> 8#include <linux/rbtree.h>
9#include <pthread.h>
10#include <stdio.h> 9#include <stdio.h>
11#include <string.h> 10#include <string.h>
12#include <stdbool.h> 11#include <stdbool.h>
13#include <linux/types.h> 12#include <linux/types.h>
14#include "rwsem.h"
15 13
16struct dso; 14struct dso;
17struct ip_callchain; 15struct ip_callchain;
@@ -48,38 +46,7 @@ struct map {
48 refcount_t refcnt; 46 refcount_t refcnt;
49}; 47};
50 48
51#define KMAP_NAME_LEN 256 49struct kmap;
52
53struct kmap {
54 struct ref_reloc_sym *ref_reloc_sym;
55 struct map_groups *kmaps;
56 char name[KMAP_NAME_LEN];
57};
58
59struct maps {
60 struct rb_root entries;
61 struct rb_root names;
62 struct rw_semaphore lock;
63};
64
65struct map_groups {
66 struct maps maps;
67 struct machine *machine;
68 refcount_t refcnt;
69};
70
71struct map_groups *map_groups__new(struct machine *machine);
72void map_groups__delete(struct map_groups *mg);
73bool map_groups__empty(struct map_groups *mg);
74
75static inline struct map_groups *map_groups__get(struct map_groups *mg)
76{
77 if (mg)
78 refcount_inc(&mg->refcnt);
79 return mg;
80}
81
82void map_groups__put(struct map_groups *mg);
83 50
84struct kmap *__map__kmap(struct map *map); 51struct kmap *__map__kmap(struct map *map);
85struct kmap *map__kmap(struct map *map); 52struct kmap *map__kmap(struct map *map);
@@ -174,18 +141,7 @@ char *map__srcline(struct map *map, u64 addr, struct symbol *sym);
174int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, 141int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
175 FILE *fp); 142 FILE *fp);
176 143
177struct srccode_state { 144struct srccode_state;
178 char *srcfile;
179 unsigned line;
180};
181
182static inline void srccode_state_init(struct srccode_state *state)
183{
184 state->srcfile = NULL;
185 state->line = 0;
186}
187
188void srccode_state_free(struct srccode_state *state);
189 145
190int map__fprintf_srccode(struct map *map, u64 addr, 146int map__fprintf_srccode(struct map *map, u64 addr,
191 FILE *fp, struct srccode_state *state); 147 FILE *fp, struct srccode_state *state);
@@ -198,61 +154,9 @@ void map__fixup_end(struct map *map);
198 154
199void map__reloc_vmlinux(struct map *map); 155void map__reloc_vmlinux(struct map *map);
200 156
201void maps__insert(struct maps *maps, struct map *map);
202void maps__remove(struct maps *maps, struct map *map);
203struct map *maps__find(struct maps *maps, u64 addr);
204struct map *maps__first(struct maps *maps);
205struct map *map__next(struct map *map);
206struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
207 struct map **mapp);
208void map_groups__init(struct map_groups *mg, struct machine *machine);
209void map_groups__exit(struct map_groups *mg);
210int map_groups__clone(struct thread *thread,
211 struct map_groups *parent);
212size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
213
214int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, 157int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
215 u64 addr); 158 u64 addr);
216 159
217static inline void map_groups__insert(struct map_groups *mg, struct map *map)
218{
219 maps__insert(&mg->maps, map);
220 map->groups = mg;
221}
222
223static inline void map_groups__remove(struct map_groups *mg, struct map *map)
224{
225 maps__remove(&mg->maps, map);
226}
227
228static inline struct map *map_groups__find(struct map_groups *mg, u64 addr)
229{
230 return maps__find(&mg->maps, addr);
231}
232
233struct map *map_groups__first(struct map_groups *mg);
234
235static inline struct map *map_groups__next(struct map *map)
236{
237 return map__next(map);
238}
239
240struct symbol *map_groups__find_symbol(struct map_groups *mg,
241 u64 addr, struct map **mapp);
242
243struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
244 const char *name,
245 struct map **mapp);
246
247struct addr_map_symbol;
248
249int map_groups__find_ams(struct addr_map_symbol *ams);
250
251int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
252 FILE *fp);
253
254struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
255
256bool __map__is_kernel(const struct map *map); 160bool __map__is_kernel(const struct map *map);
257bool __map__is_extra_kernel_map(const struct map *map); 161bool __map__is_extra_kernel_map(const struct map *map);
258 162
diff --git a/tools/perf/util/map_groups.h b/tools/perf/util/map_groups.h
new file mode 100644
index 000000000000..4dcda33e0fdf
--- /dev/null
+++ b/tools/perf/util/map_groups.h
@@ -0,0 +1,91 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __PERF_MAP_GROUPS_H
3#define __PERF_MAP_GROUPS_H
4
5#include <linux/refcount.h>
6#include <linux/rbtree.h>
7#include <stdio.h>
8#include <stdbool.h>
9#include <linux/types.h>
10#include "rwsem.h"
11
12struct ref_reloc_sym;
13struct machine;
14struct map;
15struct thread;
16
17struct maps {
18 struct rb_root entries;
19 struct rb_root names;
20 struct rw_semaphore lock;
21};
22
23void maps__insert(struct maps *maps, struct map *map);
24void maps__remove(struct maps *maps, struct map *map);
25struct map *maps__find(struct maps *maps, u64 addr);
26struct map *maps__first(struct maps *maps);
27struct map *map__next(struct map *map);
28struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp);
29
30struct map_groups {
31 struct maps maps;
32 struct machine *machine;
33 refcount_t refcnt;
34};
35
36#define KMAP_NAME_LEN 256
37
38struct kmap {
39 struct ref_reloc_sym *ref_reloc_sym;
40 struct map_groups *kmaps;
41 char name[KMAP_NAME_LEN];
42};
43
44struct map_groups *map_groups__new(struct machine *machine);
45void map_groups__delete(struct map_groups *mg);
46bool map_groups__empty(struct map_groups *mg);
47
48static inline struct map_groups *map_groups__get(struct map_groups *mg)
49{
50 if (mg)
51 refcount_inc(&mg->refcnt);
52 return mg;
53}
54
55void map_groups__put(struct map_groups *mg);
56void map_groups__init(struct map_groups *mg, struct machine *machine);
57void map_groups__exit(struct map_groups *mg);
58int map_groups__clone(struct thread *thread, struct map_groups *parent);
59size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
60
61void map_groups__insert(struct map_groups *mg, struct map *map);
62
63static inline void map_groups__remove(struct map_groups *mg, struct map *map)
64{
65 maps__remove(&mg->maps, map);
66}
67
68static inline struct map *map_groups__find(struct map_groups *mg, u64 addr)
69{
70 return maps__find(&mg->maps, addr);
71}
72
73struct map *map_groups__first(struct map_groups *mg);
74
75static inline struct map *map_groups__next(struct map *map)
76{
77 return map__next(map);
78}
79
80struct symbol *map_groups__find_symbol(struct map_groups *mg, u64 addr, struct map **mapp);
81struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, const char *name, struct map **mapp);
82
83struct addr_map_symbol;
84
85int map_groups__find_ams(struct addr_map_symbol *ams);
86
87int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp);
88
89struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
90
91#endif // __PERF_MAP_GROUPS_H
diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h
new file mode 100644
index 000000000000..5a1aed9f6bb4
--- /dev/null
+++ b/tools/perf/util/map_symbol.h
@@ -0,0 +1,22 @@
1// SPDX-License-Identifier: GPL-2.0
2#ifndef __PERF_MAP_SYMBOL
3#define __PERF_MAP_SYMBOL 1
4
5#include <linux/types.h>
6
7struct map;
8struct symbol;
9
10struct map_symbol {
11 struct map *map;
12 struct symbol *sym;
13};
14
15struct addr_map_symbol {
16 struct map *map;
17 struct symbol *sym;
18 u64 addr;
19 u64 al_addr;
20 u64 phys_addr;
21};
22#endif // __PERF_MAP_SYMBOL
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index a28f9b5cc4ff..b8d864ed4afe 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -270,7 +270,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
270} 270}
271 271
272void metricgroup__print(bool metrics, bool metricgroups, char *filter, 272void metricgroup__print(bool metrics, bool metricgroups, char *filter,
273 bool raw) 273 bool raw, bool details)
274{ 274{
275 struct pmu_events_map *map = perf_pmu__find_map(NULL); 275 struct pmu_events_map *map = perf_pmu__find_map(NULL);
276 struct pmu_event *pe; 276 struct pmu_event *pe;
@@ -329,6 +329,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
329 if (asprintf(&s, "%s\n%*s%s]", 329 if (asprintf(&s, "%s\n%*s%s]",
330 pe->metric_name, 8, "[", pe->desc) < 0) 330 pe->metric_name, 8, "[", pe->desc) < 0)
331 return; 331 return;
332
333 if (details) {
334 if (asprintf(&s, "%s\n%*s%s]",
335 s, 8, "[", pe->metric_expr) < 0)
336 return;
337 }
332 } 338 }
333 339
334 if (!s) 340 if (!s)
@@ -352,7 +358,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
352 else if (metrics && !raw) 358 else if (metrics && !raw)
353 printf("\nMetrics:\n\n"); 359 printf("\nMetrics:\n\n");
354 360
355 for (node = rb_first(&groups.entries); node; node = next) { 361 for (node = rb_first_cached(&groups.entries); node; node = next) {
356 struct mep *me = container_of(node, struct mep, nd); 362 struct mep *me = container_of(node, struct mep, nd);
357 363
358 if (metricgroups) 364 if (metricgroups)
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 8a155dba0581..5c52097a5c63 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -27,6 +27,7 @@ int metricgroup__parse_groups(const struct option *opt,
27 const char *str, 27 const char *str,
28 struct rblist *metric_events); 28 struct rblist *metric_events);
29 29
30void metricgroup__print(bool metrics, bool groups, char *filter, bool raw); 30void metricgroup__print(bool metrics, bool groups, char *filter,
31 bool raw, bool details);
31bool metricgroup__has_metric(const char *metric); 32bool metricgroup__has_metric(const char *metric);
32#endif 33#endif
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 8fc39311a30d..cdc7740fc181 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -10,6 +10,9 @@
10#include <sys/mman.h> 10#include <sys/mman.h>
11#include <inttypes.h> 11#include <inttypes.h>
12#include <asm/bug.h> 12#include <asm/bug.h>
13#ifdef HAVE_LIBNUMA_SUPPORT
14#include <numaif.h>
15#endif
13#include "debug.h" 16#include "debug.h"
14#include "event.h" 17#include "event.h"
15#include "mmap.h" 18#include "mmap.h"
@@ -154,9 +157,72 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
154} 157}
155 158
156#ifdef HAVE_AIO_SUPPORT 159#ifdef HAVE_AIO_SUPPORT
160
161#ifdef HAVE_LIBNUMA_SUPPORT
162static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
163{
164 map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
165 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
166 if (map->aio.data[idx] == MAP_FAILED) {
167 map->aio.data[idx] = NULL;
168 return -1;
169 }
170
171 return 0;
172}
173
174static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
175{
176 if (map->aio.data[idx]) {
177 munmap(map->aio.data[idx], perf_mmap__mmap_len(map));
178 map->aio.data[idx] = NULL;
179 }
180}
181
182static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity)
183{
184 void *data;
185 size_t mmap_len;
186 unsigned long node_mask;
187
188 if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
189 data = map->aio.data[idx];
190 mmap_len = perf_mmap__mmap_len(map);
191 node_mask = 1UL << cpu__get_node(cpu);
192 if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
193 pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
194 data, data + mmap_len, cpu__get_node(cpu));
195 return -1;
196 }
197 }
198
199 return 0;
200}
201#else
202static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
203{
204 map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
205 if (map->aio.data[idx] == NULL)
206 return -1;
207
208 return 0;
209}
210
211static void perf_mmap__aio_free(struct perf_mmap *map, int idx)
212{
213 zfree(&(map->aio.data[idx]));
214}
215
216static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused,
217 int cpu __maybe_unused, int affinity __maybe_unused)
218{
219 return 0;
220}
221#endif
222
157static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) 223static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
158{ 224{
159 int delta_max, i, prio; 225 int delta_max, i, prio, ret;
160 226
161 map->aio.nr_cblocks = mp->nr_cblocks; 227 map->aio.nr_cblocks = mp->nr_cblocks;
162 if (map->aio.nr_cblocks) { 228 if (map->aio.nr_cblocks) {
@@ -177,11 +243,14 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
177 } 243 }
178 delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX); 244 delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX);
179 for (i = 0; i < map->aio.nr_cblocks; ++i) { 245 for (i = 0; i < map->aio.nr_cblocks; ++i) {
180 map->aio.data[i] = malloc(perf_mmap__mmap_len(map)); 246 ret = perf_mmap__aio_alloc(map, i);
181 if (!map->aio.data[i]) { 247 if (ret == -1) {
182 pr_debug2("failed to allocate data buffer area, error %m"); 248 pr_debug2("failed to allocate data buffer area, error %m");
183 return -1; 249 return -1;
184 } 250 }
251 ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity);
252 if (ret == -1)
253 return -1;
185 /* 254 /*
186 * Use cblock.aio_fildes value different from -1 255 * Use cblock.aio_fildes value different from -1
187 * to denote started aio write operation on the 256 * to denote started aio write operation on the
@@ -210,7 +279,7 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
210 int i; 279 int i;
211 280
212 for (i = 0; i < map->aio.nr_cblocks; ++i) 281 for (i = 0; i < map->aio.nr_cblocks; ++i)
213 zfree(&map->aio.data[i]); 282 perf_mmap__aio_free(map, i);
214 if (map->aio.data) 283 if (map->aio.data)
215 zfree(&map->aio.data); 284 zfree(&map->aio.data);
216 zfree(&map->aio.cblocks); 285 zfree(&map->aio.cblocks);
@@ -314,6 +383,32 @@ void perf_mmap__munmap(struct perf_mmap *map)
314 auxtrace_mmap__munmap(&map->auxtrace_mmap); 383 auxtrace_mmap__munmap(&map->auxtrace_mmap);
315} 384}
316 385
386static void build_node_mask(int node, cpu_set_t *mask)
387{
388 int c, cpu, nr_cpus;
389 const struct cpu_map *cpu_map = NULL;
390
391 cpu_map = cpu_map__online();
392 if (!cpu_map)
393 return;
394
395 nr_cpus = cpu_map__nr(cpu_map);
396 for (c = 0; c < nr_cpus; c++) {
397 cpu = cpu_map->map[c]; /* map c index to online cpu index */
398 if (cpu__get_node(cpu) == node)
399 CPU_SET(cpu, mask);
400 }
401}
402
403static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp)
404{
405 CPU_ZERO(&map->affinity_mask);
406 if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
407 build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask);
408 else if (mp->affinity == PERF_AFFINITY_CPU)
409 CPU_SET(map->cpu, &map->affinity_mask);
410}
411
317int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu) 412int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu)
318{ 413{
319 /* 414 /*
@@ -343,6 +438,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
343 map->fd = fd; 438 map->fd = fd;
344 map->cpu = cpu; 439 map->cpu = cpu;
345 440
441 perf_mmap__setup_affinity_mask(map, mp);
442
346 if (auxtrace_mmap__mmap(&map->auxtrace_mmap, 443 if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
347 &mp->auxtrace_mp, map->base, fd)) 444 &mp->auxtrace_mp, map->base, fd))
348 return -1; 445 return -1;
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index aeb6942fdb00..e566c19b242b 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -38,6 +38,7 @@ struct perf_mmap {
38 int nr_cblocks; 38 int nr_cblocks;
39 } aio; 39 } aio;
40#endif 40#endif
41 cpu_set_t affinity_mask;
41}; 42};
42 43
43/* 44/*
@@ -69,7 +70,7 @@ enum bkw_mmap_state {
69}; 70};
70 71
71struct mmap_params { 72struct mmap_params {
72 int prot, mask, nr_cblocks; 73 int prot, mask, nr_cblocks, affinity;
73 struct auxtrace_mmap_params auxtrace_mp; 74 struct auxtrace_mmap_params auxtrace_mp;
74}; 75};
75 76
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 920e1e6551dd..4dcc01b2532c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -2540,7 +2540,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
2540 2540
2541 print_sdt_events(NULL, NULL, name_only); 2541 print_sdt_events(NULL, NULL, name_only);
2542 2542
2543 metricgroup__print(true, true, NULL, name_only); 2543 metricgroup__print(true, true, NULL, name_only, details_flag);
2544} 2544}
2545 2545
2546int parse_events__is_hardcoded_term(struct parse_events_term *term) 2546int parse_events__is_hardcoded_term(struct parse_events_term *term)
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index da8fe57691b8..44819bdb037d 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -311,7 +311,7 @@ value_sym '/' event_config '/'
311 $$ = list; 311 $$ = list;
312} 312}
313| 313|
314value_sym sep_slash_dc 314value_sym sep_slash_slash_dc
315{ 315{
316 struct list_head *list; 316 struct list_head *list;
317 int type = $1 >> 16; 317 int type = $1 >> 16;
@@ -702,7 +702,7 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE
702 702
703sep_dc: ':' | 703sep_dc: ':' |
704 704
705sep_slash_dc: '/' | ':' | 705sep_slash_slash_dc: '/' '/' | ':' |
706 706
707%% 707%%
708 708
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 11a234740632..51d437f55d18 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -29,8 +29,6 @@ struct perf_pmu_format {
29 struct list_head list; 29 struct list_head list;
30}; 30};
31 31
32#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
33
34int perf_pmu_parse(struct list_head *list, char *name); 32int perf_pmu_parse(struct list_head *list, char *name);
35extern FILE *perf_pmu_in; 33extern FILE *perf_pmu_in;
36 34
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 76fecec7b3f9..47253c3daf55 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -6,9 +6,10 @@
6#include <linux/compiler.h> 6#include <linux/compiler.h>
7#include <linux/perf_event.h> 7#include <linux/perf_event.h>
8#include <stdbool.h> 8#include <stdbool.h>
9#include "evsel.h"
10#include "parse-events.h" 9#include "parse-events.h"
11 10
11struct perf_evsel_config_term;
12
12enum { 13enum {
13 PERF_PMU_FORMAT_VALUE_CONFIG, 14 PERF_PMU_FORMAT_VALUE_CONFIG,
14 PERF_PMU_FORMAT_VALUE_CONFIG1, 15 PERF_PMU_FORMAT_VALUE_CONFIG1,
@@ -16,6 +17,7 @@ enum {
16}; 17};
17 18
18#define PERF_PMU_FORMAT_BITS 64 19#define PERF_PMU_FORMAT_BITS 64
20#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
19 21
20struct perf_event_attr; 22struct perf_event_attr;
21 23
@@ -29,7 +31,6 @@ struct perf_pmu {
29 struct list_head format; /* HEAD struct perf_pmu_format -> list */ 31 struct list_head format; /* HEAD struct perf_pmu_format -> list */
30 struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ 32 struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
31 struct list_head list; /* ELEM */ 33 struct list_head list; /* ELEM */
32 int (*set_drv_config) (struct perf_evsel_config_term *term);
33}; 34};
34 35
35struct perf_pmu_info { 36struct perf_pmu_info {
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 18a59fba97ff..0030f9b9bf7e 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -35,11 +35,14 @@
35 35
36#include "util.h" 36#include "util.h"
37#include "event.h" 37#include "event.h"
38#include "namespaces.h"
38#include "strlist.h" 39#include "strlist.h"
39#include "strfilter.h" 40#include "strfilter.h"
40#include "debug.h" 41#include "debug.h"
41#include "cache.h" 42#include "cache.h"
42#include "color.h" 43#include "color.h"
44#include "map.h"
45#include "map_groups.h"
43#include "symbol.h" 46#include "symbol.h"
44#include "thread.h" 47#include "thread.h"
45#include <api/fs/fs.h> 48#include <api/fs/fs.h>
@@ -3528,7 +3531,8 @@ int show_available_funcs(const char *target, struct nsinfo *nsi,
3528 /* Show all (filtered) symbols */ 3531 /* Show all (filtered) symbols */
3529 setup_pager(); 3532 setup_pager();
3530 3533
3531 for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) { 3534 for (nd = rb_first_cached(&map->dso->symbol_names); nd;
3535 nd = rb_next(nd)) {
3532 struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); 3536 struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
3533 3537
3534 if (strfilter__compare(_filter, pos->sym.name)) 3538 if (strfilter__compare(_filter, pos->sym.name))
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 15a98c3a2a2f..05c8d571a901 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -4,8 +4,9 @@
4 4
5#include <linux/compiler.h> 5#include <linux/compiler.h>
6#include <stdbool.h> 6#include <stdbool.h>
7#include "intlist.h" 7
8#include "namespaces.h" 8struct intlist;
9struct nsinfo;
9 10
10/* Probe related configurations */ 11/* Probe related configurations */
11struct probe_conf { 12struct probe_conf {
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
index 0b1195cad0e5..4062bc4412a9 100644
--- a/tools/perf/util/probe-file.c
+++ b/tools/perf/util/probe-file.c
@@ -20,6 +20,7 @@
20#include <sys/types.h> 20#include <sys/types.h>
21#include <sys/uio.h> 21#include <sys/uio.h>
22#include <unistd.h> 22#include <unistd.h>
23#include "namespaces.h"
23#include "util.h" 24#include "util.h"
24#include "event.h" 25#include "event.h"
25#include "strlist.h" 26#include "strlist.h"
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
index a920f702a74d..376e86cb4c3c 100644
--- a/tools/perf/util/rb_resort.h
+++ b/tools/perf/util/rb_resort.h
@@ -140,12 +140,12 @@ struct __name##_sorted *__name = __name##_sorted__new
140 140
141/* For 'struct intlist' */ 141/* For 'struct intlist' */
142#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ 142#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \
143 DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \ 143 DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \
144 __ilist->rblist.nr_entries) 144 __ilist->rblist.nr_entries)
145 145
146/* For 'struct machine->threads' */ 146/* For 'struct machine->threads' */
147#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ 147#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \
148 DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \ 148 DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \
149 __machine->threads[hash_bucket].nr) 149 __machine->threads[hash_bucket].nr)
150 150
151#endif /* _PERF_RESORT_RB_H_ */ 151#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c
index 0efc3258c648..11e07fab20dc 100644
--- a/tools/perf/util/rblist.c
+++ b/tools/perf/util/rblist.c
@@ -13,8 +13,9 @@
13 13
14int rblist__add_node(struct rblist *rblist, const void *new_entry) 14int rblist__add_node(struct rblist *rblist, const void *new_entry)
15{ 15{
16 struct rb_node **p = &rblist->entries.rb_node; 16 struct rb_node **p = &rblist->entries.rb_root.rb_node;
17 struct rb_node *parent = NULL, *new_node; 17 struct rb_node *parent = NULL, *new_node;
18 bool leftmost = true;
18 19
19 while (*p != NULL) { 20 while (*p != NULL) {
20 int rc; 21 int rc;
@@ -24,8 +25,10 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry)
24 rc = rblist->node_cmp(parent, new_entry); 25 rc = rblist->node_cmp(parent, new_entry);
25 if (rc > 0) 26 if (rc > 0)
26 p = &(*p)->rb_left; 27 p = &(*p)->rb_left;
27 else if (rc < 0) 28 else if (rc < 0) {
28 p = &(*p)->rb_right; 29 p = &(*p)->rb_right;
30 leftmost = false;
31 }
29 else 32 else
30 return -EEXIST; 33 return -EEXIST;
31 } 34 }
@@ -35,7 +38,7 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry)
35 return -ENOMEM; 38 return -ENOMEM;
36 39
37 rb_link_node(new_node, parent, p); 40 rb_link_node(new_node, parent, p);
38 rb_insert_color(new_node, &rblist->entries); 41 rb_insert_color_cached(new_node, &rblist->entries, leftmost);
39 ++rblist->nr_entries; 42 ++rblist->nr_entries;
40 43
41 return 0; 44 return 0;
@@ -43,7 +46,7 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry)
43 46
44void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node) 47void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node)
45{ 48{
46 rb_erase(rb_node, &rblist->entries); 49 rb_erase_cached(rb_node, &rblist->entries);
47 --rblist->nr_entries; 50 --rblist->nr_entries;
48 rblist->node_delete(rblist, rb_node); 51 rblist->node_delete(rblist, rb_node);
49} 52}
@@ -52,8 +55,9 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist,
52 const void *entry, 55 const void *entry,
53 bool create) 56 bool create)
54{ 57{
55 struct rb_node **p = &rblist->entries.rb_node; 58 struct rb_node **p = &rblist->entries.rb_root.rb_node;
56 struct rb_node *parent = NULL, *new_node = NULL; 59 struct rb_node *parent = NULL, *new_node = NULL;
60 bool leftmost = true;
57 61
58 while (*p != NULL) { 62 while (*p != NULL) {
59 int rc; 63 int rc;
@@ -63,8 +67,10 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist,
63 rc = rblist->node_cmp(parent, entry); 67 rc = rblist->node_cmp(parent, entry);
64 if (rc > 0) 68 if (rc > 0)
65 p = &(*p)->rb_left; 69 p = &(*p)->rb_left;
66 else if (rc < 0) 70 else if (rc < 0) {
67 p = &(*p)->rb_right; 71 p = &(*p)->rb_right;
72 leftmost = false;
73 }
68 else 74 else
69 return parent; 75 return parent;
70 } 76 }
@@ -73,7 +79,8 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist,
73 new_node = rblist->node_new(rblist, entry); 79 new_node = rblist->node_new(rblist, entry);
74 if (new_node) { 80 if (new_node) {
75 rb_link_node(new_node, parent, p); 81 rb_link_node(new_node, parent, p);
76 rb_insert_color(new_node, &rblist->entries); 82 rb_insert_color_cached(new_node,
83 &rblist->entries, leftmost);
77 ++rblist->nr_entries; 84 ++rblist->nr_entries;
78 } 85 }
79 } 86 }
@@ -94,7 +101,7 @@ struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry)
94void rblist__init(struct rblist *rblist) 101void rblist__init(struct rblist *rblist)
95{ 102{
96 if (rblist != NULL) { 103 if (rblist != NULL) {
97 rblist->entries = RB_ROOT; 104 rblist->entries = RB_ROOT_CACHED;
98 rblist->nr_entries = 0; 105 rblist->nr_entries = 0;
99 } 106 }
100 107
@@ -103,7 +110,7 @@ void rblist__init(struct rblist *rblist)
103 110
104void rblist__exit(struct rblist *rblist) 111void rblist__exit(struct rblist *rblist)
105{ 112{
106 struct rb_node *pos, *next = rb_first(&rblist->entries); 113 struct rb_node *pos, *next = rb_first_cached(&rblist->entries);
107 114
108 while (next) { 115 while (next) {
109 pos = next; 116 pos = next;
@@ -124,7 +131,8 @@ struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx)
124{ 131{
125 struct rb_node *node; 132 struct rb_node *node;
126 133
127 for (node = rb_first(&rblist->entries); node; node = rb_next(node)) { 134 for (node = rb_first_cached(&rblist->entries); node;
135 node = rb_next(node)) {
128 if (!idx--) 136 if (!idx--)
129 return node; 137 return node;
130 } 138 }
diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h
index 76df15c27f5f..14b232a4d0b6 100644
--- a/tools/perf/util/rblist.h
+++ b/tools/perf/util/rblist.h
@@ -20,7 +20,7 @@
20 */ 20 */
21 21
22struct rblist { 22struct rblist {
23 struct rb_root entries; 23 struct rb_root_cached entries;
24 unsigned int nr_entries; 24 unsigned int nr_entries;
25 25
26 int (*node_cmp)(struct rb_node *rbn, const void *entry); 26 int (*node_cmp)(struct rb_node *rbn, const void *entry);
diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h
new file mode 100644
index 000000000000..d4356030b504
--- /dev/null
+++ b/tools/perf/util/s390-cpumcf-kernel.h
@@ -0,0 +1,62 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * Support for s390 CPU measurement counter set diagnostic facility
4 *
5 * Copyright IBM Corp. 2019
6 Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
7 * Thomas Richter <tmricht@linux.ibm.com>
8 */
9#ifndef S390_CPUMCF_KERNEL_H
10#define S390_CPUMCF_KERNEL_H
11
12#define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */
13#define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */
14
15struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
16 unsigned int def:16; /* 0-15 Data Entry Format */
17 unsigned int set:16; /* 16-23 Counter set identifier */
18 unsigned int ctr:16; /* 24-39 Number of stored counters */
19 unsigned int res1:16; /* 40-63 Reserved */
20};
21
22struct cf_trailer_entry { /* CPU-M CF trailer for raw traces (64 byte) */
23 /* 0 - 7 */
24 union {
25 struct {
26 unsigned int clock_base:1; /* TOD clock base */
27 unsigned int speed:1; /* CPU speed */
28 /* Measurement alerts */
29 unsigned int mtda:1; /* Loss of MT ctr. data alert */
30 unsigned int caca:1; /* Counter auth. change alert */
31 unsigned int lcda:1; /* Loss of counter data alert */
32 };
33 unsigned long flags; /* 0-63 All indicators */
34 };
35 /* 8 - 15 */
36 unsigned int cfvn:16; /* 64-79 Ctr First Version */
37 unsigned int csvn:16; /* 80-95 Ctr Second Version */
38 unsigned int cpu_speed:32; /* 96-127 CPU speed */
39 /* 16 - 23 */
40 unsigned long timestamp; /* 128-191 Timestamp (TOD) */
41 /* 24 - 55 */
42 union {
43 struct {
44 unsigned long progusage1;
45 unsigned long progusage2;
46 unsigned long progusage3;
47 unsigned long tod_base;
48 };
49 unsigned long progusage[4];
50 };
51 /* 56 - 63 */
52 unsigned int mach_type:16; /* Machine type */
53 unsigned int res1:16; /* Reserved */
54 unsigned int res2:32; /* Reserved */
55};
56
57#define CPUMF_CTR_SET_BASIC 0 /* Basic Counter Set */
58#define CPUMF_CTR_SET_USER 1 /* Problem-State Counter Set */
59#define CPUMF_CTR_SET_CRYPTO 2 /* Crypto-Activity Counter Set */
60#define CPUMF_CTR_SET_EXT 3 /* Extended Counter Set */
61#define CPUMF_CTR_SET_MT_DIAG 4 /* MT-diagnostic Counter Set */
62#endif
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 68b2570304ec..c215704931dc 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -162,6 +162,7 @@
162#include "auxtrace.h" 162#include "auxtrace.h"
163#include "s390-cpumsf.h" 163#include "s390-cpumsf.h"
164#include "s390-cpumsf-kernel.h" 164#include "s390-cpumsf-kernel.h"
165#include "s390-cpumcf-kernel.h"
165#include "config.h" 166#include "config.h"
166 167
167struct s390_cpumsf { 168struct s390_cpumsf {
@@ -184,8 +185,58 @@ struct s390_cpumsf_queue {
184 struct auxtrace_buffer *buffer; 185 struct auxtrace_buffer *buffer;
185 int cpu; 186 int cpu;
186 FILE *logfile; 187 FILE *logfile;
188 FILE *logfile_ctr;
187}; 189};
188 190
191/* Check if the raw data should be dumped to file. If this is the case and
192 * the file to dump to has not been opened for writing, do so.
193 *
194 * Return 0 on success and greater zero on error so processing continues.
195 */
196static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
197 struct perf_sample *sample)
198{
199 struct s390_cpumsf_queue *sfq;
200 struct auxtrace_queue *q;
201 int rc = 0;
202
203 if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu)
204 return rc;
205
206 q = &sf->queues.queue_array[sample->cpu];
207 sfq = q->priv;
208 if (!sfq) /* Queue not yet allocated */
209 return rc;
210
211 if (!sfq->logfile_ctr) {
212 char *name;
213
214 rc = (sf->logdir)
215 ? asprintf(&name, "%s/aux.ctr.%02x",
216 sf->logdir, sample->cpu)
217 : asprintf(&name, "aux.ctr.%02x", sample->cpu);
218 if (rc > 0)
219 sfq->logfile_ctr = fopen(name, "w");
220 if (sfq->logfile_ctr == NULL) {
221 pr_err("Failed to open counter set log file %s, "
222 "continue...\n", name);
223 rc = 1;
224 }
225 free(name);
226 }
227
228 if (sfq->logfile_ctr) {
229 /* See comment above for -4 */
230 size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1,
231 sfq->logfile_ctr);
232 if (n != 1) {
233 pr_err("Failed to write counter set data\n");
234 rc = 1;
235 }
236 }
237 return rc;
238}
239
189/* Display s390 CPU measurement facility basic-sampling data entry */ 240/* Display s390 CPU measurement facility basic-sampling data entry */
190static bool s390_cpumsf_basic_show(const char *color, size_t pos, 241static bool s390_cpumsf_basic_show(const char *color, size_t pos,
191 struct hws_basic_entry *basic) 242 struct hws_basic_entry *basic)
@@ -301,6 +352,11 @@ static bool s390_cpumsf_validate(int machine_type,
301 *dsdes = 85; 352 *dsdes = 85;
302 *bsdes = 32; 353 *bsdes = 32;
303 break; 354 break;
355 case 2964:
356 case 2965:
357 *dsdes = 112;
358 *bsdes = 32;
359 break;
304 default: 360 default:
305 /* Illegal trailer entry */ 361 /* Illegal trailer entry */
306 return false; 362 return false;
@@ -768,7 +824,7 @@ static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
768} 824}
769 825
770static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, 826static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
771 pid_t pid, pid_t tid, u64 ip) 827 pid_t pid, pid_t tid, u64 ip, u64 timestamp)
772{ 828{
773 char msg[MAX_AUXTRACE_ERROR_MSG]; 829 char msg[MAX_AUXTRACE_ERROR_MSG];
774 union perf_event event; 830 union perf_event event;
@@ -776,7 +832,7 @@ static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
776 832
777 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); 833 strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
778 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, 834 auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
779 code, cpu, pid, tid, ip, msg); 835 code, cpu, pid, tid, ip, msg, timestamp);
780 836
781 err = perf_session__deliver_synth_event(sf->session, &event, NULL); 837 err = perf_session__deliver_synth_event(sf->session, &event, NULL);
782 if (err) 838 if (err)
@@ -788,11 +844,12 @@ static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
788static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) 844static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
789{ 845{
790 return s390_cpumsf_synth_error(sf, 1, sample->cpu, 846 return s390_cpumsf_synth_error(sf, 1, sample->cpu,
791 sample->pid, sample->tid, 0); 847 sample->pid, sample->tid, 0,
848 sample->time);
792} 849}
793 850
794static int 851static int
795s390_cpumsf_process_event(struct perf_session *session __maybe_unused, 852s390_cpumsf_process_event(struct perf_session *session,
796 union perf_event *event, 853 union perf_event *event,
797 struct perf_sample *sample, 854 struct perf_sample *sample,
798 struct perf_tool *tool) 855 struct perf_tool *tool)
@@ -801,6 +858,8 @@ s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
801 struct s390_cpumsf, 858 struct s390_cpumsf,
802 auxtrace); 859 auxtrace);
803 u64 timestamp = sample->time; 860 u64 timestamp = sample->time;
861 struct perf_evsel *ev_bc000;
862
804 int err = 0; 863 int err = 0;
805 864
806 if (dump_trace) 865 if (dump_trace)
@@ -811,6 +870,16 @@ s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
811 return -EINVAL; 870 return -EINVAL;
812 } 871 }
813 872
873 if (event->header.type == PERF_RECORD_SAMPLE &&
874 sample->raw_size) {
875 /* Handle event with raw data */
876 ev_bc000 = perf_evlist__event2evsel(session->evlist, event);
877 if (ev_bc000 &&
878 ev_bc000->attr.config == PERF_EVENT_CPUM_CF_DIAG)
879 err = s390_cpumcf_dumpctr(sf, sample);
880 return err;
881 }
882
814 if (event->header.type == PERF_RECORD_AUX && 883 if (event->header.type == PERF_RECORD_AUX &&
815 event->aux.flags & PERF_AUX_FLAG_TRUNCATED) 884 event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
816 return s390_cpumsf_lost(sf, sample); 885 return s390_cpumsf_lost(sf, sample);
@@ -891,9 +960,15 @@ static void s390_cpumsf_free_queues(struct perf_session *session)
891 struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) 960 struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *)
892 queues->queue_array[i].priv; 961 queues->queue_array[i].priv;
893 962
894 if (sfq != NULL && sfq->logfile) { 963 if (sfq != NULL) {
895 fclose(sfq->logfile); 964 if (sfq->logfile) {
896 sfq->logfile = NULL; 965 fclose(sfq->logfile);
966 sfq->logfile = NULL;
967 }
968 if (sfq->logfile_ctr) {
969 fclose(sfq->logfile_ctr);
970 sfq->logfile_ctr = NULL;
971 }
897 } 972 }
898 zfree(&queues->queue_array[i].priv); 973 zfree(&queues->queue_array[i].priv);
899 } 974 }
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
new file mode 100644
index 000000000000..6650f599ed9c
--- /dev/null
+++ b/tools/perf/util/s390-sample-raw.c
@@ -0,0 +1,222 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright IBM Corp. 2019
4 * Author(s): Thomas Richter <tmricht@linux.ibm.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License (version 2 only)
8 * as published by the Free Software Foundation.
9 *
10 * Architecture specific trace_event function. Save event's bc000 raw data
11 * to file. File name is aux.ctr.## where ## stands for the CPU number the
12 * sample was taken from.
13 */
14
15#include <unistd.h>
16#include <stdio.h>
17#include <string.h>
18#include <inttypes.h>
19
20#include <sys/stat.h>
21#include <linux/compiler.h>
22#include <asm/byteorder.h>
23
24#include "debug.h"
25#include "util.h"
26#include "auxtrace.h"
27#include "session.h"
28#include "evlist.h"
29#include "config.h"
30#include "color.h"
31#include "sample-raw.h"
32#include "s390-cpumcf-kernel.h"
33#include "pmu-events/pmu-events.h"
34
35static size_t ctrset_size(struct cf_ctrset_entry *set)
36{
37 return sizeof(*set) + set->ctr * sizeof(u64);
38}
39
40static bool ctrset_valid(struct cf_ctrset_entry *set)
41{
42 return set->def == S390_CPUMCF_DIAG_DEF;
43}
44
45/* CPU Measurement Counter Facility raw data is a byte stream. It is 8 byte
46 * aligned and might have trailing padding bytes.
47 * Display the raw data on screen.
48 */
49static bool s390_cpumcfdg_testctr(struct perf_sample *sample)
50{
51 size_t len = sample->raw_size, offset = 0;
52 unsigned char *buf = sample->raw_data;
53 struct cf_trailer_entry *te;
54 struct cf_ctrset_entry *cep, ce;
55
56 if (!len)
57 return false;
58 while (offset < len) {
59 cep = (struct cf_ctrset_entry *)(buf + offset);
60 ce.def = be16_to_cpu(cep->def);
61 ce.set = be16_to_cpu(cep->set);
62 ce.ctr = be16_to_cpu(cep->ctr);
63 ce.res1 = be16_to_cpu(cep->res1);
64
65 if (!ctrset_valid(&ce) || offset + ctrset_size(&ce) > len) {
66 /* Raw data for counter sets are always multiple of 8
67 * bytes. Prepending a 4 bytes size field to the
68 * raw data block in the sample causes the perf tool
69 * to append 4 padding bytes to make the raw data part
70 * of the sample a multiple of eight bytes again.
71 *
72 * If the last entry (trailer) is 4 bytes off the raw
73 * area data end, all is good.
74 */
75 if (len - offset - sizeof(*te) == 4)
76 break;
77 pr_err("Invalid counter set entry at %zd\n", offset);
78 return false;
79 }
80 offset += ctrset_size(&ce);
81 }
82 return true;
83}
84
85/* Dump event bc000 on screen, already tested on correctness. */
86static void s390_cpumcfdg_dumptrail(const char *color, size_t offset,
87 struct cf_trailer_entry *tep)
88{
89 struct cf_trailer_entry te;
90
91 te.flags = be64_to_cpu(tep->flags);
92 te.cfvn = be16_to_cpu(tep->cfvn);
93 te.csvn = be16_to_cpu(tep->csvn);
94 te.cpu_speed = be32_to_cpu(tep->cpu_speed);
95 te.timestamp = be64_to_cpu(tep->timestamp);
96 te.progusage1 = be64_to_cpu(tep->progusage1);
97 te.progusage2 = be64_to_cpu(tep->progusage2);
98 te.progusage3 = be64_to_cpu(tep->progusage3);
99 te.tod_base = be64_to_cpu(tep->tod_base);
100 te.mach_type = be16_to_cpu(tep->mach_type);
101 te.res1 = be16_to_cpu(tep->res1);
102 te.res2 = be32_to_cpu(tep->res2);
103
104 color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c"
105 " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n",
106 offset, te.clock_base ? 'T' : ' ',
107 te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ',
108 te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ',
109 te.cfvn, te.csvn, te.cpu_speed, te.timestamp);
110 color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx"
111 " Type:%x\n\n",
112 te.progusage1, te.progusage2, te.progusage3,
113 te.tod_base, te.mach_type);
114}
115
116/* Return starting number of a counter set */
117static int get_counterset_start(int setnr)
118{
119 switch (setnr) {
120 case CPUMF_CTR_SET_BASIC: /* Basic counter set */
121 return 0;
122 case CPUMF_CTR_SET_USER: /* Problem state counter set */
123 return 32;
124 case CPUMF_CTR_SET_CRYPTO: /* Crypto counter set */
125 return 64;
126 case CPUMF_CTR_SET_EXT: /* Extended counter set */
127 return 128;
128 case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */
129 return 448;
130 default:
131 return -1;
132 }
133}
134
135/* Scan the PMU table and extract the logical name of a counter from the
136 * PMU events table. Input is the counter set and counter number with in the
137 * set. Construct the event number and use this as key. If they match return
138 * the name of this counter.
139 * If no match is found a NULL pointer is returned.
140 */
141static const char *get_counter_name(int set, int nr, struct pmu_events_map *map)
142{
143 int rc, event_nr, wanted = get_counterset_start(set) + nr;
144
145 if (map) {
146 struct pmu_event *evp = map->table;
147
148 for (; evp->name || evp->event || evp->desc; ++evp) {
149 if (evp->name == NULL || evp->event == NULL)
150 continue;
151 rc = sscanf(evp->event, "event=%x", &event_nr);
152 if (rc == 1 && event_nr == wanted)
153 return evp->name;
154 }
155 }
156 return NULL;
157}
158
159static void s390_cpumcfdg_dump(struct perf_sample *sample)
160{
161 size_t i, len = sample->raw_size, offset = 0;
162 unsigned char *buf = sample->raw_data;
163 const char *color = PERF_COLOR_BLUE;
164 struct cf_ctrset_entry *cep, ce;
165 struct pmu_events_map *map;
166 struct perf_pmu pmu;
167 u64 *p;
168
169 memset(&pmu, 0, sizeof(pmu));
170 map = perf_pmu__find_map(&pmu);
171 while (offset < len) {
172 cep = (struct cf_ctrset_entry *)(buf + offset);
173
174 ce.def = be16_to_cpu(cep->def);
175 ce.set = be16_to_cpu(cep->set);
176 ce.ctr = be16_to_cpu(cep->ctr);
177 ce.res1 = be16_to_cpu(cep->res1);
178
179 if (!ctrset_valid(&ce)) { /* Print trailer */
180 s390_cpumcfdg_dumptrail(color, offset,
181 (struct cf_trailer_entry *)cep);
182 return;
183 }
184
185 color_fprintf(stdout, color, " [%#08zx] Counterset:%d"
186 " Counters:%d\n", offset, ce.set, ce.ctr);
187 for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; ++i, ++p) {
188 const char *ev_name = get_counter_name(ce.set, i, map);
189
190 color_fprintf(stdout, color,
191 "\tCounter:%03d %s Value:%#018lx\n", i,
192 ev_name ?: "<unknown>", be64_to_cpu(*p));
193 }
194 offset += ctrset_size(&ce);
195 }
196}
197
198/* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events
199 * and if the event was triggered by a counter set diagnostic event display
200 * its raw data.
201 * The function is only invoked when the dump flag -D is set.
202 */
203void perf_evlist__s390_sample_raw(struct perf_evlist *evlist, union perf_event *event,
204 struct perf_sample *sample)
205{
206 struct perf_evsel *ev_bc000;
207
208 if (event->header.type != PERF_RECORD_SAMPLE)
209 return;
210
211 ev_bc000 = perf_evlist__event2evsel(evlist, event);
212 if (ev_bc000 == NULL ||
213 ev_bc000->attr.config != PERF_EVENT_CPUM_CF_DIAG)
214 return;
215
216 /* Display raw data on screen */
217 if (!s390_cpumcfdg_testctr(sample)) {
218 pr_err("Invalid counter set data encountered\n");
219 return;
220 }
221 s390_cpumcfdg_dump(sample);
222}
diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c
new file mode 100644
index 000000000000..c21e1311fb0f
--- /dev/null
+++ b/tools/perf/util/sample-raw.c
@@ -0,0 +1,18 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2
3#include <string.h>
4#include "evlist.h"
5#include "env.h"
6#include "sample-raw.h"
7
8/*
9 * Check platform the perf data file was created on and perform platform
10 * specific interpretation.
11 */
12void perf_evlist__init_trace_event_sample_raw(struct perf_evlist *evlist)
13{
14 const char *arch_pf = perf_env__arch(evlist->env);
15
16 if (arch_pf && !strcmp("s390", arch_pf))
17 evlist->trace_event_sample_raw = perf_evlist__s390_sample_raw;
18}
diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h
new file mode 100644
index 000000000000..95d445c87e93
--- /dev/null
+++ b/tools/perf/util/sample-raw.h
@@ -0,0 +1,14 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __SAMPLE_RAW_H
3#define __SAMPLE_RAW_H 1
4
5struct perf_evlist;
6union perf_event;
7struct perf_sample;
8
9void perf_evlist__s390_sample_raw(struct perf_evlist *evlist,
10 union perf_event *event,
11 struct perf_sample *sample);
12
13void perf_evlist__init_trace_event_sample_raw(struct perf_evlist *evlist);
14#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build
index 82d28c67e0f3..7b342ce38d99 100644
--- a/tools/perf/util/scripting-engines/Build
+++ b/tools/perf/util/scripting-engines/Build
@@ -1,5 +1,5 @@
1libperf-$(CONFIG_LIBPERL) += trace-event-perl.o 1perf-$(CONFIG_LIBPERL) += trace-event-perl.o
2libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o 2perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
3 3
4CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default 4CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default
5 5
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index b93f36b887b5..5f06378a482b 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -37,6 +37,8 @@
37#include "../../perf.h" 37#include "../../perf.h"
38#include "../callchain.h" 38#include "../callchain.h"
39#include "../machine.h" 39#include "../machine.h"
40#include "../map.h"
41#include "../symbol.h"
40#include "../thread.h" 42#include "../thread.h"
41#include "../event.h" 43#include "../event.h"
42#include "../trace-event.h" 44#include "../trace-event.h"
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 87ef16a1b17e..0e17db41b49b 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -44,6 +44,8 @@
44#include "../thread-stack.h" 44#include "../thread-stack.h"
45#include "../trace-event.h" 45#include "../trace-event.h"
46#include "../call-path.h" 46#include "../call-path.h"
47#include "map.h"
48#include "symbol.h"
47#include "thread_map.h" 49#include "thread_map.h"
48#include "cpumap.h" 50#include "cpumap.h"
49#include "print_binary.h" 51#include "print_binary.h"
@@ -733,8 +735,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
733 Py_FatalError("couldn't create Python dictionary"); 735 Py_FatalError("couldn't create Python dictionary");
734 736
735 pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel))); 737 pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel)));
736 pydict_set_item_string_decref(dict, "attr", _PyUnicode_FromStringAndSize( 738 pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->attr, sizeof(evsel->attr)));
737 (const char *)&evsel->attr, sizeof(evsel->attr)));
738 739
739 pydict_set_item_string_decref(dict_sample, "pid", 740 pydict_set_item_string_decref(dict_sample, "pid",
740 _PyLong_FromLong(sample->pid)); 741 _PyLong_FromLong(sample->pid));
@@ -1494,34 +1495,40 @@ static void _free_command_line(wchar_t **command_line, int num)
1494static int python_start_script(const char *script, int argc, const char **argv) 1495static int python_start_script(const char *script, int argc, const char **argv)
1495{ 1496{
1496 struct tables *tables = &tables_global; 1497 struct tables *tables = &tables_global;
1498 PyMODINIT_FUNC (*initfunc)(void);
1497#if PY_MAJOR_VERSION < 3 1499#if PY_MAJOR_VERSION < 3
1498 const char **command_line; 1500 const char **command_line;
1499#else 1501#else
1500 wchar_t **command_line; 1502 wchar_t **command_line;
1501#endif 1503#endif
1502 char buf[PATH_MAX]; 1504 /*
1505 * Use a non-const name variable to cope with python 2.6's
1506 * PyImport_AppendInittab prototype
1507 */
1508 char buf[PATH_MAX], name[19] = "perf_trace_context";
1503 int i, err = 0; 1509 int i, err = 0;
1504 FILE *fp; 1510 FILE *fp;
1505 1511
1506#if PY_MAJOR_VERSION < 3 1512#if PY_MAJOR_VERSION < 3
1513 initfunc = initperf_trace_context;
1507 command_line = malloc((argc + 1) * sizeof(const char *)); 1514 command_line = malloc((argc + 1) * sizeof(const char *));
1508 command_line[0] = script; 1515 command_line[0] = script;
1509 for (i = 1; i < argc + 1; i++) 1516 for (i = 1; i < argc + 1; i++)
1510 command_line[i] = argv[i - 1]; 1517 command_line[i] = argv[i - 1];
1511#else 1518#else
1519 initfunc = PyInit_perf_trace_context;
1512 command_line = malloc((argc + 1) * sizeof(wchar_t *)); 1520 command_line = malloc((argc + 1) * sizeof(wchar_t *));
1513 command_line[0] = Py_DecodeLocale(script, NULL); 1521 command_line[0] = Py_DecodeLocale(script, NULL);
1514 for (i = 1; i < argc + 1; i++) 1522 for (i = 1; i < argc + 1; i++)
1515 command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); 1523 command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
1516#endif 1524#endif
1517 1525
1526 PyImport_AppendInittab(name, initfunc);
1518 Py_Initialize(); 1527 Py_Initialize();
1519 1528
1520#if PY_MAJOR_VERSION < 3 1529#if PY_MAJOR_VERSION < 3
1521 initperf_trace_context();
1522 PySys_SetArgv(argc + 1, (char **)command_line); 1530 PySys_SetArgv(argc + 1, (char **)command_line);
1523#else 1531#else
1524 PyInit_perf_trace_context();
1525 PySys_SetArgv(argc + 1, command_line); 1532 PySys_SetArgv(argc + 1, command_line);
1526#endif 1533#endif
1527 1534
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 5456c84c7dd1..c764bbc91009 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -13,6 +13,8 @@
13#include "evlist.h" 13#include "evlist.h"
14#include "evsel.h" 14#include "evsel.h"
15#include "memswap.h" 15#include "memswap.h"
16#include "map.h"
17#include "symbol.h"
16#include "session.h" 18#include "session.h"
17#include "tool.h" 19#include "tool.h"
18#include "sort.h" 20#include "sort.h"
@@ -23,6 +25,7 @@
23#include "auxtrace.h" 25#include "auxtrace.h"
24#include "thread.h" 26#include "thread.h"
25#include "thread-stack.h" 27#include "thread-stack.h"
28#include "sample-raw.h"
26#include "stat.h" 29#include "stat.h"
27#include "arch/common.h" 30#include "arch/common.h"
28 31
@@ -147,6 +150,8 @@ struct perf_session *perf_session__new(struct perf_data *data,
147 perf_session__set_id_hdr_size(session); 150 perf_session__set_id_hdr_size(session);
148 perf_session__set_comm_exec(session); 151 perf_session__set_comm_exec(session);
149 } 152 }
153
154 perf_evlist__init_trace_event_sample_raw(session->evlist);
150 } 155 }
151 } else { 156 } else {
152 session->machines.host.env = &perf_env; 157 session->machines.host.env = &perf_env;
@@ -376,6 +381,10 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
376 tool->itrace_start = perf_event__process_itrace_start; 381 tool->itrace_start = perf_event__process_itrace_start;
377 if (tool->context_switch == NULL) 382 if (tool->context_switch == NULL)
378 tool->context_switch = perf_event__process_switch; 383 tool->context_switch = perf_event__process_switch;
384 if (tool->ksymbol == NULL)
385 tool->ksymbol = perf_event__process_ksymbol;
386 if (tool->bpf_event == NULL)
387 tool->bpf_event = perf_event__process_bpf_event;
379 if (tool->read == NULL) 388 if (tool->read == NULL)
380 tool->read = process_event_sample_stub; 389 tool->read = process_event_sample_stub;
381 if (tool->throttle == NULL) 390 if (tool->throttle == NULL)
@@ -694,7 +703,10 @@ static void perf_event__auxtrace_error_swap(union perf_event *event,
694 event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu); 703 event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu);
695 event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid); 704 event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid);
696 event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid); 705 event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid);
706 event->auxtrace_error.fmt = bswap_32(event->auxtrace_error.fmt);
697 event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip); 707 event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip);
708 if (event->auxtrace_error.fmt)
709 event->auxtrace_error.time = bswap_64(event->auxtrace_error.time);
698} 710}
699 711
700static void perf_event__thread_map_swap(union perf_event *event, 712static void perf_event__thread_map_swap(union perf_event *event,
@@ -1065,6 +1077,8 @@ static void dump_event(struct perf_evlist *evlist, union perf_event *event,
1065 file_offset, event->header.size, event->header.type); 1077 file_offset, event->header.size, event->header.type);
1066 1078
1067 trace_event(event); 1079 trace_event(event);
1080 if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw)
1081 evlist->trace_event_sample_raw(evlist, event, sample);
1068 1082
1069 if (sample) 1083 if (sample)
1070 perf_evlist__print_tstamp(evlist, event, sample); 1084 perf_evlist__print_tstamp(evlist, event, sample);
@@ -1188,6 +1202,13 @@ static int deliver_sample_value(struct perf_evlist *evlist,
1188 return 0; 1202 return 0;
1189 } 1203 }
1190 1204
1205 /*
1206 * There's no reason to deliver sample
1207 * for zero period, bail out.
1208 */
1209 if (!sample->period)
1210 return 0;
1211
1191 return tool->sample(tool, event, sample, sid->evsel, machine); 1212 return tool->sample(tool, event, sample, sid->evsel, machine);
1192} 1213}
1193 1214
@@ -1305,6 +1326,10 @@ static int machines__deliver_event(struct machines *machines,
1305 case PERF_RECORD_SWITCH: 1326 case PERF_RECORD_SWITCH:
1306 case PERF_RECORD_SWITCH_CPU_WIDE: 1327 case PERF_RECORD_SWITCH_CPU_WIDE:
1307 return tool->context_switch(tool, event, sample, machine); 1328 return tool->context_switch(tool, event, sample, machine);
1329 case PERF_RECORD_KSYMBOL:
1330 return tool->ksymbol(tool, event, sample, machine);
1331 case PERF_RECORD_BPF_EVENT:
1332 return tool->bpf_event(tool, event, sample, machine);
1308 default: 1333 default:
1309 ++evlist->stats.nr_unknown_events; 1334 ++evlist->stats.nr_unknown_events;
1310 return -1; 1335 return -1;
@@ -1820,38 +1845,35 @@ fetch_mmaped_event(struct perf_session *session,
1820#define NUM_MMAPS 128 1845#define NUM_MMAPS 128
1821#endif 1846#endif
1822 1847
1823static int __perf_session__process_events(struct perf_session *session, 1848struct reader {
1824 u64 data_offset, u64 data_size, 1849 int fd;
1825 u64 file_size) 1850 u64 data_size;
1851 u64 data_offset;
1852};
1853
1854static int
1855reader__process_events(struct reader *rd, struct perf_session *session,
1856 struct ui_progress *prog)
1826{ 1857{
1827 struct ordered_events *oe = &session->ordered_events; 1858 u64 data_size = rd->data_size;
1828 struct perf_tool *tool = session->tool;
1829 int fd = perf_data__fd(session->data);
1830 u64 head, page_offset, file_offset, file_pos, size; 1859 u64 head, page_offset, file_offset, file_pos, size;
1831 int err, mmap_prot, mmap_flags, map_idx = 0; 1860 int err = 0, mmap_prot, mmap_flags, map_idx = 0;
1832 size_t mmap_size; 1861 size_t mmap_size;
1833 char *buf, *mmaps[NUM_MMAPS]; 1862 char *buf, *mmaps[NUM_MMAPS];
1834 union perf_event *event; 1863 union perf_event *event;
1835 struct ui_progress prog;
1836 s64 skip; 1864 s64 skip;
1837 1865
1838 perf_tool__fill_defaults(tool); 1866 page_offset = page_size * (rd->data_offset / page_size);
1839
1840 page_offset = page_size * (data_offset / page_size);
1841 file_offset = page_offset; 1867 file_offset = page_offset;
1842 head = data_offset - page_offset; 1868 head = rd->data_offset - page_offset;
1843
1844 if (data_size == 0)
1845 goto out;
1846 1869
1847 if (data_offset + data_size < file_size) 1870 ui_progress__init_size(prog, data_size, "Processing events...");
1848 file_size = data_offset + data_size;
1849 1871
1850 ui_progress__init_size(&prog, file_size, "Processing events..."); 1872 data_size += rd->data_offset;
1851 1873
1852 mmap_size = MMAP_SIZE; 1874 mmap_size = MMAP_SIZE;
1853 if (mmap_size > file_size) { 1875 if (mmap_size > data_size) {
1854 mmap_size = file_size; 1876 mmap_size = data_size;
1855 session->one_mmap = true; 1877 session->one_mmap = true;
1856 } 1878 }
1857 1879
@@ -1865,12 +1887,12 @@ static int __perf_session__process_events(struct perf_session *session,
1865 mmap_flags = MAP_PRIVATE; 1887 mmap_flags = MAP_PRIVATE;
1866 } 1888 }
1867remap: 1889remap:
1868 buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd, 1890 buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd,
1869 file_offset); 1891 file_offset);
1870 if (buf == MAP_FAILED) { 1892 if (buf == MAP_FAILED) {
1871 pr_err("failed to mmap file\n"); 1893 pr_err("failed to mmap file\n");
1872 err = -errno; 1894 err = -errno;
1873 goto out_err; 1895 goto out;
1874 } 1896 }
1875 mmaps[map_idx] = buf; 1897 mmaps[map_idx] = buf;
1876 map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); 1898 map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
@@ -1902,7 +1924,7 @@ more:
1902 file_offset + head, event->header.size, 1924 file_offset + head, event->header.size,
1903 event->header.type); 1925 event->header.type);
1904 err = -EINVAL; 1926 err = -EINVAL;
1905 goto out_err; 1927 goto out;
1906 } 1928 }
1907 1929
1908 if (skip) 1930 if (skip)
@@ -1911,15 +1933,40 @@ more:
1911 head += size; 1933 head += size;
1912 file_pos += size; 1934 file_pos += size;
1913 1935
1914 ui_progress__update(&prog, size); 1936 ui_progress__update(prog, size);
1915 1937
1916 if (session_done()) 1938 if (session_done())
1917 goto out; 1939 goto out;
1918 1940
1919 if (file_pos < file_size) 1941 if (file_pos < data_size)
1920 goto more; 1942 goto more;
1921 1943
1922out: 1944out:
1945 return err;
1946}
1947
1948static int __perf_session__process_events(struct perf_session *session)
1949{
1950 struct reader rd = {
1951 .fd = perf_data__fd(session->data),
1952 .data_size = session->header.data_size,
1953 .data_offset = session->header.data_offset,
1954 };
1955 struct ordered_events *oe = &session->ordered_events;
1956 struct perf_tool *tool = session->tool;
1957 struct ui_progress prog;
1958 int err;
1959
1960 perf_tool__fill_defaults(tool);
1961
1962 if (rd.data_size == 0)
1963 return -1;
1964
1965 ui_progress__init_size(&prog, rd.data_size, "Processing events...");
1966
1967 err = reader__process_events(&rd, session, &prog);
1968 if (err)
1969 goto out_err;
1923 /* do the final flush for ordered samples */ 1970 /* do the final flush for ordered samples */
1924 err = ordered_events__flush(oe, OE_FLUSH__FINAL); 1971 err = ordered_events__flush(oe, OE_FLUSH__FINAL);
1925 if (err) 1972 if (err)
@@ -1944,20 +1991,13 @@ out_err:
1944 1991
1945int perf_session__process_events(struct perf_session *session) 1992int perf_session__process_events(struct perf_session *session)
1946{ 1993{
1947 u64 size = perf_data__size(session->data);
1948 int err;
1949
1950 if (perf_session__register_idle_thread(session) < 0) 1994 if (perf_session__register_idle_thread(session) < 0)
1951 return -ENOMEM; 1995 return -ENOMEM;
1952 1996
1953 if (!perf_data__is_pipe(session->data)) 1997 if (perf_data__is_pipe(session->data))
1954 err = __perf_session__process_events(session, 1998 return __perf_session__process_pipe_events(session);
1955 session->header.data_offset,
1956 session->header.data_size, size);
1957 else
1958 err = __perf_session__process_pipe_events(session);
1959 1999
1960 return err; 2000 return __perf_session__process_events(session);
1961} 2001}
1962 2002
1963bool perf_session__has_traces(struct perf_session *session, const char *msg) 2003bool perf_session__has_traces(struct perf_session *session, const char *msg)
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 64d1f36dee99..5b5a167b43ce 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -1,5 +1,3 @@
1#!/usr/bin/python
2
3from os import getenv 1from os import getenv
4from subprocess import Popen, PIPE 2from subprocess import Popen, PIPE
5from re import sub 3from re import sub
@@ -55,9 +53,14 @@ ext_sources = [f.strip() for f in open('util/python-ext-sources')
55# use full paths with source files 53# use full paths with source files
56ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) 54ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources))
57 55
56extra_libraries = []
57if '-DHAVE_LIBNUMA_SUPPORT' in cflags:
58 extra_libraries = [ 'numa' ]
59
58perf = Extension('perf', 60perf = Extension('perf',
59 sources = ext_sources, 61 sources = ext_sources,
60 include_dirs = ['util/include'], 62 include_dirs = ['util/include'],
63 libraries = extra_libraries,
61 extra_compile_args = cflags, 64 extra_compile_args = cflags,
62 extra_objects = [libtraceevent, libapikfs], 65 extra_objects = [libtraceevent, libapikfs],
63 ) 66 )
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 6c1a83768eb0..d2299e912e59 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -6,6 +6,7 @@
6#include "sort.h" 6#include "sort.h"
7#include "hist.h" 7#include "hist.h"
8#include "comm.h" 8#include "comm.h"
9#include "map.h"
9#include "symbol.h" 10#include "symbol.h"
10#include "thread.h" 11#include "thread.h"
11#include "evsel.h" 12#include "evsel.h"
@@ -230,8 +231,14 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
230 if (sym_l == sym_r) 231 if (sym_l == sym_r)
231 return 0; 232 return 0;
232 233
233 if (sym_l->inlined || sym_r->inlined) 234 if (sym_l->inlined || sym_r->inlined) {
234 return strcmp(sym_l->name, sym_r->name); 235 int ret = strcmp(sym_l->name, sym_r->name);
236
237 if (ret)
238 return ret;
239 if ((sym_l->start <= sym_r->end) && (sym_l->end >= sym_r->start))
240 return 0;
241 }
235 242
236 if (sym_l->start != sym_r->start) 243 if (sym_l->start != sym_r->start)
237 return (int64_t)(sym_r->start - sym_l->start); 244 return (int64_t)(sym_r->start - sym_l->start);
@@ -428,8 +435,6 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf,
428{ 435{
429 436
430 struct symbol *sym = he->ms.sym; 437 struct symbol *sym = he->ms.sym;
431 struct map *map = he->ms.map;
432 struct perf_evsel *evsel = hists_to_evsel(he->hists);
433 struct annotation *notes; 438 struct annotation *notes;
434 double ipc = 0.0, coverage = 0.0; 439 double ipc = 0.0, coverage = 0.0;
435 char tmp[64]; 440 char tmp[64];
@@ -437,11 +442,6 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf,
437 if (!sym) 442 if (!sym)
438 return repsep_snprintf(bf, size, "%-*s", width, "-"); 443 return repsep_snprintf(bf, size, "%-*s", width, "-");
439 444
440 if (!sym->annotate2 && symbol__annotate2(sym, map, evsel,
441 &annotation__default_options, NULL) < 0) {
442 return 0;
443 }
444
445 notes = symbol__annotation(sym); 445 notes = symbol__annotation(sym);
446 446
447 if (notes->hit_cycles) 447 if (notes->hit_cycles)
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 130fe37fe2df..2fbee0b1011c 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -9,7 +9,8 @@
9#include <linux/list.h> 9#include <linux/list.h>
10#include "cache.h" 10#include "cache.h"
11#include <linux/rbtree.h> 11#include <linux/rbtree.h>
12#include "symbol.h" 12#include "map_symbol.h"
13#include "symbol_conf.h"
13#include "string.h" 14#include "string.h"
14#include "callchain.h" 15#include "callchain.h"
15#include "values.h" 16#include "values.h"
@@ -145,8 +146,8 @@ struct hist_entry {
145 union { 146 union {
146 /* this is for hierarchical entry structure */ 147 /* this is for hierarchical entry structure */
147 struct { 148 struct {
148 struct rb_root hroot_in; 149 struct rb_root_cached hroot_in;
149 struct rb_root hroot_out; 150 struct rb_root_cached hroot_out;
150 }; /* non-leaf entries */ 151 }; /* non-leaf entries */
151 struct rb_root sorted_chain; /* leaf entry has callchains */ 152 struct rb_root sorted_chain; /* leaf entry has callchains */
152 }; 153 };
diff --git a/tools/perf/util/srccode.h b/tools/perf/util/srccode.h
index e500a746d5f1..1b5ed769779c 100644
--- a/tools/perf/util/srccode.h
+++ b/tools/perf/util/srccode.h
@@ -1,6 +1,19 @@
1#ifndef SRCCODE_H 1#ifndef SRCCODE_H
2#define SRCCODE_H 1 2#define SRCCODE_H 1
3 3
4struct srccode_state {
5 char *srcfile;
6 unsigned line;
7};
8
9static inline void srccode_state_init(struct srccode_state *state)
10{
11 state->srcfile = NULL;
12 state->line = 0;
13}
14
15void srccode_state_free(struct srccode_state *state);
16
4/* Result is not 0 terminated */ 17/* Result is not 0 terminated */
5char *find_sourceline(char *fn, unsigned line, int *lenp); 18char *find_sourceline(char *fn, unsigned line, int *lenp);
6 19
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index dc86597d0cc4..10ca1533937e 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -104,7 +104,7 @@ static struct symbol *new_inline_sym(struct dso *dso,
104 } else { 104 } else {
105 /* create a fake symbol for the inline frame */ 105 /* create a fake symbol for the inline frame */
106 inline_sym = symbol__new(base_sym ? base_sym->start : 0, 106 inline_sym = symbol__new(base_sym ? base_sym->start : 0,
107 base_sym ? base_sym->end : 0, 107 base_sym ? (base_sym->end - base_sym->start) : 0,
108 base_sym ? base_sym->binding : 0, 108 base_sym ? base_sym->binding : 0,
109 base_sym ? base_sym->type : 0, 109 base_sym ? base_sym->type : 0,
110 funcname); 110 funcname);
@@ -594,11 +594,12 @@ struct srcline_node {
594 struct rb_node rb_node; 594 struct rb_node rb_node;
595}; 595};
596 596
597void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline) 597void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline)
598{ 598{
599 struct rb_node **p = &tree->rb_node; 599 struct rb_node **p = &tree->rb_root.rb_node;
600 struct rb_node *parent = NULL; 600 struct rb_node *parent = NULL;
601 struct srcline_node *i, *node; 601 struct srcline_node *i, *node;
602 bool leftmost = true;
602 603
603 node = zalloc(sizeof(struct srcline_node)); 604 node = zalloc(sizeof(struct srcline_node));
604 if (!node) { 605 if (!node) {
@@ -614,16 +615,18 @@ void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline)
614 i = rb_entry(parent, struct srcline_node, rb_node); 615 i = rb_entry(parent, struct srcline_node, rb_node);
615 if (addr < i->addr) 616 if (addr < i->addr)
616 p = &(*p)->rb_left; 617 p = &(*p)->rb_left;
617 else 618 else {
618 p = &(*p)->rb_right; 619 p = &(*p)->rb_right;
620 leftmost = false;
621 }
619 } 622 }
620 rb_link_node(&node->rb_node, parent, p); 623 rb_link_node(&node->rb_node, parent, p);
621 rb_insert_color(&node->rb_node, tree); 624 rb_insert_color_cached(&node->rb_node, tree, leftmost);
622} 625}
623 626
624char *srcline__tree_find(struct rb_root *tree, u64 addr) 627char *srcline__tree_find(struct rb_root_cached *tree, u64 addr)
625{ 628{
626 struct rb_node *n = tree->rb_node; 629 struct rb_node *n = tree->rb_root.rb_node;
627 630
628 while (n) { 631 while (n) {
629 struct srcline_node *i = rb_entry(n, struct srcline_node, 632 struct srcline_node *i = rb_entry(n, struct srcline_node,
@@ -640,15 +643,15 @@ char *srcline__tree_find(struct rb_root *tree, u64 addr)
640 return NULL; 643 return NULL;
641} 644}
642 645
643void srcline__tree_delete(struct rb_root *tree) 646void srcline__tree_delete(struct rb_root_cached *tree)
644{ 647{
645 struct srcline_node *pos; 648 struct srcline_node *pos;
646 struct rb_node *next = rb_first(tree); 649 struct rb_node *next = rb_first_cached(tree);
647 650
648 while (next) { 651 while (next) {
649 pos = rb_entry(next, struct srcline_node, rb_node); 652 pos = rb_entry(next, struct srcline_node, rb_node);
650 next = rb_next(&pos->rb_node); 653 next = rb_next(&pos->rb_node);
651 rb_erase(&pos->rb_node, tree); 654 rb_erase_cached(&pos->rb_node, tree);
652 free_srcline(pos->srcline); 655 free_srcline(pos->srcline);
653 zfree(&pos); 656 zfree(&pos);
654 } 657 }
@@ -682,28 +685,32 @@ void inline_node__delete(struct inline_node *node)
682 free(node); 685 free(node);
683} 686}
684 687
685void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines) 688void inlines__tree_insert(struct rb_root_cached *tree,
689 struct inline_node *inlines)
686{ 690{
687 struct rb_node **p = &tree->rb_node; 691 struct rb_node **p = &tree->rb_root.rb_node;
688 struct rb_node *parent = NULL; 692 struct rb_node *parent = NULL;
689 const u64 addr = inlines->addr; 693 const u64 addr = inlines->addr;
690 struct inline_node *i; 694 struct inline_node *i;
695 bool leftmost = true;
691 696
692 while (*p != NULL) { 697 while (*p != NULL) {
693 parent = *p; 698 parent = *p;
694 i = rb_entry(parent, struct inline_node, rb_node); 699 i = rb_entry(parent, struct inline_node, rb_node);
695 if (addr < i->addr) 700 if (addr < i->addr)
696 p = &(*p)->rb_left; 701 p = &(*p)->rb_left;
697 else 702 else {
698 p = &(*p)->rb_right; 703 p = &(*p)->rb_right;
704 leftmost = false;
705 }
699 } 706 }
700 rb_link_node(&inlines->rb_node, parent, p); 707 rb_link_node(&inlines->rb_node, parent, p);
701 rb_insert_color(&inlines->rb_node, tree); 708 rb_insert_color_cached(&inlines->rb_node, tree, leftmost);
702} 709}
703 710
704struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr) 711struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr)
705{ 712{
706 struct rb_node *n = tree->rb_node; 713 struct rb_node *n = tree->rb_root.rb_node;
707 714
708 while (n) { 715 while (n) {
709 struct inline_node *i = rb_entry(n, struct inline_node, 716 struct inline_node *i = rb_entry(n, struct inline_node,
@@ -720,15 +727,15 @@ struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr)
720 return NULL; 727 return NULL;
721} 728}
722 729
723void inlines__tree_delete(struct rb_root *tree) 730void inlines__tree_delete(struct rb_root_cached *tree)
724{ 731{
725 struct inline_node *pos; 732 struct inline_node *pos;
726 struct rb_node *next = rb_first(tree); 733 struct rb_node *next = rb_first_cached(tree);
727 734
728 while (next) { 735 while (next) {
729 pos = rb_entry(next, struct inline_node, rb_node); 736 pos = rb_entry(next, struct inline_node, rb_node);
730 next = rb_next(&pos->rb_node); 737 next = rb_next(&pos->rb_node);
731 rb_erase(&pos->rb_node, tree); 738 rb_erase_cached(&pos->rb_node, tree);
732 inline_node__delete(pos); 739 inline_node__delete(pos);
733 } 740 }
734} 741}
diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h
index 5762212dc342..b11a0aaaa676 100644
--- a/tools/perf/util/srcline.h
+++ b/tools/perf/util/srcline.h
@@ -19,11 +19,11 @@ void free_srcline(char *srcline);
19char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line); 19char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line);
20 20
21/* insert the srcline into the DSO, which will take ownership */ 21/* insert the srcline into the DSO, which will take ownership */
22void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline); 22void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline);
23/* find previously inserted srcline */ 23/* find previously inserted srcline */
24char *srcline__tree_find(struct rb_root *tree, u64 addr); 24char *srcline__tree_find(struct rb_root_cached *tree, u64 addr);
25/* delete all srclines within the tree */ 25/* delete all srclines within the tree */
26void srcline__tree_delete(struct rb_root *tree); 26void srcline__tree_delete(struct rb_root_cached *tree);
27 27
28#define SRCLINE_UNKNOWN ((char *) "??:0") 28#define SRCLINE_UNKNOWN ((char *) "??:0")
29 29
@@ -46,10 +46,11 @@ struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
46void inline_node__delete(struct inline_node *node); 46void inline_node__delete(struct inline_node *node);
47 47
48/* insert the inline node list into the DSO, which will take ownership */ 48/* insert the inline node list into the DSO, which will take ownership */
49void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines); 49void inlines__tree_insert(struct rb_root_cached *tree,
50 struct inline_node *inlines);
50/* find previously inserted inline node list */ 51/* find previously inserted inline node list */
51struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr); 52struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr);
52/* delete all nodes within the tree of inline_node s */ 53/* delete all nodes within the tree of inline_node s */
53void inlines__tree_delete(struct rb_root *tree); 54void inlines__tree_delete(struct rb_root_cached *tree);
54 55
55#endif /* PERF_SRCLINE_H */ 56#endif /* PERF_SRCLINE_H */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 665ee374fc01..6d043c78f3c2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -2,6 +2,7 @@
2#include <inttypes.h> 2#include <inttypes.h>
3#include <linux/time64.h> 3#include <linux/time64.h>
4#include <math.h> 4#include <math.h>
5#include "color.h"
5#include "evlist.h" 6#include "evlist.h"
6#include "evsel.h" 7#include "evsel.h"
7#include "stat.h" 8#include "stat.h"
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 3c22c58b3e90..83d8094be4fe 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -168,7 +168,7 @@ static void reset_stat(struct runtime_stat *st)
168 struct rb_node *pos, *next; 168 struct rb_node *pos, *next;
169 169
170 rblist = &st->value_list; 170 rblist = &st->value_list;
171 next = rb_first(&rblist->entries); 171 next = rb_first_cached(&rblist->entries);
172 while (next) { 172 while (next) {
173 pos = next; 173 pos = next;
174 next = rb_next(pos); 174 next = rb_next(pos);
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
index d58f1e08b170..7e82c71dcc42 100644
--- a/tools/perf/util/strlist.h
+++ b/tools/perf/util/strlist.h
@@ -57,7 +57,7 @@ static inline unsigned int strlist__nr_entries(const struct strlist *slist)
57/* For strlist iteration */ 57/* For strlist iteration */
58static inline struct str_node *strlist__first(struct strlist *slist) 58static inline struct str_node *strlist__first(struct strlist *slist)
59{ 59{
60 struct rb_node *rn = rb_first(&slist->rblist.entries); 60 struct rb_node *rn = rb_first_cached(&slist->rblist.entries);
61 return rn ? rb_entry(rn, struct str_node, rb_node) : NULL; 61 return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
62} 62}
63static inline struct str_node *strlist__next(struct str_node *sn) 63static inline struct str_node *strlist__next(struct str_node *sn)
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index dca7dfae69ad..4ad106a5f2c0 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -6,6 +6,8 @@
6#include <unistd.h> 6#include <unistd.h>
7#include <inttypes.h> 7#include <inttypes.h>
8 8
9#include "map.h"
10#include "map_groups.h"
9#include "symbol.h" 11#include "symbol.h"
10#include "demangle-java.h" 12#include "demangle-java.h"
11#include "demangle-rust.h" 13#include "demangle-rust.h"
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index 7119df77dc0b..17edbd4f6f85 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c
@@ -3,6 +3,7 @@
3#include "util.h" 3#include "util.h"
4 4
5#include <errno.h> 5#include <errno.h>
6#include <unistd.h>
6#include <stdio.h> 7#include <stdio.h>
7#include <fcntl.h> 8#include <fcntl.h>
8#include <string.h> 9#include <string.h>
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 48efad6d0f90..758bf5f74e6e 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -17,6 +17,7 @@
17#include "util.h" 17#include "util.h"
18#include "debug.h" 18#include "debug.h"
19#include "machine.h" 19#include "machine.h"
20#include "map.h"
20#include "symbol.h" 21#include "symbol.h"
21#include "strlist.h" 22#include "strlist.h"
22#include "intlist.h" 23#include "intlist.h"
@@ -163,7 +164,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
163 return arch__choose_best_symbol(syma, symb); 164 return arch__choose_best_symbol(syma, symb);
164} 165}
165 166
166void symbols__fixup_duplicate(struct rb_root *symbols) 167void symbols__fixup_duplicate(struct rb_root_cached *symbols)
167{ 168{
168 struct rb_node *nd; 169 struct rb_node *nd;
169 struct symbol *curr, *next; 170 struct symbol *curr, *next;
@@ -171,7 +172,7 @@ void symbols__fixup_duplicate(struct rb_root *symbols)
171 if (symbol_conf.allow_aliases) 172 if (symbol_conf.allow_aliases)
172 return; 173 return;
173 174
174 nd = rb_first(symbols); 175 nd = rb_first_cached(symbols);
175 176
176 while (nd) { 177 while (nd) {
177 curr = rb_entry(nd, struct symbol, rb_node); 178 curr = rb_entry(nd, struct symbol, rb_node);
@@ -186,20 +187,20 @@ again:
186 continue; 187 continue;
187 188
188 if (choose_best_symbol(curr, next) == SYMBOL_A) { 189 if (choose_best_symbol(curr, next) == SYMBOL_A) {
189 rb_erase(&next->rb_node, symbols); 190 rb_erase_cached(&next->rb_node, symbols);
190 symbol__delete(next); 191 symbol__delete(next);
191 goto again; 192 goto again;
192 } else { 193 } else {
193 nd = rb_next(&curr->rb_node); 194 nd = rb_next(&curr->rb_node);
194 rb_erase(&curr->rb_node, symbols); 195 rb_erase_cached(&curr->rb_node, symbols);
195 symbol__delete(curr); 196 symbol__delete(curr);
196 } 197 }
197 } 198 }
198} 199}
199 200
200void symbols__fixup_end(struct rb_root *symbols) 201void symbols__fixup_end(struct rb_root_cached *symbols)
201{ 202{
202 struct rb_node *nd, *prevnd = rb_first(symbols); 203 struct rb_node *nd, *prevnd = rb_first_cached(symbols);
203 struct symbol *curr, *prev; 204 struct symbol *curr, *prev;
204 205
205 if (prevnd == NULL) 206 if (prevnd == NULL)
@@ -282,25 +283,27 @@ void symbol__delete(struct symbol *sym)
282 free(((void *)sym) - symbol_conf.priv_size); 283 free(((void *)sym) - symbol_conf.priv_size);
283} 284}
284 285
285void symbols__delete(struct rb_root *symbols) 286void symbols__delete(struct rb_root_cached *symbols)
286{ 287{
287 struct symbol *pos; 288 struct symbol *pos;
288 struct rb_node *next = rb_first(symbols); 289 struct rb_node *next = rb_first_cached(symbols);
289 290
290 while (next) { 291 while (next) {
291 pos = rb_entry(next, struct symbol, rb_node); 292 pos = rb_entry(next, struct symbol, rb_node);
292 next = rb_next(&pos->rb_node); 293 next = rb_next(&pos->rb_node);
293 rb_erase(&pos->rb_node, symbols); 294 rb_erase_cached(&pos->rb_node, symbols);
294 symbol__delete(pos); 295 symbol__delete(pos);
295 } 296 }
296} 297}
297 298
298void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel) 299void __symbols__insert(struct rb_root_cached *symbols,
300 struct symbol *sym, bool kernel)
299{ 301{
300 struct rb_node **p = &symbols->rb_node; 302 struct rb_node **p = &symbols->rb_root.rb_node;
301 struct rb_node *parent = NULL; 303 struct rb_node *parent = NULL;
302 const u64 ip = sym->start; 304 const u64 ip = sym->start;
303 struct symbol *s; 305 struct symbol *s;
306 bool leftmost = true;
304 307
305 if (kernel) { 308 if (kernel) {
306 const char *name = sym->name; 309 const char *name = sym->name;
@@ -318,26 +321,28 @@ void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
318 s = rb_entry(parent, struct symbol, rb_node); 321 s = rb_entry(parent, struct symbol, rb_node);
319 if (ip < s->start) 322 if (ip < s->start)
320 p = &(*p)->rb_left; 323 p = &(*p)->rb_left;
321 else 324 else {
322 p = &(*p)->rb_right; 325 p = &(*p)->rb_right;
326 leftmost = false;
327 }
323 } 328 }
324 rb_link_node(&sym->rb_node, parent, p); 329 rb_link_node(&sym->rb_node, parent, p);
325 rb_insert_color(&sym->rb_node, symbols); 330 rb_insert_color_cached(&sym->rb_node, symbols, leftmost);
326} 331}
327 332
328void symbols__insert(struct rb_root *symbols, struct symbol *sym) 333void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym)
329{ 334{
330 __symbols__insert(symbols, sym, false); 335 __symbols__insert(symbols, sym, false);
331} 336}
332 337
333static struct symbol *symbols__find(struct rb_root *symbols, u64 ip) 338static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip)
334{ 339{
335 struct rb_node *n; 340 struct rb_node *n;
336 341
337 if (symbols == NULL) 342 if (symbols == NULL)
338 return NULL; 343 return NULL;
339 344
340 n = symbols->rb_node; 345 n = symbols->rb_root.rb_node;
341 346
342 while (n) { 347 while (n) {
343 struct symbol *s = rb_entry(n, struct symbol, rb_node); 348 struct symbol *s = rb_entry(n, struct symbol, rb_node);
@@ -353,9 +358,9 @@ static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
353 return NULL; 358 return NULL;
354} 359}
355 360
356static struct symbol *symbols__first(struct rb_root *symbols) 361static struct symbol *symbols__first(struct rb_root_cached *symbols)
357{ 362{
358 struct rb_node *n = rb_first(symbols); 363 struct rb_node *n = rb_first_cached(symbols);
359 364
360 if (n) 365 if (n)
361 return rb_entry(n, struct symbol, rb_node); 366 return rb_entry(n, struct symbol, rb_node);
@@ -363,9 +368,9 @@ static struct symbol *symbols__first(struct rb_root *symbols)
363 return NULL; 368 return NULL;
364} 369}
365 370
366static struct symbol *symbols__last(struct rb_root *symbols) 371static struct symbol *symbols__last(struct rb_root_cached *symbols)
367{ 372{
368 struct rb_node *n = rb_last(symbols); 373 struct rb_node *n = rb_last(&symbols->rb_root);
369 374
370 if (n) 375 if (n)
371 return rb_entry(n, struct symbol, rb_node); 376 return rb_entry(n, struct symbol, rb_node);
@@ -383,11 +388,12 @@ static struct symbol *symbols__next(struct symbol *sym)
383 return NULL; 388 return NULL;
384} 389}
385 390
386static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym) 391static void symbols__insert_by_name(struct rb_root_cached *symbols, struct symbol *sym)
387{ 392{
388 struct rb_node **p = &symbols->rb_node; 393 struct rb_node **p = &symbols->rb_root.rb_node;
389 struct rb_node *parent = NULL; 394 struct rb_node *parent = NULL;
390 struct symbol_name_rb_node *symn, *s; 395 struct symbol_name_rb_node *symn, *s;
396 bool leftmost = true;
391 397
392 symn = container_of(sym, struct symbol_name_rb_node, sym); 398 symn = container_of(sym, struct symbol_name_rb_node, sym);
393 399
@@ -396,19 +402,21 @@ static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym)
396 s = rb_entry(parent, struct symbol_name_rb_node, rb_node); 402 s = rb_entry(parent, struct symbol_name_rb_node, rb_node);
397 if (strcmp(sym->name, s->sym.name) < 0) 403 if (strcmp(sym->name, s->sym.name) < 0)
398 p = &(*p)->rb_left; 404 p = &(*p)->rb_left;
399 else 405 else {
400 p = &(*p)->rb_right; 406 p = &(*p)->rb_right;
407 leftmost = false;
408 }
401 } 409 }
402 rb_link_node(&symn->rb_node, parent, p); 410 rb_link_node(&symn->rb_node, parent, p);
403 rb_insert_color(&symn->rb_node, symbols); 411 rb_insert_color_cached(&symn->rb_node, symbols, leftmost);
404} 412}
405 413
406static void symbols__sort_by_name(struct rb_root *symbols, 414static void symbols__sort_by_name(struct rb_root_cached *symbols,
407 struct rb_root *source) 415 struct rb_root_cached *source)
408{ 416{
409 struct rb_node *nd; 417 struct rb_node *nd;
410 418
411 for (nd = rb_first(source); nd; nd = rb_next(nd)) { 419 for (nd = rb_first_cached(source); nd; nd = rb_next(nd)) {
412 struct symbol *pos = rb_entry(nd, struct symbol, rb_node); 420 struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
413 symbols__insert_by_name(symbols, pos); 421 symbols__insert_by_name(symbols, pos);
414 } 422 }
@@ -431,7 +439,7 @@ int symbol__match_symbol_name(const char *name, const char *str,
431 return arch__compare_symbol_names(name, str); 439 return arch__compare_symbol_names(name, str);
432} 440}
433 441
434static struct symbol *symbols__find_by_name(struct rb_root *symbols, 442static struct symbol *symbols__find_by_name(struct rb_root_cached *symbols,
435 const char *name, 443 const char *name,
436 enum symbol_tag_include includes) 444 enum symbol_tag_include includes)
437{ 445{
@@ -441,7 +449,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
441 if (symbols == NULL) 449 if (symbols == NULL)
442 return NULL; 450 return NULL;
443 451
444 n = symbols->rb_node; 452 n = symbols->rb_root.rb_node;
445 453
446 while (n) { 454 while (n) {
447 int cmp; 455 int cmp;
@@ -644,7 +652,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name,
644{ 652{
645 struct symbol *sym; 653 struct symbol *sym;
646 struct dso *dso = arg; 654 struct dso *dso = arg;
647 struct rb_root *root = &dso->symbols; 655 struct rb_root_cached *root = &dso->symbols;
648 656
649 if (!symbol_type__filter(type)) 657 if (!symbol_type__filter(type))
650 return 0; 658 return 0;
@@ -681,14 +689,14 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct
681 struct map *curr_map; 689 struct map *curr_map;
682 struct symbol *pos; 690 struct symbol *pos;
683 int count = 0; 691 int count = 0;
684 struct rb_root old_root = dso->symbols; 692 struct rb_root_cached old_root = dso->symbols;
685 struct rb_root *root = &dso->symbols; 693 struct rb_root_cached *root = &dso->symbols;
686 struct rb_node *next = rb_first(root); 694 struct rb_node *next = rb_first_cached(root);
687 695
688 if (!kmaps) 696 if (!kmaps)
689 return -1; 697 return -1;
690 698
691 *root = RB_ROOT; 699 *root = RB_ROOT_CACHED;
692 700
693 while (next) { 701 while (next) {
694 char *module; 702 char *module;
@@ -696,8 +704,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct
696 pos = rb_entry(next, struct symbol, rb_node); 704 pos = rb_entry(next, struct symbol, rb_node);
697 next = rb_next(&pos->rb_node); 705 next = rb_next(&pos->rb_node);
698 706
699 rb_erase_init(&pos->rb_node, &old_root); 707 rb_erase_cached(&pos->rb_node, &old_root);
700 708 RB_CLEAR_NODE(&pos->rb_node);
701 module = strchr(pos->name, '\t'); 709 module = strchr(pos->name, '\t');
702 if (module) 710 if (module)
703 *module = '\0'; 711 *module = '\0';
@@ -710,6 +718,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct
710 } 718 }
711 719
712 pos->start -= curr_map->start - curr_map->pgoff; 720 pos->start -= curr_map->start - curr_map->pgoff;
721 if (pos->end > curr_map->end)
722 pos->end = curr_map->end;
713 if (pos->end) 723 if (pos->end)
714 pos->end -= curr_map->start - curr_map->pgoff; 724 pos->end -= curr_map->start - curr_map->pgoff;
715 symbols__insert(&curr_map->dso->symbols, pos); 725 symbols__insert(&curr_map->dso->symbols, pos);
@@ -734,8 +744,8 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso,
734 struct map *curr_map = initial_map; 744 struct map *curr_map = initial_map;
735 struct symbol *pos; 745 struct symbol *pos;
736 int count = 0, moved = 0; 746 int count = 0, moved = 0;
737 struct rb_root *root = &dso->symbols; 747 struct rb_root_cached *root = &dso->symbols;
738 struct rb_node *next = rb_first(root); 748 struct rb_node *next = rb_first_cached(root);
739 int kernel_range = 0; 749 int kernel_range = 0;
740 bool x86_64; 750 bool x86_64;
741 751
@@ -849,7 +859,7 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso,
849 } 859 }
850add_symbol: 860add_symbol:
851 if (curr_map != initial_map) { 861 if (curr_map != initial_map) {
852 rb_erase(&pos->rb_node, root); 862 rb_erase_cached(&pos->rb_node, root);
853 symbols__insert(&curr_map->dso->symbols, pos); 863 symbols__insert(&curr_map->dso->symbols, pos);
854 ++moved; 864 ++moved;
855 } else 865 } else
@@ -857,7 +867,7 @@ add_symbol:
857 867
858 continue; 868 continue;
859discard_symbol: 869discard_symbol:
860 rb_erase(&pos->rb_node, root); 870 rb_erase_cached(&pos->rb_node, root);
861 symbol__delete(pos); 871 symbol__delete(pos);
862 } 872 }
863 873
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 14d9d438e7e2..9a8fe012910a 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -5,16 +5,13 @@
5#include <linux/types.h> 5#include <linux/types.h>
6#include <stdbool.h> 6#include <stdbool.h>
7#include <stdint.h> 7#include <stdint.h>
8#include "map.h"
9#include "../perf.h"
10#include <linux/list.h> 8#include <linux/list.h>
11#include <linux/rbtree.h> 9#include <linux/rbtree.h>
12#include <stdio.h> 10#include <stdio.h>
13#include <byteswap.h> 11#include "map_symbol.h"
14#include <libgen.h> 12#include "branch.h"
15#include "build-id.h"
16#include "event.h"
17#include "path.h" 13#include "path.h"
14#include "symbol_conf.h"
18 15
19#ifdef HAVE_LIBELF_SUPPORT 16#ifdef HAVE_LIBELF_SUPPORT
20#include <libelf.h> 17#include <libelf.h>
@@ -24,6 +21,10 @@
24 21
25#include "dso.h" 22#include "dso.h"
26 23
24struct map;
25struct map_groups;
26struct option;
27
27/* 28/*
28 * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; 29 * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
29 * for newer versions we can use mmap to reduce memory usage: 30 * for newer versions we can use mmap to reduce memory usage:
@@ -68,7 +69,7 @@ struct symbol {
68}; 69};
69 70
70void symbol__delete(struct symbol *sym); 71void symbol__delete(struct symbol *sym);
71void symbols__delete(struct rb_root *symbols); 72void symbols__delete(struct rb_root_cached *symbols);
72 73
73/* symbols__for_each_entry - iterate over symbols (rb_root) 74/* symbols__for_each_entry - iterate over symbols (rb_root)
74 * 75 *
@@ -77,7 +78,7 @@ void symbols__delete(struct rb_root *symbols);
77 * @nd: the 'struct rb_node *' to use as a temporary storage 78 * @nd: the 'struct rb_node *' to use as a temporary storage
78 */ 79 */
79#define symbols__for_each_entry(symbols, pos, nd) \ 80#define symbols__for_each_entry(symbols, pos, nd) \
80 for (nd = rb_first(symbols); \ 81 for (nd = rb_first_cached(symbols); \
81 nd && (pos = rb_entry(nd, struct symbol, rb_node)); \ 82 nd && (pos = rb_entry(nd, struct symbol, rb_node)); \
82 nd = rb_next(nd)) 83 nd = rb_next(nd))
83 84
@@ -89,69 +90,6 @@ static inline size_t symbol__size(const struct symbol *sym)
89struct strlist; 90struct strlist;
90struct intlist; 91struct intlist;
91 92
92struct symbol_conf {
93 unsigned short priv_size;
94 bool try_vmlinux_path,
95 init_annotation,
96 force,
97 ignore_vmlinux,
98 ignore_vmlinux_buildid,
99 show_kernel_path,
100 use_modules,
101 allow_aliases,
102 sort_by_name,
103 show_nr_samples,
104 show_total_period,
105 use_callchain,
106 cumulate_callchain,
107 show_branchflag_count,
108 exclude_other,
109 show_cpu_utilization,
110 initialized,
111 kptr_restrict,
112 event_group,
113 demangle,
114 demangle_kernel,
115 filter_relative,
116 show_hist_headers,
117 branch_callstack,
118 has_filter,
119 show_ref_callgraph,
120 hide_unresolved,
121 raw_trace,
122 report_hierarchy,
123 inline_name;
124 const char *vmlinux_name,
125 *kallsyms_name,
126 *source_prefix,
127 *field_sep,
128 *graph_function;
129 const char *default_guest_vmlinux_name,
130 *default_guest_kallsyms,
131 *default_guest_modules;
132 const char *guestmount;
133 const char *dso_list_str,
134 *comm_list_str,
135 *pid_list_str,
136 *tid_list_str,
137 *sym_list_str,
138 *col_width_list_str,
139 *bt_stop_list_str;
140 struct strlist *dso_list,
141 *comm_list,
142 *sym_list,
143 *dso_from_list,
144 *dso_to_list,
145 *sym_from_list,
146 *sym_to_list,
147 *bt_stop_list;
148 struct intlist *pid_list,
149 *tid_list;
150 const char *symfs;
151};
152
153extern struct symbol_conf symbol_conf;
154
155struct symbol_name_rb_node { 93struct symbol_name_rb_node {
156 struct rb_node rb_node; 94 struct rb_node rb_node;
157 struct symbol sym; 95 struct symbol sym;
@@ -178,19 +116,6 @@ struct ref_reloc_sym {
178 u64 unrelocated_addr; 116 u64 unrelocated_addr;
179}; 117};
180 118
181struct map_symbol {
182 struct map *map;
183 struct symbol *sym;
184};
185
186struct addr_map_symbol {
187 struct map *map;
188 struct symbol *sym;
189 u64 addr;
190 u64 al_addr;
191 u64 phys_addr;
192};
193
194struct branch_info { 119struct branch_info {
195 struct addr_map_symbol from; 120 struct addr_map_symbol from;
196 struct addr_map_symbol to; 121 struct addr_map_symbol to;
@@ -310,10 +235,11 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
310 235
311char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); 236char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
312 237
313void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel); 238void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym,
314void symbols__insert(struct rb_root *symbols, struct symbol *sym); 239 bool kernel);
315void symbols__fixup_duplicate(struct rb_root *symbols); 240void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym);
316void symbols__fixup_end(struct rb_root *symbols); 241void symbols__fixup_duplicate(struct rb_root_cached *symbols);
242void symbols__fixup_end(struct rb_root_cached *symbols);
317void map_groups__fixup_end(struct map_groups *mg); 243void map_groups__fixup_end(struct map_groups *mg);
318 244
319typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); 245typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
new file mode 100644
index 000000000000..fffea68c1203
--- /dev/null
+++ b/tools/perf/util/symbol_conf.h
@@ -0,0 +1,73 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef __PERF_SYMBOL_CONF
3#define __PERF_SYMBOL_CONF 1
4
5#include <stdbool.h>
6
7struct strlist;
8struct intlist;
9
10struct symbol_conf {
11 unsigned short priv_size;
12 bool try_vmlinux_path,
13 init_annotation,
14 force,
15 ignore_vmlinux,
16 ignore_vmlinux_buildid,
17 show_kernel_path,
18 use_modules,
19 allow_aliases,
20 sort_by_name,
21 show_nr_samples,
22 show_total_period,
23 use_callchain,
24 cumulate_callchain,
25 show_branchflag_count,
26 exclude_other,
27 show_cpu_utilization,
28 initialized,
29 kptr_restrict,
30 event_group,
31 demangle,
32 demangle_kernel,
33 filter_relative,
34 show_hist_headers,
35 branch_callstack,
36 has_filter,
37 show_ref_callgraph,
38 hide_unresolved,
39 raw_trace,
40 report_hierarchy,
41 inline_name;
42 const char *vmlinux_name,
43 *kallsyms_name,
44 *source_prefix,
45 *field_sep,
46 *graph_function;
47 const char *default_guest_vmlinux_name,
48 *default_guest_kallsyms,
49 *default_guest_modules;
50 const char *guestmount;
51 const char *dso_list_str,
52 *comm_list_str,
53 *pid_list_str,
54 *tid_list_str,
55 *sym_list_str,
56 *col_width_list_str,
57 *bt_stop_list_str;
58 struct strlist *dso_list,
59 *comm_list,
60 *sym_list,
61 *dso_from_list,
62 *dso_to_list,
63 *sym_from_list,
64 *sym_to_list,
65 *bt_stop_list;
66 struct intlist *pid_list,
67 *tid_list;
68 const char *symfs;
69};
70
71extern struct symbol_conf symbol_conf;
72
73#endif // __PERF_SYMBOL_CONF
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
index ed0205cc7942..02e89b02c2ce 100644
--- a/tools/perf/util/symbol_fprintf.c
+++ b/tools/perf/util/symbol_fprintf.c
@@ -3,6 +3,7 @@
3#include <inttypes.h> 3#include <inttypes.h>
4#include <stdio.h> 4#include <stdio.h>
5 5
6#include "map.h"
6#include "symbol.h" 7#include "symbol.h"
7 8
8size_t symbol__fprintf(struct symbol *sym, FILE *fp) 9size_t symbol__fprintf(struct symbol *sym, FILE *fp)
@@ -64,7 +65,7 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso,
64 struct rb_node *nd; 65 struct rb_node *nd;
65 struct symbol_name_rb_node *pos; 66 struct symbol_name_rb_node *pos;
66 67
67 for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) { 68 for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) {
68 pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); 69 pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
69 fprintf(fp, "%s\n", pos->sym.name); 70 fprintf(fp, "%s\n", pos->sym.name);
70 } 71 }
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index d52f27f373ce..a8b45168513c 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -20,6 +20,7 @@
20#include "thread.h" 20#include "thread.h"
21#include "event.h" 21#include "event.h"
22#include "machine.h" 22#include "machine.h"
23#include "env.h"
23#include "util.h" 24#include "util.h"
24#include "debug.h" 25#include "debug.h"
25#include "symbol.h" 26#include "symbol.h"
@@ -29,6 +30,19 @@
29 30
30#define STACK_GROWTH 2048 31#define STACK_GROWTH 2048
31 32
33/*
34 * State of retpoline detection.
35 *
36 * RETPOLINE_NONE: no retpoline detection
37 * X86_RETPOLINE_POSSIBLE: x86 retpoline possible
38 * X86_RETPOLINE_DETECTED: x86 retpoline detected
39 */
40enum retpoline_state_t {
41 RETPOLINE_NONE,
42 X86_RETPOLINE_POSSIBLE,
43 X86_RETPOLINE_DETECTED,
44};
45
32/** 46/**
33 * struct thread_stack_entry - thread stack entry. 47 * struct thread_stack_entry - thread stack entry.
34 * @ret_addr: return address 48 * @ret_addr: return address
@@ -38,6 +52,7 @@
38 * @cp: call path 52 * @cp: call path
39 * @no_call: a 'call' was not seen 53 * @no_call: a 'call' was not seen
40 * @trace_end: a 'call' but trace ended 54 * @trace_end: a 'call' but trace ended
55 * @non_call: a branch but not a 'call' to the start of a different symbol
41 */ 56 */
42struct thread_stack_entry { 57struct thread_stack_entry {
43 u64 ret_addr; 58 u64 ret_addr;
@@ -47,6 +62,7 @@ struct thread_stack_entry {
47 struct call_path *cp; 62 struct call_path *cp;
48 bool no_call; 63 bool no_call;
49 bool trace_end; 64 bool trace_end;
65 bool non_call;
50}; 66};
51 67
52/** 68/**
@@ -62,6 +78,7 @@ struct thread_stack_entry {
62 * @crp: call/return processor 78 * @crp: call/return processor
63 * @comm: current comm 79 * @comm: current comm
64 * @arr_sz: size of array if this is the first element of an array 80 * @arr_sz: size of array if this is the first element of an array
81 * @rstate: used to detect retpolines
65 */ 82 */
66struct thread_stack { 83struct thread_stack {
67 struct thread_stack_entry *stack; 84 struct thread_stack_entry *stack;
@@ -74,6 +91,7 @@ struct thread_stack {
74 struct call_return_processor *crp; 91 struct call_return_processor *crp;
75 struct comm *comm; 92 struct comm *comm;
76 unsigned int arr_sz; 93 unsigned int arr_sz;
94 enum retpoline_state_t rstate;
77}; 95};
78 96
79/* 97/*
@@ -113,10 +131,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
113 if (err) 131 if (err)
114 return err; 132 return err;
115 133
116 if (thread->mg && thread->mg->machine) 134 if (thread->mg && thread->mg->machine) {
117 ts->kernel_start = machine__kernel_start(thread->mg->machine); 135 struct machine *machine = thread->mg->machine;
118 else 136 const char *arch = perf_env__arch(machine->env);
137
138 ts->kernel_start = machine__kernel_start(machine);
139 if (!strcmp(arch, "x86"))
140 ts->rstate = X86_RETPOLINE_POSSIBLE;
141 } else {
119 ts->kernel_start = 1ULL << 63; 142 ts->kernel_start = 1ULL << 63;
143 }
120 ts->crp = crp; 144 ts->crp = crp;
121 145
122 return 0; 146 return 0;
@@ -268,6 +292,8 @@ static int thread_stack__call_return(struct thread *thread,
268 cr.flags |= CALL_RETURN_NO_CALL; 292 cr.flags |= CALL_RETURN_NO_CALL;
269 if (no_return) 293 if (no_return)
270 cr.flags |= CALL_RETURN_NO_RETURN; 294 cr.flags |= CALL_RETURN_NO_RETURN;
295 if (tse->non_call)
296 cr.flags |= CALL_RETURN_NON_CALL;
271 297
272 return crp->process(&cr, crp->data); 298 return crp->process(&cr, crp->data);
273} 299}
@@ -493,6 +519,9 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
493 struct thread_stack_entry *tse; 519 struct thread_stack_entry *tse;
494 int err; 520 int err;
495 521
522 if (!cp)
523 return -ENOMEM;
524
496 if (ts->cnt == ts->sz) { 525 if (ts->cnt == ts->sz) {
497 err = thread_stack__grow(ts); 526 err = thread_stack__grow(ts);
498 if (err) 527 if (err)
@@ -507,6 +536,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
507 tse->cp = cp; 536 tse->cp = cp;
508 tse->no_call = no_call; 537 tse->no_call = no_call;
509 tse->trace_end = trace_end; 538 tse->trace_end = trace_end;
539 tse->non_call = false;
510 540
511 return 0; 541 return 0;
512} 542}
@@ -528,14 +558,16 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
528 timestamp, ref, false); 558 timestamp, ref, false);
529 } 559 }
530 560
531 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) { 561 if (ts->stack[ts->cnt - 1].ret_addr == ret_addr &&
562 !ts->stack[ts->cnt - 1].non_call) {
532 return thread_stack__call_return(thread, ts, --ts->cnt, 563 return thread_stack__call_return(thread, ts, --ts->cnt,
533 timestamp, ref, false); 564 timestamp, ref, false);
534 } else { 565 } else {
535 size_t i = ts->cnt - 1; 566 size_t i = ts->cnt - 1;
536 567
537 while (i--) { 568 while (i--) {
538 if (ts->stack[i].ret_addr != ret_addr) 569 if (ts->stack[i].ret_addr != ret_addr ||
570 ts->stack[i].non_call)
539 continue; 571 continue;
540 i += 1; 572 i += 1;
541 while (ts->cnt > i) { 573 while (ts->cnt > i) {
@@ -576,8 +608,6 @@ static int thread_stack__bottom(struct thread_stack *ts,
576 608
577 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, 609 cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
578 ts->kernel_start); 610 ts->kernel_start);
579 if (!cp)
580 return -ENOMEM;
581 611
582 return thread_stack__push_cp(ts, ip, sample->time, ref, cp, 612 return thread_stack__push_cp(ts, ip, sample->time, ref, cp,
583 true, false); 613 true, false);
@@ -590,36 +620,36 @@ static int thread_stack__no_call_return(struct thread *thread,
590 struct addr_location *to_al, u64 ref) 620 struct addr_location *to_al, u64 ref)
591{ 621{
592 struct call_path_root *cpr = ts->crp->cpr; 622 struct call_path_root *cpr = ts->crp->cpr;
623 struct call_path *root = &cpr->call_path;
624 struct symbol *fsym = from_al->sym;
625 struct symbol *tsym = to_al->sym;
593 struct call_path *cp, *parent; 626 struct call_path *cp, *parent;
594 u64 ks = ts->kernel_start; 627 u64 ks = ts->kernel_start;
628 u64 addr = sample->addr;
629 u64 tm = sample->time;
630 u64 ip = sample->ip;
595 int err; 631 int err;
596 632
597 if (sample->ip >= ks && sample->addr < ks) { 633 if (ip >= ks && addr < ks) {
598 /* Return to userspace, so pop all kernel addresses */ 634 /* Return to userspace, so pop all kernel addresses */
599 while (thread_stack__in_kernel(ts)) { 635 while (thread_stack__in_kernel(ts)) {
600 err = thread_stack__call_return(thread, ts, --ts->cnt, 636 err = thread_stack__call_return(thread, ts, --ts->cnt,
601 sample->time, ref, 637 tm, ref, true);
602 true);
603 if (err) 638 if (err)
604 return err; 639 return err;
605 } 640 }
606 641
607 /* If the stack is empty, push the userspace address */ 642 /* If the stack is empty, push the userspace address */
608 if (!ts->cnt) { 643 if (!ts->cnt) {
609 cp = call_path__findnew(cpr, &cpr->call_path, 644 cp = call_path__findnew(cpr, root, tsym, addr, ks);
610 to_al->sym, sample->addr, 645 return thread_stack__push_cp(ts, 0, tm, ref, cp, true,
611 ts->kernel_start); 646 false);
612 if (!cp)
613 return -ENOMEM;
614 return thread_stack__push_cp(ts, 0, sample->time, ref,
615 cp, true, false);
616 } 647 }
617 } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { 648 } else if (thread_stack__in_kernel(ts) && ip < ks) {
618 /* Return to userspace, so pop all kernel addresses */ 649 /* Return to userspace, so pop all kernel addresses */
619 while (thread_stack__in_kernel(ts)) { 650 while (thread_stack__in_kernel(ts)) {
620 err = thread_stack__call_return(thread, ts, --ts->cnt, 651 err = thread_stack__call_return(thread, ts, --ts->cnt,
621 sample->time, ref, 652 tm, ref, true);
622 true);
623 if (err) 653 if (err)
624 return err; 654 return err;
625 } 655 }
@@ -628,21 +658,59 @@ static int thread_stack__no_call_return(struct thread *thread,
628 if (ts->cnt) 658 if (ts->cnt)
629 parent = ts->stack[ts->cnt - 1].cp; 659 parent = ts->stack[ts->cnt - 1].cp;
630 else 660 else
631 parent = &cpr->call_path; 661 parent = root;
632 662
633 /* This 'return' had no 'call', so push and pop top of stack */ 663 if (parent->sym == from_al->sym) {
634 cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip, 664 /*
635 ts->kernel_start); 665 * At the bottom of the stack, assume the missing 'call' was
636 if (!cp) 666 * before the trace started. So, pop the current symbol and push
637 return -ENOMEM; 667 * the 'to' symbol.
668 */
669 if (ts->cnt == 1) {
670 err = thread_stack__call_return(thread, ts, --ts->cnt,
671 tm, ref, false);
672 if (err)
673 return err;
674 }
675
676 if (!ts->cnt) {
677 cp = call_path__findnew(cpr, root, tsym, addr, ks);
638 678
639 err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, 679 return thread_stack__push_cp(ts, addr, tm, ref, cp,
640 true, false); 680 true, false);
681 }
682
683 /*
684 * Otherwise assume the 'return' is being used as a jump (e.g.
685 * retpoline) and just push the 'to' symbol.
686 */
687 cp = call_path__findnew(cpr, parent, tsym, addr, ks);
688
689 err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false);
690 if (!err)
691 ts->stack[ts->cnt - 1].non_call = true;
692
693 return err;
694 }
695
696 /*
697 * Assume 'parent' has not yet returned, so push 'to', and then push and
698 * pop 'from'.
699 */
700
701 cp = call_path__findnew(cpr, parent, tsym, addr, ks);
702
703 err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false);
641 if (err) 704 if (err)
642 return err; 705 return err;
643 706
644 return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, 707 cp = call_path__findnew(cpr, cp, fsym, ip, ks);
645 to_al->sym); 708
709 err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false);
710 if (err)
711 return err;
712
713 return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false);
646} 714}
647 715
648static int thread_stack__trace_begin(struct thread *thread, 716static int thread_stack__trace_begin(struct thread *thread,
@@ -680,8 +748,6 @@ static int thread_stack__trace_end(struct thread_stack *ts,
680 748
681 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, 749 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
682 ts->kernel_start); 750 ts->kernel_start);
683 if (!cp)
684 return -ENOMEM;
685 751
686 ret_addr = sample->ip + sample->insn_len; 752 ret_addr = sample->ip + sample->insn_len;
687 753
@@ -689,6 +755,70 @@ static int thread_stack__trace_end(struct thread_stack *ts,
689 false, true); 755 false, true);
690} 756}
691 757
758static bool is_x86_retpoline(const char *name)
759{
760 const char *p = strstr(name, "__x86_indirect_thunk_");
761
762 return p == name || !strcmp(name, "__indirect_thunk_start");
763}
764
765/*
766 * x86 retpoline functions pollute the call graph. This function removes them.
767 * This does not handle function return thunks, nor is there any improvement
768 * for the handling of inline thunks or extern thunks.
769 */
770static int thread_stack__x86_retpoline(struct thread_stack *ts,
771 struct perf_sample *sample,
772 struct addr_location *to_al)
773{
774 struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1];
775 struct call_path_root *cpr = ts->crp->cpr;
776 struct symbol *sym = tse->cp->sym;
777 struct symbol *tsym = to_al->sym;
778 struct call_path *cp;
779
780 if (sym && is_x86_retpoline(sym->name)) {
781 /*
782 * This is a x86 retpoline fn. It pollutes the call graph by
783 * showing up everywhere there is an indirect branch, but does
784 * not itself mean anything. Here the top-of-stack is removed,
785 * by decrementing the stack count, and then further down, the
786 * resulting top-of-stack is replaced with the actual target.
787 * The result is that the retpoline functions will no longer
788 * appear in the call graph. Note this only affects the call
789 * graph, since all the original branches are left unchanged.
790 */
791 ts->cnt -= 1;
792 sym = ts->stack[ts->cnt - 2].cp->sym;
793 if (sym && sym == tsym && to_al->addr != tsym->start) {
794 /*
795 * Target is back to the middle of the symbol we came
796 * from so assume it is an indirect jmp and forget it
797 * altogether.
798 */
799 ts->cnt -= 1;
800 return 0;
801 }
802 } else if (sym && sym == tsym) {
803 /*
804 * Target is back to the symbol we came from so assume it is an
805 * indirect jmp and forget it altogether.
806 */
807 ts->cnt -= 1;
808 return 0;
809 }
810
811 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym,
812 sample->addr, ts->kernel_start);
813 if (!cp)
814 return -ENOMEM;
815
816 /* Replace the top-of-stack with the actual target */
817 ts->stack[ts->cnt - 1].cp = cp;
818
819 return 0;
820}
821
692int thread_stack__process(struct thread *thread, struct comm *comm, 822int thread_stack__process(struct thread *thread, struct comm *comm,
693 struct perf_sample *sample, 823 struct perf_sample *sample,
694 struct addr_location *from_al, 824 struct addr_location *from_al,
@@ -696,6 +826,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
696 struct call_return_processor *crp) 826 struct call_return_processor *crp)
697{ 827{
698 struct thread_stack *ts = thread__stack(thread, sample->cpu); 828 struct thread_stack *ts = thread__stack(thread, sample->cpu);
829 enum retpoline_state_t rstate;
699 int err = 0; 830 int err = 0;
700 831
701 if (ts && !ts->crp) { 832 if (ts && !ts->crp) {
@@ -711,6 +842,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
711 ts->comm = comm; 842 ts->comm = comm;
712 } 843 }
713 844
845 rstate = ts->rstate;
846 if (rstate == X86_RETPOLINE_DETECTED)
847 ts->rstate = X86_RETPOLINE_POSSIBLE;
848
714 /* Flush stack on exec */ 849 /* Flush stack on exec */
715 if (ts->comm != comm && thread->pid_ == thread->tid) { 850 if (ts->comm != comm && thread->pid_ == thread->tid) {
716 err = __thread_stack__flush(thread, ts); 851 err = __thread_stack__flush(thread, ts);
@@ -745,14 +880,27 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
745 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, 880 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
746 to_al->sym, sample->addr, 881 to_al->sym, sample->addr,
747 ts->kernel_start); 882 ts->kernel_start);
748 if (!cp)
749 return -ENOMEM;
750 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, 883 err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
751 cp, false, trace_end); 884 cp, false, trace_end);
885
886 /*
887 * A call to the same symbol but not the start of the symbol,
888 * may be the start of a x86 retpoline.
889 */
890 if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym &&
891 from_al->sym == to_al->sym &&
892 to_al->addr != to_al->sym->start)
893 ts->rstate = X86_RETPOLINE_DETECTED;
894
752 } else if (sample->flags & PERF_IP_FLAG_RETURN) { 895 } else if (sample->flags & PERF_IP_FLAG_RETURN) {
753 if (!sample->ip || !sample->addr) 896 if (!sample->ip || !sample->addr)
754 return 0; 897 return 0;
755 898
899 /* x86 retpoline 'return' doesn't match the stack */
900 if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 &&
901 ts->stack[ts->cnt - 1].ret_addr != sample->addr)
902 return thread_stack__x86_retpoline(ts, sample, to_al);
903
756 err = thread_stack__pop_cp(thread, ts, sample->addr, 904 err = thread_stack__pop_cp(thread, ts, sample->addr,
757 sample->time, ref, from_al->sym); 905 sample->time, ref, from_al->sym);
758 if (err) { 906 if (err) {
@@ -765,6 +913,25 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
765 err = thread_stack__trace_begin(thread, ts, sample->time, ref); 913 err = thread_stack__trace_begin(thread, ts, sample->time, ref);
766 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { 914 } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
767 err = thread_stack__trace_end(ts, sample, ref); 915 err = thread_stack__trace_end(ts, sample, ref);
916 } else if (sample->flags & PERF_IP_FLAG_BRANCH &&
917 from_al->sym != to_al->sym && to_al->sym &&
918 to_al->addr == to_al->sym->start) {
919 struct call_path_root *cpr = ts->crp->cpr;
920 struct call_path *cp;
921
922 /*
923 * The compiler might optimize a call/ret combination by making
924 * it a jmp. Make that visible by recording on the stack a
925 * branch to the start of a different symbol. Note, that means
926 * when a ret pops the stack, all jmps must be popped off first.
927 */
928 cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
929 to_al->sym, sample->addr,
930 ts->kernel_start);
931 err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false,
932 false);
933 if (!err)
934 ts->stack[ts->cnt - 1].non_call = true;
768 } 935 }
769 936
770 return err; 937 return err;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index 1f626f4a1c40..b7c04e19ad41 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -35,10 +35,13 @@ struct call_path;
35 * 35 *
36 * CALL_RETURN_NO_CALL: 'return' but no matching 'call' 36 * CALL_RETURN_NO_CALL: 'return' but no matching 'call'
37 * CALL_RETURN_NO_RETURN: 'call' but no matching 'return' 37 * CALL_RETURN_NO_RETURN: 'call' but no matching 'return'
38 * CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different
39 * symbol
38 */ 40 */
39enum { 41enum {
40 CALL_RETURN_NO_CALL = 1 << 0, 42 CALL_RETURN_NO_CALL = 1 << 0,
41 CALL_RETURN_NO_RETURN = 1 << 1, 43 CALL_RETURN_NO_RETURN = 1 << 1,
44 CALL_RETURN_NON_CALL = 1 << 2,
42}; 45};
43 46
44/** 47/**
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index c83372329f89..4c179fef442d 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -12,6 +12,7 @@
12#include "debug.h" 12#include "debug.h"
13#include "namespaces.h" 13#include "namespaces.h"
14#include "comm.h" 14#include "comm.h"
15#include "symbol.h"
15#include "unwind.h" 16#include "unwind.h"
16 17
17#include <api/fs/fs.h> 18#include <api/fs/fs.h>
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 712dd48cc0ca..8276ffeec556 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -5,14 +5,18 @@
5#include <linux/refcount.h> 5#include <linux/refcount.h>
6#include <linux/rbtree.h> 6#include <linux/rbtree.h>
7#include <linux/list.h> 7#include <linux/list.h>
8#include <stdio.h>
8#include <unistd.h> 9#include <unistd.h>
9#include <sys/types.h> 10#include <sys/types.h>
10#include "symbol.h" 11#include "srccode.h"
11#include "map.h" 12#include "symbol_conf.h"
12#include <strlist.h> 13#include <strlist.h>
13#include <intlist.h> 14#include <intlist.h>
14#include "rwsem.h" 15#include "rwsem.h"
15 16
17struct addr_location;
18struct map;
19struct namespaces_event;
16struct thread_stack; 20struct thread_stack;
17struct unwind_libunwind_ops; 21struct unwind_libunwind_ops;
18 22
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 56e4ca54020a..250391672f9f 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -53,7 +53,10 @@ struct perf_tool {
53 itrace_start, 53 itrace_start,
54 context_switch, 54 context_switch,
55 throttle, 55 throttle,
56 unthrottle; 56 unthrottle,
57 ksymbol,
58 bpf_event;
59
57 event_attr_op attr; 60 event_attr_op attr;
58 event_attr_op event_update; 61 event_attr_op event_update;
59 event_op2 tracing_data; 62 event_op2 tracing_data;
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
index 5eff9bfc5758..407d0167b942 100644
--- a/tools/perf/util/unwind-libdw.c
+++ b/tools/perf/util/unwind-libdw.c
@@ -8,6 +8,8 @@
8#include "unwind.h" 8#include "unwind.h"
9#include "unwind-libdw.h" 9#include "unwind-libdw.h"
10#include "machine.h" 10#include "machine.h"
11#include "map.h"
12#include "symbol.h"
11#include "thread.h" 13#include "thread.h"
12#include <linux/types.h> 14#include <linux/types.h>
13#include "event.h" 15#include "event.h"
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index 79f521a552cf..f3c666a84e4d 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -34,6 +34,7 @@
34#include "session.h" 34#include "session.h"
35#include "perf_regs.h" 35#include "perf_regs.h"
36#include "unwind.h" 36#include "unwind.h"
37#include "map.h"
37#include "symbol.h" 38#include "symbol.h"
38#include "util.h" 39#include "util.h"
39#include "debug.h" 40#include "debug.h"
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index b029a5e9ae49..9778b3133b77 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -1,5 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include "unwind.h" 2#include "unwind.h"
3#include "map.h"
3#include "thread.h" 4#include "thread.h"
4#include "session.h" 5#include "session.h"
5#include "debug.h" 6#include "debug.h"
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 093352e93d50..d388f80d8703 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -2,6 +2,7 @@
2#include "../perf.h" 2#include "../perf.h"
3#include "util.h" 3#include "util.h"
4#include "debug.h" 4#include "debug.h"
5#include "namespaces.h"
5#include <api/fs/fs.h> 6#include <api/fs/fs.h>
6#include <sys/mman.h> 7#include <sys/mman.h>
7#include <sys/stat.h> 8#include <sys/stat.h>
@@ -20,6 +21,7 @@
20#include <linux/time64.h> 21#include <linux/time64.h>
21#include <unistd.h> 22#include <unistd.h>
22#include "strlist.h" 23#include "strlist.h"
24#include "string2.h"
23 25
24/* 26/*
25 * XXX We need to find a better place for these things... 27 * XXX We need to find a better place for these things...
@@ -116,23 +118,67 @@ int mkdir_p(char *path, mode_t mode)
116 return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; 118 return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
117} 119}
118 120
119int rm_rf(const char *path) 121static bool match_pat(char *file, const char **pat)
122{
123 int i = 0;
124
125 if (!pat)
126 return true;
127
128 while (pat[i]) {
129 if (strglobmatch(file, pat[i]))
130 return true;
131
132 i++;
133 }
134
135 return false;
136}
137
138/*
139 * The depth specify how deep the removal will go.
140 * 0 - will remove only files under the 'path' directory
141 * 1 .. x - will dive in x-level deep under the 'path' directory
142 *
143 * If specified the pat is array of string patterns ended with NULL,
144 * which are checked upon every file/directory found. Only matching
145 * ones are removed.
146 *
147 * The function returns:
148 * 0 on success
149 * -1 on removal failure with errno set
150 * -2 on pattern failure
151 */
152static int rm_rf_depth_pat(const char *path, int depth, const char **pat)
120{ 153{
121 DIR *dir; 154 DIR *dir;
122 int ret = 0; 155 int ret;
123 struct dirent *d; 156 struct dirent *d;
124 char namebuf[PATH_MAX]; 157 char namebuf[PATH_MAX];
158 struct stat statbuf;
125 159
160 /* Do not fail if there's no file. */
161 ret = lstat(path, &statbuf);
162 if (ret)
163 return 0;
164
165 /* Try to remove any file we get. */
166 if (!(statbuf.st_mode & S_IFDIR))
167 return unlink(path);
168
169 /* We have directory in path. */
126 dir = opendir(path); 170 dir = opendir(path);
127 if (dir == NULL) 171 if (dir == NULL)
128 return 0; 172 return -1;
129 173
130 while ((d = readdir(dir)) != NULL && !ret) { 174 while ((d = readdir(dir)) != NULL && !ret) {
131 struct stat statbuf;
132 175
133 if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) 176 if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
134 continue; 177 continue;
135 178
179 if (!match_pat(d->d_name, pat))
180 return -2;
181
136 scnprintf(namebuf, sizeof(namebuf), "%s/%s", 182 scnprintf(namebuf, sizeof(namebuf), "%s/%s",
137 path, d->d_name); 183 path, d->d_name);
138 184
@@ -144,7 +190,7 @@ int rm_rf(const char *path)
144 } 190 }
145 191
146 if (S_ISDIR(statbuf.st_mode)) 192 if (S_ISDIR(statbuf.st_mode))
147 ret = rm_rf(namebuf); 193 ret = depth ? rm_rf_depth_pat(namebuf, depth - 1, pat) : 0;
148 else 194 else
149 ret = unlink(namebuf); 195 ret = unlink(namebuf);
150 } 196 }
@@ -156,6 +202,22 @@ int rm_rf(const char *path)
156 return rmdir(path); 202 return rmdir(path);
157} 203}
158 204
205int rm_rf_perf_data(const char *path)
206{
207 const char *pat[] = {
208 "header",
209 "data.*",
210 NULL,
211 };
212
213 return rm_rf_depth_pat(path, 0, pat);
214}
215
216int rm_rf(const char *path)
217{
218 return rm_rf_depth_pat(path, INT_MAX, NULL);
219}
220
159/* A filter which removes dot files */ 221/* A filter which removes dot files */
160bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d) 222bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d)
161{ 223{
@@ -506,3 +568,13 @@ out:
506 568
507 return tip; 569 return tip;
508} 570}
571
572char *perf_exe(char *buf, int len)
573{
574 int n = readlink("/proc/self/exe", buf, len);
575 if (n > 0) {
576 buf[n] = 0;
577 return buf;
578 }
579 return strcpy(buf, "perf");
580}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index ece040b799f6..09c1b0f91f65 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -31,6 +31,7 @@ struct strlist;
31 31
32int mkdir_p(char *path, mode_t mode); 32int mkdir_p(char *path, mode_t mode);
33int rm_rf(const char *path); 33int rm_rf(const char *path);
34int rm_rf_perf_data(const char *path);
34struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); 35struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
35bool lsdir_no_dot_filter(const char *name, struct dirent *d); 36bool lsdir_no_dot_filter(const char *name, struct dirent *d);
36int copyfile(const char *from, const char *to); 37int copyfile(const char *from, const char *to);
@@ -76,6 +77,8 @@ extern bool perf_singlethreaded;
76void perf_set_singlethreaded(void); 77void perf_set_singlethreaded(void);
77void perf_set_multithreaded(void); 78void perf_set_multithreaded(void);
78 79
80char *perf_exe(char *buf, int len);
81
79#ifndef O_CLOEXEC 82#ifndef O_CLOEXEC
80#ifdef __sparc__ 83#ifdef __sparc__
81#define O_CLOEXEC 0x400000 84#define O_CLOEXEC 0x400000
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 3702cba11d7d..5031b7b22bbd 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -11,6 +11,7 @@
11 11
12#include "vdso.h" 12#include "vdso.h"
13#include "util.h" 13#include "util.h"
14#include "map.h"
14#include "symbol.h" 15#include "symbol.h"
15#include "machine.h" 16#include "machine.h"
16#include "thread.h" 17#include "thread.h"
diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c
index 902ce6384f57..512ad7c09b13 100644
--- a/tools/perf/util/zlib.c
+++ b/tools/perf/util/zlib.c
@@ -6,7 +6,6 @@
6#include <sys/mman.h> 6#include <sys/mman.h>
7#include <zlib.h> 7#include <zlib.h>
8#include <linux/compiler.h> 8#include <linux/compiler.h>
9#include <unistd.h>
10 9
11#include "util/compress.h" 10#include "util/compress.h"
12#include "util/util.h" 11#include "util/util.h"