diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-06 10:59:36 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-03-06 10:59:36 -0500 |
| commit | 203b6609e0ede49eb0b97008b1150c69e9d2ffd3 (patch) | |
| tree | 7d9c1227eeec17f75b2a827e385387f640a365a6 /tools/perf | |
| parent | 3478588b5136966c80c571cf0006f08e9e5b8f04 (diff) | |
| parent | c978b9460fe1d4a1e1effa0abd6bd69b18a098a8 (diff) | |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Lots of tooling updates - too many to list, here's a few highlights:
- Various subcommand updates to 'perf trace', 'perf report', 'perf
record', 'perf annotate', 'perf script', 'perf test', etc.
- CPU and NUMA topology and affinity handling improvements,
- HW tracing and HW support updates:
- Intel PT updates
- ARM CoreSight updates
- vendor HW event updates
- BPF updates
- Tons of infrastructure updates, both on the build system and the
library support side
- Documentation updates.
- ... and lots of other changes, see the changelog for details.
Kernel side updates:
- Tighten up kprobes blacklist handling, reduce the number of places
where developers can install a kprobe and hang/crash the system.
- Fix/enhance vma address filter handling.
- Various PMU driver updates, small fixes and additions.
- refcount_t conversions
- BPF updates
- error code propagation enhancements
- misc other changes"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (238 commits)
perf script python: Add Python3 support to syscall-counts-by-pid.py
perf script python: Add Python3 support to syscall-counts.py
perf script python: Add Python3 support to stat-cpi.py
perf script python: Add Python3 support to stackcollapse.py
perf script python: Add Python3 support to sctop.py
perf script python: Add Python3 support to powerpc-hcalls.py
perf script python: Add Python3 support to net_dropmonitor.py
perf script python: Add Python3 support to mem-phys-addr.py
perf script python: Add Python3 support to failed-syscalls-by-pid.py
perf script python: Add Python3 support to netdev-times.py
perf tools: Add perf_exe() helper to find perf binary
perf script: Handle missing fields with -F +..
perf data: Add perf_data__open_dir_data function
perf data: Add perf_data__(create_dir|close_dir) functions
perf data: Fail check_backup in case of error
perf data: Make check_backup work over directories
perf tools: Add rm_rf_perf_data function
perf tools: Add pattern name checking to rm_rf
perf tools: Add depth checking to rm_rf
perf data: Add global path holder
...
Diffstat (limited to 'tools/perf')
236 files changed, 9100 insertions, 2137 deletions
diff --git a/tools/perf/Build b/tools/perf/Build index e5232d567611..5f392dbb88fc 100644 --- a/tools/perf/Build +++ b/tools/perf/Build | |||
| @@ -46,10 +46,10 @@ CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_ | |||
| 46 | CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))" | 46 | CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))" |
| 47 | CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" | 47 | CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)" |
| 48 | 48 | ||
| 49 | libperf-y += util/ | 49 | perf-y += util/ |
| 50 | libperf-y += arch/ | 50 | perf-y += arch/ |
| 51 | libperf-y += ui/ | 51 | perf-y += ui/ |
| 52 | libperf-y += scripts/ | 52 | perf-y += scripts/ |
| 53 | libperf-$(CONFIG_TRACE) += trace/beauty/ | 53 | perf-$(CONFIG_TRACE) += trace/beauty/ |
| 54 | 54 | ||
| 55 | gtk-y += ui/gtk/ | 55 | gtk-y += ui/gtk/ |
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 4ac7775fbc11..86f3dcc15f83 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt | |||
| @@ -120,6 +120,10 @@ Given a $HOME/.perfconfig like this: | |||
| 120 | children = true | 120 | children = true |
| 121 | group = true | 121 | group = true |
| 122 | 122 | ||
| 123 | [llvm] | ||
| 124 | dump-obj = true | ||
| 125 | clang-opt = -g | ||
| 126 | |||
| 123 | You can hide source code of annotate feature setting the config to false with | 127 | You can hide source code of annotate feature setting the config to false with |
| 124 | 128 | ||
| 125 | % perf config annotate.hide_src_code=true | 129 | % perf config annotate.hide_src_code=true |
| @@ -553,6 +557,33 @@ trace.*:: | |||
| 553 | trace.show_zeros:: | 557 | trace.show_zeros:: |
| 554 | Do not suppress syscall arguments that are equal to zero. | 558 | Do not suppress syscall arguments that are equal to zero. |
| 555 | 559 | ||
| 560 | llvm.*:: | ||
| 561 | llvm.clang-path:: | ||
| 562 | Path to clang. If omit, search it from $PATH. | ||
| 563 | |||
| 564 | llvm.clang-bpf-cmd-template:: | ||
| 565 | Cmdline template. Below lines show its default value. Environment | ||
| 566 | variable is used to pass options. | ||
| 567 | "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS \ | ||
| 568 | -Wno-unused-value -Wno-pointer-sign -working-directory \ | ||
| 569 | $WORKING_DIR -c $CLANG_SOURCE -target bpf -O2 -o -" | ||
| 570 | |||
| 571 | llvm.clang-opt:: | ||
| 572 | Options passed to clang. | ||
| 573 | |||
| 574 | llvm.kbuild-dir:: | ||
| 575 | kbuild directory. If not set, use /lib/modules/`uname -r`/build. | ||
| 576 | If set to "" deliberately, skip kernel header auto-detector. | ||
| 577 | |||
| 578 | llvm.kbuild-opts:: | ||
| 579 | Options passed to 'make' when detecting kernel header options. | ||
| 580 | |||
| 581 | llvm.dump-obj:: | ||
| 582 | Enable perf dump BPF object files compiled by LLVM. | ||
| 583 | |||
| 584 | llvm.opts:: | ||
| 585 | Options passed to llc. | ||
| 586 | |||
| 556 | SEE ALSO | 587 | SEE ALSO |
| 557 | -------- | 588 | -------- |
| 558 | linkperf:perf[1] | 589 | linkperf:perf[1] |
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index d232b13ea713..8f0c2be34848 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
| @@ -88,6 +88,20 @@ OPTIONS | |||
| 88 | If you want to profile write accesses in [0x1000~1008), just set | 88 | If you want to profile write accesses in [0x1000~1008), just set |
| 89 | 'mem:0x1000/8:w'. | 89 | 'mem:0x1000/8:w'. |
| 90 | 90 | ||
| 91 | - a BPF source file (ending in .c) or a precompiled object file (ending | ||
| 92 | in .o) selects one or more BPF events. | ||
| 93 | The BPF program can attach to various perf events based on the ELF section | ||
| 94 | names. | ||
| 95 | |||
| 96 | When processing a '.c' file, perf searches an installed LLVM to compile it | ||
| 97 | into an object file first. Optional clang options can be passed via the | ||
| 98 | '--clang-opt' command line option, e.g.: | ||
| 99 | |||
| 100 | perf record --clang-opt "-DLINUX_VERSION_CODE=0x50000" \ | ||
| 101 | -e tests/bpf-script-example.c | ||
| 102 | |||
| 103 | Note: '--clang-opt' must be placed before '--event/-e'. | ||
| 104 | |||
| 91 | - a group of events surrounded by a pair of brace ("{event1,event2,...}"). | 105 | - a group of events surrounded by a pair of brace ("{event1,event2,...}"). |
| 92 | Each event is separated by commas and the group should be quoted to | 106 | Each event is separated by commas and the group should be quoted to |
| 93 | prevent the shell interpretation. You also need to use --group on | 107 | prevent the shell interpretation. You also need to use --group on |
| @@ -440,6 +454,11 @@ Use <n> control blocks in asynchronous (Posix AIO) trace writing mode (default: | |||
| 440 | Asynchronous mode is supported only when linking Perf tool with libc library | 454 | Asynchronous mode is supported only when linking Perf tool with libc library |
| 441 | providing implementation for Posix AIO API. | 455 | providing implementation for Posix AIO API. |
| 442 | 456 | ||
| 457 | --affinity=mode:: | ||
| 458 | Set affinity mask of trace reading thread according to the policy defined by 'mode' value: | ||
| 459 | node - thread affinity mask is set to NUMA node cpu mask of the processed mmap buffer | ||
| 460 | cpu - thread affinity mask is set to cpu of the processed mmap buffer | ||
| 461 | |||
| 443 | --all-kernel:: | 462 | --all-kernel:: |
| 444 | Configure all used events to run in kernel space. | 463 | Configure all used events to run in kernel space. |
| 445 | 464 | ||
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 9e4def08d569..2e19fd7ffe35 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt | |||
| @@ -159,6 +159,12 @@ OPTIONS | |||
| 159 | the override, and the result of the above is that only S/W and H/W | 159 | the override, and the result of the above is that only S/W and H/W |
| 160 | events are displayed with the given fields. | 160 | events are displayed with the given fields. |
| 161 | 161 | ||
| 162 | It's possible tp add/remove fields only for specific event type: | ||
| 163 | |||
| 164 | -Fsw:-cpu,-period | ||
| 165 | |||
| 166 | removes cpu and period from software events. | ||
| 167 | |||
| 162 | For the 'wildcard' option if a user selected field is invalid for an | 168 | For the 'wildcard' option if a user selected field is invalid for an |
| 163 | event type, a message is displayed to the user that the option is | 169 | event type, a message is displayed to the user that the option is |
| 164 | ignored for that type. For example: | 170 | ignored for that type. For example: |
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 631e687be4eb..fc6e43262c41 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt | |||
| @@ -210,6 +210,14 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. | |||
| 210 | may happen, for instance, when a thread gets migrated to a different CPU | 210 | may happen, for instance, when a thread gets migrated to a different CPU |
| 211 | while processing a syscall. | 211 | while processing a syscall. |
| 212 | 212 | ||
| 213 | --map-dump:: | ||
| 214 | Dump BPF maps setup by events passed via -e, for instance the augmented_raw_syscalls | ||
| 215 | living in tools/perf/examples/bpf/augmented_raw_syscalls.c. For now this | ||
| 216 | dumps just boolean map values and integer keys, in time this will print in hex | ||
| 217 | by default and use BTF when available, as well as use functions to do pretty | ||
| 218 | printing using the existing 'perf trace' syscall arg beautifiers to map integer | ||
| 219 | arguments to strings (pid to comm, syscall id to syscall name, etc). | ||
| 220 | |||
| 213 | 221 | ||
| 214 | PAGEFAULTS | 222 | PAGEFAULTS |
| 215 | ---------- | 223 | ---------- |
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index dfb218feaad9..593ef49b273c 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt | |||
| @@ -43,11 +43,10 @@ struct perf_file_section { | |||
| 43 | 43 | ||
| 44 | Flags section: | 44 | Flags section: |
| 45 | 45 | ||
| 46 | The header is followed by different optional headers, described by the bits set | 46 | For each of the optional features a perf_file_section it placed after the data |
| 47 | in flags. Only headers for which the bit is set are included. Each header | 47 | section if the feature bit is set in the perf_header flags bitset. The |
| 48 | consists of a perf_file_section located after the initial header. | 48 | respective perf_file_section points to the data of the additional header and |
| 49 | The respective perf_file_section points to the data of the additional | 49 | defines its size. |
| 50 | header and defines its size. | ||
| 51 | 50 | ||
| 52 | Some headers consist of strings, which are defined like this: | 51 | Some headers consist of strings, which are defined like this: |
| 53 | 52 | ||
| @@ -131,7 +130,7 @@ An uint64_t with the total memory in bytes. | |||
| 131 | 130 | ||
| 132 | HEADER_CMDLINE = 11, | 131 | HEADER_CMDLINE = 11, |
| 133 | 132 | ||
| 134 | A perf_header_string with the perf command line used to collect the data. | 133 | A perf_header_string_list with the perf arg-vector used to collect the data. |
| 135 | 134 | ||
| 136 | HEADER_EVENT_DESC = 12, | 135 | HEADER_EVENT_DESC = 12, |
| 137 | 136 | ||
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b441c88cafa1..0f11d5891301 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config | |||
| @@ -109,6 +109,13 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) | |||
| 109 | FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) | 109 | FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) |
| 110 | FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) | 110 | FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) |
| 111 | 111 | ||
| 112 | FEATURE_CHECK_LDFLAGS-libunwind-arm = -lunwind -lunwind-arm | ||
| 113 | FEATURE_CHECK_LDFLAGS-libunwind-aarch64 = -lunwind -lunwind-aarch64 | ||
| 114 | FEATURE_CHECK_LDFLAGS-libunwind-x86 = -lunwind -llzma -lunwind-x86 | ||
| 115 | FEATURE_CHECK_LDFLAGS-libunwind-x86_64 = -lunwind -llzma -lunwind-x86_64 | ||
| 116 | |||
| 117 | FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto | ||
| 118 | |||
| 112 | ifdef CSINCLUDES | 119 | ifdef CSINCLUDES |
| 113 | LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) | 120 | LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) |
| 114 | endif | 121 | endif |
| @@ -218,6 +225,8 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS) | |||
| 218 | FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) | 225 | FEATURE_CHECK_CFLAGS-libpython-version := $(PYTHON_EMBED_CCOPTS) |
| 219 | FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) | 226 | FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) |
| 220 | 227 | ||
| 228 | FEATURE_CHECK_LDFLAGS-libaio = -lrt | ||
| 229 | |||
| 221 | CFLAGS += -fno-omit-frame-pointer | 230 | CFLAGS += -fno-omit-frame-pointer |
| 222 | CFLAGS += -ggdb3 | 231 | CFLAGS += -ggdb3 |
| 223 | CFLAGS += -funwind-tables | 232 | CFLAGS += -funwind-tables |
| @@ -386,7 +395,8 @@ ifeq ($(feature-setns), 1) | |||
| 386 | $(call detected,CONFIG_SETNS) | 395 | $(call detected,CONFIG_SETNS) |
| 387 | endif | 396 | endif |
| 388 | 397 | ||
| 389 | ifndef NO_CORESIGHT | 398 | ifdef CORESIGHT |
| 399 | $(call feature_check,libopencsd) | ||
| 390 | ifeq ($(feature-libopencsd), 1) | 400 | ifeq ($(feature-libopencsd), 1) |
| 391 | CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) | 401 | CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) |
| 392 | LDFLAGS += $(LIBOPENCSD_LDFLAGS) | 402 | LDFLAGS += $(LIBOPENCSD_LDFLAGS) |
| @@ -482,6 +492,7 @@ endif | |||
| 482 | ifndef NO_LIBUNWIND | 492 | ifndef NO_LIBUNWIND |
| 483 | have_libunwind := | 493 | have_libunwind := |
| 484 | 494 | ||
| 495 | $(call feature_check,libunwind-x86) | ||
| 485 | ifeq ($(feature-libunwind-x86), 1) | 496 | ifeq ($(feature-libunwind-x86), 1) |
| 486 | $(call detected,CONFIG_LIBUNWIND_X86) | 497 | $(call detected,CONFIG_LIBUNWIND_X86) |
| 487 | CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT | 498 | CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT |
| @@ -490,6 +501,7 @@ ifndef NO_LIBUNWIND | |||
| 490 | have_libunwind = 1 | 501 | have_libunwind = 1 |
| 491 | endif | 502 | endif |
| 492 | 503 | ||
| 504 | $(call feature_check,libunwind-aarch64) | ||
| 493 | ifeq ($(feature-libunwind-aarch64), 1) | 505 | ifeq ($(feature-libunwind-aarch64), 1) |
| 494 | $(call detected,CONFIG_LIBUNWIND_AARCH64) | 506 | $(call detected,CONFIG_LIBUNWIND_AARCH64) |
| 495 | CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT | 507 | CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT |
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 0ee6795d82cc..01f7555fd933 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf | |||
| @@ -102,7 +102,7 @@ include ../scripts/utilities.mak | |||
| 102 | # When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if | 102 | # When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if |
| 103 | # llvm-config is not in $PATH. | 103 | # llvm-config is not in $PATH. |
| 104 | # | 104 | # |
| 105 | # Define NO_CORESIGHT if you do not want support for CoreSight trace decoding. | 105 | # Define CORESIGHT if you DO WANT support for CoreSight trace decoding. |
| 106 | # | 106 | # |
| 107 | # Define NO_AIO if you do not want support of Posix AIO based trace | 107 | # Define NO_AIO if you do not want support of Posix AIO based trace |
| 108 | # streaming for record mode. Currently Posix AIO trace streaming is | 108 | # streaming for record mode. Currently Posix AIO trace streaming is |
| @@ -344,9 +344,9 @@ endif | |||
| 344 | 344 | ||
| 345 | export PERL_PATH | 345 | export PERL_PATH |
| 346 | 346 | ||
| 347 | LIB_FILE=$(OUTPUT)libperf.a | 347 | LIBPERF_A=$(OUTPUT)libperf.a |
| 348 | 348 | ||
| 349 | PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) | 349 | PERFLIBS = $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD) |
| 350 | ifndef NO_LIBBPF | 350 | ifndef NO_LIBBPF |
| 351 | PERFLIBS += $(LIBBPF) | 351 | PERFLIBS += $(LIBBPF) |
| 352 | endif | 352 | endif |
| @@ -549,6 +549,8 @@ JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o | |||
| 549 | 549 | ||
| 550 | PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o | 550 | PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o |
| 551 | 551 | ||
| 552 | LIBPERF_IN := $(OUTPUT)libperf-in.o | ||
| 553 | |||
| 552 | export JEVENTS | 554 | export JEVENTS |
| 553 | 555 | ||
| 554 | build := -f $(srctree)/tools/build/Makefile.build dir=. obj | 556 | build := -f $(srctree)/tools/build/Makefile.build dir=. obj |
| @@ -565,9 +567,12 @@ $(JEVENTS): $(JEVENTS_IN) | |||
| 565 | $(PMU_EVENTS_IN): $(JEVENTS) FORCE | 567 | $(PMU_EVENTS_IN): $(JEVENTS) FORCE |
| 566 | $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events | 568 | $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events |
| 567 | 569 | ||
| 568 | $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) | 570 | $(LIBPERF_IN): prepare FORCE |
| 571 | $(Q)$(MAKE) $(build)=libperf | ||
| 572 | |||
| 573 | $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) | ||
| 569 | $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ | 574 | $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ |
| 570 | $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ | 575 | $(PERF_IN) $(PMU_EVENTS_IN) $(LIBPERF_IN) $(LIBS) -o $@ |
| 571 | 576 | ||
| 572 | $(GTK_IN): FORCE | 577 | $(GTK_IN): FORCE |
| 573 | $(Q)$(MAKE) $(build)=gtk | 578 | $(Q)$(MAKE) $(build)=gtk |
| @@ -683,12 +688,7 @@ endif | |||
| 683 | 688 | ||
| 684 | $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) | 689 | $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) |
| 685 | 690 | ||
| 686 | LIBPERF_IN := $(OUTPUT)libperf-in.o | 691 | $(LIBPERF_A): $(LIBPERF_IN) |
| 687 | |||
| 688 | $(LIBPERF_IN): prepare FORCE | ||
| 689 | $(Q)$(MAKE) $(build)=libperf | ||
| 690 | |||
| 691 | $(LIB_FILE): $(LIBPERF_IN) | ||
| 692 | $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) | 692 | $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) |
| 693 | 693 | ||
| 694 | LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' | 694 | LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' |
| @@ -863,8 +863,8 @@ ifndef NO_LIBPYTHON | |||
| 863 | $(call QUIET_INSTALL, python-scripts) \ | 863 | $(call QUIET_INSTALL, python-scripts) \ |
| 864 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ | 864 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ |
| 865 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \ | 865 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \ |
| 866 | $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ | 866 | $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \ |
| 867 | $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \ | 867 | $(INSTALL) scripts/python/*.py -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \ |
| 868 | $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' | 868 | $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin' |
| 869 | endif | 869 | endif |
| 870 | $(call QUIET_INSTALL, perf_completion-script) \ | 870 | $(call QUIET_INSTALL, perf_completion-script) \ |
| @@ -910,7 +910,7 @@ python-clean: | |||
| 910 | $(python-clean) | 910 | $(python-clean) |
| 911 | 911 | ||
| 912 | clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean | 912 | clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean |
| 913 | $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) | 913 | $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) |
| 914 | $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete | 914 | $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete |
| 915 | $(Q)$(RM) $(OUTPUT).config-detected | 915 | $(Q)$(RM) $(OUTPUT).config-detected |
| 916 | $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so | 916 | $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so |
diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build index d9b6af837c7d..688818844c11 100644 --- a/tools/perf/arch/Build +++ b/tools/perf/arch/Build | |||
| @@ -1,2 +1,2 @@ | |||
| 1 | libperf-y += common.o | 1 | perf-y += common.o |
| 2 | libperf-y += $(SRCARCH)/ | 2 | perf-y += $(SRCARCH)/ |
diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build index 41bf61da476a..36222e64bbf7 100644 --- a/tools/perf/arch/arm/Build +++ b/tools/perf/arch/arm/Build | |||
| @@ -1,2 +1,2 @@ | |||
| 1 | libperf-y += util/ | 1 | perf-y += util/ |
| 2 | libperf-$(CONFIG_DWARF_UNWIND) += tests/ | 2 | perf-$(CONFIG_DWARF_UNWIND) += tests/ |
diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build index d9ae2733f9cc..bc8e97380c82 100644 --- a/tools/perf/arch/arm/tests/Build +++ b/tools/perf/arch/arm/tests/Build | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | libperf-y += regs_load.o | 1 | perf-y += regs_load.o |
| 2 | libperf-y += dwarf-unwind.o | 2 | perf-y += dwarf-unwind.o |
| 3 | libperf-y += vectors-page.o | 3 | perf-y += vectors-page.o |
| 4 | 4 | ||
| 5 | libperf-y += arch-tests.o | 5 | perf-y += arch-tests.o |
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c index 9a0242e74cfc..2c35e532bc9a 100644 --- a/tools/perf/arch/arm/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm/tests/dwarf-unwind.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include "perf_regs.h" | 3 | #include "perf_regs.h" |
| 4 | #include "thread.h" | 4 | #include "thread.h" |
| 5 | #include "map.h" | 5 | #include "map.h" |
| 6 | #include "map_groups.h" | ||
| 6 | #include "event.h" | 7 | #include "event.h" |
| 7 | #include "debug.h" | 8 | #include "debug.h" |
| 8 | #include "tests/tests.h" | 9 | #include "tests/tests.h" |
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index e64c5f216448..296f0eac5e18 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | libperf-$(CONFIG_DWARF) += dwarf-regs.o | 1 | perf-$(CONFIG_DWARF) += dwarf-regs.o |
| 2 | 2 | ||
| 3 | libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o | 3 | perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o |
| 4 | libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o | 4 | perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o |
| 5 | 5 | ||
| 6 | libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o | 6 | perf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o |
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 2f595cd73da6..911426721170 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | */ | 5 | */ |
| 6 | 6 | ||
| 7 | #include <api/fs/fs.h> | 7 | #include <api/fs/fs.h> |
| 8 | #include <linux/bits.h> | ||
| 8 | #include <linux/bitops.h> | 9 | #include <linux/bitops.h> |
| 9 | #include <linux/compiler.h> | 10 | #include <linux/compiler.h> |
| 10 | #include <linux/coresight-pmu.h> | 11 | #include <linux/coresight-pmu.h> |
| @@ -22,12 +23,10 @@ | |||
| 22 | #include "../../util/thread_map.h" | 23 | #include "../../util/thread_map.h" |
| 23 | #include "../../util/cs-etm.h" | 24 | #include "../../util/cs-etm.h" |
| 24 | 25 | ||
| 26 | #include <errno.h> | ||
| 25 | #include <stdlib.h> | 27 | #include <stdlib.h> |
| 26 | #include <sys/stat.h> | 28 | #include <sys/stat.h> |
| 27 | 29 | ||
| 28 | #define ENABLE_SINK_MAX 128 | ||
| 29 | #define CS_BUS_DEVICE_PATH "/bus/coresight/devices/" | ||
| 30 | |||
| 31 | struct cs_etm_recording { | 30 | struct cs_etm_recording { |
| 32 | struct auxtrace_record itr; | 31 | struct auxtrace_record itr; |
| 33 | struct perf_pmu *cs_etm_pmu; | 32 | struct perf_pmu *cs_etm_pmu; |
| @@ -60,10 +59,48 @@ static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr, | |||
| 60 | return 0; | 59 | return 0; |
| 61 | } | 60 | } |
| 62 | 61 | ||
| 62 | static int cs_etm_set_sink_attr(struct perf_pmu *pmu, | ||
| 63 | struct perf_evsel *evsel) | ||
| 64 | { | ||
| 65 | char msg[BUFSIZ], path[PATH_MAX], *sink; | ||
| 66 | struct perf_evsel_config_term *term; | ||
| 67 | int ret = -EINVAL; | ||
| 68 | u32 hash; | ||
| 69 | |||
| 70 | if (evsel->attr.config2 & GENMASK(31, 0)) | ||
| 71 | return 0; | ||
| 72 | |||
| 73 | list_for_each_entry(term, &evsel->config_terms, list) { | ||
| 74 | if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) | ||
| 75 | continue; | ||
| 76 | |||
| 77 | sink = term->val.drv_cfg; | ||
| 78 | snprintf(path, PATH_MAX, "sinks/%s", sink); | ||
| 79 | |||
| 80 | ret = perf_pmu__scan_file(pmu, path, "%x", &hash); | ||
| 81 | if (ret != 1) { | ||
| 82 | pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n", | ||
| 83 | sink, perf_evsel__name(evsel), errno, | ||
| 84 | str_error_r(errno, msg, sizeof(msg))); | ||
| 85 | return ret; | ||
| 86 | } | ||
| 87 | |||
| 88 | evsel->attr.config2 |= hash; | ||
| 89 | return 0; | ||
| 90 | } | ||
| 91 | |||
| 92 | /* | ||
| 93 | * No sink was provided on the command line - for _now_ treat | ||
| 94 | * this as an error. | ||
| 95 | */ | ||
| 96 | return ret; | ||
| 97 | } | ||
| 98 | |||
| 63 | static int cs_etm_recording_options(struct auxtrace_record *itr, | 99 | static int cs_etm_recording_options(struct auxtrace_record *itr, |
| 64 | struct perf_evlist *evlist, | 100 | struct perf_evlist *evlist, |
| 65 | struct record_opts *opts) | 101 | struct record_opts *opts) |
| 66 | { | 102 | { |
| 103 | int ret; | ||
| 67 | struct cs_etm_recording *ptr = | 104 | struct cs_etm_recording *ptr = |
| 68 | container_of(itr, struct cs_etm_recording, itr); | 105 | container_of(itr, struct cs_etm_recording, itr); |
| 69 | struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; | 106 | struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; |
| @@ -92,6 +129,10 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, | |||
| 92 | if (!cs_etm_evsel) | 129 | if (!cs_etm_evsel) |
| 93 | return 0; | 130 | return 0; |
| 94 | 131 | ||
| 132 | ret = cs_etm_set_sink_attr(cs_etm_pmu, cs_etm_evsel); | ||
| 133 | if (ret) | ||
| 134 | return ret; | ||
| 135 | |||
| 95 | if (opts->use_clockid) { | 136 | if (opts->use_clockid) { |
| 96 | pr_err("Cannot use clockid (-k option) with %s\n", | 137 | pr_err("Cannot use clockid (-k option) with %s\n", |
| 97 | CORESIGHT_ETM_PMU_NAME); | 138 | CORESIGHT_ETM_PMU_NAME); |
| @@ -598,54 +639,3 @@ struct auxtrace_record *cs_etm_record_init(int *err) | |||
| 598 | out: | 639 | out: |
| 599 | return NULL; | 640 | return NULL; |
| 600 | } | 641 | } |
| 601 | |||
| 602 | static FILE *cs_device__open_file(const char *name) | ||
| 603 | { | ||
| 604 | struct stat st; | ||
| 605 | char path[PATH_MAX]; | ||
| 606 | const char *sysfs; | ||
| 607 | |||
| 608 | sysfs = sysfs__mountpoint(); | ||
| 609 | if (!sysfs) | ||
| 610 | return NULL; | ||
| 611 | |||
| 612 | snprintf(path, PATH_MAX, | ||
| 613 | "%s" CS_BUS_DEVICE_PATH "%s", sysfs, name); | ||
| 614 | |||
| 615 | if (stat(path, &st) < 0) | ||
| 616 | return NULL; | ||
| 617 | |||
| 618 | return fopen(path, "w"); | ||
| 619 | |||
| 620 | } | ||
| 621 | |||
| 622 | static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...) | ||
| 623 | { | ||
| 624 | va_list args; | ||
| 625 | FILE *file; | ||
| 626 | int ret = -EINVAL; | ||
| 627 | |||
| 628 | va_start(args, fmt); | ||
| 629 | file = cs_device__open_file(name); | ||
| 630 | if (file) { | ||
| 631 | ret = vfprintf(file, fmt, args); | ||
| 632 | fclose(file); | ||
| 633 | } | ||
| 634 | va_end(args); | ||
| 635 | return ret; | ||
| 636 | } | ||
| 637 | |||
| 638 | int cs_etm_set_drv_config(struct perf_evsel_config_term *term) | ||
| 639 | { | ||
| 640 | int ret; | ||
| 641 | char enable_sink[ENABLE_SINK_MAX]; | ||
| 642 | |||
| 643 | snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s", | ||
| 644 | term->val.drv_cfg, "enable_sink"); | ||
| 645 | |||
| 646 | ret = cs_device__print_file(enable_sink, "%d", 1); | ||
| 647 | if (ret < 0) | ||
| 648 | return ret; | ||
| 649 | |||
| 650 | return 0; | ||
| 651 | } | ||
diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h index 1a12e64f5127..a3354bda4fe8 100644 --- a/tools/perf/arch/arm/util/cs-etm.h +++ b/tools/perf/arch/arm/util/cs-etm.h | |||
| @@ -7,9 +7,6 @@ | |||
| 7 | #ifndef INCLUDE__PERF_CS_ETM_H__ | 7 | #ifndef INCLUDE__PERF_CS_ETM_H__ |
| 8 | #define INCLUDE__PERF_CS_ETM_H__ | 8 | #define INCLUDE__PERF_CS_ETM_H__ |
| 9 | 9 | ||
| 10 | #include "../../util/evsel.h" | ||
| 11 | |||
| 12 | struct auxtrace_record *cs_etm_record_init(int *err); | 10 | struct auxtrace_record *cs_etm_record_init(int *err); |
| 13 | int cs_etm_set_drv_config(struct perf_evsel_config_term *term); | ||
| 14 | 11 | ||
| 15 | #endif | 12 | #endif |
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index e047571e6080..bbc297a7e2e3 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c | |||
| @@ -7,8 +7,8 @@ | |||
| 7 | #include <string.h> | 7 | #include <string.h> |
| 8 | #include <linux/coresight-pmu.h> | 8 | #include <linux/coresight-pmu.h> |
| 9 | #include <linux/perf_event.h> | 9 | #include <linux/perf_event.h> |
| 10 | #include <linux/string.h> | ||
| 10 | 11 | ||
| 11 | #include "cs-etm.h" | ||
| 12 | #include "arm-spe.h" | 12 | #include "arm-spe.h" |
| 13 | #include "../../util/pmu.h" | 13 | #include "../../util/pmu.h" |
| 14 | 14 | ||
| @@ -19,7 +19,6 @@ struct perf_event_attr | |||
| 19 | if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) { | 19 | if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) { |
| 20 | /* add ETM default config here */ | 20 | /* add ETM default config here */ |
| 21 | pmu->selectable = true; | 21 | pmu->selectable = true; |
| 22 | pmu->set_drv_config = cs_etm_set_drv_config; | ||
| 23 | #if defined(__aarch64__) | 22 | #if defined(__aarch64__) |
| 24 | } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) { | 23 | } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) { |
| 25 | return arm_spe_pmu_default_config(pmu); | 24 | return arm_spe_pmu_default_config(pmu); |
diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build index 41bf61da476a..36222e64bbf7 100644 --- a/tools/perf/arch/arm64/Build +++ b/tools/perf/arch/arm64/Build | |||
| @@ -1,2 +1,2 @@ | |||
| 1 | libperf-y += util/ | 1 | perf-y += util/ |
| 2 | libperf-$(CONFIG_DWARF_UNWIND) += tests/ | 2 | perf-$(CONFIG_DWARF_UNWIND) += tests/ |
diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build index 883c57ff0c08..41707fea74b3 100644 --- a/tools/perf/arch/arm64/tests/Build +++ b/tools/perf/arch/arm64/tests/Build | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | libperf-y += regs_load.o | 1 | perf-y += regs_load.o |
| 2 | libperf-y += dwarf-unwind.o | 2 | perf-y += dwarf-unwind.o |
| 3 | 3 | ||
| 4 | libperf-y += arch-tests.o | 4 | perf-y += arch-tests.o |
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c index 5522ce384723..a6a407fa1b8b 100644 --- a/tools/perf/arch/arm64/tests/dwarf-unwind.c +++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include "perf_regs.h" | 3 | #include "perf_regs.h" |
| 4 | #include "thread.h" | 4 | #include "thread.h" |
| 5 | #include "map.h" | 5 | #include "map.h" |
| 6 | #include "map_groups.h" | ||
| 6 | #include "event.h" | 7 | #include "event.h" |
| 7 | #include "debug.h" | 8 | #include "debug.h" |
| 8 | #include "tests/tests.h" | 9 | #include "tests/tests.h" |
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 68f8a8eb3ad0..3cde540d2fcf 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build | |||
| @@ -1,10 +1,10 @@ | |||
| 1 | libperf-y += header.o | 1 | perf-y += header.o |
| 2 | libperf-y += sym-handling.o | 2 | perf-y += sym-handling.o |
| 3 | libperf-$(CONFIG_DWARF) += dwarf-regs.o | 3 | perf-$(CONFIG_DWARF) += dwarf-regs.o |
| 4 | libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o | 4 | perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o |
| 5 | libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o | 5 | perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o |
| 6 | 6 | ||
| 7 | libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ | 7 | perf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ |
| 8 | ../../arm/util/auxtrace.o \ | 8 | ../../arm/util/auxtrace.o \ |
| 9 | ../../arm/util/cs-etm.o \ | 9 | ../../arm/util/cs-etm.o \ |
| 10 | arm-spe.o | 10 | arm-spe.o |
diff --git a/tools/perf/arch/nds32/Build b/tools/perf/arch/nds32/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/nds32/Build +++ b/tools/perf/arch/nds32/Build | |||
| @@ -1 +1 @@ | |||
| libperf-y += util/ | perf-y += util/ | ||
diff --git a/tools/perf/arch/nds32/util/Build b/tools/perf/arch/nds32/util/Build index ca623bbf993c..d0bc205fe49a 100644 --- a/tools/perf/arch/nds32/util/Build +++ b/tools/perf/arch/nds32/util/Build | |||
| @@ -1 +1 @@ | |||
| libperf-y += header.o | perf-y += header.o | ||
diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build index db52fa22d3a1..a7dd46a5b678 100644 --- a/tools/perf/arch/powerpc/Build +++ b/tools/perf/arch/powerpc/Build | |||
| @@ -1,2 +1,2 @@ | |||
| 1 | libperf-y += util/ | 1 | perf-y += util/ |
| 2 | libperf-y += tests/ | 2 | perf-y += tests/ |
diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build index d827ef384b33..3526ab0af9f9 100644 --- a/tools/perf/arch/powerpc/tests/Build +++ b/tools/perf/arch/powerpc/tests/Build | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o | 1 | perf-$(CONFIG_DWARF_UNWIND) += regs_load.o |
| 2 | libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o | 2 | perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o |
| 3 | 3 | ||
| 4 | libperf-y += arch-tests.o | 4 | perf-y += arch-tests.o |
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c index 5f39efef0856..5c178e4a1995 100644 --- a/tools/perf/arch/powerpc/tests/dwarf-unwind.c +++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include "perf_regs.h" | 3 | #include "perf_regs.h" |
| 4 | #include "thread.h" | 4 | #include "thread.h" |
| 5 | #include "map.h" | 5 | #include "map.h" |
| 6 | #include "map_groups.h" | ||
| 6 | #include "event.h" | 7 | #include "event.h" |
| 7 | #include "debug.h" | 8 | #include "debug.h" |
| 8 | #include "tests/tests.h" | 9 | #include "tests/tests.h" |
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index ba98bd006488..7cf0b8803097 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build | |||
| @@ -1,11 +1,11 @@ | |||
| 1 | libperf-y += header.o | 1 | perf-y += header.o |
| 2 | libperf-y += sym-handling.o | 2 | perf-y += sym-handling.o |
| 3 | libperf-y += kvm-stat.o | 3 | perf-y += kvm-stat.o |
| 4 | libperf-y += perf_regs.o | 4 | perf-y += perf_regs.o |
| 5 | libperf-y += mem-events.o | 5 | perf-y += mem-events.o |
| 6 | 6 | ||
| 7 | libperf-$(CONFIG_DWARF) += dwarf-regs.o | 7 | perf-$(CONFIG_DWARF) += dwarf-regs.o |
| 8 | libperf-$(CONFIG_DWARF) += skip-callchain-idx.o | 8 | perf-$(CONFIG_DWARF) += skip-callchain-idx.o |
| 9 | 9 | ||
| 10 | libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o | 10 | perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o |
| 11 | libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o | 11 | perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o |
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c index 596ad6aedaac..f9db341c47b6 100644 --- a/tools/perf/arch/powerpc/util/kvm-stat.c +++ b/tools/perf/arch/powerpc/util/kvm-stat.c | |||
| @@ -3,6 +3,8 @@ | |||
| 3 | #include "util/kvm-stat.h" | 3 | #include "util/kvm-stat.h" |
| 4 | #include "util/parse-events.h" | 4 | #include "util/parse-events.h" |
| 5 | #include "util/debug.h" | 5 | #include "util/debug.h" |
| 6 | #include "util/evsel.h" | ||
| 7 | #include "util/evlist.h" | ||
| 6 | 8 | ||
| 7 | #include "book3s_hv_exits.h" | 9 | #include "book3s_hv_exits.h" |
| 8 | #include "book3s_hcalls.h" | 10 | #include "book3s_hcalls.h" |
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 7c6eeb4633fe..2918bb16c892 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c | |||
| @@ -16,6 +16,9 @@ | |||
| 16 | #include "util/thread.h" | 16 | #include "util/thread.h" |
| 17 | #include "util/callchain.h" | 17 | #include "util/callchain.h" |
| 18 | #include "util/debug.h" | 18 | #include "util/debug.h" |
| 19 | #include "util/dso.h" | ||
| 20 | #include "util/map.h" | ||
| 21 | #include "util/symbol.h" | ||
| 19 | 22 | ||
| 20 | /* | 23 | /* |
| 21 | * When saving the callchain on Power, the kernel conservatively saves | 24 | * When saving the callchain on Power, the kernel conservatively saves |
diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/s390/Build +++ b/tools/perf/arch/s390/Build | |||
| @@ -1 +1 @@ | |||
| libperf-y += util/ | perf-y += util/ | ||
diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 4a233683c684..22797f043b84 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build | |||
| @@ -1,9 +1,9 @@ | |||
| 1 | libperf-y += header.o | 1 | perf-y += header.o |
| 2 | libperf-y += kvm-stat.o | 2 | perf-y += kvm-stat.o |
| 3 | 3 | ||
| 4 | libperf-$(CONFIG_DWARF) += dwarf-regs.o | 4 | perf-$(CONFIG_DWARF) += dwarf-regs.o |
| 5 | libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o | 5 | perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o |
| 6 | 6 | ||
| 7 | libperf-y += machine.o | 7 | perf-y += machine.o |
| 8 | 8 | ||
| 9 | libperf-$(CONFIG_AUXTRACE) += auxtrace.o | 9 | perf-$(CONFIG_AUXTRACE) += auxtrace.o |
diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c index aaabab5e2830..7e3961a4b292 100644 --- a/tools/perf/arch/s390/util/kvm-stat.c +++ b/tools/perf/arch/s390/util/kvm-stat.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | #include <errno.h> | 12 | #include <errno.h> |
| 13 | #include "../../util/kvm-stat.h" | 13 | #include "../../util/kvm-stat.h" |
| 14 | #include "../../util/evsel.h" | ||
| 14 | #include <asm/sie.h> | 15 | #include <asm/sie.h> |
| 15 | 16 | ||
| 16 | define_exit_reasons_table(sie_exit_reasons, sie_intercept_code); | 17 | define_exit_reasons_table(sie_exit_reasons, sie_intercept_code); |
diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/sh/Build +++ b/tools/perf/arch/sh/Build | |||
| @@ -1 +1 @@ | |||
| libperf-y += util/ | perf-y += util/ | ||
diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/sh/util/Build +++ b/tools/perf/arch/sh/util/Build | |||
| @@ -1 +1 @@ | |||
| libperf-$(CONFIG_DWARF) += dwarf-regs.o | perf-$(CONFIG_DWARF) += dwarf-regs.o | ||
diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/sparc/Build +++ b/tools/perf/arch/sparc/Build | |||
| @@ -1 +1 @@ | |||
| libperf-y += util/ | perf-y += util/ | ||
diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/sparc/util/Build +++ b/tools/perf/arch/sparc/util/Build | |||
| @@ -1 +1 @@ | |||
| libperf-$(CONFIG_DWARF) += dwarf-regs.o | perf-$(CONFIG_DWARF) += dwarf-regs.o | ||
diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build index db52fa22d3a1..a7dd46a5b678 100644 --- a/tools/perf/arch/x86/Build +++ b/tools/perf/arch/x86/Build | |||
| @@ -1,2 +1,2 @@ | |||
| 1 | libperf-y += util/ | 1 | perf-y += util/ |
| 2 | libperf-y += tests/ | 2 | perf-y += tests/ |
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 586849ff83a0..3d83d0c6982d 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o | 1 | perf-$(CONFIG_DWARF_UNWIND) += regs_load.o |
| 2 | libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o | 2 | perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o |
| 3 | 3 | ||
| 4 | libperf-y += arch-tests.o | 4 | perf-y += arch-tests.o |
| 5 | libperf-y += rdpmc.o | 5 | perf-y += rdpmc.o |
| 6 | libperf-y += perf-time-to-tsc.o | 6 | perf-y += perf-time-to-tsc.o |
| 7 | libperf-$(CONFIG_AUXTRACE) += insn-x86.o | 7 | perf-$(CONFIG_AUXTRACE) += insn-x86.o |
| 8 | libperf-$(CONFIG_X86_64) += bp-modify.o | 8 | perf-$(CONFIG_X86_64) += bp-modify.o |
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c index 7879df34569a..6ad0a1cedb13 100644 --- a/tools/perf/arch/x86/tests/dwarf-unwind.c +++ b/tools/perf/arch/x86/tests/dwarf-unwind.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include "perf_regs.h" | 3 | #include "perf_regs.h" |
| 4 | #include "thread.h" | 4 | #include "thread.h" |
| 5 | #include "map.h" | 5 | #include "map.h" |
| 6 | #include "map_groups.h" | ||
| 6 | #include "event.h" | 7 | #include "event.h" |
| 7 | #include "debug.h" | 8 | #include "debug.h" |
| 8 | #include "tests/tests.h" | 9 | #include "tests/tests.h" |
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 844b8f335532..7aab0be5fc5f 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build | |||
| @@ -1,18 +1,18 @@ | |||
| 1 | libperf-y += header.o | 1 | perf-y += header.o |
| 2 | libperf-y += tsc.o | 2 | perf-y += tsc.o |
| 3 | libperf-y += pmu.o | 3 | perf-y += pmu.o |
| 4 | libperf-y += kvm-stat.o | 4 | perf-y += kvm-stat.o |
| 5 | libperf-y += perf_regs.o | 5 | perf-y += perf_regs.o |
| 6 | libperf-y += group.o | 6 | perf-y += group.o |
| 7 | libperf-y += machine.o | 7 | perf-y += machine.o |
| 8 | libperf-y += event.o | 8 | perf-y += event.o |
| 9 | 9 | ||
| 10 | libperf-$(CONFIG_DWARF) += dwarf-regs.o | 10 | perf-$(CONFIG_DWARF) += dwarf-regs.o |
| 11 | libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o | 11 | perf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o |
| 12 | 12 | ||
| 13 | libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o | 13 | perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o |
| 14 | libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o | 14 | perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o |
| 15 | 15 | ||
| 16 | libperf-$(CONFIG_AUXTRACE) += auxtrace.o | 16 | perf-$(CONFIG_AUXTRACE) += auxtrace.o |
| 17 | libperf-$(CONFIG_AUXTRACE) += intel-pt.o | 17 | perf-$(CONFIG_AUXTRACE) += intel-pt.o |
| 18 | libperf-$(CONFIG_AUXTRACE) += intel-bts.o | 18 | perf-$(CONFIG_AUXTRACE) += intel-bts.o |
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c index 081353d7b095..865a9762f22e 100644 --- a/tools/perf/arch/x86/util/kvm-stat.c +++ b/tools/perf/arch/x86/util/kvm-stat.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <errno.h> | 2 | #include <errno.h> |
| 3 | #include "../../util/kvm-stat.h" | 3 | #include "../../util/kvm-stat.h" |
| 4 | #include "../../util/evsel.h" | ||
| 4 | #include <asm/svm.h> | 5 | #include <asm/svm.h> |
| 5 | #include <asm/vmx.h> | 6 | #include <asm/vmx.h> |
| 6 | #include <asm/kvm.h> | 7 | #include <asm/kvm.h> |
diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build index 54afe4a467e7..e4e5f33c84d8 100644 --- a/tools/perf/arch/xtensa/Build +++ b/tools/perf/arch/xtensa/Build | |||
| @@ -1 +1 @@ | |||
| libperf-y += util/ | perf-y += util/ | ||
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build index 954e287bbb89..e813e618954b 100644 --- a/tools/perf/arch/xtensa/util/Build +++ b/tools/perf/arch/xtensa/util/Build | |||
| @@ -1 +1 @@ | |||
| libperf-$(CONFIG_DWARF) += dwarf-regs.o | perf-$(CONFIG_DWARF) += dwarf-regs.o | ||
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 93d679eaf1f4..67f9d9ffacfb 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "util/thread.h" | 27 | #include "util/thread.h" |
| 28 | #include "util/sort.h" | 28 | #include "util/sort.h" |
| 29 | #include "util/hist.h" | 29 | #include "util/hist.h" |
| 30 | #include "util/map.h" | ||
| 30 | #include "util/session.h" | 31 | #include "util/session.h" |
| 31 | #include "util/tool.h" | 32 | #include "util/tool.h" |
| 32 | #include "util/data.h" | 33 | #include "util/data.h" |
| @@ -227,7 +228,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, | |||
| 227 | * the DSO? | 228 | * the DSO? |
| 228 | */ | 229 | */ |
| 229 | if (al->sym != NULL) { | 230 | if (al->sym != NULL) { |
| 230 | rb_erase(&al->sym->rb_node, | 231 | rb_erase_cached(&al->sym->rb_node, |
| 231 | &al->map->dso->symbols); | 232 | &al->map->dso->symbols); |
| 232 | symbol__delete(al->sym); | 233 | symbol__delete(al->sym); |
| 233 | dso__reset_find_symbol_cache(al->map->dso); | 234 | dso__reset_find_symbol_cache(al->map->dso); |
| @@ -305,7 +306,7 @@ static void hists__find_annotations(struct hists *hists, | |||
| 305 | struct perf_evsel *evsel, | 306 | struct perf_evsel *evsel, |
| 306 | struct perf_annotate *ann) | 307 | struct perf_annotate *ann) |
| 307 | { | 308 | { |
| 308 | struct rb_node *nd = rb_first(&hists->entries), *next; | 309 | struct rb_node *nd = rb_first_cached(&hists->entries), *next; |
| 309 | int key = K_RIGHT; | 310 | int key = K_RIGHT; |
| 310 | 311 | ||
| 311 | while (nd) { | 312 | while (nd) { |
| @@ -440,7 +441,7 @@ static int __cmd_annotate(struct perf_annotate *ann) | |||
| 440 | } | 441 | } |
| 441 | 442 | ||
| 442 | if (total_nr_samples == 0) { | 443 | if (total_nr_samples == 0) { |
| 443 | ui__error("The %s file has no samples!\n", session->data->file.path); | 444 | ui__error("The %s data has no samples!\n", session->data->path); |
| 444 | goto out; | 445 | goto out; |
| 445 | } | 446 | } |
| 446 | 447 | ||
| @@ -577,7 +578,7 @@ int cmd_annotate(int argc, const char **argv) | |||
| 577 | if (quiet) | 578 | if (quiet) |
| 578 | perf_quiet_option(); | 579 | perf_quiet_option(); |
| 579 | 580 | ||
| 580 | data.file.path = input_name; | 581 | data.path = input_name; |
| 581 | 582 | ||
| 582 | annotate.session = perf_session__new(&data, false, &annotate.tool); | 583 | annotate.session = perf_session__new(&data, false, &annotate.tool); |
| 583 | if (annotate.session == NULL) | 584 | if (annotate.session == NULL) |
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 115110a4796a..10457b10e568 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c | |||
| @@ -416,8 +416,8 @@ int cmd_buildid_cache(int argc, const char **argv) | |||
| 416 | nsi = nsinfo__new(ns_id); | 416 | nsi = nsinfo__new(ns_id); |
| 417 | 417 | ||
| 418 | if (missing_filename) { | 418 | if (missing_filename) { |
| 419 | data.file.path = missing_filename; | 419 | data.path = missing_filename; |
| 420 | data.force = force; | 420 | data.force = force; |
| 421 | 421 | ||
| 422 | session = perf_session__new(&data, false, NULL); | 422 | session = perf_session__new(&data, false, NULL); |
| 423 | if (session == NULL) | 423 | if (session == NULL) |
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 78abbe8d9d5f..f403e19488b5 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c | |||
| @@ -52,11 +52,9 @@ static int perf_session__list_build_ids(bool force, bool with_hits) | |||
| 52 | { | 52 | { |
| 53 | struct perf_session *session; | 53 | struct perf_session *session; |
| 54 | struct perf_data data = { | 54 | struct perf_data data = { |
| 55 | .file = { | 55 | .path = input_name, |
| 56 | .path = input_name, | 56 | .mode = PERF_DATA_MODE_READ, |
| 57 | }, | 57 | .force = force, |
| 58 | .mode = PERF_DATA_MODE_READ, | ||
| 59 | .force = force, | ||
| 60 | }; | 58 | }; |
| 61 | 59 | ||
| 62 | symbol__elf_init(); | 60 | symbol__elf_init(); |
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index d340d2e42776..4272763a5e96 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include "ui/browsers/hists.h" | 33 | #include "ui/browsers/hists.h" |
| 34 | #include "thread.h" | 34 | #include "thread.h" |
| 35 | #include "mem2node.h" | 35 | #include "mem2node.h" |
| 36 | #include "symbol.h" | ||
| 36 | 37 | ||
| 37 | struct c2c_hists { | 38 | struct c2c_hists { |
| 38 | struct hists hists; | 39 | struct hists hists; |
| @@ -1969,7 +1970,7 @@ static void calc_width(struct c2c_hist_entry *c2c_he) | |||
| 1969 | set_nodestr(c2c_he); | 1970 | set_nodestr(c2c_he); |
| 1970 | } | 1971 | } |
| 1971 | 1972 | ||
| 1972 | static int filter_cb(struct hist_entry *he) | 1973 | static int filter_cb(struct hist_entry *he, void *arg __maybe_unused) |
| 1973 | { | 1974 | { |
| 1974 | struct c2c_hist_entry *c2c_he; | 1975 | struct c2c_hist_entry *c2c_he; |
| 1975 | 1976 | ||
| @@ -1986,7 +1987,7 @@ static int filter_cb(struct hist_entry *he) | |||
| 1986 | return 0; | 1987 | return 0; |
| 1987 | } | 1988 | } |
| 1988 | 1989 | ||
| 1989 | static int resort_cl_cb(struct hist_entry *he) | 1990 | static int resort_cl_cb(struct hist_entry *he, void *arg __maybe_unused) |
| 1990 | { | 1991 | { |
| 1991 | struct c2c_hist_entry *c2c_he; | 1992 | struct c2c_hist_entry *c2c_he; |
| 1992 | struct c2c_hists *c2c_hists; | 1993 | struct c2c_hists *c2c_hists; |
| @@ -2073,7 +2074,7 @@ static int setup_nodes(struct perf_session *session) | |||
| 2073 | 2074 | ||
| 2074 | #define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) | 2075 | #define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) |
| 2075 | 2076 | ||
| 2076 | static int resort_hitm_cb(struct hist_entry *he) | 2077 | static int resort_hitm_cb(struct hist_entry *he, void *arg __maybe_unused) |
| 2077 | { | 2078 | { |
| 2078 | struct c2c_hist_entry *c2c_he; | 2079 | struct c2c_hist_entry *c2c_he; |
| 2079 | c2c_he = container_of(he, struct c2c_hist_entry, he); | 2080 | c2c_he = container_of(he, struct c2c_hist_entry, he); |
| @@ -2088,14 +2089,14 @@ static int resort_hitm_cb(struct hist_entry *he) | |||
| 2088 | 2089 | ||
| 2089 | static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb) | 2090 | static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb) |
| 2090 | { | 2091 | { |
| 2091 | struct rb_node *next = rb_first(&hists->entries); | 2092 | struct rb_node *next = rb_first_cached(&hists->entries); |
| 2092 | int ret = 0; | 2093 | int ret = 0; |
| 2093 | 2094 | ||
| 2094 | while (next) { | 2095 | while (next) { |
| 2095 | struct hist_entry *he; | 2096 | struct hist_entry *he; |
| 2096 | 2097 | ||
| 2097 | he = rb_entry(next, struct hist_entry, rb_node); | 2098 | he = rb_entry(next, struct hist_entry, rb_node); |
| 2098 | ret = cb(he); | 2099 | ret = cb(he, NULL); |
| 2099 | if (ret) | 2100 | if (ret) |
| 2100 | break; | 2101 | break; |
| 2101 | next = rb_next(&he->rb_node); | 2102 | next = rb_next(&he->rb_node); |
| @@ -2215,7 +2216,7 @@ static void print_pareto(FILE *out) | |||
| 2215 | if (WARN_ONCE(ret, "failed to setup sort entries\n")) | 2216 | if (WARN_ONCE(ret, "failed to setup sort entries\n")) |
| 2216 | return; | 2217 | return; |
| 2217 | 2218 | ||
| 2218 | nd = rb_first(&c2c.hists.hists.entries); | 2219 | nd = rb_first_cached(&c2c.hists.hists.entries); |
| 2219 | 2220 | ||
| 2220 | for (; nd; nd = rb_next(nd)) { | 2221 | for (; nd; nd = rb_next(nd)) { |
| 2221 | struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); | 2222 | struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); |
| @@ -2283,7 +2284,7 @@ static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session) | |||
| 2283 | static void c2c_browser__update_nr_entries(struct hist_browser *hb) | 2284 | static void c2c_browser__update_nr_entries(struct hist_browser *hb) |
| 2284 | { | 2285 | { |
| 2285 | u64 nr_entries = 0; | 2286 | u64 nr_entries = 0; |
| 2286 | struct rb_node *nd = rb_first(&hb->hists->entries); | 2287 | struct rb_node *nd = rb_first_cached(&hb->hists->entries); |
| 2287 | 2288 | ||
| 2288 | while (nd) { | 2289 | while (nd) { |
| 2289 | struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); | 2290 | struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); |
| @@ -2343,7 +2344,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) | |||
| 2343 | struct c2c_cacheline_browser *cl_browser; | 2344 | struct c2c_cacheline_browser *cl_browser; |
| 2344 | struct hist_browser *browser; | 2345 | struct hist_browser *browser; |
| 2345 | int key = -1; | 2346 | int key = -1; |
| 2346 | const char help[] = | 2347 | static const char help[] = |
| 2347 | " ENTER Toggle callchains (if present) \n" | 2348 | " ENTER Toggle callchains (if present) \n" |
| 2348 | " n Toggle Node details info \n" | 2349 | " n Toggle Node details info \n" |
| 2349 | " s Toggle full length of symbol and source line columns \n" | 2350 | " s Toggle full length of symbol and source line columns \n" |
| @@ -2424,7 +2425,7 @@ static int perf_c2c__hists_browse(struct hists *hists) | |||
| 2424 | { | 2425 | { |
| 2425 | struct hist_browser *browser; | 2426 | struct hist_browser *browser; |
| 2426 | int key = -1; | 2427 | int key = -1; |
| 2427 | const char help[] = | 2428 | static const char help[] = |
| 2428 | " d Display cacheline details \n" | 2429 | " d Display cacheline details \n" |
| 2429 | " ENTER Toggle callchains (if present) \n" | 2430 | " ENTER Toggle callchains (if present) \n" |
| 2430 | " q Quit \n"; | 2431 | " q Quit \n"; |
| @@ -2749,8 +2750,8 @@ static int perf_c2c__report(int argc, const char **argv) | |||
| 2749 | if (!input_name || !strlen(input_name)) | 2750 | if (!input_name || !strlen(input_name)) |
| 2750 | input_name = "perf.data"; | 2751 | input_name = "perf.data"; |
| 2751 | 2752 | ||
| 2752 | data.file.path = input_name; | 2753 | data.path = input_name; |
| 2753 | data.force = symbol_conf.force; | 2754 | data.force = symbol_conf.force; |
| 2754 | 2755 | ||
| 2755 | err = setup_display(display); | 2756 | err = setup_display(display); |
| 2756 | if (err) | 2757 | if (err) |
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 39db2ee32d48..58fe0e88215c 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c | |||
| @@ -429,7 +429,7 @@ get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt) | |||
| 429 | 429 | ||
| 430 | static void hists__baseline_only(struct hists *hists) | 430 | static void hists__baseline_only(struct hists *hists) |
| 431 | { | 431 | { |
| 432 | struct rb_root *root; | 432 | struct rb_root_cached *root; |
| 433 | struct rb_node *next; | 433 | struct rb_node *next; |
| 434 | 434 | ||
| 435 | if (hists__has(hists, need_collapse)) | 435 | if (hists__has(hists, need_collapse)) |
| @@ -437,13 +437,13 @@ static void hists__baseline_only(struct hists *hists) | |||
| 437 | else | 437 | else |
| 438 | root = hists->entries_in; | 438 | root = hists->entries_in; |
| 439 | 439 | ||
| 440 | next = rb_first(root); | 440 | next = rb_first_cached(root); |
| 441 | while (next != NULL) { | 441 | while (next != NULL) { |
| 442 | struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in); | 442 | struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in); |
| 443 | 443 | ||
| 444 | next = rb_next(&he->rb_node_in); | 444 | next = rb_next(&he->rb_node_in); |
| 445 | if (!hist_entry__next_pair(he)) { | 445 | if (!hist_entry__next_pair(he)) { |
| 446 | rb_erase(&he->rb_node_in, root); | 446 | rb_erase_cached(&he->rb_node_in, root); |
| 447 | hist_entry__delete(he); | 447 | hist_entry__delete(he); |
| 448 | } | 448 | } |
| 449 | } | 449 | } |
| @@ -451,7 +451,7 @@ static void hists__baseline_only(struct hists *hists) | |||
| 451 | 451 | ||
| 452 | static void hists__precompute(struct hists *hists) | 452 | static void hists__precompute(struct hists *hists) |
| 453 | { | 453 | { |
| 454 | struct rb_root *root; | 454 | struct rb_root_cached *root; |
| 455 | struct rb_node *next; | 455 | struct rb_node *next; |
| 456 | 456 | ||
| 457 | if (hists__has(hists, need_collapse)) | 457 | if (hists__has(hists, need_collapse)) |
| @@ -459,7 +459,7 @@ static void hists__precompute(struct hists *hists) | |||
| 459 | else | 459 | else |
| 460 | root = hists->entries_in; | 460 | root = hists->entries_in; |
| 461 | 461 | ||
| 462 | next = rb_first(root); | 462 | next = rb_first_cached(root); |
| 463 | while (next != NULL) { | 463 | while (next != NULL) { |
| 464 | struct hist_entry *he, *pair; | 464 | struct hist_entry *he, *pair; |
| 465 | struct data__file *d; | 465 | struct data__file *d; |
| @@ -708,7 +708,7 @@ static void data__fprintf(void) | |||
| 708 | 708 | ||
| 709 | data__for_each_file(i, d) | 709 | data__for_each_file(i, d) |
| 710 | fprintf(stdout, "# [%d] %s %s\n", | 710 | fprintf(stdout, "# [%d] %s %s\n", |
| 711 | d->idx, d->data.file.path, | 711 | d->idx, d->data.path, |
| 712 | !d->idx ? "(Baseline)" : ""); | 712 | !d->idx ? "(Baseline)" : ""); |
| 713 | 713 | ||
| 714 | fprintf(stdout, "#\n"); | 714 | fprintf(stdout, "#\n"); |
| @@ -779,14 +779,14 @@ static int __cmd_diff(void) | |||
| 779 | data__for_each_file(i, d) { | 779 | data__for_each_file(i, d) { |
| 780 | d->session = perf_session__new(&d->data, false, &tool); | 780 | d->session = perf_session__new(&d->data, false, &tool); |
| 781 | if (!d->session) { | 781 | if (!d->session) { |
| 782 | pr_err("Failed to open %s\n", d->data.file.path); | 782 | pr_err("Failed to open %s\n", d->data.path); |
| 783 | ret = -1; | 783 | ret = -1; |
| 784 | goto out_delete; | 784 | goto out_delete; |
| 785 | } | 785 | } |
| 786 | 786 | ||
| 787 | ret = perf_session__process_events(d->session); | 787 | ret = perf_session__process_events(d->session); |
| 788 | if (ret) { | 788 | if (ret) { |
| 789 | pr_err("Failed to process %s\n", d->data.file.path); | 789 | pr_err("Failed to process %s\n", d->data.path); |
| 790 | goto out_delete; | 790 | goto out_delete; |
| 791 | } | 791 | } |
| 792 | 792 | ||
| @@ -1289,9 +1289,9 @@ static int data_init(int argc, const char **argv) | |||
| 1289 | data__for_each_file(i, d) { | 1289 | data__for_each_file(i, d) { |
| 1290 | struct perf_data *data = &d->data; | 1290 | struct perf_data *data = &d->data; |
| 1291 | 1291 | ||
| 1292 | data->file.path = use_default ? defaults[i] : argv[i]; | 1292 | data->path = use_default ? defaults[i] : argv[i]; |
| 1293 | data->mode = PERF_DATA_MODE_READ, | 1293 | data->mode = PERF_DATA_MODE_READ, |
| 1294 | data->force = force, | 1294 | data->force = force, |
| 1295 | 1295 | ||
| 1296 | d->idx = i; | 1296 | d->idx = i; |
| 1297 | } | 1297 | } |
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index e06e822ce634..6e4f63b0da4a 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c | |||
| @@ -23,9 +23,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details | |||
| 23 | struct perf_session *session; | 23 | struct perf_session *session; |
| 24 | struct perf_evsel *pos; | 24 | struct perf_evsel *pos; |
| 25 | struct perf_data data = { | 25 | struct perf_data data = { |
| 26 | .file = { | 26 | .path = file_name, |
| 27 | .path = file_name, | ||
| 28 | }, | ||
| 29 | .mode = PERF_DATA_MODE_READ, | 27 | .mode = PERF_DATA_MODE_READ, |
| 30 | .force = details->force, | 28 | .force = details->force, |
| 31 | }; | 29 | }; |
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index eda41673c4f3..24086b7f1b14 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "util/color.h" | 12 | #include "util/color.h" |
| 13 | #include "util/evlist.h" | 13 | #include "util/evlist.h" |
| 14 | #include "util/evsel.h" | 14 | #include "util/evsel.h" |
| 15 | #include "util/map.h" | ||
| 15 | #include "util/session.h" | 16 | #include "util/session.h" |
| 16 | #include "util/tool.h" | 17 | #include "util/tool.h" |
| 17 | #include "util/debug.h" | 18 | #include "util/debug.h" |
| @@ -19,6 +20,7 @@ | |||
| 19 | #include "util/data.h" | 20 | #include "util/data.h" |
| 20 | #include "util/auxtrace.h" | 21 | #include "util/auxtrace.h" |
| 21 | #include "util/jit.h" | 22 | #include "util/jit.h" |
| 23 | #include "util/symbol.h" | ||
| 22 | #include "util/thread.h" | 24 | #include "util/thread.h" |
| 23 | 25 | ||
| 24 | #include <subcmd/parse-options.h> | 26 | #include <subcmd/parse-options.h> |
| @@ -768,10 +770,8 @@ int cmd_inject(int argc, const char **argv) | |||
| 768 | .input_name = "-", | 770 | .input_name = "-", |
| 769 | .samples = LIST_HEAD_INIT(inject.samples), | 771 | .samples = LIST_HEAD_INIT(inject.samples), |
| 770 | .output = { | 772 | .output = { |
| 771 | .file = { | 773 | .path = "-", |
| 772 | .path = "-", | 774 | .mode = PERF_DATA_MODE_WRITE, |
| 773 | }, | ||
| 774 | .mode = PERF_DATA_MODE_WRITE, | ||
| 775 | }, | 775 | }, |
| 776 | }; | 776 | }; |
| 777 | struct perf_data data = { | 777 | struct perf_data data = { |
| @@ -784,7 +784,7 @@ int cmd_inject(int argc, const char **argv) | |||
| 784 | "Inject build-ids into the output stream"), | 784 | "Inject build-ids into the output stream"), |
| 785 | OPT_STRING('i', "input", &inject.input_name, "file", | 785 | OPT_STRING('i', "input", &inject.input_name, "file", |
| 786 | "input file name"), | 786 | "input file name"), |
| 787 | OPT_STRING('o', "output", &inject.output.file.path, "file", | 787 | OPT_STRING('o', "output", &inject.output.path, "file", |
| 788 | "output file name"), | 788 | "output file name"), |
| 789 | OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, | 789 | OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat, |
| 790 | "Merge sched-stat and sched-switch for getting events " | 790 | "Merge sched-stat and sched-switch for getting events " |
| @@ -832,7 +832,7 @@ int cmd_inject(int argc, const char **argv) | |||
| 832 | 832 | ||
| 833 | inject.tool.ordered_events = inject.sched_stat; | 833 | inject.tool.ordered_events = inject.sched_stat; |
| 834 | 834 | ||
| 835 | data.file.path = inject.input_name; | 835 | data.path = inject.input_name; |
| 836 | inject.session = perf_session__new(&data, true, &inject.tool); | 836 | inject.session = perf_session__new(&data, true, &inject.tool); |
| 837 | if (inject.session == NULL) | 837 | if (inject.session == NULL) |
| 838 | return -1; | 838 | return -1; |
diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c index 90d1a2305b72..bc7a2bc7aed7 100644 --- a/tools/perf/builtin-kallsyms.c +++ b/tools/perf/builtin-kallsyms.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <subcmd/parse-options.h> | 13 | #include <subcmd/parse-options.h> |
| 14 | #include "debug.h" | 14 | #include "debug.h" |
| 15 | #include "machine.h" | 15 | #include "machine.h" |
| 16 | #include "map.h" | ||
| 16 | #include "symbol.h" | 17 | #include "symbol.h" |
| 17 | 18 | ||
| 18 | static int __cmd_kallsyms(int argc, const char **argv) | 19 | static int __cmd_kallsyms(int argc, const char **argv) |
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index b63bca4b0c2a..fa520f4b8095 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "util/evsel.h" | 6 | #include "util/evsel.h" |
| 7 | #include "util/util.h" | 7 | #include "util/util.h" |
| 8 | #include "util/config.h" | 8 | #include "util/config.h" |
| 9 | #include "util/map.h" | ||
| 9 | #include "util/symbol.h" | 10 | #include "util/symbol.h" |
| 10 | #include "util/thread.h" | 11 | #include "util/thread.h" |
| 11 | #include "util/header.h" | 12 | #include "util/header.h" |
| @@ -334,7 +335,7 @@ static int build_alloc_func_list(void) | |||
| 334 | struct alloc_func *func; | 335 | struct alloc_func *func; |
| 335 | struct machine *machine = &kmem_session->machines.host; | 336 | struct machine *machine = &kmem_session->machines.host; |
| 336 | regex_t alloc_func_regex; | 337 | regex_t alloc_func_regex; |
| 337 | const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?"; | 338 | static const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?"; |
| 338 | 339 | ||
| 339 | ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED); | 340 | ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED); |
| 340 | if (ret) { | 341 | if (ret) { |
| @@ -1924,7 +1925,7 @@ int cmd_kmem(int argc, const char **argv) | |||
| 1924 | NULL | 1925 | NULL |
| 1925 | }; | 1926 | }; |
| 1926 | struct perf_session *session; | 1927 | struct perf_session *session; |
| 1927 | const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n"; | 1928 | static const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n"; |
| 1928 | int ret = perf_config(kmem_config, NULL); | 1929 | int ret = perf_config(kmem_config, NULL); |
| 1929 | 1930 | ||
| 1930 | if (ret) | 1931 | if (ret) |
| @@ -1948,7 +1949,7 @@ int cmd_kmem(int argc, const char **argv) | |||
| 1948 | return __cmd_record(argc, argv); | 1949 | return __cmd_record(argc, argv); |
| 1949 | } | 1950 | } |
| 1950 | 1951 | ||
| 1951 | data.file.path = input_name; | 1952 | data.path = input_name; |
| 1952 | 1953 | ||
| 1953 | kmem_session = session = perf_session__new(&data, false, &perf_kmem); | 1954 | kmem_session = session = perf_session__new(&data, false, &perf_kmem); |
| 1954 | if (session == NULL) | 1955 | if (session == NULL) |
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 3d4cbc4e87c7..dbb6f737a3e2 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c | |||
| @@ -1080,11 +1080,9 @@ static int read_events(struct perf_kvm_stat *kvm) | |||
| 1080 | .ordered_events = true, | 1080 | .ordered_events = true, |
| 1081 | }; | 1081 | }; |
| 1082 | struct perf_data file = { | 1082 | struct perf_data file = { |
| 1083 | .file = { | 1083 | .path = kvm->file_name, |
| 1084 | .path = kvm->file_name, | 1084 | .mode = PERF_DATA_MODE_READ, |
| 1085 | }, | 1085 | .force = kvm->force, |
| 1086 | .mode = PERF_DATA_MODE_READ, | ||
| 1087 | .force = kvm->force, | ||
| 1088 | }; | 1086 | }; |
| 1089 | 1087 | ||
| 1090 | kvm->tool = eops; | 1088 | kvm->tool = eops; |
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index ead221e49f00..c9f98d00c0e9 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c | |||
| @@ -82,9 +82,9 @@ int cmd_list(int argc, const char **argv) | |||
| 82 | else if (strcmp(argv[i], "sdt") == 0) | 82 | else if (strcmp(argv[i], "sdt") == 0) |
| 83 | print_sdt_events(NULL, NULL, raw_dump); | 83 | print_sdt_events(NULL, NULL, raw_dump); |
| 84 | else if (strcmp(argv[i], "metric") == 0) | 84 | else if (strcmp(argv[i], "metric") == 0) |
| 85 | metricgroup__print(true, false, NULL, raw_dump); | 85 | metricgroup__print(true, false, NULL, raw_dump, details_flag); |
| 86 | else if (strcmp(argv[i], "metricgroup") == 0) | 86 | else if (strcmp(argv[i], "metricgroup") == 0) |
| 87 | metricgroup__print(false, true, NULL, raw_dump); | 87 | metricgroup__print(false, true, NULL, raw_dump, details_flag); |
| 88 | else if ((sep = strchr(argv[i], ':')) != NULL) { | 88 | else if ((sep = strchr(argv[i], ':')) != NULL) { |
| 89 | int sep_idx; | 89 | int sep_idx; |
| 90 | 90 | ||
| @@ -102,7 +102,7 @@ int cmd_list(int argc, const char **argv) | |||
| 102 | s[sep_idx] = '\0'; | 102 | s[sep_idx] = '\0'; |
| 103 | print_tracepoint_events(s, s + sep_idx + 1, raw_dump); | 103 | print_tracepoint_events(s, s + sep_idx + 1, raw_dump); |
| 104 | print_sdt_events(s, s + sep_idx + 1, raw_dump); | 104 | print_sdt_events(s, s + sep_idx + 1, raw_dump); |
| 105 | metricgroup__print(true, true, s, raw_dump); | 105 | metricgroup__print(true, true, s, raw_dump, details_flag); |
| 106 | free(s); | 106 | free(s); |
| 107 | } else { | 107 | } else { |
| 108 | if (asprintf(&s, "*%s*", argv[i]) < 0) { | 108 | if (asprintf(&s, "*%s*", argv[i]) < 0) { |
| @@ -119,7 +119,7 @@ int cmd_list(int argc, const char **argv) | |||
| 119 | details_flag); | 119 | details_flag); |
| 120 | print_tracepoint_events(NULL, s, raw_dump); | 120 | print_tracepoint_events(NULL, s, raw_dump); |
| 121 | print_sdt_events(NULL, s, raw_dump); | 121 | print_sdt_events(NULL, s, raw_dump); |
| 122 | metricgroup__print(true, true, NULL, raw_dump); | 122 | metricgroup__print(true, true, NULL, raw_dump, details_flag); |
| 123 | free(s); | 123 | free(s); |
| 124 | } | 124 | } |
| 125 | } | 125 | } |
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 6e0189df2b3b..b9810a8d350a 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c | |||
| @@ -866,11 +866,9 @@ static int __cmd_report(bool display_info) | |||
| 866 | .ordered_events = true, | 866 | .ordered_events = true, |
| 867 | }; | 867 | }; |
| 868 | struct perf_data data = { | 868 | struct perf_data data = { |
| 869 | .file = { | 869 | .path = input_name, |
| 870 | .path = input_name, | 870 | .mode = PERF_DATA_MODE_READ, |
| 871 | }, | 871 | .force = force, |
| 872 | .mode = PERF_DATA_MODE_READ, | ||
| 873 | .force = force, | ||
| 874 | }; | 872 | }; |
| 875 | 873 | ||
| 876 | session = perf_session__new(&data, false, &eops); | 874 | session = perf_session__new(&data, false, &eops); |
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 57393e94d156..f45c8b502f63 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "util/data.h" | 13 | #include "util/data.h" |
| 14 | #include "util/mem-events.h" | 14 | #include "util/mem-events.h" |
| 15 | #include "util/debug.h" | 15 | #include "util/debug.h" |
| 16 | #include "util/map.h" | ||
| 16 | #include "util/symbol.h" | 17 | #include "util/symbol.h" |
| 17 | 18 | ||
| 18 | #define MEM_OPERATION_LOAD 0x1 | 19 | #define MEM_OPERATION_LOAD 0x1 |
| @@ -238,11 +239,9 @@ static int process_sample_event(struct perf_tool *tool, | |||
| 238 | static int report_raw_events(struct perf_mem *mem) | 239 | static int report_raw_events(struct perf_mem *mem) |
| 239 | { | 240 | { |
| 240 | struct perf_data data = { | 241 | struct perf_data data = { |
| 241 | .file = { | 242 | .path = input_name, |
| 242 | .path = input_name, | 243 | .mode = PERF_DATA_MODE_READ, |
| 243 | }, | 244 | .force = mem->force, |
| 244 | .mode = PERF_DATA_MODE_READ, | ||
| 245 | .force = mem->force, | ||
| 246 | }; | 245 | }; |
| 247 | int ret; | 246 | int ret; |
| 248 | struct perf_session *session = perf_session__new(&data, false, | 247 | struct perf_session *session = perf_session__new(&data, false, |
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 99de91698de1..46d3c2deeb40 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | 32 | ||
| 33 | #include "perf.h" | 33 | #include "perf.h" |
| 34 | #include "builtin.h" | 34 | #include "builtin.h" |
| 35 | #include "namespaces.h" | ||
| 35 | #include "util/util.h" | 36 | #include "util/util.h" |
| 36 | #include "util/strlist.h" | 37 | #include "util/strlist.h" |
| 37 | #include "util/strfilter.h" | 38 | #include "util/strfilter.h" |
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 882285fb9f64..f3f7f3100336 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
| @@ -23,7 +23,6 @@ | |||
| 23 | #include "util/evlist.h" | 23 | #include "util/evlist.h" |
| 24 | #include "util/evsel.h" | 24 | #include "util/evsel.h" |
| 25 | #include "util/debug.h" | 25 | #include "util/debug.h" |
| 26 | #include "util/drv_configs.h" | ||
| 27 | #include "util/session.h" | 26 | #include "util/session.h" |
| 28 | #include "util/tool.h" | 27 | #include "util/tool.h" |
| 29 | #include "util/symbol.h" | 28 | #include "util/symbol.h" |
| @@ -39,8 +38,10 @@ | |||
| 39 | #include "util/bpf-loader.h" | 38 | #include "util/bpf-loader.h" |
| 40 | #include "util/trigger.h" | 39 | #include "util/trigger.h" |
| 41 | #include "util/perf-hooks.h" | 40 | #include "util/perf-hooks.h" |
| 41 | #include "util/cpu-set-sched.h" | ||
| 42 | #include "util/time-utils.h" | 42 | #include "util/time-utils.h" |
| 43 | #include "util/units.h" | 43 | #include "util/units.h" |
| 44 | #include "util/bpf-event.h" | ||
| 44 | #include "asm/bug.h" | 45 | #include "asm/bug.h" |
| 45 | 46 | ||
| 46 | #include <errno.h> | 47 | #include <errno.h> |
| @@ -81,12 +82,17 @@ struct record { | |||
| 81 | bool timestamp_boundary; | 82 | bool timestamp_boundary; |
| 82 | struct switch_output switch_output; | 83 | struct switch_output switch_output; |
| 83 | unsigned long long samples; | 84 | unsigned long long samples; |
| 85 | cpu_set_t affinity_mask; | ||
| 84 | }; | 86 | }; |
| 85 | 87 | ||
| 86 | static volatile int auxtrace_record__snapshot_started; | 88 | static volatile int auxtrace_record__snapshot_started; |
| 87 | static DEFINE_TRIGGER(auxtrace_snapshot_trigger); | 89 | static DEFINE_TRIGGER(auxtrace_snapshot_trigger); |
| 88 | static DEFINE_TRIGGER(switch_output_trigger); | 90 | static DEFINE_TRIGGER(switch_output_trigger); |
| 89 | 91 | ||
| 92 | static const char *affinity_tags[PERF_AFFINITY_MAX] = { | ||
| 93 | "SYS", "NODE", "CPU" | ||
| 94 | }; | ||
| 95 | |||
| 90 | static bool switch_output_signal(struct record *rec) | 96 | static bool switch_output_signal(struct record *rec) |
| 91 | { | 97 | { |
| 92 | return rec->switch_output.signal && | 98 | return rec->switch_output.signal && |
| @@ -531,9 +537,13 @@ static int record__mmap_evlist(struct record *rec, | |||
| 531 | struct record_opts *opts = &rec->opts; | 537 | struct record_opts *opts = &rec->opts; |
| 532 | char msg[512]; | 538 | char msg[512]; |
| 533 | 539 | ||
| 540 | if (opts->affinity != PERF_AFFINITY_SYS) | ||
| 541 | cpu__setup_cpunode_map(); | ||
| 542 | |||
| 534 | if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, | 543 | if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, |
| 535 | opts->auxtrace_mmap_pages, | 544 | opts->auxtrace_mmap_pages, |
| 536 | opts->auxtrace_snapshot_mode, opts->nr_cblocks) < 0) { | 545 | opts->auxtrace_snapshot_mode, |
| 546 | opts->nr_cblocks, opts->affinity) < 0) { | ||
| 537 | if (errno == EPERM) { | 547 | if (errno == EPERM) { |
| 538 | pr_err("Permission error mapping pages.\n" | 548 | pr_err("Permission error mapping pages.\n" |
| 539 | "Consider increasing " | 549 | "Consider increasing " |
| @@ -566,7 +576,6 @@ static int record__open(struct record *rec) | |||
| 566 | struct perf_evlist *evlist = rec->evlist; | 576 | struct perf_evlist *evlist = rec->evlist; |
| 567 | struct perf_session *session = rec->session; | 577 | struct perf_session *session = rec->session; |
| 568 | struct record_opts *opts = &rec->opts; | 578 | struct record_opts *opts = &rec->opts; |
| 569 | struct perf_evsel_config_term *err_term; | ||
| 570 | int rc = 0; | 579 | int rc = 0; |
| 571 | 580 | ||
| 572 | /* | 581 | /* |
| @@ -619,14 +628,6 @@ try_again: | |||
| 619 | goto out; | 628 | goto out; |
| 620 | } | 629 | } |
| 621 | 630 | ||
| 622 | if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) { | ||
| 623 | pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", | ||
| 624 | err_term->val.drv_cfg, perf_evsel__name(pos), errno, | ||
| 625 | str_error_r(errno, msg, sizeof(msg))); | ||
| 626 | rc = -1; | ||
| 627 | goto out; | ||
| 628 | } | ||
| 629 | |||
| 630 | rc = record__mmap(rec); | 631 | rc = record__mmap(rec); |
| 631 | if (rc) | 632 | if (rc) |
| 632 | goto out; | 633 | goto out; |
| @@ -659,10 +660,9 @@ static int process_sample_event(struct perf_tool *tool, | |||
| 659 | 660 | ||
| 660 | static int process_buildids(struct record *rec) | 661 | static int process_buildids(struct record *rec) |
| 661 | { | 662 | { |
| 662 | struct perf_data *data = &rec->data; | ||
| 663 | struct perf_session *session = rec->session; | 663 | struct perf_session *session = rec->session; |
| 664 | 664 | ||
| 665 | if (data->size == 0) | 665 | if (perf_data__size(&rec->data) == 0) |
| 666 | return 0; | 666 | return 0; |
| 667 | 667 | ||
| 668 | /* | 668 | /* |
| @@ -722,6 +722,16 @@ static struct perf_event_header finished_round_event = { | |||
| 722 | .type = PERF_RECORD_FINISHED_ROUND, | 722 | .type = PERF_RECORD_FINISHED_ROUND, |
| 723 | }; | 723 | }; |
| 724 | 724 | ||
| 725 | static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) | ||
| 726 | { | ||
| 727 | if (rec->opts.affinity != PERF_AFFINITY_SYS && | ||
| 728 | !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) { | ||
| 729 | CPU_ZERO(&rec->affinity_mask); | ||
| 730 | CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask); | ||
| 731 | sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask); | ||
| 732 | } | ||
| 733 | } | ||
| 734 | |||
| 725 | static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, | 735 | static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, |
| 726 | bool overwrite) | 736 | bool overwrite) |
| 727 | { | 737 | { |
| @@ -749,6 +759,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli | |||
| 749 | struct perf_mmap *map = &maps[i]; | 759 | struct perf_mmap *map = &maps[i]; |
| 750 | 760 | ||
| 751 | if (map->base) { | 761 | if (map->base) { |
| 762 | record__adjust_affinity(rec, map); | ||
| 752 | if (!record__aio_enabled(rec)) { | 763 | if (!record__aio_enabled(rec)) { |
| 753 | if (perf_mmap__push(map, rec, record__pushfn) != 0) { | 764 | if (perf_mmap__push(map, rec, record__pushfn) != 0) { |
| 754 | rc = -1; | 765 | rc = -1; |
| @@ -839,7 +850,7 @@ record__finish_output(struct record *rec) | |||
| 839 | return; | 850 | return; |
| 840 | 851 | ||
| 841 | rec->session->header.data_size += rec->bytes_written; | 852 | rec->session->header.data_size += rec->bytes_written; |
| 842 | data->size = lseek(perf_data__fd(data), 0, SEEK_CUR); | 853 | data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR); |
| 843 | 854 | ||
| 844 | if (!rec->no_buildid) { | 855 | if (!rec->no_buildid) { |
| 845 | process_buildids(rec); | 856 | process_buildids(rec); |
| @@ -907,7 +918,7 @@ record__switch_output(struct record *rec, bool at_exit) | |||
| 907 | 918 | ||
| 908 | if (!quiet) | 919 | if (!quiet) |
| 909 | fprintf(stderr, "[ perf record: Dump %s.%s ]\n", | 920 | fprintf(stderr, "[ perf record: Dump %s.%s ]\n", |
| 910 | data->file.path, timestamp); | 921 | data->path, timestamp); |
| 911 | 922 | ||
| 912 | /* Output tracking events */ | 923 | /* Output tracking events */ |
| 913 | if (!at_exit) { | 924 | if (!at_exit) { |
| @@ -1082,6 +1093,11 @@ static int record__synthesize(struct record *rec, bool tail) | |||
| 1082 | return err; | 1093 | return err; |
| 1083 | } | 1094 | } |
| 1084 | 1095 | ||
| 1096 | err = perf_event__synthesize_bpf_events(tool, process_synthesized_event, | ||
| 1097 | machine, opts); | ||
| 1098 | if (err < 0) | ||
| 1099 | pr_warning("Couldn't synthesize bpf events.\n"); | ||
| 1100 | |||
| 1085 | err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, | 1101 | err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, |
| 1086 | process_synthesized_event, opts->sample_address, | 1102 | process_synthesized_event, opts->sample_address, |
| 1087 | 1); | 1103 | 1); |
| @@ -1445,7 +1461,7 @@ out_child: | |||
| 1445 | 1461 | ||
| 1446 | fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", | 1462 | fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", |
| 1447 | perf_data__size(data) / 1024.0 / 1024.0, | 1463 | perf_data__size(data) / 1024.0 / 1024.0, |
| 1448 | data->file.path, postfix, samples); | 1464 | data->path, postfix, samples); |
| 1449 | } | 1465 | } |
| 1450 | 1466 | ||
| 1451 | out_delete_session: | 1467 | out_delete_session: |
| @@ -1639,6 +1655,21 @@ static int parse_clockid(const struct option *opt, const char *str, int unset) | |||
| 1639 | return -1; | 1655 | return -1; |
| 1640 | } | 1656 | } |
| 1641 | 1657 | ||
| 1658 | static int record__parse_affinity(const struct option *opt, const char *str, int unset) | ||
| 1659 | { | ||
| 1660 | struct record_opts *opts = (struct record_opts *)opt->value; | ||
| 1661 | |||
| 1662 | if (unset || !str) | ||
| 1663 | return 0; | ||
| 1664 | |||
| 1665 | if (!strcasecmp(str, "node")) | ||
| 1666 | opts->affinity = PERF_AFFINITY_NODE; | ||
| 1667 | else if (!strcasecmp(str, "cpu")) | ||
| 1668 | opts->affinity = PERF_AFFINITY_CPU; | ||
| 1669 | |||
| 1670 | return 0; | ||
| 1671 | } | ||
| 1672 | |||
| 1642 | static int record__parse_mmap_pages(const struct option *opt, | 1673 | static int record__parse_mmap_pages(const struct option *opt, |
| 1643 | const char *str, | 1674 | const char *str, |
| 1644 | int unset __maybe_unused) | 1675 | int unset __maybe_unused) |
| @@ -1831,7 +1862,7 @@ static struct option __record_options[] = { | |||
| 1831 | OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", | 1862 | OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu", |
| 1832 | "list of cpus to monitor"), | 1863 | "list of cpus to monitor"), |
| 1833 | OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), | 1864 | OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"), |
| 1834 | OPT_STRING('o', "output", &record.data.file.path, "file", | 1865 | OPT_STRING('o', "output", &record.data.path, "file", |
| 1835 | "output file name"), | 1866 | "output file name"), |
| 1836 | OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, | 1867 | OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit, |
| 1837 | &record.opts.no_inherit_set, | 1868 | &record.opts.no_inherit_set, |
| @@ -1839,6 +1870,7 @@ static struct option __record_options[] = { | |||
| 1839 | OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, | 1870 | OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, |
| 1840 | "synthesize non-sample events at the end of output"), | 1871 | "synthesize non-sample events at the end of output"), |
| 1841 | OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), | 1872 | OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), |
| 1873 | OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"), | ||
| 1842 | OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, | 1874 | OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, |
| 1843 | "Fail if the specified frequency can't be used"), | 1875 | "Fail if the specified frequency can't be used"), |
| 1844 | OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", | 1876 | OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", |
| @@ -1946,6 +1978,9 @@ static struct option __record_options[] = { | |||
| 1946 | &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", | 1978 | &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)", |
| 1947 | record__aio_parse), | 1979 | record__aio_parse), |
| 1948 | #endif | 1980 | #endif |
| 1981 | OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", | ||
| 1982 | "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", | ||
| 1983 | record__parse_affinity), | ||
| 1949 | OPT_END() | 1984 | OPT_END() |
| 1950 | }; | 1985 | }; |
| 1951 | 1986 | ||
| @@ -1980,6 +2015,9 @@ int cmd_record(int argc, const char **argv) | |||
| 1980 | # undef REASON | 2015 | # undef REASON |
| 1981 | #endif | 2016 | #endif |
| 1982 | 2017 | ||
| 2018 | CPU_ZERO(&rec->affinity_mask); | ||
| 2019 | rec->opts.affinity = PERF_AFFINITY_SYS; | ||
| 2020 | |||
| 1983 | rec->evlist = perf_evlist__new(); | 2021 | rec->evlist = perf_evlist__new(); |
| 1984 | if (rec->evlist == NULL) | 2022 | if (rec->evlist == NULL) |
| 1985 | return -ENOMEM; | 2023 | return -ENOMEM; |
| @@ -2143,6 +2181,8 @@ int cmd_record(int argc, const char **argv) | |||
| 2143 | if (verbose > 0) | 2181 | if (verbose > 0) |
| 2144 | pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); | 2182 | pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); |
| 2145 | 2183 | ||
| 2184 | pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); | ||
| 2185 | |||
| 2146 | err = __cmd_record(&record, argc, argv); | 2186 | err = __cmd_record(&record, argc, argv); |
| 2147 | out: | 2187 | out: |
| 2148 | perf_evlist__delete(rec->evlist); | 2188 | perf_evlist__delete(rec->evlist); |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4958095be4fc..1532ebde6c4b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/list.h> | 16 | #include <linux/list.h> |
| 17 | #include <linux/rbtree.h> | 17 | #include <linux/rbtree.h> |
| 18 | #include <linux/err.h> | 18 | #include <linux/err.h> |
| 19 | #include "util/map.h" | ||
| 19 | #include "util/symbol.h" | 20 | #include "util/symbol.h" |
| 20 | #include "util/callchain.h" | 21 | #include "util/callchain.h" |
| 21 | #include "util/values.h" | 22 | #include "util/values.h" |
| @@ -615,6 +616,21 @@ static int report__collapse_hists(struct report *rep) | |||
| 615 | return ret; | 616 | return ret; |
| 616 | } | 617 | } |
| 617 | 618 | ||
| 619 | static int hists__resort_cb(struct hist_entry *he, void *arg) | ||
| 620 | { | ||
| 621 | struct report *rep = arg; | ||
| 622 | struct symbol *sym = he->ms.sym; | ||
| 623 | |||
| 624 | if (rep->symbol_ipc && sym && !sym->annotate2) { | ||
| 625 | struct perf_evsel *evsel = hists_to_evsel(he->hists); | ||
| 626 | |||
| 627 | symbol__annotate2(sym, he->ms.map, evsel, | ||
| 628 | &annotation__default_options, NULL); | ||
| 629 | } | ||
| 630 | |||
| 631 | return 0; | ||
| 632 | } | ||
| 633 | |||
| 618 | static void report__output_resort(struct report *rep) | 634 | static void report__output_resort(struct report *rep) |
| 619 | { | 635 | { |
| 620 | struct ui_progress prog; | 636 | struct ui_progress prog; |
| @@ -622,8 +638,10 @@ static void report__output_resort(struct report *rep) | |||
| 622 | 638 | ||
| 623 | ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); | 639 | ui_progress__init(&prog, rep->nr_entries, "Sorting events for output..."); |
| 624 | 640 | ||
| 625 | evlist__for_each_entry(rep->session->evlist, pos) | 641 | evlist__for_each_entry(rep->session->evlist, pos) { |
| 626 | perf_evsel__output_resort(pos, &prog); | 642 | perf_evsel__output_resort_cb(pos, &prog, |
| 643 | hists__resort_cb, rep); | ||
| 644 | } | ||
| 627 | 645 | ||
| 628 | ui_progress__finish(); | 646 | ui_progress__finish(); |
| 629 | } | 647 | } |
| @@ -753,7 +771,8 @@ static int tasks_print(struct report *rep, FILE *fp) | |||
| 753 | for (i = 0; i < THREADS__TABLE_SIZE; i++) { | 771 | for (i = 0; i < THREADS__TABLE_SIZE; i++) { |
| 754 | struct threads *threads = &machine->threads[i]; | 772 | struct threads *threads = &machine->threads[i]; |
| 755 | 773 | ||
| 756 | for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { | 774 | for (nd = rb_first_cached(&threads->entries); nd; |
| 775 | nd = rb_next(nd)) { | ||
| 757 | task = tasks + itask++; | 776 | task = tasks + itask++; |
| 758 | 777 | ||
| 759 | task->thread = rb_entry(nd, struct thread, rb_node); | 778 | task->thread = rb_entry(nd, struct thread, rb_node); |
| @@ -880,7 +899,7 @@ static int __cmd_report(struct report *rep) | |||
| 880 | rep->nr_entries += evsel__hists(pos)->nr_entries; | 899 | rep->nr_entries += evsel__hists(pos)->nr_entries; |
| 881 | 900 | ||
| 882 | if (rep->nr_entries == 0) { | 901 | if (rep->nr_entries == 0) { |
| 883 | ui__error("The %s file has no samples!\n", data->file.path); | 902 | ui__error("The %s data has no samples!\n", data->path); |
| 884 | return 0; | 903 | return 0; |
| 885 | } | 904 | } |
| 886 | 905 | ||
| @@ -956,9 +975,9 @@ int cmd_report(int argc, const char **argv) | |||
| 956 | int branch_mode = -1; | 975 | int branch_mode = -1; |
| 957 | bool branch_call_mode = false; | 976 | bool branch_call_mode = false; |
| 958 | #define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" | 977 | #define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" |
| 959 | const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" | 978 | static const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" |
| 960 | CALLCHAIN_REPORT_HELP | 979 | CALLCHAIN_REPORT_HELP |
| 961 | "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; | 980 | "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; |
| 962 | char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; | 981 | char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; |
| 963 | const char * const report_usage[] = { | 982 | const char * const report_usage[] = { |
| 964 | "perf report [<options>]", | 983 | "perf report [<options>]", |
| @@ -1188,8 +1207,8 @@ int cmd_report(int argc, const char **argv) | |||
| 1188 | input_name = "perf.data"; | 1207 | input_name = "perf.data"; |
| 1189 | } | 1208 | } |
| 1190 | 1209 | ||
| 1191 | data.file.path = input_name; | 1210 | data.path = input_name; |
| 1192 | data.force = symbol_conf.force; | 1211 | data.force = symbol_conf.force; |
| 1193 | 1212 | ||
| 1194 | repeat: | 1213 | repeat: |
| 1195 | session = perf_session__new(&data, false, &report.tool); | 1214 | session = perf_session__new(&data, false, &report.tool); |
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index cbf39dab19c1..275f2d92a7bf 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c | |||
| @@ -213,7 +213,7 @@ struct perf_sched { | |||
| 213 | u64 all_runtime; | 213 | u64 all_runtime; |
| 214 | u64 all_count; | 214 | u64 all_count; |
| 215 | u64 cpu_last_switched[MAX_CPUS]; | 215 | u64 cpu_last_switched[MAX_CPUS]; |
| 216 | struct rb_root atom_root, sorted_atom_root, merged_atom_root; | 216 | struct rb_root_cached atom_root, sorted_atom_root, merged_atom_root; |
| 217 | struct list_head sort_list, cmp_pid; | 217 | struct list_head sort_list, cmp_pid; |
| 218 | bool force; | 218 | bool force; |
| 219 | bool skip_merge; | 219 | bool skip_merge; |
| @@ -271,7 +271,7 @@ struct evsel_runtime { | |||
| 271 | struct idle_thread_runtime { | 271 | struct idle_thread_runtime { |
| 272 | struct thread_runtime tr; | 272 | struct thread_runtime tr; |
| 273 | struct thread *last_thread; | 273 | struct thread *last_thread; |
| 274 | struct rb_root sorted_root; | 274 | struct rb_root_cached sorted_root; |
| 275 | struct callchain_root callchain; | 275 | struct callchain_root callchain; |
| 276 | struct callchain_cursor cursor; | 276 | struct callchain_cursor cursor; |
| 277 | }; | 277 | }; |
| @@ -950,10 +950,10 @@ thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms * | |||
| 950 | } | 950 | } |
| 951 | 951 | ||
| 952 | static struct work_atoms * | 952 | static struct work_atoms * |
| 953 | thread_atoms_search(struct rb_root *root, struct thread *thread, | 953 | thread_atoms_search(struct rb_root_cached *root, struct thread *thread, |
| 954 | struct list_head *sort_list) | 954 | struct list_head *sort_list) |
| 955 | { | 955 | { |
| 956 | struct rb_node *node = root->rb_node; | 956 | struct rb_node *node = root->rb_root.rb_node; |
| 957 | struct work_atoms key = { .thread = thread }; | 957 | struct work_atoms key = { .thread = thread }; |
| 958 | 958 | ||
| 959 | while (node) { | 959 | while (node) { |
| @@ -976,10 +976,11 @@ thread_atoms_search(struct rb_root *root, struct thread *thread, | |||
| 976 | } | 976 | } |
| 977 | 977 | ||
| 978 | static void | 978 | static void |
| 979 | __thread_latency_insert(struct rb_root *root, struct work_atoms *data, | 979 | __thread_latency_insert(struct rb_root_cached *root, struct work_atoms *data, |
| 980 | struct list_head *sort_list) | 980 | struct list_head *sort_list) |
| 981 | { | 981 | { |
| 982 | struct rb_node **new = &(root->rb_node), *parent = NULL; | 982 | struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; |
| 983 | bool leftmost = true; | ||
| 983 | 984 | ||
| 984 | while (*new) { | 985 | while (*new) { |
| 985 | struct work_atoms *this; | 986 | struct work_atoms *this; |
| @@ -992,12 +993,14 @@ __thread_latency_insert(struct rb_root *root, struct work_atoms *data, | |||
| 992 | 993 | ||
| 993 | if (cmp > 0) | 994 | if (cmp > 0) |
| 994 | new = &((*new)->rb_left); | 995 | new = &((*new)->rb_left); |
| 995 | else | 996 | else { |
| 996 | new = &((*new)->rb_right); | 997 | new = &((*new)->rb_right); |
| 998 | leftmost = false; | ||
| 999 | } | ||
| 997 | } | 1000 | } |
| 998 | 1001 | ||
| 999 | rb_link_node(&data->node, parent, new); | 1002 | rb_link_node(&data->node, parent, new); |
| 1000 | rb_insert_color(&data->node, root); | 1003 | rb_insert_color_cached(&data->node, root, leftmost); |
| 1001 | } | 1004 | } |
| 1002 | 1005 | ||
| 1003 | static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) | 1006 | static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) |
| @@ -1447,15 +1450,15 @@ static int sort_dimension__add(const char *tok, struct list_head *list) | |||
| 1447 | static void perf_sched__sort_lat(struct perf_sched *sched) | 1450 | static void perf_sched__sort_lat(struct perf_sched *sched) |
| 1448 | { | 1451 | { |
| 1449 | struct rb_node *node; | 1452 | struct rb_node *node; |
| 1450 | struct rb_root *root = &sched->atom_root; | 1453 | struct rb_root_cached *root = &sched->atom_root; |
| 1451 | again: | 1454 | again: |
| 1452 | for (;;) { | 1455 | for (;;) { |
| 1453 | struct work_atoms *data; | 1456 | struct work_atoms *data; |
| 1454 | node = rb_first(root); | 1457 | node = rb_first_cached(root); |
| 1455 | if (!node) | 1458 | if (!node) |
| 1456 | break; | 1459 | break; |
| 1457 | 1460 | ||
| 1458 | rb_erase(node, root); | 1461 | rb_erase_cached(node, root); |
| 1459 | data = rb_entry(node, struct work_atoms, node); | 1462 | data = rb_entry(node, struct work_atoms, node); |
| 1460 | __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); | 1463 | __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); |
| 1461 | } | 1464 | } |
| @@ -1782,11 +1785,9 @@ static int perf_sched__read_events(struct perf_sched *sched) | |||
| 1782 | }; | 1785 | }; |
| 1783 | struct perf_session *session; | 1786 | struct perf_session *session; |
| 1784 | struct perf_data data = { | 1787 | struct perf_data data = { |
| 1785 | .file = { | 1788 | .path = input_name, |
| 1786 | .path = input_name, | 1789 | .mode = PERF_DATA_MODE_READ, |
| 1787 | }, | 1790 | .force = sched->force, |
| 1788 | .mode = PERF_DATA_MODE_READ, | ||
| 1789 | .force = sched->force, | ||
| 1790 | }; | 1791 | }; |
| 1791 | int rc = -1; | 1792 | int rc = -1; |
| 1792 | 1793 | ||
| @@ -2762,12 +2763,12 @@ static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node) | |||
| 2762 | return ret; | 2763 | return ret; |
| 2763 | } | 2764 | } |
| 2764 | 2765 | ||
| 2765 | static size_t timehist_print_idlehist_callchain(struct rb_root *root) | 2766 | static size_t timehist_print_idlehist_callchain(struct rb_root_cached *root) |
| 2766 | { | 2767 | { |
| 2767 | size_t ret = 0; | 2768 | size_t ret = 0; |
| 2768 | FILE *fp = stdout; | 2769 | FILE *fp = stdout; |
| 2769 | struct callchain_node *chain; | 2770 | struct callchain_node *chain; |
| 2770 | struct rb_node *rb_node = rb_first(root); | 2771 | struct rb_node *rb_node = rb_first_cached(root); |
| 2771 | 2772 | ||
| 2772 | printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); | 2773 | printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains"); |
| 2773 | printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, | 2774 | printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line, |
| @@ -2868,7 +2869,7 @@ static void timehist_print_summary(struct perf_sched *sched, | |||
| 2868 | if (itr == NULL) | 2869 | if (itr == NULL) |
| 2869 | continue; | 2870 | continue; |
| 2870 | 2871 | ||
| 2871 | callchain_param.sort(&itr->sorted_root, &itr->callchain, | 2872 | callchain_param.sort(&itr->sorted_root.rb_root, &itr->callchain, |
| 2872 | 0, &callchain_param); | 2873 | 0, &callchain_param); |
| 2873 | 2874 | ||
| 2874 | printf(" CPU %2d:", i); | 2875 | printf(" CPU %2d:", i); |
| @@ -2955,11 +2956,9 @@ static int perf_sched__timehist(struct perf_sched *sched) | |||
| 2955 | { "sched:sched_migrate_task", timehist_migrate_task_event, }, | 2956 | { "sched:sched_migrate_task", timehist_migrate_task_event, }, |
| 2956 | }; | 2957 | }; |
| 2957 | struct perf_data data = { | 2958 | struct perf_data data = { |
| 2958 | .file = { | 2959 | .path = input_name, |
| 2959 | .path = input_name, | 2960 | .mode = PERF_DATA_MODE_READ, |
| 2960 | }, | 2961 | .force = sched->force, |
| 2961 | .mode = PERF_DATA_MODE_READ, | ||
| 2962 | .force = sched->force, | ||
| 2963 | }; | 2962 | }; |
| 2964 | 2963 | ||
| 2965 | struct perf_session *session; | 2964 | struct perf_session *session; |
| @@ -3074,11 +3073,12 @@ static void print_bad_events(struct perf_sched *sched) | |||
| 3074 | } | 3073 | } |
| 3075 | } | 3074 | } |
| 3076 | 3075 | ||
| 3077 | static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) | 3076 | static void __merge_work_atoms(struct rb_root_cached *root, struct work_atoms *data) |
| 3078 | { | 3077 | { |
| 3079 | struct rb_node **new = &(root->rb_node), *parent = NULL; | 3078 | struct rb_node **new = &(root->rb_root.rb_node), *parent = NULL; |
| 3080 | struct work_atoms *this; | 3079 | struct work_atoms *this; |
| 3081 | const char *comm = thread__comm_str(data->thread), *this_comm; | 3080 | const char *comm = thread__comm_str(data->thread), *this_comm; |
| 3081 | bool leftmost = true; | ||
| 3082 | 3082 | ||
| 3083 | while (*new) { | 3083 | while (*new) { |
| 3084 | int cmp; | 3084 | int cmp; |
| @@ -3092,6 +3092,7 @@ static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) | |||
| 3092 | new = &((*new)->rb_left); | 3092 | new = &((*new)->rb_left); |
| 3093 | } else if (cmp < 0) { | 3093 | } else if (cmp < 0) { |
| 3094 | new = &((*new)->rb_right); | 3094 | new = &((*new)->rb_right); |
| 3095 | leftmost = false; | ||
| 3095 | } else { | 3096 | } else { |
| 3096 | this->num_merged++; | 3097 | this->num_merged++; |
| 3097 | this->total_runtime += data->total_runtime; | 3098 | this->total_runtime += data->total_runtime; |
| @@ -3109,7 +3110,7 @@ static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) | |||
| 3109 | 3110 | ||
| 3110 | data->num_merged++; | 3111 | data->num_merged++; |
| 3111 | rb_link_node(&data->node, parent, new); | 3112 | rb_link_node(&data->node, parent, new); |
| 3112 | rb_insert_color(&data->node, root); | 3113 | rb_insert_color_cached(&data->node, root, leftmost); |
| 3113 | } | 3114 | } |
| 3114 | 3115 | ||
| 3115 | static void perf_sched__merge_lat(struct perf_sched *sched) | 3116 | static void perf_sched__merge_lat(struct perf_sched *sched) |
| @@ -3120,8 +3121,8 @@ static void perf_sched__merge_lat(struct perf_sched *sched) | |||
| 3120 | if (sched->skip_merge) | 3121 | if (sched->skip_merge) |
| 3121 | return; | 3122 | return; |
| 3122 | 3123 | ||
| 3123 | while ((node = rb_first(&sched->atom_root))) { | 3124 | while ((node = rb_first_cached(&sched->atom_root))) { |
| 3124 | rb_erase(node, &sched->atom_root); | 3125 | rb_erase_cached(node, &sched->atom_root); |
| 3125 | data = rb_entry(node, struct work_atoms, node); | 3126 | data = rb_entry(node, struct work_atoms, node); |
| 3126 | __merge_work_atoms(&sched->merged_atom_root, data); | 3127 | __merge_work_atoms(&sched->merged_atom_root, data); |
| 3127 | } | 3128 | } |
| @@ -3143,7 +3144,7 @@ static int perf_sched__lat(struct perf_sched *sched) | |||
| 3143 | printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); | 3144 | printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n"); |
| 3144 | printf(" -----------------------------------------------------------------------------------------------------------------\n"); | 3145 | printf(" -----------------------------------------------------------------------------------------------------------------\n"); |
| 3145 | 3146 | ||
| 3146 | next = rb_first(&sched->sorted_atom_root); | 3147 | next = rb_first_cached(&sched->sorted_atom_root); |
| 3147 | 3148 | ||
| 3148 | while (next) { | 3149 | while (next) { |
| 3149 | struct work_atoms *work_list; | 3150 | struct work_atoms *work_list; |
| @@ -3336,7 +3337,7 @@ static int __cmd_record(int argc, const char **argv) | |||
| 3336 | 3337 | ||
| 3337 | int cmd_sched(int argc, const char **argv) | 3338 | int cmd_sched(int argc, const char **argv) |
| 3338 | { | 3339 | { |
| 3339 | const char default_sort_order[] = "avg, max, switch, runtime"; | 3340 | static const char default_sort_order[] = "avg, max, switch, runtime"; |
| 3340 | struct perf_sched sched = { | 3341 | struct perf_sched sched = { |
| 3341 | .tool = { | 3342 | .tool = { |
| 3342 | .sample = perf_sched__process_tracepoint_sample, | 3343 | .sample = perf_sched__process_tracepoint_sample, |
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ac221f137ed2..2d8cb1d1682c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "util/perf_regs.h" | 10 | #include "util/perf_regs.h" |
| 11 | #include "util/session.h" | 11 | #include "util/session.h" |
| 12 | #include "util/tool.h" | 12 | #include "util/tool.h" |
| 13 | #include "util/map.h" | ||
| 13 | #include "util/symbol.h" | 14 | #include "util/symbol.h" |
| 14 | #include "util/thread.h" | 15 | #include "util/thread.h" |
| 15 | #include "util/trace-event.h" | 16 | #include "util/trace-event.h" |
| @@ -148,6 +149,7 @@ static struct { | |||
| 148 | unsigned int print_ip_opts; | 149 | unsigned int print_ip_opts; |
| 149 | u64 fields; | 150 | u64 fields; |
| 150 | u64 invalid_fields; | 151 | u64 invalid_fields; |
| 152 | u64 user_set_fields; | ||
| 151 | } output[OUTPUT_TYPE_MAX] = { | 153 | } output[OUTPUT_TYPE_MAX] = { |
| 152 | 154 | ||
| 153 | [PERF_TYPE_HARDWARE] = { | 155 | [PERF_TYPE_HARDWARE] = { |
| @@ -344,7 +346,7 @@ static int perf_evsel__do_check_stype(struct perf_evsel *evsel, | |||
| 344 | if (attr->sample_type & sample_type) | 346 | if (attr->sample_type & sample_type) |
| 345 | return 0; | 347 | return 0; |
| 346 | 348 | ||
| 347 | if (output[type].user_set) { | 349 | if (output[type].user_set_fields & field) { |
| 348 | if (allow_user_set) | 350 | if (allow_user_set) |
| 349 | return 0; | 351 | return 0; |
| 350 | evname = perf_evsel__name(evsel); | 352 | evname = perf_evsel__name(evsel); |
| @@ -2559,6 +2561,10 @@ static int parse_output_fields(const struct option *opt __maybe_unused, | |||
| 2559 | pr_warning("Overriding previous field request for %s events.\n", | 2561 | pr_warning("Overriding previous field request for %s events.\n", |
| 2560 | event_type(type)); | 2562 | event_type(type)); |
| 2561 | 2563 | ||
| 2564 | /* Don't override defaults for +- */ | ||
| 2565 | if (strchr(tok, '+') || strchr(tok, '-')) | ||
| 2566 | goto parse; | ||
| 2567 | |||
| 2562 | output[type].fields = 0; | 2568 | output[type].fields = 0; |
| 2563 | output[type].user_set = true; | 2569 | output[type].user_set = true; |
| 2564 | output[type].wildcard_set = false; | 2570 | output[type].wildcard_set = false; |
| @@ -2627,10 +2633,13 @@ parse: | |||
| 2627 | pr_warning("\'%s\' not valid for %s events. Ignoring.\n", | 2633 | pr_warning("\'%s\' not valid for %s events. Ignoring.\n", |
| 2628 | all_output_options[i].str, event_type(j)); | 2634 | all_output_options[i].str, event_type(j)); |
| 2629 | } else { | 2635 | } else { |
| 2630 | if (change == REMOVE) | 2636 | if (change == REMOVE) { |
| 2631 | output[j].fields &= ~all_output_options[i].field; | 2637 | output[j].fields &= ~all_output_options[i].field; |
| 2632 | else | 2638 | output[j].user_set_fields &= ~all_output_options[i].field; |
| 2639 | } else { | ||
| 2633 | output[j].fields |= all_output_options[i].field; | 2640 | output[j].fields |= all_output_options[i].field; |
| 2641 | output[j].user_set_fields |= all_output_options[i].field; | ||
| 2642 | } | ||
| 2634 | output[j].user_set = true; | 2643 | output[j].user_set = true; |
| 2635 | output[j].wildcard_set = true; | 2644 | output[j].wildcard_set = true; |
| 2636 | } | 2645 | } |
| @@ -2643,6 +2652,10 @@ parse: | |||
| 2643 | rc = -EINVAL; | 2652 | rc = -EINVAL; |
| 2644 | goto out; | 2653 | goto out; |
| 2645 | } | 2654 | } |
| 2655 | if (change == REMOVE) | ||
| 2656 | output[type].fields &= ~all_output_options[i].field; | ||
| 2657 | else | ||
| 2658 | output[type].fields |= all_output_options[i].field; | ||
| 2646 | output[type].user_set = true; | 2659 | output[type].user_set = true; |
| 2647 | output[type].wildcard_set = true; | 2660 | output[type].wildcard_set = true; |
| 2648 | } | 2661 | } |
| @@ -2942,10 +2955,8 @@ int find_scripts(char **scripts_array, char **scripts_path_array) | |||
| 2942 | DIR *scripts_dir, *lang_dir; | 2955 | DIR *scripts_dir, *lang_dir; |
| 2943 | struct perf_session *session; | 2956 | struct perf_session *session; |
| 2944 | struct perf_data data = { | 2957 | struct perf_data data = { |
| 2945 | .file = { | 2958 | .path = input_name, |
| 2946 | .path = input_name, | 2959 | .mode = PERF_DATA_MODE_READ, |
| 2947 | }, | ||
| 2948 | .mode = PERF_DATA_MODE_READ, | ||
| 2949 | }; | 2960 | }; |
| 2950 | char *temp; | 2961 | char *temp; |
| 2951 | int i = 0; | 2962 | int i = 0; |
| @@ -3418,8 +3429,8 @@ int cmd_script(int argc, const char **argv) | |||
| 3418 | argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, | 3429 | argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, |
| 3419 | PARSE_OPT_STOP_AT_NON_OPTION); | 3430 | PARSE_OPT_STOP_AT_NON_OPTION); |
| 3420 | 3431 | ||
| 3421 | data.file.path = input_name; | 3432 | data.path = input_name; |
| 3422 | data.force = symbol_conf.force; | 3433 | data.force = symbol_conf.force; |
| 3423 | 3434 | ||
| 3424 | if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { | 3435 | if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) { |
| 3425 | rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); | 3436 | rec_script_path = get_script_path(argv[1], RECORD_SUFFIX); |
| @@ -3645,7 +3656,7 @@ int cmd_script(int argc, const char **argv) | |||
| 3645 | goto out_delete; | 3656 | goto out_delete; |
| 3646 | } | 3657 | } |
| 3647 | 3658 | ||
| 3648 | input = open(data.file.path, O_RDONLY); /* input_name */ | 3659 | input = open(data.path, O_RDONLY); /* input_name */ |
| 3649 | if (input < 0) { | 3660 | if (input < 0) { |
| 3650 | err = -errno; | 3661 | err = -errno; |
| 3651 | perror("failed to open file"); | 3662 | perror("failed to open file"); |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 63a3afc7f32b..7b8f09b0b8bf 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
| @@ -52,7 +52,6 @@ | |||
| 52 | #include "util/evlist.h" | 52 | #include "util/evlist.h" |
| 53 | #include "util/evsel.h" | 53 | #include "util/evsel.h" |
| 54 | #include "util/debug.h" | 54 | #include "util/debug.h" |
| 55 | #include "util/drv_configs.h" | ||
| 56 | #include "util/color.h" | 55 | #include "util/color.h" |
| 57 | #include "util/stat.h" | 56 | #include "util/stat.h" |
| 58 | #include "util/header.h" | 57 | #include "util/header.h" |
| @@ -83,7 +82,6 @@ | |||
| 83 | #include <unistd.h> | 82 | #include <unistd.h> |
| 84 | #include <sys/time.h> | 83 | #include <sys/time.h> |
| 85 | #include <sys/resource.h> | 84 | #include <sys/resource.h> |
| 86 | #include <sys/wait.h> | ||
| 87 | 85 | ||
| 88 | #include "sane_ctype.h" | 86 | #include "sane_ctype.h" |
| 89 | 87 | ||
| @@ -418,7 +416,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) | |||
| 418 | int status = 0; | 416 | int status = 0; |
| 419 | const bool forks = (argc > 0); | 417 | const bool forks = (argc > 0); |
| 420 | bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; | 418 | bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false; |
| 421 | struct perf_evsel_config_term *err_term; | ||
| 422 | 419 | ||
| 423 | if (interval) { | 420 | if (interval) { |
| 424 | ts.tv_sec = interval / USEC_PER_MSEC; | 421 | ts.tv_sec = interval / USEC_PER_MSEC; |
| @@ -515,13 +512,6 @@ try_again: | |||
| 515 | return -1; | 512 | return -1; |
| 516 | } | 513 | } |
| 517 | 514 | ||
| 518 | if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) { | ||
| 519 | pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", | ||
| 520 | err_term->val.drv_cfg, perf_evsel__name(counter), errno, | ||
| 521 | str_error_r(errno, msg, sizeof(msg))); | ||
| 522 | return -1; | ||
| 523 | } | ||
| 524 | |||
| 525 | if (STAT_RECORD) { | 515 | if (STAT_RECORD) { |
| 526 | int err, fd = perf_data__fd(&perf_stat.data); | 516 | int err, fd = perf_data__fd(&perf_stat.data); |
| 527 | 517 | ||
| @@ -1332,7 +1322,7 @@ static int __cmd_record(int argc, const char **argv) | |||
| 1332 | PARSE_OPT_STOP_AT_NON_OPTION); | 1322 | PARSE_OPT_STOP_AT_NON_OPTION); |
| 1333 | 1323 | ||
| 1334 | if (output_name) | 1324 | if (output_name) |
| 1335 | data->file.path = output_name; | 1325 | data->path = output_name; |
| 1336 | 1326 | ||
| 1337 | if (stat_config.run_count != 1 || forever) { | 1327 | if (stat_config.run_count != 1 || forever) { |
| 1338 | pr_err("Cannot use -r option with perf stat record.\n"); | 1328 | pr_err("Cannot use -r option with perf stat record.\n"); |
| @@ -1533,8 +1523,8 @@ static int __cmd_report(int argc, const char **argv) | |||
| 1533 | input_name = "perf.data"; | 1523 | input_name = "perf.data"; |
| 1534 | } | 1524 | } |
| 1535 | 1525 | ||
| 1536 | perf_stat.data.file.path = input_name; | 1526 | perf_stat.data.path = input_name; |
| 1537 | perf_stat.data.mode = PERF_DATA_MODE_READ; | 1527 | perf_stat.data.mode = PERF_DATA_MODE_READ; |
| 1538 | 1528 | ||
| 1539 | session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); | 1529 | session = perf_session__new(&perf_stat.data, false, &perf_stat.tool); |
| 1540 | if (session == NULL) | 1530 | if (session == NULL) |
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 775b99833e51..9b98687a27b9 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c | |||
| @@ -1602,11 +1602,9 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) | |||
| 1602 | { "syscalls:sys_exit_select", process_exit_poll }, | 1602 | { "syscalls:sys_exit_select", process_exit_poll }, |
| 1603 | }; | 1603 | }; |
| 1604 | struct perf_data data = { | 1604 | struct perf_data data = { |
| 1605 | .file = { | 1605 | .path = input_name, |
| 1606 | .path = input_name, | 1606 | .mode = PERF_DATA_MODE_READ, |
| 1607 | }, | 1607 | .force = tchart->force, |
| 1608 | .mode = PERF_DATA_MODE_READ, | ||
| 1609 | .force = tchart->force, | ||
| 1610 | }; | 1608 | }; |
| 1611 | 1609 | ||
| 1612 | struct perf_session *session = perf_session__new(&data, false, | 1610 | struct perf_session *session = perf_session__new(&data, false, |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index f64e312db787..231a90daa958 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
| @@ -22,13 +22,14 @@ | |||
| 22 | #include "perf.h" | 22 | #include "perf.h" |
| 23 | 23 | ||
| 24 | #include "util/annotate.h" | 24 | #include "util/annotate.h" |
| 25 | #include "util/bpf-event.h" | ||
| 25 | #include "util/config.h" | 26 | #include "util/config.h" |
| 26 | #include "util/color.h" | 27 | #include "util/color.h" |
| 27 | #include "util/drv_configs.h" | ||
| 28 | #include "util/evlist.h" | 28 | #include "util/evlist.h" |
| 29 | #include "util/evsel.h" | 29 | #include "util/evsel.h" |
| 30 | #include "util/event.h" | 30 | #include "util/event.h" |
| 31 | #include "util/machine.h" | 31 | #include "util/machine.h" |
| 32 | #include "util/map.h" | ||
| 32 | #include "util/session.h" | 33 | #include "util/session.h" |
| 33 | #include "util/symbol.h" | 34 | #include "util/symbol.h" |
| 34 | #include "util/thread.h" | 35 | #include "util/thread.h" |
| @@ -366,7 +367,7 @@ static void perf_top__prompt_symbol(struct perf_top *top, const char *msg) | |||
| 366 | if (p) | 367 | if (p) |
| 367 | *p = 0; | 368 | *p = 0; |
| 368 | 369 | ||
| 369 | next = rb_first(&hists->entries); | 370 | next = rb_first_cached(&hists->entries); |
| 370 | while (next) { | 371 | while (next) { |
| 371 | n = rb_entry(next, struct hist_entry, rb_node); | 372 | n = rb_entry(next, struct hist_entry, rb_node); |
| 372 | if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) { | 373 | if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) { |
| @@ -1184,10 +1185,6 @@ static void init_process_thread(struct perf_top *top) | |||
| 1184 | 1185 | ||
| 1185 | static int __cmd_top(struct perf_top *top) | 1186 | static int __cmd_top(struct perf_top *top) |
| 1186 | { | 1187 | { |
| 1187 | char msg[512]; | ||
| 1188 | struct perf_evsel *pos; | ||
| 1189 | struct perf_evsel_config_term *err_term; | ||
| 1190 | struct perf_evlist *evlist = top->evlist; | ||
| 1191 | struct record_opts *opts = &top->record_opts; | 1188 | struct record_opts *opts = &top->record_opts; |
| 1192 | pthread_t thread, thread_process; | 1189 | pthread_t thread, thread_process; |
| 1193 | int ret; | 1190 | int ret; |
| @@ -1215,6 +1212,12 @@ static int __cmd_top(struct perf_top *top) | |||
| 1215 | 1212 | ||
| 1216 | init_process_thread(top); | 1213 | init_process_thread(top); |
| 1217 | 1214 | ||
| 1215 | ret = perf_event__synthesize_bpf_events(&top->tool, perf_event__process, | ||
| 1216 | &top->session->machines.host, | ||
| 1217 | &top->record_opts); | ||
| 1218 | if (ret < 0) | ||
| 1219 | pr_warning("Couldn't synthesize bpf events.\n"); | ||
| 1220 | |||
| 1218 | machine__synthesize_threads(&top->session->machines.host, &opts->target, | 1221 | machine__synthesize_threads(&top->session->machines.host, &opts->target, |
| 1219 | top->evlist->threads, false, | 1222 | top->evlist->threads, false, |
| 1220 | top->nr_threads_synthesize); | 1223 | top->nr_threads_synthesize); |
| @@ -1232,14 +1235,6 @@ static int __cmd_top(struct perf_top *top) | |||
| 1232 | if (ret) | 1235 | if (ret) |
| 1233 | goto out_delete; | 1236 | goto out_delete; |
| 1234 | 1237 | ||
| 1235 | ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term); | ||
| 1236 | if (ret) { | ||
| 1237 | pr_err("failed to set config \"%s\" on event %s with %d (%s)\n", | ||
| 1238 | err_term->val.drv_cfg, perf_evsel__name(pos), errno, | ||
| 1239 | str_error_r(errno, msg, sizeof(msg))); | ||
| 1240 | goto out_delete; | ||
| 1241 | } | ||
| 1242 | |||
| 1243 | top->session->evlist = top->evlist; | 1238 | top->session->evlist = top->evlist; |
| 1244 | perf_session__set_id_hdr_size(top->session); | 1239 | perf_session__set_id_hdr_size(top->session); |
| 1245 | 1240 | ||
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index b36061cd1ab8..f5b3a1e9c1dd 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | #include <traceevent/event-parse.h> | 19 | #include <traceevent/event-parse.h> |
| 20 | #include <api/fs/tracing_path.h> | 20 | #include <api/fs/tracing_path.h> |
| 21 | #include <bpf/bpf.h> | 21 | #include <bpf/bpf.h> |
| 22 | #include "util/bpf_map.h" | ||
| 22 | #include "builtin.h" | 23 | #include "builtin.h" |
| 23 | #include "util/cgroup.h" | 24 | #include "util/cgroup.h" |
| 24 | #include "util/color.h" | 25 | #include "util/color.h" |
| @@ -29,6 +30,8 @@ | |||
| 29 | #include "util/evlist.h" | 30 | #include "util/evlist.h" |
| 30 | #include <subcmd/exec-cmd.h> | 31 | #include <subcmd/exec-cmd.h> |
| 31 | #include "util/machine.h" | 32 | #include "util/machine.h" |
| 33 | #include "util/map.h" | ||
| 34 | #include "util/symbol.h" | ||
| 32 | #include "util/path.h" | 35 | #include "util/path.h" |
| 33 | #include "util/session.h" | 36 | #include "util/session.h" |
| 34 | #include "util/thread.h" | 37 | #include "util/thread.h" |
| @@ -85,6 +88,9 @@ struct trace { | |||
| 85 | *augmented; | 88 | *augmented; |
| 86 | } events; | 89 | } events; |
| 87 | } syscalls; | 90 | } syscalls; |
| 91 | struct { | ||
| 92 | struct bpf_map *map; | ||
| 93 | } dump; | ||
| 88 | struct record_opts opts; | 94 | struct record_opts opts; |
| 89 | struct perf_evlist *evlist; | 95 | struct perf_evlist *evlist; |
| 90 | struct machine *host; | 96 | struct machine *host; |
| @@ -1039,6 +1045,9 @@ static const size_t trace__entry_str_size = 2048; | |||
| 1039 | 1045 | ||
| 1040 | static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd) | 1046 | static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd) |
| 1041 | { | 1047 | { |
| 1048 | if (fd < 0) | ||
| 1049 | return NULL; | ||
| 1050 | |||
| 1042 | if (fd > ttrace->files.max) { | 1051 | if (fd > ttrace->files.max) { |
| 1043 | struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file)); | 1052 | struct file *nfiles = realloc(ttrace->files.table, (fd + 1) * sizeof(struct file)); |
| 1044 | 1053 | ||
| @@ -2766,7 +2775,8 @@ static int trace__set_filter_loop_pids(struct trace *trace) | |||
| 2766 | if (parent == NULL) | 2775 | if (parent == NULL) |
| 2767 | break; | 2776 | break; |
| 2768 | 2777 | ||
| 2769 | if (!strcmp(thread__comm_str(parent), "sshd")) { | 2778 | if (!strcmp(thread__comm_str(parent), "sshd") || |
| 2779 | strstarts(thread__comm_str(parent), "gnome-terminal")) { | ||
| 2770 | pids[nr++] = parent->tid; | 2780 | pids[nr++] = parent->tid; |
| 2771 | break; | 2781 | break; |
| 2772 | } | 2782 | } |
| @@ -2991,6 +3001,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
| 2991 | if (err < 0) | 3001 | if (err < 0) |
| 2992 | goto out_error_apply_filters; | 3002 | goto out_error_apply_filters; |
| 2993 | 3003 | ||
| 3004 | if (trace->dump.map) | ||
| 3005 | bpf_map__fprintf(trace->dump.map, trace->output); | ||
| 3006 | |||
| 2994 | err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); | 3007 | err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); |
| 2995 | if (err < 0) | 3008 | if (err < 0) |
| 2996 | goto out_error_mmap; | 3009 | goto out_error_mmap; |
| @@ -3141,11 +3154,9 @@ static int trace__replay(struct trace *trace) | |||
| 3141 | { "probe:vfs_getname", trace__vfs_getname, }, | 3154 | { "probe:vfs_getname", trace__vfs_getname, }, |
| 3142 | }; | 3155 | }; |
| 3143 | struct perf_data data = { | 3156 | struct perf_data data = { |
| 3144 | .file = { | 3157 | .path = input_name, |
| 3145 | .path = input_name, | 3158 | .mode = PERF_DATA_MODE_READ, |
| 3146 | }, | 3159 | .force = trace->force, |
| 3147 | .mode = PERF_DATA_MODE_READ, | ||
| 3148 | .force = trace->force, | ||
| 3149 | }; | 3160 | }; |
| 3150 | struct perf_session *session; | 3161 | struct perf_session *session; |
| 3151 | struct perf_evsel *evsel; | 3162 | struct perf_evsel *evsel; |
| @@ -3680,6 +3691,7 @@ int cmd_trace(int argc, const char **argv) | |||
| 3680 | .max_stack = UINT_MAX, | 3691 | .max_stack = UINT_MAX, |
| 3681 | .max_events = ULONG_MAX, | 3692 | .max_events = ULONG_MAX, |
| 3682 | }; | 3693 | }; |
| 3694 | const char *map_dump_str = NULL; | ||
| 3683 | const char *output_name = NULL; | 3695 | const char *output_name = NULL; |
| 3684 | const struct option trace_options[] = { | 3696 | const struct option trace_options[] = { |
| 3685 | OPT_CALLBACK('e', "event", &trace, "event", | 3697 | OPT_CALLBACK('e', "event", &trace, "event", |
| @@ -3712,6 +3724,9 @@ int cmd_trace(int argc, const char **argv) | |||
| 3712 | OPT_CALLBACK(0, "duration", &trace, "float", | 3724 | OPT_CALLBACK(0, "duration", &trace, "float", |
| 3713 | "show only events with duration > N.M ms", | 3725 | "show only events with duration > N.M ms", |
| 3714 | trace__set_duration), | 3726 | trace__set_duration), |
| 3727 | #ifdef HAVE_LIBBPF_SUPPORT | ||
| 3728 | OPT_STRING(0, "map-dump", &map_dump_str, "BPF map", "BPF map to periodically dump"), | ||
| 3729 | #endif | ||
| 3715 | OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), | 3730 | OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"), |
| 3716 | OPT_INCR('v', "verbose", &verbose, "be more verbose"), | 3731 | OPT_INCR('v', "verbose", &verbose, "be more verbose"), |
| 3717 | OPT_BOOLEAN('T', "time", &trace.full_time, | 3732 | OPT_BOOLEAN('T', "time", &trace.full_time, |
| @@ -3806,6 +3821,14 @@ int cmd_trace(int argc, const char **argv) | |||
| 3806 | 3821 | ||
| 3807 | err = -1; | 3822 | err = -1; |
| 3808 | 3823 | ||
| 3824 | if (map_dump_str) { | ||
| 3825 | trace.dump.map = bpf__find_map_by_name(map_dump_str); | ||
| 3826 | if (trace.dump.map == NULL) { | ||
| 3827 | pr_err("ERROR: BPF map \"%s\" not found\n", map_dump_str); | ||
| 3828 | goto out; | ||
| 3829 | } | ||
| 3830 | } | ||
| 3831 | |||
| 3809 | if (trace.trace_pgfaults) { | 3832 | if (trace.trace_pgfaults) { |
| 3810 | trace.opts.sample_address = true; | 3833 | trace.opts.sample_address = true; |
| 3811 | trace.opts.sample_time = true; | 3834 | trace.opts.sample_time = true; |
| @@ -3865,7 +3888,8 @@ int cmd_trace(int argc, const char **argv) | |||
| 3865 | goto init_augmented_syscall_tp; | 3888 | goto init_augmented_syscall_tp; |
| 3866 | } | 3889 | } |
| 3867 | 3890 | ||
| 3868 | if (strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_enter") == 0) { | 3891 | if (trace.syscalls.events.augmented->priv == NULL && |
| 3892 | strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) { | ||
| 3869 | struct perf_evsel *augmented = trace.syscalls.events.augmented; | 3893 | struct perf_evsel *augmented = trace.syscalls.events.augmented; |
| 3870 | if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) || | 3894 | if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) || |
| 3871 | perf_evsel__init_augmented_syscall_tp_args(augmented)) | 3895 | perf_evsel__init_augmented_syscall_tp_args(augmented)) |
diff --git a/tools/perf/design.txt b/tools/perf/design.txt index a28dca2582aa..0453ba26cdbd 100644 --- a/tools/perf/design.txt +++ b/tools/perf/design.txt | |||
| @@ -222,6 +222,10 @@ The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a | |||
| 222 | way to request that counting of events be restricted to times when the | 222 | way to request that counting of events be restricted to times when the |
| 223 | CPU is in user, kernel and/or hypervisor mode. | 223 | CPU is in user, kernel and/or hypervisor mode. |
| 224 | 224 | ||
| 225 | Furthermore the 'exclude_host' and 'exclude_guest' bits provide a way | ||
| 226 | to request counting of events restricted to guest and host contexts when | ||
| 227 | using Linux as the hypervisor. | ||
| 228 | |||
| 225 | The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap | 229 | The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap |
| 226 | operations, these can be used to relate userspace IP addresses to actual | 230 | operations, these can be used to relate userspace IP addresses to actual |
| 227 | code, even after the mapping (or even the whole process) is gone, | 231 | code, even after the mapping (or even the whole process) is gone, |
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index 53c233370fae..f9b2161e1ca4 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c | |||
| @@ -18,23 +18,13 @@ | |||
| 18 | #include <pid_filter.h> | 18 | #include <pid_filter.h> |
| 19 | 19 | ||
| 20 | /* bpf-output associated map */ | 20 | /* bpf-output associated map */ |
| 21 | struct bpf_map SEC("maps") __augmented_syscalls__ = { | 21 | bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); |
| 22 | .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, | ||
| 23 | .key_size = sizeof(int), | ||
| 24 | .value_size = sizeof(u32), | ||
| 25 | .max_entries = __NR_CPUS__, | ||
| 26 | }; | ||
| 27 | 22 | ||
| 28 | struct syscall { | 23 | struct syscall { |
| 29 | bool enabled; | 24 | bool enabled; |
| 30 | }; | 25 | }; |
| 31 | 26 | ||
| 32 | struct bpf_map SEC("maps") syscalls = { | 27 | bpf_map(syscalls, ARRAY, int, struct syscall, 512); |
| 33 | .type = BPF_MAP_TYPE_ARRAY, | ||
| 34 | .key_size = sizeof(int), | ||
| 35 | .value_size = sizeof(struct syscall), | ||
| 36 | .max_entries = 512, | ||
| 37 | }; | ||
| 38 | 28 | ||
| 39 | struct syscall_enter_args { | 29 | struct syscall_enter_args { |
| 40 | unsigned long long common_tp_fields; | 30 | unsigned long long common_tp_fields; |
| @@ -141,8 +131,8 @@ int sys_enter(struct syscall_enter_args *args) | |||
| 141 | len = sizeof(augmented_args.args); | 131 | len = sizeof(augmented_args.args); |
| 142 | } | 132 | } |
| 143 | 133 | ||
| 144 | perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); | 134 | /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ |
| 145 | return 0; | 135 | return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); |
| 146 | } | 136 | } |
| 147 | 137 | ||
| 148 | SEC("raw_syscalls:sys_exit") | 138 | SEC("raw_syscalls:sys_exit") |
diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c index 2ae44813ef2d..524fdb8534b3 100644 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ b/tools/perf/examples/bpf/augmented_syscalls.c | |||
| @@ -19,12 +19,8 @@ | |||
| 19 | #include <stdio.h> | 19 | #include <stdio.h> |
| 20 | #include <linux/socket.h> | 20 | #include <linux/socket.h> |
| 21 | 21 | ||
| 22 | struct bpf_map SEC("maps") __augmented_syscalls__ = { | 22 | /* bpf-output associated map */ |
| 23 | .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, | 23 | bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); |
| 24 | .key_size = sizeof(int), | ||
| 25 | .value_size = sizeof(u32), | ||
| 26 | .max_entries = __NR_CPUS__, | ||
| 27 | }; | ||
| 28 | 24 | ||
| 29 | struct syscall_exit_args { | 25 | struct syscall_exit_args { |
| 30 | unsigned long long common_tp_fields; | 26 | unsigned long long common_tp_fields; |
| @@ -55,9 +51,9 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ | |||
| 55 | len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ | 51 | len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ |
| 56 | len &= sizeof(augmented_args.filename.value) - 1; \ | 52 | len &= sizeof(augmented_args.filename.value) - 1; \ |
| 57 | } \ | 53 | } \ |
| 58 | perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ | 54 | /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ |
| 59 | &augmented_args, len); \ | 55 | return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ |
| 60 | return 0; \ | 56 | &augmented_args, len); \ |
| 61 | } \ | 57 | } \ |
| 62 | int syscall_exit(syscall)(struct syscall_exit_args *args) \ | 58 | int syscall_exit(syscall)(struct syscall_exit_args *args) \ |
| 63 | { \ | 59 | { \ |
| @@ -125,10 +121,10 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ | |||
| 125 | /* addrlen = augmented_args.args.addrlen; */ \ | 121 | /* addrlen = augmented_args.args.addrlen; */ \ |
| 126 | /* */ \ | 122 | /* */ \ |
| 127 | probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ | 123 | probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ |
| 128 | perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ | 124 | /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ |
| 129 | &augmented_args, \ | 125 | return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ |
| 130 | sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen); \ | 126 | &augmented_args, \ |
| 131 | return 0; \ | 127 | sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ |
| 132 | } \ | 128 | } \ |
| 133 | int syscall_exit(syscall)(struct syscall_exit_args *args) \ | 129 | int syscall_exit(syscall)(struct syscall_exit_args *args) \ |
| 134 | { \ | 130 | { \ |
diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c index b59e8812ee8c..e81b535346c0 100644 --- a/tools/perf/examples/bpf/etcsnoop.c +++ b/tools/perf/examples/bpf/etcsnoop.c | |||
| @@ -21,12 +21,8 @@ | |||
| 21 | 21 | ||
| 22 | #include <stdio.h> | 22 | #include <stdio.h> |
| 23 | 23 | ||
| 24 | struct bpf_map SEC("maps") __augmented_syscalls__ = { | 24 | /* bpf-output associated map */ |
| 25 | .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, | 25 | bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); |
| 26 | .key_size = sizeof(int), | ||
| 27 | .value_size = sizeof(u32), | ||
| 28 | .max_entries = __NR_CPUS__, | ||
| 29 | }; | ||
| 30 | 26 | ||
| 31 | struct augmented_filename { | 27 | struct augmented_filename { |
| 32 | int size; | 28 | int size; |
| @@ -49,11 +45,11 @@ int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ | |||
| 49 | args->filename_ptr); \ | 45 | args->filename_ptr); \ |
| 50 | if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ | 46 | if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ |
| 51 | return 0; \ | 47 | return 0; \ |
| 52 | perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ | 48 | /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ |
| 53 | &augmented_args, \ | 49 | return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ |
| 54 | (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ | 50 | &augmented_args, \ |
| 55 | augmented_args.filename.size)); \ | 51 | (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ |
| 56 | return 0; \ | 52 | augmented_args.filename.size)); \ |
| 57 | } | 53 | } |
| 58 | 54 | ||
| 59 | struct syscall_enter_openat_args { | 55 | struct syscall_enter_openat_args { |
diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h index e667577207dc..5df7ed9d9020 100644 --- a/tools/perf/include/bpf/bpf.h +++ b/tools/perf/include/bpf/bpf.h | |||
| @@ -18,6 +18,14 @@ struct bpf_map { | |||
| 18 | unsigned int numa_node; | 18 | unsigned int numa_node; |
| 19 | }; | 19 | }; |
| 20 | 20 | ||
| 21 | #define bpf_map(name, _type, type_key, type_val, _max_entries) \ | ||
| 22 | struct bpf_map SEC("maps") name = { \ | ||
| 23 | .type = BPF_MAP_TYPE_##_type, \ | ||
| 24 | .key_size = sizeof(type_key), \ | ||
| 25 | .value_size = sizeof(type_val), \ | ||
| 26 | .max_entries = _max_entries, \ | ||
| 27 | } | ||
| 28 | |||
| 21 | /* | 29 | /* |
| 22 | * FIXME: this should receive .max_entries as a parameter, as careful | 30 | * FIXME: this should receive .max_entries as a parameter, as careful |
| 23 | * tuning of these limits is needed to avoid hitting limits that | 31 | * tuning of these limits is needed to avoid hitting limits that |
| @@ -26,13 +34,7 @@ struct bpf_map { | |||
| 26 | * For the current need, 'perf trace --filter-pids', 64 should | 34 | * For the current need, 'perf trace --filter-pids', 64 should |
| 27 | * be good enough, but this surely needs to be revisited. | 35 | * be good enough, but this surely needs to be revisited. |
| 28 | */ | 36 | */ |
| 29 | #define pid_map(name, value_type) \ | 37 | #define pid_map(name, value_type) bpf_map(name, HASH, pid_t, value_type, 64) |
| 30 | struct bpf_map SEC("maps") name = { \ | ||
| 31 | .type = BPF_MAP_TYPE_HASH, \ | ||
| 32 | .key_size = sizeof(pid_t), \ | ||
| 33 | .value_size = sizeof(value_type), \ | ||
| 34 | .max_entries = 64, \ | ||
| 35 | } | ||
| 36 | 38 | ||
| 37 | static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem; | 39 | static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem; |
| 38 | static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem; | 40 | static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem; |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 388c6dd128b8..b120e547ddc7 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
| @@ -66,6 +66,7 @@ struct record_opts { | |||
| 66 | bool ignore_missing_thread; | 66 | bool ignore_missing_thread; |
| 67 | bool strict_freq; | 67 | bool strict_freq; |
| 68 | bool sample_id; | 68 | bool sample_id; |
| 69 | bool bpf_event; | ||
| 69 | unsigned int freq; | 70 | unsigned int freq; |
| 70 | unsigned int mmap_pages; | 71 | unsigned int mmap_pages; |
| 71 | unsigned int auxtrace_mmap_pages; | 72 | unsigned int auxtrace_mmap_pages; |
| @@ -83,6 +84,14 @@ struct record_opts { | |||
| 83 | clockid_t clockid; | 84 | clockid_t clockid; |
| 84 | u64 clockid_res_ns; | 85 | u64 clockid_res_ns; |
| 85 | int nr_cblocks; | 86 | int nr_cblocks; |
| 87 | int affinity; | ||
| 88 | }; | ||
| 89 | |||
| 90 | enum perf_affinity { | ||
| 91 | PERF_AFFINITY_SYS = 0, | ||
| 92 | PERF_AFFINITY_NODE, | ||
| 93 | PERF_AFFINITY_CPU, | ||
| 94 | PERF_AFFINITY_MAX | ||
| 86 | }; | 95 | }; |
| 87 | 96 | ||
| 88 | struct option; | 97 | struct option; |
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json new file mode 100644 index 000000000000..bffb2d4a6420 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json | |||
| @@ -0,0 +1,2245 @@ | |||
| 1 | [ | ||
| 2 | { | ||
| 3 | "BriefDescription": "% of finished branches that were treated as BC+8", | ||
| 4 | "MetricExpr": "PM_BR_BC_8_CONV / PM_BRU_FIN * 100", | ||
| 5 | "MetricGroup": "branch_prediction", | ||
| 6 | "MetricName": "bc_8_branch_ratio_percent" | ||
| 7 | }, | ||
| 8 | { | ||
| 9 | "BriefDescription": "% of finished branches that were pairable but not treated as BC+8", | ||
| 10 | "MetricExpr": "PM_BR_BC_8 / PM_BRU_FIN * 100", | ||
| 11 | "MetricGroup": "branch_prediction", | ||
| 12 | "MetricName": "bc_8_not_converted_branch_ratio_percent" | ||
| 13 | }, | ||
| 14 | { | ||
| 15 | "BriefDescription": "Percent of mispredicted branches out of all predicted (correctly and incorrectly) branches that completed", | ||
| 16 | "MetricExpr": "PM_BR_MPRED_CMPL / (PM_BR_PRED_BR0 + PM_BR_PRED_BR1) * 100", | ||
| 17 | "MetricGroup": "branch_prediction", | ||
| 18 | "MetricName": "br_misprediction_percent" | ||
| 19 | }, | ||
| 20 | { | ||
| 21 | "BriefDescription": "% of Branch miss predictions per instruction", | ||
| 22 | "MetricExpr": "PM_BR_MPRED_CMPL / PM_RUN_INST_CMPL * 100", | ||
| 23 | "MetricGroup": "branch_prediction", | ||
| 24 | "MetricName": "branch_mispredict_rate_percent" | ||
| 25 | }, | ||
| 26 | { | ||
| 27 | "BriefDescription": "Count cache branch misprediction per instruction", | ||
| 28 | "MetricExpr": "PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100", | ||
| 29 | "MetricGroup": "branch_prediction", | ||
| 30 | "MetricName": "ccache_mispredict_rate_percent" | ||
| 31 | }, | ||
| 32 | { | ||
| 33 | "BriefDescription": "Percent of count catch mispredictions out of all completed branches that required count cache predictionn", | ||
| 34 | "MetricExpr": "PM_BR_MPRED_CCACHE / (PM_BR_PRED_CCACHE_BR0 + PM_BR_PRED_CCACHE_BR1) * 100", | ||
| 35 | "MetricGroup": "branch_prediction", | ||
| 36 | "MetricName": "ccache_misprediction_percent" | ||
| 37 | }, | ||
| 38 | { | ||
| 39 | "BriefDescription": "CR MisPredictions per Instruction", | ||
| 40 | "MetricExpr": "PM_BR_MPRED_CR / PM_RUN_INST_CMPL * 100", | ||
| 41 | "MetricGroup": "branch_prediction", | ||
| 42 | "MetricName": "cr_mispredict_rate_percent" | ||
| 43 | }, | ||
| 44 | { | ||
| 45 | "BriefDescription": "Link stack branch misprediction", | ||
| 46 | "MetricExpr": "(PM_BR_MPRED_TA - PM_BR_MPRED_CCACHE) / PM_RUN_INST_CMPL * 100", | ||
| 47 | "MetricGroup": "branch_prediction", | ||
| 48 | "MetricName": "lstack_mispredict_rate_percent" | ||
| 49 | }, | ||
| 50 | { | ||
| 51 | "BriefDescription": "Percent of link stack mispredictions out of all completed branches that required link stack prediction", | ||
| 52 | "MetricExpr": "(PM_BR_MPRED_TA - PM_BR_MPRED_CCACHE) / (PM_BR_PRED_LSTACK_BR0 + PM_BR_PRED_LSTACK_BR1) * 100", | ||
| 53 | "MetricGroup": "branch_prediction", | ||
| 54 | "MetricName": "lstack_misprediction_percent" | ||
| 55 | }, | ||
| 56 | { | ||
| 57 | "BriefDescription": "TA MisPredictions per Instruction", | ||
| 58 | "MetricExpr": "PM_BR_MPRED_TA / PM_RUN_INST_CMPL * 100", | ||
| 59 | "MetricGroup": "branch_prediction", | ||
| 60 | "MetricName": "ta_mispredict_rate_percent" | ||
| 61 | }, | ||
| 62 | { | ||
| 63 | "BriefDescription": "Percent of target address mispredictions out of all completed branches that required address prediction", | ||
| 64 | "MetricExpr": "PM_BR_MPRED_TA / (PM_BR_PRED_CCACHE_BR0 + PM_BR_PRED_CCACHE_BR1 + PM_BR_PRED_LSTACK_BR0 + PM_BR_PRED_LSTACK_BR1) * 100", | ||
| 65 | "MetricGroup": "branch_prediction", | ||
| 66 | "MetricName": "ta_misprediction_percent" | ||
| 67 | }, | ||
| 68 | { | ||
| 69 | "BriefDescription": "Percent of branches completed that were taken", | ||
| 70 | "MetricExpr": "PM_BR_TAKEN_CMPL * 100 / PM_BR_CMPL", | ||
| 71 | "MetricGroup": "branch_prediction", | ||
| 72 | "MetricName": "taken_branches_percent" | ||
| 73 | }, | ||
| 74 | { | ||
| 75 | "BriefDescription": "Percent of chip+group+sys pumps that were incorrectly predicted", | ||
| 76 | "MetricExpr": "PM_PUMP_MPRED * 100 / (PM_PUMP_CPRED + PM_PUMP_MPRED)", | ||
| 77 | "MetricGroup": "bus_stats", | ||
| 78 | "MetricName": "any_pump_mpred_percent" | ||
| 79 | }, | ||
| 80 | { | ||
| 81 | "BriefDescription": "Percent of chip pumps that were correctly predicted as chip pumps the first time", | ||
| 82 | "MetricExpr": "PM_CHIP_PUMP_CPRED * 100 / PM_L2_CHIP_PUMP", | ||
| 83 | "MetricGroup": "bus_stats", | ||
| 84 | "MetricName": "chip_pump_cpred_percent" | ||
| 85 | }, | ||
| 86 | { | ||
| 87 | "BriefDescription": "Percent of group pumps that were correctly predicted as group pumps the first time", | ||
| 88 | "MetricExpr": "PM_GRP_PUMP_CPRED * 100 / PM_L2_GROUP_PUMP", | ||
| 89 | "MetricGroup": "bus_stats", | ||
| 90 | "MetricName": "group_pump_cpred_percent" | ||
| 91 | }, | ||
| 92 | { | ||
| 93 | "BriefDescription": "Percent of system pumps that were correctly predicted as group pumps the first time", | ||
| 94 | "MetricExpr": "PM_SYS_PUMP_CPRED * 100 / PM_L2_GROUP_PUMP", | ||
| 95 | "MetricGroup": "bus_stats", | ||
| 96 | "MetricName": "sys_pump_cpred_percent" | ||
| 97 | }, | ||
| 98 | { | ||
| 99 | "BriefDescription": "Cycles stalled due to CRU or BRU operations", | ||
| 100 | "MetricExpr": "PM_CMPLU_STALL_BRU_CRU / PM_RUN_INST_CMPL", | ||
| 101 | "MetricGroup": "cpi_breakdown", | ||
| 102 | "MetricName": "bru_cru_stall_cpi" | ||
| 103 | }, | ||
| 104 | { | ||
| 105 | "BriefDescription": "Cycles stalled due to ISU Branch Operations", | ||
| 106 | "MetricExpr": "PM_CMPLU_STALL_BRU / PM_RUN_INST_CMPL", | ||
| 107 | "MetricGroup": "cpi_breakdown", | ||
| 108 | "MetricName": "bru_stall_cpi" | ||
| 109 | }, | ||
| 110 | { | ||
| 111 | "BriefDescription": "Cycles in which a Group Completed", | ||
| 112 | "MetricExpr": "PM_GRP_CMPL / PM_RUN_INST_CMPL", | ||
| 113 | "MetricGroup": "cpi_breakdown", | ||
| 114 | "MetricName": "completion_cpi" | ||
| 115 | }, | ||
| 116 | { | ||
| 117 | "BriefDescription": "Cycles stalled by CO queue full", | ||
| 118 | "MetricExpr": "PM_CMPLU_STALL_COQ_FULL / PM_RUN_INST_CMPL", | ||
| 119 | "MetricGroup": "cpi_breakdown", | ||
| 120 | "MetricName": "coq_full_stall_cpi" | ||
| 121 | }, | ||
| 122 | { | ||
| 123 | "BriefDescription": "Cycles stalled due to CRU Operations", | ||
| 124 | "MetricExpr": "(PM_CMPLU_STALL_BRU_CRU - PM_CMPLU_STALL_BRU) / PM_RUN_INST_CMPL", | ||
| 125 | "MetricGroup": "cpi_breakdown", | ||
| 126 | "MetricName": "cru_stall_cpi" | ||
| 127 | }, | ||
| 128 | { | ||
| 129 | "BriefDescription": "Cycles stalled by flushes", | ||
| 130 | "MetricExpr": "PM_CMPLU_STALL_FLUSH / PM_RUN_INST_CMPL", | ||
| 131 | "MetricGroup": "cpi_breakdown", | ||
| 132 | "MetricName": "flush_stall_cpi" | ||
| 133 | }, | ||
| 134 | { | ||
| 135 | "BriefDescription": "Cycles stalled by FXU Multi-Cycle Instructions", | ||
| 136 | "MetricExpr": "PM_CMPLU_STALL_FXLONG / PM_RUN_INST_CMPL", | ||
| 137 | "MetricGroup": "cpi_breakdown", | ||
| 138 | "MetricName": "fxu_multi_cyc_cpi" | ||
| 139 | }, | ||
| 140 | { | ||
| 141 | "BriefDescription": "Cycles stalled by FXU", | ||
| 142 | "MetricExpr": "PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL", | ||
| 143 | "MetricGroup": "cpi_breakdown", | ||
| 144 | "MetricName": "fxu_stall_cpi" | ||
| 145 | }, | ||
| 146 | { | ||
| 147 | "BriefDescription": "Other cycles stalled by FXU", | ||
| 148 | "MetricExpr": "(PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_FXLONG / PM_RUN_INST_CMPL)", | ||
| 149 | "MetricGroup": "cpi_breakdown", | ||
| 150 | "MetricName": "fxu_stall_other_cpi" | ||
| 151 | }, | ||
| 152 | { | ||
| 153 | "BriefDescription": "Cycles GCT empty due to Branch Mispredicts", | ||
| 154 | "MetricExpr": "PM_GCT_NOSLOT_BR_MPRED / PM_RUN_INST_CMPL", | ||
| 155 | "MetricGroup": "cpi_breakdown", | ||
| 156 | "MetricName": "gct_empty_br_mpred_cpi" | ||
| 157 | }, | ||
| 158 | { | ||
| 159 | "BriefDescription": "Cycles GCT empty due to Branch Mispredicts and Icache Misses", | ||
| 160 | "MetricExpr": "PM_GCT_NOSLOT_BR_MPRED_ICMISS / PM_RUN_INST_CMPL", | ||
| 161 | "MetricGroup": "cpi_breakdown", | ||
| 162 | "MetricName": "gct_empty_br_mpred_ic_miss_cpi" | ||
| 163 | }, | ||
| 164 | { | ||
| 165 | "BriefDescription": "GCT empty cycles", | ||
| 166 | "MetricExpr": "PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL", | ||
| 167 | "MetricGroup": "cpi_breakdown", | ||
| 168 | "MetricName": "gct_empty_cpi" | ||
| 169 | }, | ||
| 170 | { | ||
| 171 | "BriefDescription": "Cycles GCT empty where dispatch was held", | ||
| 172 | "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL)", | ||
| 173 | "MetricGroup": "cpi_breakdown", | ||
| 174 | "MetricName": "gct_empty_disp_held_cpi" | ||
| 175 | }, | ||
| 176 | { | ||
| 177 | "BriefDescription": "Cycles GCT empty where dispatch was held due to issue queue", | ||
| 178 | "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_ISSQ / PM_RUN_INST_CMPL", | ||
| 179 | "MetricGroup": "cpi_breakdown", | ||
| 180 | "MetricName": "gct_empty_disp_held_issq_cpi" | ||
| 181 | }, | ||
| 182 | { | ||
| 183 | "BriefDescription": "Cycles GCT empty where dispatch was held due to maps", | ||
| 184 | "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_MAP / PM_RUN_INST_CMPL", | ||
| 185 | "MetricGroup": "cpi_breakdown", | ||
| 186 | "MetricName": "gct_empty_disp_held_map_cpi" | ||
| 187 | }, | ||
| 188 | { | ||
| 189 | "BriefDescription": "Cycles GCT empty where dispatch was held due to syncs and other effects", | ||
| 190 | "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_OTHER / PM_RUN_INST_CMPL", | ||
| 191 | "MetricGroup": "cpi_breakdown", | ||
| 192 | "MetricName": "gct_empty_disp_held_other_cpi" | ||
| 193 | }, | ||
| 194 | { | ||
| 195 | "BriefDescription": "Cycles GCT empty where dispatch was held due to SRQ", | ||
| 196 | "MetricExpr": "PM_GCT_NOSLOT_DISP_HELD_SRQ / PM_RUN_INST_CMPL", | ||
| 197 | "MetricGroup": "cpi_breakdown", | ||
| 198 | "MetricName": "gct_empty_disp_held_srq_cpi" | ||
| 199 | }, | ||
| 200 | { | ||
| 201 | "BriefDescription": "Cycles stalled by GCT empty due to Icache misses", | ||
| 202 | "MetricExpr": "PM_GCT_NOSLOT_IC_MISS / PM_RUN_INST_CMPL", | ||
| 203 | "MetricGroup": "cpi_breakdown", | ||
| 204 | "MetricName": "gct_empty_ic_miss_cpi" | ||
| 205 | }, | ||
| 206 | { | ||
| 207 | "BriefDescription": "Cycles stalled by GCT empty due to Icache misses that resolve in the local L2 or L3", | ||
| 208 | "MetricExpr": "(PM_GCT_NOSLOT_IC_MISS - PM_GCT_NOSLOT_IC_L3MISS) / PM_RUN_INST_CMPL", | ||
| 209 | "MetricGroup": "cpi_breakdown", | ||
| 210 | "MetricName": "gct_empty_ic_miss_l2l3_cpi" | ||
| 211 | }, | ||
| 212 | { | ||
| 213 | "BriefDescription": "Cycles stalled by GCT empty due to Icache misses that resolve off-chip", | ||
| 214 | "MetricExpr": "PM_GCT_NOSLOT_IC_L3MISS / PM_RUN_INST_CMPL", | ||
| 215 | "MetricGroup": "cpi_breakdown", | ||
| 216 | "MetricName": "gct_empty_ic_miss_l3miss_cpi" | ||
| 217 | }, | ||
| 218 | { | ||
| 219 | "BriefDescription": "Other GCT empty cycles", | ||
| 220 | "MetricExpr": "(PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_IC_MISS / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_BR_MPRED / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_BR_MPRED_ICMISS / PM_RUN_INST_CMPL) - ((PM_GCT_NOSLOT_DISP_HELD_MAP / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_SRQ / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_ISSQ / PM_RUN_INST_CMPL) + (PM_GCT_NOSLOT_DISP_HELD_OTHER / PM_RUN_INST_CMPL))", | ||
| 221 | "MetricGroup": "cpi_breakdown", | ||
| 222 | "MetricName": "gct_empty_other_cpi" | ||
| 223 | }, | ||
| 224 | { | ||
| 225 | "BriefDescription": "Cycles stalled by heavyweight syncs", | ||
| 226 | "MetricExpr": "PM_CMPLU_STALL_HWSYNC / PM_RUN_INST_CMPL", | ||
| 227 | "MetricGroup": "cpi_breakdown", | ||
| 228 | "MetricName": "hwsync_stall_cpi" | ||
| 229 | }, | ||
| 230 | { | ||
| 231 | "BriefDescription": "Cycles stalled by LSU", | ||
| 232 | "MetricExpr": "PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL", | ||
| 233 | "MetricGroup": "cpi_breakdown", | ||
| 234 | "MetricName": "lsu_stall_cpi" | ||
| 235 | }, | ||
| 236 | { | ||
| 237 | "BriefDescription": "Cycles stalled by D-Cache Misses", | ||
| 238 | "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL", | ||
| 239 | "MetricGroup": "cpi_breakdown", | ||
| 240 | "MetricName": "lsu_stall_dcache_miss_cpi" | ||
| 241 | }, | ||
| 242 | { | ||
| 243 | "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in distant interventions and memory", | ||
| 244 | "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_REMOTE) / PM_RUN_INST_CMPL", | ||
| 245 | "MetricGroup": "cpi_breakdown", | ||
| 246 | "MetricName": "lsu_stall_dcache_miss_distant_cpi" | ||
| 247 | }, | ||
| 248 | { | ||
| 249 | "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in remote or distant caches", | ||
| 250 | "MetricExpr": "PM_CMPLU_STALL_DMISS_L21_L31 / PM_RUN_INST_CMPL", | ||
| 251 | "MetricGroup": "cpi_breakdown", | ||
| 252 | "MetricName": "lsu_stall_dcache_miss_l21l31_cpi" | ||
| 253 | }, | ||
| 254 | { | ||
| 255 | "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3, where there was a conflict", | ||
| 256 | "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT / PM_RUN_INST_CMPL", | ||
| 257 | "MetricGroup": "cpi_breakdown", | ||
| 258 | "MetricName": "lsu_stall_dcache_miss_l2l3_conflict_cpi" | ||
| 259 | }, | ||
| 260 | { | ||
| 261 | "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3", | ||
| 262 | "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3 / PM_RUN_INST_CMPL", | ||
| 263 | "MetricGroup": "cpi_breakdown", | ||
| 264 | "MetricName": "lsu_stall_dcache_miss_l2l3_cpi" | ||
| 265 | }, | ||
| 266 | { | ||
| 267 | "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in the local L2 or L3, where there was no conflict", | ||
| 268 | "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT) / PM_RUN_INST_CMPL", | ||
| 269 | "MetricGroup": "cpi_breakdown", | ||
| 270 | "MetricName": "lsu_stall_dcache_miss_l2l3_noconflict_cpi" | ||
| 271 | }, | ||
| 272 | { | ||
| 273 | "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in other core's caches or memory", | ||
| 274 | "MetricExpr": "PM_CMPLU_STALL_DMISS_L3MISS / PM_RUN_INST_CMPL", | ||
| 275 | "MetricGroup": "cpi_breakdown", | ||
| 276 | "MetricName": "lsu_stall_dcache_miss_l3miss_cpi" | ||
| 277 | }, | ||
| 278 | { | ||
| 279 | "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in local memory or local L4", | ||
| 280 | "MetricExpr": "PM_CMPLU_STALL_DMISS_LMEM / PM_RUN_INST_CMPL", | ||
| 281 | "MetricGroup": "cpi_breakdown", | ||
| 282 | "MetricName": "lsu_stall_dcache_miss_lmem_cpi" | ||
| 283 | }, | ||
| 284 | { | ||
| 285 | "BriefDescription": "Cycles stalled by D-Cache Misses that resolved in remote interventions and memory", | ||
| 286 | "MetricExpr": "PM_CMPLU_STALL_DMISS_REMOTE / PM_RUN_INST_CMPL", | ||
| 287 | "MetricGroup": "cpi_breakdown", | ||
| 288 | "MetricName": "lsu_stall_dcache_miss_remote_cpi" | ||
| 289 | }, | ||
| 290 | { | ||
| 291 | "BriefDescription": "Cycles stalled by ERAT Translation rejects", | ||
| 292 | "MetricExpr": "PM_CMPLU_STALL_ERAT_MISS / PM_RUN_INST_CMPL", | ||
| 293 | "MetricGroup": "cpi_breakdown", | ||
| 294 | "MetricName": "lsu_stall_erat_miss_cpi" | ||
| 295 | }, | ||
| 296 | { | ||
| 297 | "BriefDescription": "Cycles stalled by LSU load finishes", | ||
| 298 | "MetricExpr": "PM_CMPLU_STALL_LOAD_FINISH / PM_RUN_INST_CMPL", | ||
| 299 | "MetricGroup": "cpi_breakdown", | ||
| 300 | "MetricName": "lsu_stall_ld_fin_cpi" | ||
| 301 | }, | ||
| 302 | { | ||
| 303 | "BriefDescription": "Cycles stalled by LHS rejects", | ||
| 304 | "MetricExpr": "PM_CMPLU_STALL_REJECT_LHS / PM_RUN_INST_CMPL", | ||
| 305 | "MetricGroup": "cpi_breakdown", | ||
| 306 | "MetricName": "lsu_stall_lhs_cpi" | ||
| 307 | }, | ||
| 308 | { | ||
| 309 | "BriefDescription": "Cycles stalled by LMQ Full rejects", | ||
| 310 | "MetricExpr": "PM_CMPLU_STALL_REJ_LMQ_FULL / PM_RUN_INST_CMPL", | ||
| 311 | "MetricGroup": "cpi_breakdown", | ||
| 312 | "MetricName": "lsu_stall_lmq_full_cpi" | ||
| 313 | }, | ||
| 314 | { | ||
| 315 | "BriefDescription": "Cycles stalled by Other LSU Operations", | ||
| 316 | "MetricExpr": "(PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_STORE / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_LOAD_FINISH / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_ST_FWD / PM_RUN_INST_CMPL)", | ||
| 317 | "MetricGroup": "cpi_breakdown", | ||
| 318 | "MetricName": "lsu_stall_other_cpi" | ||
| 319 | }, | ||
| 320 | { | ||
| 321 | "BriefDescription": "Cycles stalled by LSU Rejects", | ||
| 322 | "MetricExpr": "PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL", | ||
| 323 | "MetricGroup": "cpi_breakdown", | ||
| 324 | "MetricName": "lsu_stall_reject_cpi" | ||
| 325 | }, | ||
| 326 | { | ||
| 327 | "BriefDescription": "Cycles stalled by Other LSU Rejects", | ||
| 328 | "MetricExpr": "(PM_CMPLU_STALL_REJECT / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJECT_LHS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_ERAT_MISS / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_REJ_LMQ_FULL / PM_RUN_INST_CMPL)", | ||
| 329 | "MetricGroup": "cpi_breakdown", | ||
| 330 | "MetricName": "lsu_stall_reject_other_cpi" | ||
| 331 | }, | ||
| 332 | { | ||
| 333 | "BriefDescription": "Cycles stalled by LSU store forwarding", | ||
| 334 | "MetricExpr": "PM_CMPLU_STALL_ST_FWD / PM_RUN_INST_CMPL", | ||
| 335 | "MetricGroup": "cpi_breakdown", | ||
| 336 | "MetricName": "lsu_stall_st_fwd_cpi" | ||
| 337 | }, | ||
| 338 | { | ||
| 339 | "BriefDescription": "Cycles stalled by LSU Stores", | ||
| 340 | "MetricExpr": "PM_CMPLU_STALL_STORE / PM_RUN_INST_CMPL", | ||
| 341 | "MetricGroup": "cpi_breakdown", | ||
| 342 | "MetricName": "lsu_stall_store_cpi" | ||
| 343 | }, | ||
| 344 | { | ||
| 345 | "BriefDescription": "Cycles stalled by lightweight syncs", | ||
| 346 | "MetricExpr": "PM_CMPLU_STALL_LWSYNC / PM_RUN_INST_CMPL", | ||
| 347 | "MetricGroup": "cpi_breakdown", | ||
| 348 | "MetricName": "lwsync_stall_cpi" | ||
| 349 | }, | ||
| 350 | { | ||
| 351 | "MetricExpr": "PM_CMPLU_STALL_MEM_ECC_DELAY / PM_RUN_INST_CMPL", | ||
| 352 | "MetricGroup": "cpi_breakdown", | ||
| 353 | "MetricName": "mem_ecc_delay_stall_cpi" | ||
| 354 | }, | ||
| 355 | { | ||
| 356 | "BriefDescription": "Cycles stalled by nops (nothing next to finish)", | ||
| 357 | "MetricExpr": "PM_CMPLU_STALL_NO_NTF / PM_RUN_INST_CMPL", | ||
| 358 | "MetricGroup": "cpi_breakdown", | ||
| 359 | "MetricName": "no_ntf_stall_cpi" | ||
| 360 | }, | ||
| 361 | { | ||
| 362 | "MetricExpr": "PM_NTCG_ALL_FIN / PM_RUN_INST_CMPL", | ||
| 363 | "MetricGroup": "cpi_breakdown", | ||
| 364 | "MetricName": "ntcg_all_fin_cpi" | ||
| 365 | }, | ||
| 366 | { | ||
| 367 | "MetricExpr": "PM_CMPLU_STALL_NTCG_FLUSH / PM_RUN_INST_CMPL", | ||
| 368 | "MetricGroup": "cpi_breakdown", | ||
| 369 | "MetricName": "ntcg_flush_cpi" | ||
| 370 | }, | ||
| 371 | { | ||
| 372 | "BriefDescription": "Other thread block stall cycles", | ||
| 373 | "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_LWSYNC - PM_CMPLU_STALL_HWSYNC - PM_CMPLU_STALL_MEM_ECC_DELAY - PM_CMPLU_STALL_FLUSH - PM_CMPLU_STALL_COQ_FULL) / PM_RUN_INST_CMPL", | ||
| 374 | "MetricGroup": "cpi_breakdown", | ||
| 375 | "MetricName": "other_block_stall_cpi" | ||
| 376 | }, | ||
| 377 | { | ||
| 378 | "BriefDescription": "Cycles unaccounted for", | ||
| 379 | "MetricExpr": "(PM_RUN_CYC / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL / PM_RUN_INST_CMPL) - (PM_GCT_NOSLOT_CYC / PM_RUN_INST_CMPL) - (PM_NTCG_ALL_FIN / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_THRD / PM_RUN_INST_CMPL) - (PM_GRP_CMPL / PM_RUN_INST_CMPL)", | ||
| 380 | "MetricGroup": "cpi_breakdown", | ||
| 381 | "MetricName": "other_cpi" | ||
| 382 | }, | ||
| 383 | { | ||
| 384 | "BriefDescription": "Stall cycles unaccounted for", | ||
| 385 | "MetricExpr": "(PM_CMPLU_STALL / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_BRU_CRU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_FXU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_VSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_LSU / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_NTCG_FLUSH / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_NO_NTF / PM_RUN_INST_CMPL)", | ||
| 386 | "MetricGroup": "cpi_breakdown", | ||
| 387 | "MetricName": "other_stall_cpi" | ||
| 388 | }, | ||
| 389 | { | ||
| 390 | "BriefDescription": "Run cycles per run instruction", | ||
| 391 | "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL", | ||
| 392 | "MetricGroup": "cpi_breakdown", | ||
| 393 | "MetricName": "run_cpi" | ||
| 394 | }, | ||
| 395 | { | ||
| 396 | "BriefDescription": "Completion Stall Cycles", | ||
| 397 | "MetricExpr": "PM_CMPLU_STALL / PM_RUN_INST_CMPL", | ||
| 398 | "MetricGroup": "cpi_breakdown", | ||
| 399 | "MetricName": "stall_cpi" | ||
| 400 | }, | ||
| 401 | { | ||
| 402 | "BriefDescription": "Cycles a thread was blocked", | ||
| 403 | "MetricExpr": "PM_CMPLU_STALL_THRD / PM_RUN_INST_CMPL", | ||
| 404 | "MetricGroup": "cpi_breakdown", | ||
| 405 | "MetricName": "thread_block_stall_cpi" | ||
| 406 | }, | ||
| 407 | { | ||
| 408 | "BriefDescription": "Cycles stalled by VSU", | ||
| 409 | "MetricExpr": "PM_CMPLU_STALL_VSU / PM_RUN_INST_CMPL", | ||
| 410 | "MetricGroup": "cpi_breakdown", | ||
| 411 | "MetricName": "vsu_stall_cpi" | ||
| 412 | }, | ||
| 413 | { | ||
| 414 | "BriefDescription": "Cycles stalled by other VSU Operations", | ||
| 415 | "MetricExpr": "(PM_CMPLU_STALL_VSU - PM_CMPLU_STALL_VECTOR - PM_CMPLU_STALL_SCALAR) / PM_RUN_INST_CMPL", | ||
| 416 | "MetricGroup": "cpi_breakdown", | ||
| 417 | "MetricName": "vsu_stall_other_cpi" | ||
| 418 | }, | ||
| 419 | { | ||
| 420 | "BriefDescription": "Cycles stalled by VSU Scalar Operations", | ||
| 421 | "MetricExpr": "PM_CMPLU_STALL_SCALAR / PM_RUN_INST_CMPL", | ||
| 422 | "MetricGroup": "cpi_breakdown", | ||
| 423 | "MetricName": "vsu_stall_scalar_cpi" | ||
| 424 | }, | ||
| 425 | { | ||
| 426 | "BriefDescription": "Cycles stalled by VSU Scalar Long Operations", | ||
| 427 | "MetricExpr": "PM_CMPLU_STALL_SCALAR_LONG / PM_RUN_INST_CMPL", | ||
| 428 | "MetricGroup": "cpi_breakdown", | ||
| 429 | "MetricName": "vsu_stall_scalar_long_cpi" | ||
| 430 | }, | ||
| 431 | { | ||
| 432 | "BriefDescription": "Cycles stalled by Other VSU Scalar Operations", | ||
| 433 | "MetricExpr": "(PM_CMPLU_STALL_SCALAR / PM_RUN_INST_CMPL) - (PM_CMPLU_STALL_SCALAR_LONG / PM_RUN_INST_CMPL)", | ||
| 434 | "MetricGroup": "cpi_breakdown", | ||
| 435 | "MetricName": "vsu_stall_scalar_other_cpi" | ||
| 436 | }, | ||
| 437 | { | ||
| 438 | "BriefDescription": "Cycles stalled by VSU Vector Operations", | ||
| 439 | "MetricExpr": "PM_CMPLU_STALL_VECTOR / PM_RUN_INST_CMPL", | ||
| 440 | "MetricGroup": "cpi_breakdown", | ||
| 441 | "MetricName": "vsu_stall_vector_cpi" | ||
| 442 | }, | ||
| 443 | { | ||
| 444 | "BriefDescription": "Cycles stalled by VSU Vector Long Operations", | ||
| 445 | "MetricExpr": "PM_CMPLU_STALL_VECTOR_LONG / PM_RUN_INST_CMPL", | ||
| 446 | "MetricGroup": "cpi_breakdown", | ||
| 447 | "MetricName": "vsu_stall_vector_long_cpi" | ||
| 448 | }, | ||
| 449 | { | ||
| 450 | "BriefDescription": "Cycles stalled by other VSU Vector Operations", | ||
| 451 | "MetricExpr": "(PM_CMPLU_STALL_VECTOR - PM_CMPLU_STALL_VECTOR_LONG) / PM_RUN_INST_CMPL", | ||
| 452 | "MetricGroup": "cpi_breakdown", | ||
| 453 | "MetricName": "vsu_stall_vector_other_cpi" | ||
| 454 | }, | ||
| 455 | { | ||
| 456 | "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst", | ||
| 457 | "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 458 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 459 | "MetricName": "dl1_reload_from_dl2l3_mod_rate_percent" | ||
| 460 | }, | ||
| 461 | { | ||
| 462 | "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst", | ||
| 463 | "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 464 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 465 | "MetricName": "dl1_reload_from_dl2l3_shr_rate_percent" | ||
| 466 | }, | ||
| 467 | { | ||
| 468 | "BriefDescription": "% of DL1 Reloads from Distant L4 per Inst", | ||
| 469 | "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL", | ||
| 470 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 471 | "MetricName": "dl1_reload_from_dl4_rate_percent" | ||
| 472 | }, | ||
| 473 | { | ||
| 474 | "BriefDescription": "% of DL1 Reloads from Distant Memory per Inst", | ||
| 475 | "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL", | ||
| 476 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 477 | "MetricName": "dl1_reload_from_dmem_rate_percent" | ||
| 478 | }, | ||
| 479 | { | ||
| 480 | "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", | ||
| 481 | "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 482 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 483 | "MetricName": "dl1_reload_from_l21_mod_rate_percent" | ||
| 484 | }, | ||
| 485 | { | ||
| 486 | "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", | ||
| 487 | "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 488 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 489 | "MetricName": "dl1_reload_from_l21_shr_rate_percent" | ||
| 490 | }, | ||
| 491 | { | ||
| 492 | "BriefDescription": "Percentage of L2 load hits per instruction where the L2 experienced a Load-Hit-Store conflict", | ||
| 493 | "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST * 100 / PM_RUN_INST_CMPL", | ||
| 494 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 495 | "MetricName": "dl1_reload_from_l2_lhs_rate_percent" | ||
| 496 | }, | ||
| 497 | { | ||
| 498 | "BriefDescription": "% of DL1 reloads from L2 per Inst", | ||
| 499 | "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", | ||
| 500 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 501 | "MetricName": "dl1_reload_from_l2_miss_rate_percent" | ||
| 502 | }, | ||
| 503 | { | ||
| 504 | "BriefDescription": "Percentage of L2 load hits per instruction where the L2 did not experience a conflict", | ||
| 505 | "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT * 100 / PM_RUN_INST_CMPL", | ||
| 506 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 507 | "MetricName": "dl1_reload_from_l2_no_conflict_rate_percent" | ||
| 508 | }, | ||
| 509 | { | ||
| 510 | "BriefDescription": "Percentage of L2 load hits per instruction where the L2 experienced some conflict other than Load-Hit-Store", | ||
| 511 | "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER * 100 / PM_RUN_INST_CMPL", | ||
| 512 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 513 | "MetricName": "dl1_reload_from_l2_other_conflict_rate_percent" | ||
| 514 | }, | ||
| 515 | { | ||
| 516 | "BriefDescription": "% of DL1 reloads from L2 per Inst", | ||
| 517 | "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL", | ||
| 518 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 519 | "MetricName": "dl1_reload_from_l2_rate_percent" | ||
| 520 | }, | ||
| 521 | { | ||
| 522 | "BriefDescription": "% of DL1 reloads from Private L3 M state, other core per Inst", | ||
| 523 | "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 524 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 525 | "MetricName": "dl1_reload_from_l31_mod_rate_percent" | ||
| 526 | }, | ||
| 527 | { | ||
| 528 | "BriefDescription": "% of DL1 reloads from Private L3 S tate, other core per Inst", | ||
| 529 | "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 530 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 531 | "MetricName": "dl1_reload_from_l31_shr_rate_percent" | ||
| 532 | }, | ||
| 533 | { | ||
| 534 | "BriefDescription": "Percentage of L3 load hits per instruction where the load collided with a pending prefetch", | ||
| 535 | "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT * 100 / PM_RUN_INST_CMPL", | ||
| 536 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 537 | "MetricName": "dl1_reload_from_l3_conflict_rate_percent" | ||
| 538 | }, | ||
| 539 | { | ||
| 540 | "BriefDescription": "% of DL1 reloads from L3 per Inst", | ||
| 541 | "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", | ||
| 542 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 543 | "MetricName": "dl1_reload_from_l3_miss_rate_percent" | ||
| 544 | }, | ||
| 545 | { | ||
| 546 | "BriefDescription": "Percentage of L3 load hits per instruction where the L3 did not experience a conflict", | ||
| 547 | "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT * 100 / PM_RUN_INST_CMPL", | ||
| 548 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 549 | "MetricName": "dl1_reload_from_l3_no_conflict_rate_percent" | ||
| 550 | }, | ||
| 551 | { | ||
| 552 | "BriefDescription": "% of DL1 Reloads from L3 per Inst", | ||
| 553 | "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL", | ||
| 554 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 555 | "MetricName": "dl1_reload_from_l3_rate_percent" | ||
| 556 | }, | ||
| 557 | { | ||
| 558 | "BriefDescription": "% of DL1 Reloads from Local L4 per Inst", | ||
| 559 | "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL", | ||
| 560 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 561 | "MetricName": "dl1_reload_from_ll4_rate_percent" | ||
| 562 | }, | ||
| 563 | { | ||
| 564 | "BriefDescription": "% of DL1 Reloads from Local Memory per Inst", | ||
| 565 | "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL", | ||
| 566 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 567 | "MetricName": "dl1_reload_from_lmem_rate_percent" | ||
| 568 | }, | ||
| 569 | { | ||
| 570 | "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", | ||
| 571 | "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 572 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 573 | "MetricName": "dl1_reload_from_rl2l3_mod_rate_percent" | ||
| 574 | }, | ||
| 575 | { | ||
| 576 | "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", | ||
| 577 | "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 578 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 579 | "MetricName": "dl1_reload_from_rl2l3_shr_rate_percent" | ||
| 580 | }, | ||
| 581 | { | ||
| 582 | "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", | ||
| 583 | "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL", | ||
| 584 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 585 | "MetricName": "dl1_reload_from_rl4_rate_percent" | ||
| 586 | }, | ||
| 587 | { | ||
| 588 | "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", | ||
| 589 | "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL", | ||
| 590 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 591 | "MetricName": "dl1_reload_from_rmem_rate_percent" | ||
| 592 | }, | ||
| 593 | { | ||
| 594 | "BriefDescription": "Percentage of L1 demand load misses per run instruction", | ||
| 595 | "MetricExpr": "PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL", | ||
| 596 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 597 | "MetricName": "l1_ld_miss_rate_percent" | ||
| 598 | }, | ||
| 599 | { | ||
| 600 | "BriefDescription": "% of DL1 misses that result in a cache reload", | ||
| 601 | "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1", | ||
| 602 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 603 | "MetricName": "dl1_miss_reloads_percent" | ||
| 604 | }, | ||
| 605 | { | ||
| 606 | "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Modified)", | ||
| 607 | "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 608 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 609 | "MetricName": "dl1_reload_from_dl2l3_mod_percent" | ||
| 610 | }, | ||
| 611 | { | ||
| 612 | "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Shared)", | ||
| 613 | "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 614 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 615 | "MetricName": "dl1_reload_from_dl2l3_shr_percent" | ||
| 616 | }, | ||
| 617 | { | ||
| 618 | "BriefDescription": "% of DL1 dL1_Reloads from Distant L4", | ||
| 619 | "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 620 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 621 | "MetricName": "dl1_reload_from_dl4_percent" | ||
| 622 | }, | ||
| 623 | { | ||
| 624 | "BriefDescription": "% of DL1 dL1_Reloads from Distant Memory", | ||
| 625 | "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 626 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 627 | "MetricName": "dl1_reload_from_dmem_percent" | ||
| 628 | }, | ||
| 629 | { | ||
| 630 | "BriefDescription": "% of DL1 reloads from Private L2, other core", | ||
| 631 | "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 632 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 633 | "MetricName": "dl1_reload_from_l21_mod_percent" | ||
| 634 | }, | ||
| 635 | { | ||
| 636 | "BriefDescription": "% of DL1 reloads from Private L2, other core", | ||
| 637 | "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 638 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 639 | "MetricName": "dl1_reload_from_l21_shr_percent" | ||
| 640 | }, | ||
| 641 | { | ||
| 642 | "BriefDescription": "Percentage of DL1 reloads from L2 with a Load-Hit-Store conflict", | ||
| 643 | "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 644 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 645 | "MetricName": "dl1_reload_from_l2_lhs_percent" | ||
| 646 | }, | ||
| 647 | { | ||
| 648 | "BriefDescription": "Percentage of DL1 reloads from L2 with no conflicts", | ||
| 649 | "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 650 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 651 | "MetricName": "dl1_reload_from_l2_no_conflict_percent" | ||
| 652 | }, | ||
| 653 | { | ||
| 654 | "BriefDescription": "Percentage of DL1 reloads from L2 with some conflict other than Load-Hit-Store", | ||
| 655 | "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 656 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 657 | "MetricName": "dl1_reload_from_l2_other_conflict_percent" | ||
| 658 | }, | ||
| 659 | { | ||
| 660 | "BriefDescription": "% of DL1 reloads from L2", | ||
| 661 | "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 662 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 663 | "MetricName": "dl1_reload_from_l2_percent" | ||
| 664 | }, | ||
| 665 | { | ||
| 666 | "BriefDescription": "% of DL1 reloads from Private L3, other core", | ||
| 667 | "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 668 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 669 | "MetricName": "dl1_reload_from_l31_mod_percent" | ||
| 670 | }, | ||
| 671 | { | ||
| 672 | "BriefDescription": "% of DL1 reloads from Private L3, other core", | ||
| 673 | "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 674 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 675 | "MetricName": "dl1_reload_from_l31_shr_percent" | ||
| 676 | }, | ||
| 677 | { | ||
| 678 | "BriefDescription": "Percentage of DL1 reloads from L3 where the load collided with a pending prefetch", | ||
| 679 | "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 680 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 681 | "MetricName": "dl1_reload_from_l3_conflict_percent" | ||
| 682 | }, | ||
| 683 | { | ||
| 684 | "BriefDescription": "Percentage of L3 load hits per instruction where the line was brought into the L3 by a prefetch operation", | ||
| 685 | "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL", | ||
| 686 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 687 | "MetricName": "dl1_reload_from_l3_mepf_rate_percent" | ||
| 688 | }, | ||
| 689 | { | ||
| 690 | "BriefDescription": "Percentage of DL1 reloads from L3 without conflicts", | ||
| 691 | "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 692 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 693 | "MetricName": "dl1_reload_from_l3_no_conflict_percent" | ||
| 694 | }, | ||
| 695 | { | ||
| 696 | "BriefDescription": "% of DL1 Reloads from L3", | ||
| 697 | "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 698 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 699 | "MetricName": "dl1_reload_from_l3_percent" | ||
| 700 | }, | ||
| 701 | { | ||
| 702 | "BriefDescription": "% of DL1 dL1_Reloads from Local L4", | ||
| 703 | "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 704 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 705 | "MetricName": "dl1_reload_from_ll4_percent" | ||
| 706 | }, | ||
| 707 | { | ||
| 708 | "BriefDescription": "% of DL1 dL1_Reloads from Local Memory", | ||
| 709 | "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 710 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 711 | "MetricName": "dl1_reload_from_lmem_percent" | ||
| 712 | }, | ||
| 713 | { | ||
| 714 | "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Modified)", | ||
| 715 | "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 716 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 717 | "MetricName": "dl1_reload_from_rl2l3_mod_percent" | ||
| 718 | }, | ||
| 719 | { | ||
| 720 | "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Shared)", | ||
| 721 | "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 722 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 723 | "MetricName": "dl1_reload_from_rl2l3_shr_percent" | ||
| 724 | }, | ||
| 725 | { | ||
| 726 | "BriefDescription": "% of DL1 dL1_Reloads from Remote L4", | ||
| 727 | "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 728 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 729 | "MetricName": "dl1_reload_from_rl4_percent" | ||
| 730 | }, | ||
| 731 | { | ||
| 732 | "BriefDescription": "% of DL1 dL1_Reloads from Remote Memory", | ||
| 733 | "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 734 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 735 | "MetricName": "dl1_reload_from_rmem_percent" | ||
| 736 | }, | ||
| 737 | { | ||
| 738 | "BriefDescription": "dL1 miss portion of CPI", | ||
| 739 | "MetricExpr": "( (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)/ (PM_RUN_CYC / PM_RUN_INST_CMPL)) * 100", | ||
| 740 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 741 | "MetricName": "dcache_miss_cpi_percent" | ||
| 742 | }, | ||
| 743 | { | ||
| 744 | "BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi", | ||
| 745 | "MetricExpr": "(((PM_DATA_FROM_DL2L3_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 746 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 747 | "MetricName": "dl2l3_mod_cpi_percent" | ||
| 748 | }, | ||
| 749 | { | ||
| 750 | "BriefDescription": "estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi", | ||
| 751 | "MetricExpr": "(((PM_DATA_FROM_DL2L3_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 752 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 753 | "MetricName": "dl2l3_shr_cpi_percent" | ||
| 754 | }, | ||
| 755 | { | ||
| 756 | "BriefDescription": "estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi", | ||
| 757 | "MetricExpr": "(((PM_DATA_FROM_DL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 758 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 759 | "MetricName": "dl4_cpi_percent" | ||
| 760 | }, | ||
| 761 | { | ||
| 762 | "BriefDescription": "estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi", | ||
| 763 | "MetricExpr": "(((PM_DATA_FROM_DMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 764 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 765 | "MetricName": "dmem_cpi_percent" | ||
| 766 | }, | ||
| 767 | { | ||
| 768 | "BriefDescription": "estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi", | ||
| 769 | "MetricExpr": "(((PM_DATA_FROM_L21_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 770 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 771 | "MetricName": "l21_mod_cpi_percent" | ||
| 772 | }, | ||
| 773 | { | ||
| 774 | "BriefDescription": "estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi", | ||
| 775 | "MetricExpr": "(((PM_DATA_FROM_L21_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 776 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 777 | "MetricName": "l21_shr_cpi_percent" | ||
| 778 | }, | ||
| 779 | { | ||
| 780 | "BriefDescription": "estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi", | ||
| 781 | "MetricExpr": "(((PM_DATA_FROM_L2 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL) ) *100", | ||
| 782 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 783 | "MetricName": "l2_cpi_percent" | ||
| 784 | }, | ||
| 785 | { | ||
| 786 | "BriefDescription": "estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi", | ||
| 787 | "MetricExpr": "(((PM_DATA_FROM_L31_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 788 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 789 | "MetricName": "l31_mod_cpi_percent" | ||
| 790 | }, | ||
| 791 | { | ||
| 792 | "BriefDescription": "estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi", | ||
| 793 | "MetricExpr": "(((PM_DATA_FROM_L31_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 794 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 795 | "MetricName": "l31_shr_cpi_percent" | ||
| 796 | }, | ||
| 797 | { | ||
| 798 | "BriefDescription": "estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi", | ||
| 799 | "MetricExpr": "(((PM_DATA_FROM_L3 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) * 100", | ||
| 800 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 801 | "MetricName": "l3_cpi_percent" | ||
| 802 | }, | ||
| 803 | { | ||
| 804 | "BriefDescription": "estimate of Local L4 miss rates with measured LL4 latency as a %of dcache miss cpi", | ||
| 805 | "MetricExpr": "(((PM_DATA_FROM_LL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 806 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 807 | "MetricName": "ll4_cpi_percent" | ||
| 808 | }, | ||
| 809 | { | ||
| 810 | "BriefDescription": "estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi", | ||
| 811 | "MetricExpr": "(((PM_DATA_FROM_LMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 812 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 813 | "MetricName": "lmem_cpi_percent" | ||
| 814 | }, | ||
| 815 | { | ||
| 816 | "BriefDescription": "estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi", | ||
| 817 | "MetricExpr": "(((PM_DATA_FROM_RL2L3_MOD / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 818 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 819 | "MetricName": "rl2l3_mod_cpi_percent" | ||
| 820 | }, | ||
| 821 | { | ||
| 822 | "BriefDescription": "estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi", | ||
| 823 | "MetricExpr": "(((PM_DATA_FROM_RL2L3_SHR / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) * 100", | ||
| 824 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 825 | "MetricName": "rl2l3_shr_cpi_percent" | ||
| 826 | }, | ||
| 827 | { | ||
| 828 | "BriefDescription": "estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi", | ||
| 829 | "MetricExpr": "(((PM_DATA_FROM_RL4 / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 830 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 831 | "MetricName": "rl4_cpi_percent" | ||
| 832 | }, | ||
| 833 | { | ||
| 834 | "BriefDescription": "estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi", | ||
| 835 | "MetricExpr": "(((PM_DATA_FROM_RMEM / PM_RUN_INST_CMPL) * (PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM)) / (PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL)) *100", | ||
| 836 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 837 | "MetricName": "rmem_cpi_percent" | ||
| 838 | }, | ||
| 839 | { | ||
| 840 | "BriefDescription": "Branch Mispredict flushes per instruction", | ||
| 841 | "MetricExpr": "PM_FLUSH_BR_MPRED / PM_RUN_INST_CMPL * 100", | ||
| 842 | "MetricGroup": "general", | ||
| 843 | "MetricName": "br_mpred_flush_rate_percent" | ||
| 844 | }, | ||
| 845 | { | ||
| 846 | "BriefDescription": "Cycles per instruction", | ||
| 847 | "MetricExpr": "PM_CYC / PM_INST_CMPL", | ||
| 848 | "MetricGroup": "general", | ||
| 849 | "MetricName": "cpi" | ||
| 850 | }, | ||
| 851 | { | ||
| 852 | "BriefDescription": "Percentage Cycles a group completed", | ||
| 853 | "MetricExpr": "PM_GRP_CMPL / PM_CYC * 100", | ||
| 854 | "MetricGroup": "general", | ||
| 855 | "MetricName": "cyc_grp_completed_percent" | ||
| 856 | }, | ||
| 857 | { | ||
| 858 | "BriefDescription": "Percentage Cycles a group dispatched", | ||
| 859 | "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100", | ||
| 860 | "MetricGroup": "general", | ||
| 861 | "MetricName": "cyc_grp_dispatched_percent" | ||
| 862 | }, | ||
| 863 | { | ||
| 864 | "BriefDescription": "Cycles per group", | ||
| 865 | "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL", | ||
| 866 | "MetricGroup": "general", | ||
| 867 | "MetricName": "cyc_per_group" | ||
| 868 | }, | ||
| 869 | { | ||
| 870 | "BriefDescription": "GCT empty cycles", | ||
| 871 | "MetricExpr": "(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100", | ||
| 872 | "MetricGroup": "general", | ||
| 873 | "MetricName": "disp_flush_rate_percent" | ||
| 874 | }, | ||
| 875 | { | ||
| 876 | "BriefDescription": "% DTLB miss rate per inst", | ||
| 877 | "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL *100", | ||
| 878 | "MetricGroup": "general", | ||
| 879 | "MetricName": "dtlb_miss_rate_percent" | ||
| 880 | }, | ||
| 881 | { | ||
| 882 | "BriefDescription": "Flush rate (%)", | ||
| 883 | "MetricExpr": "PM_FLUSH * 100 / PM_RUN_INST_CMPL", | ||
| 884 | "MetricGroup": "general", | ||
| 885 | "MetricName": "flush_rate_percent" | ||
| 886 | }, | ||
| 887 | { | ||
| 888 | "BriefDescription": "GCT slot utilization (11 to 14) as a % of cycles this thread had atleast 1 slot valid", | ||
| 889 | "MetricExpr": "PM_GCT_UTIL_11_14_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", | ||
| 890 | "MetricGroup": "general", | ||
| 891 | "MetricName": "gct_util_11to14_slots_percent" | ||
| 892 | }, | ||
| 893 | { | ||
| 894 | "BriefDescription": "GCT slot utilization (15 to 17) as a % of cycles this thread had atleast 1 slot valid", | ||
| 895 | "MetricExpr": "PM_GCT_UTIL_15_17_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", | ||
| 896 | "MetricGroup": "general", | ||
| 897 | "MetricName": "gct_util_15to17_slots_percent" | ||
| 898 | }, | ||
| 899 | { | ||
| 900 | "BriefDescription": "GCT slot utilization 18+ as a % of cycles this thread had atleast 1 slot valid", | ||
| 901 | "MetricExpr": "PM_GCT_UTIL_18_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", | ||
| 902 | "MetricGroup": "general", | ||
| 903 | "MetricName": "gct_util_18plus_slots_percent" | ||
| 904 | }, | ||
| 905 | { | ||
| 906 | "BriefDescription": "GCT slot utilization (1 to 2) as a % of cycles this thread had atleast 1 slot valid", | ||
| 907 | "MetricExpr": "PM_GCT_UTIL_1_2_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", | ||
| 908 | "MetricGroup": "general", | ||
| 909 | "MetricName": "gct_util_1to2_slots_percent" | ||
| 910 | }, | ||
| 911 | { | ||
| 912 | "BriefDescription": "GCT slot utilization (3 to 6) as a % of cycles this thread had atleast 1 slot valid", | ||
| 913 | "MetricExpr": "PM_GCT_UTIL_3_6_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", | ||
| 914 | "MetricGroup": "general", | ||
| 915 | "MetricName": "gct_util_3to6_slots_percent" | ||
| 916 | }, | ||
| 917 | { | ||
| 918 | "BriefDescription": "GCT slot utilization (7 to 10) as a % of cycles this thread had atleast 1 slot valid", | ||
| 919 | "MetricExpr": "PM_GCT_UTIL_7_10_ENTRIES / ( PM_RUN_CYC - PM_GCT_NOSLOT_CYC) * 100", | ||
| 920 | "MetricGroup": "general", | ||
| 921 | "MetricName": "gct_util_7to10_slots_percent" | ||
| 922 | }, | ||
| 923 | { | ||
| 924 | "BriefDescription": "Avg. group size", | ||
| 925 | "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL", | ||
| 926 | "MetricGroup": "general", | ||
| 927 | "MetricName": "group_size" | ||
| 928 | }, | ||
| 929 | { | ||
| 930 | "BriefDescription": "Instructions per group", | ||
| 931 | "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL", | ||
| 932 | "MetricGroup": "general", | ||
| 933 | "MetricName": "inst_per_group" | ||
| 934 | }, | ||
| 935 | { | ||
| 936 | "BriefDescription": "Instructions per cycles", | ||
| 937 | "MetricExpr": "PM_INST_CMPL / PM_CYC", | ||
| 938 | "MetricGroup": "general", | ||
| 939 | "MetricName": "ipc" | ||
| 940 | }, | ||
| 941 | { | ||
| 942 | "BriefDescription": "% ITLB miss rate per inst", | ||
| 943 | "MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL *100", | ||
| 944 | "MetricGroup": "general", | ||
| 945 | "MetricName": "itlb_miss_rate_percent" | ||
| 946 | }, | ||
| 947 | { | ||
| 948 | "BriefDescription": "Percentage of L1 load misses per L1 load ref", | ||
| 949 | "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100", | ||
| 950 | "MetricGroup": "general", | ||
| 951 | "MetricName": "l1_ld_miss_ratio_percent" | ||
| 952 | }, | ||
| 953 | { | ||
| 954 | "BriefDescription": "Percentage of L1 store misses per run instruction", | ||
| 955 | "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL", | ||
| 956 | "MetricGroup": "general", | ||
| 957 | "MetricName": "l1_st_miss_rate_percent" | ||
| 958 | }, | ||
| 959 | { | ||
| 960 | "BriefDescription": "Percentage of L1 store misses per L1 store ref", | ||
| 961 | "MetricExpr": "PM_ST_MISS_L1 / PM_ST_FIN * 100", | ||
| 962 | "MetricGroup": "general", | ||
| 963 | "MetricName": "l1_st_miss_ratio_percent" | ||
| 964 | }, | ||
| 965 | { | ||
| 966 | "BriefDescription": "L2 Instruction Miss Rate (per instruction)(%)", | ||
| 967 | "MetricExpr": "PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", | ||
| 968 | "MetricGroup": "general", | ||
| 969 | "MetricName": "l2_inst_miss_rate_percent" | ||
| 970 | }, | ||
| 971 | { | ||
| 972 | "BriefDescription": "L2 dmand Load Miss Rate (per run instruction)(%)", | ||
| 973 | "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", | ||
| 974 | "MetricGroup": "general", | ||
| 975 | "MetricName": "l2_ld_miss_rate_percent" | ||
| 976 | }, | ||
| 977 | { | ||
| 978 | "BriefDescription": "L2 PTEG Miss Rate (per run instruction)(%)", | ||
| 979 | "MetricExpr": "PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", | ||
| 980 | "MetricGroup": "general", | ||
| 981 | "MetricName": "l2_pteg_miss_rate_percent" | ||
| 982 | }, | ||
| 983 | { | ||
| 984 | "BriefDescription": "Percentage of L2 store misses per run instruction", | ||
| 985 | "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL", | ||
| 986 | "MetricGroup": "general", | ||
| 987 | "MetricName": "l2_st_miss_rate_percent" | ||
| 988 | }, | ||
| 989 | { | ||
| 990 | "BriefDescription": "L3 Instruction Miss Rate (per instruction)(%)", | ||
| 991 | "MetricExpr": "PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", | ||
| 992 | "MetricGroup": "general", | ||
| 993 | "MetricName": "l3_inst_miss_rate_percent" | ||
| 994 | }, | ||
| 995 | { | ||
| 996 | "BriefDescription": "L3 demand Load Miss Rate (per run instruction)(%)", | ||
| 997 | "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", | ||
| 998 | "MetricGroup": "general", | ||
| 999 | "MetricName": "l3_ld_miss_rate_percent" | ||
| 1000 | }, | ||
| 1001 | { | ||
| 1002 | "BriefDescription": "L3 PTEG Miss Rate (per run instruction)(%)", | ||
| 1003 | "MetricExpr": "PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1004 | "MetricGroup": "general", | ||
| 1005 | "MetricName": "l3_pteg_miss_rate_percent" | ||
| 1006 | }, | ||
| 1007 | { | ||
| 1008 | "BriefDescription": "Run cycles per cycle", | ||
| 1009 | "MetricExpr": "PM_RUN_CYC / PM_CYC*100", | ||
| 1010 | "MetricGroup": "general", | ||
| 1011 | "MetricName": "run_cycles_percent" | ||
| 1012 | }, | ||
| 1013 | { | ||
| 1014 | "BriefDescription": "Percentage of cycles spent in SMT2 Mode", | ||
| 1015 | "MetricExpr": "(PM_RUN_CYC_SMT2_MODE/PM_RUN_CYC) * 100", | ||
| 1016 | "MetricGroup": "general", | ||
| 1017 | "MetricName": "smt2_cycles_percent" | ||
| 1018 | }, | ||
| 1019 | { | ||
| 1020 | "BriefDescription": "Percentage of cycles spent in SMT4 Mode", | ||
| 1021 | "MetricExpr": "(PM_RUN_CYC_SMT4_MODE/PM_RUN_CYC) * 100", | ||
| 1022 | "MetricGroup": "general", | ||
| 1023 | "MetricName": "smt4_cycles_percent" | ||
| 1024 | }, | ||
| 1025 | { | ||
| 1026 | "BriefDescription": "Percentage of cycles spent in SMT8 Mode", | ||
| 1027 | "MetricExpr": "(PM_RUN_CYC_SMT8_MODE/PM_RUN_CYC) * 100", | ||
| 1028 | "MetricGroup": "general", | ||
| 1029 | "MetricName": "smt8_cycles_percent" | ||
| 1030 | }, | ||
| 1031 | { | ||
| 1032 | "BriefDescription": "IPC of all instructions completed by the core while this thread was stalled", | ||
| 1033 | "MetricExpr": "PM_CMPLU_STALL_OTHER_CMPL/PM_RUN_CYC", | ||
| 1034 | "MetricGroup": "general", | ||
| 1035 | "MetricName": "smt_benefit" | ||
| 1036 | }, | ||
| 1037 | { | ||
| 1038 | "BriefDescription": "Instruction dispatch-to-completion ratio", | ||
| 1039 | "MetricExpr": "PM_INST_DISP / PM_INST_CMPL", | ||
| 1040 | "MetricGroup": "general", | ||
| 1041 | "MetricName": "speculation" | ||
| 1042 | }, | ||
| 1043 | { | ||
| 1044 | "BriefDescription": "Percentage of cycles spent in Single Thread Mode", | ||
| 1045 | "MetricExpr": "(PM_RUN_CYC_ST_MODE/PM_RUN_CYC) * 100", | ||
| 1046 | "MetricGroup": "general", | ||
| 1047 | "MetricName": "st_cycles_percent" | ||
| 1048 | }, | ||
| 1049 | { | ||
| 1050 | "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst", | ||
| 1051 | "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1052 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1053 | "MetricName": "inst_from_dl2l3_mod_rate_percent" | ||
| 1054 | }, | ||
| 1055 | { | ||
| 1056 | "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared) per Inst", | ||
| 1057 | "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1058 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1059 | "MetricName": "inst_from_dl2l3_shr_rate_percent" | ||
| 1060 | }, | ||
| 1061 | { | ||
| 1062 | "BriefDescription": "% of ICache reloads from Distant L4 per Inst", | ||
| 1063 | "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1064 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1065 | "MetricName": "inst_from_dl4_rate_percent" | ||
| 1066 | }, | ||
| 1067 | { | ||
| 1068 | "BriefDescription": "% of ICache reloads from Distant Memory per Inst", | ||
| 1069 | "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1070 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1071 | "MetricName": "inst_from_dmem_rate_percent" | ||
| 1072 | }, | ||
| 1073 | { | ||
| 1074 | "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", | ||
| 1075 | "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1076 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1077 | "MetricName": "inst_from_l21_mod_rate_percent" | ||
| 1078 | }, | ||
| 1079 | { | ||
| 1080 | "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", | ||
| 1081 | "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1082 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1083 | "MetricName": "inst_from_l21_shr_rate_percent" | ||
| 1084 | }, | ||
| 1085 | { | ||
| 1086 | "BriefDescription": "% of ICache reloads from L2 per Inst", | ||
| 1087 | "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL", | ||
| 1088 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1089 | "MetricName": "inst_from_l2_rate_percent" | ||
| 1090 | }, | ||
| 1091 | { | ||
| 1092 | "BriefDescription": "% of ICache reloads from Private L3, other core per Inst", | ||
| 1093 | "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1094 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1095 | "MetricName": "inst_from_l31_mod_rate_percent" | ||
| 1096 | }, | ||
| 1097 | { | ||
| 1098 | "BriefDescription": "% of ICache reloads from Private L3 other core per Inst", | ||
| 1099 | "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1100 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1101 | "MetricName": "inst_from_l31_shr_rate_percent" | ||
| 1102 | }, | ||
| 1103 | { | ||
| 1104 | "BriefDescription": "% of ICache reloads from L3 per Inst", | ||
| 1105 | "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL", | ||
| 1106 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1107 | "MetricName": "inst_from_l3_rate_percent" | ||
| 1108 | }, | ||
| 1109 | { | ||
| 1110 | "BriefDescription": "% of ICache reloads from Local L4 per Inst", | ||
| 1111 | "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1112 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1113 | "MetricName": "inst_from_ll4_rate_percent" | ||
| 1114 | }, | ||
| 1115 | { | ||
| 1116 | "BriefDescription": "% of ICache reloads from Local Memory per Inst", | ||
| 1117 | "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1118 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1119 | "MetricName": "inst_from_lmem_rate_percent" | ||
| 1120 | }, | ||
| 1121 | { | ||
| 1122 | "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified) per Inst", | ||
| 1123 | "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1124 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1125 | "MetricName": "inst_from_rl2l3_mod_rate_percent" | ||
| 1126 | }, | ||
| 1127 | { | ||
| 1128 | "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared) per Inst", | ||
| 1129 | "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1130 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1131 | "MetricName": "inst_from_rl2l3_shr_rate_percent" | ||
| 1132 | }, | ||
| 1133 | { | ||
| 1134 | "BriefDescription": "% of ICache reloads from Remote L4 per Inst", | ||
| 1135 | "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1136 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1137 | "MetricName": "inst_from_rl4_rate_percent" | ||
| 1138 | }, | ||
| 1139 | { | ||
| 1140 | "BriefDescription": "% of ICache reloads from Remote Memory per Inst", | ||
| 1141 | "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1142 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1143 | "MetricName": "inst_from_rmem_rate_percent" | ||
| 1144 | }, | ||
| 1145 | { | ||
| 1146 | "BriefDescription": "Instruction Cache Miss Rate (Per run Instruction)(%)", | ||
| 1147 | "MetricExpr": "PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1148 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1149 | "MetricName": "l1_inst_miss_rate_percent" | ||
| 1150 | }, | ||
| 1151 | { | ||
| 1152 | "BriefDescription": "% Branches per instruction", | ||
| 1153 | "MetricExpr": "PM_BRU_FIN / PM_RUN_INST_CMPL", | ||
| 1154 | "MetricGroup": "instruction_mix", | ||
| 1155 | "MetricName": "branches_per_inst" | ||
| 1156 | }, | ||
| 1157 | { | ||
| 1158 | "BriefDescription": "Total Fixed point operations", | ||
| 1159 | "MetricExpr": "(PM_FXU0_FIN + PM_FXU1_FIN)/PM_RUN_INST_CMPL", | ||
| 1160 | "MetricGroup": "instruction_mix", | ||
| 1161 | "MetricName": "fixed_per_inst" | ||
| 1162 | }, | ||
| 1163 | { | ||
| 1164 | "BriefDescription": "FXU0 balance", | ||
| 1165 | "MetricExpr": "PM_FXU0_FIN / (PM_FXU0_FIN + PM_FXU1_FIN)", | ||
| 1166 | "MetricGroup": "instruction_mix", | ||
| 1167 | "MetricName": "fxu0_balance" | ||
| 1168 | }, | ||
| 1169 | { | ||
| 1170 | "BriefDescription": "Fraction of cycles that FXU0 is in use", | ||
| 1171 | "MetricExpr": "PM_FXU0_FIN / PM_RUN_CYC", | ||
| 1172 | "MetricGroup": "instruction_mix", | ||
| 1173 | "MetricName": "fxu0_fin" | ||
| 1174 | }, | ||
| 1175 | { | ||
| 1176 | "BriefDescription": "FXU0 only Busy", | ||
| 1177 | "MetricExpr": "PM_FXU0_BUSY_FXU1_IDLE / PM_CYC", | ||
| 1178 | "MetricGroup": "instruction_mix", | ||
| 1179 | "MetricName": "fxu0_only_busy" | ||
| 1180 | }, | ||
| 1181 | { | ||
| 1182 | "BriefDescription": "Fraction of cycles that FXU1 is in use", | ||
| 1183 | "MetricExpr": "PM_FXU1_FIN / PM_RUN_CYC", | ||
| 1184 | "MetricGroup": "instruction_mix", | ||
| 1185 | "MetricName": "fxu1_fin" | ||
| 1186 | }, | ||
| 1187 | { | ||
| 1188 | "BriefDescription": "FXU1 only Busy", | ||
| 1189 | "MetricExpr": "PM_FXU1_BUSY_FXU0_IDLE / PM_CYC", | ||
| 1190 | "MetricGroup": "instruction_mix", | ||
| 1191 | "MetricName": "fxu1_only_busy" | ||
| 1192 | }, | ||
| 1193 | { | ||
| 1194 | "BriefDescription": "Both FXU Busy", | ||
| 1195 | "MetricExpr": "PM_FXU_BUSY / PM_CYC", | ||
| 1196 | "MetricGroup": "instruction_mix", | ||
| 1197 | "MetricName": "fxu_both_busy" | ||
| 1198 | }, | ||
| 1199 | { | ||
| 1200 | "BriefDescription": "Both FXU Idle", | ||
| 1201 | "MetricExpr": "PM_FXU_IDLE / PM_CYC", | ||
| 1202 | "MetricGroup": "instruction_mix", | ||
| 1203 | "MetricName": "fxu_both_idle" | ||
| 1204 | }, | ||
| 1205 | { | ||
| 1206 | "BriefDescription": "PCT instruction loads", | ||
| 1207 | "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL", | ||
| 1208 | "MetricGroup": "instruction_mix", | ||
| 1209 | "MetricName": "loads_per_inst" | ||
| 1210 | }, | ||
| 1211 | { | ||
| 1212 | "BriefDescription": "PCT instruction stores", | ||
| 1213 | "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL", | ||
| 1214 | "MetricGroup": "instruction_mix", | ||
| 1215 | "MetricName": "stores_per_inst" | ||
| 1216 | }, | ||
| 1217 | { | ||
| 1218 | "BriefDescription": "Icache Fetchs per Icache Miss", | ||
| 1219 | "MetricExpr": "(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS", | ||
| 1220 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1221 | "MetricName": "icache_miss_reload" | ||
| 1222 | }, | ||
| 1223 | { | ||
| 1224 | "BriefDescription": "% of ICache reloads due to prefetch", | ||
| 1225 | "MetricExpr": "PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS", | ||
| 1226 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1227 | "MetricName": "icache_pref_percent" | ||
| 1228 | }, | ||
| 1229 | { | ||
| 1230 | "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified)", | ||
| 1231 | "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS", | ||
| 1232 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1233 | "MetricName": "inst_from_dl2l3_mod_percent" | ||
| 1234 | }, | ||
| 1235 | { | ||
| 1236 | "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared)", | ||
| 1237 | "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS", | ||
| 1238 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1239 | "MetricName": "inst_from_dl2l3_shr_percent" | ||
| 1240 | }, | ||
| 1241 | { | ||
| 1242 | "BriefDescription": "% of ICache reloads from Distant L4", | ||
| 1243 | "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS", | ||
| 1244 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1245 | "MetricName": "inst_from_dl4_percent" | ||
| 1246 | }, | ||
| 1247 | { | ||
| 1248 | "BriefDescription": "% of ICache reloads from Distant Memory", | ||
| 1249 | "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS", | ||
| 1250 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1251 | "MetricName": "inst_from_dmem_percent" | ||
| 1252 | }, | ||
| 1253 | { | ||
| 1254 | "BriefDescription": "% of ICache reloads from Private L2, other core", | ||
| 1255 | "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS", | ||
| 1256 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1257 | "MetricName": "inst_from_l21_mod_percent" | ||
| 1258 | }, | ||
| 1259 | { | ||
| 1260 | "BriefDescription": "% of ICache reloads from Private L2, other core", | ||
| 1261 | "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS", | ||
| 1262 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1263 | "MetricName": "inst_from_l21_shr_percent" | ||
| 1264 | }, | ||
| 1265 | { | ||
| 1266 | "BriefDescription": "% of ICache reloads from L2", | ||
| 1267 | "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS", | ||
| 1268 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1269 | "MetricName": "inst_from_l2_percent" | ||
| 1270 | }, | ||
| 1271 | { | ||
| 1272 | "BriefDescription": "% of ICache reloads from Private L3, other core", | ||
| 1273 | "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS", | ||
| 1274 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1275 | "MetricName": "inst_from_l31_mod_percent" | ||
| 1276 | }, | ||
| 1277 | { | ||
| 1278 | "BriefDescription": "% of ICache reloads from Private L3, other core", | ||
| 1279 | "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS", | ||
| 1280 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1281 | "MetricName": "inst_from_l31_shr_percent" | ||
| 1282 | }, | ||
| 1283 | { | ||
| 1284 | "BriefDescription": "% of ICache reloads from L3", | ||
| 1285 | "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS", | ||
| 1286 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1287 | "MetricName": "inst_from_l3_percent" | ||
| 1288 | }, | ||
| 1289 | { | ||
| 1290 | "BriefDescription": "% of ICache reloads from Local L4", | ||
| 1291 | "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS", | ||
| 1292 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1293 | "MetricName": "inst_from_ll4_percent" | ||
| 1294 | }, | ||
| 1295 | { | ||
| 1296 | "BriefDescription": "% of ICache reloads from Local Memory", | ||
| 1297 | "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS", | ||
| 1298 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1299 | "MetricName": "inst_from_lmem_percent" | ||
| 1300 | }, | ||
| 1301 | { | ||
| 1302 | "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified)", | ||
| 1303 | "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS", | ||
| 1304 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1305 | "MetricName": "inst_from_rl2l3_mod_percent" | ||
| 1306 | }, | ||
| 1307 | { | ||
| 1308 | "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared)", | ||
| 1309 | "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS", | ||
| 1310 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1311 | "MetricName": "inst_from_rl2l3_shr_percent" | ||
| 1312 | }, | ||
| 1313 | { | ||
| 1314 | "BriefDescription": "% of ICache reloads from Remote L4", | ||
| 1315 | "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS", | ||
| 1316 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1317 | "MetricName": "inst_from_rl4_percent" | ||
| 1318 | }, | ||
| 1319 | { | ||
| 1320 | "BriefDescription": "% of ICache reloads from Remote Memory", | ||
| 1321 | "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS", | ||
| 1322 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1323 | "MetricName": "inst_from_rmem_percent" | ||
| 1324 | }, | ||
| 1325 | { | ||
| 1326 | "BriefDescription": "Average number of stores that gather in the store buffer before being sent to an L2 RC machine", | ||
| 1327 | "MetricExpr": "PM_ST_CMPL / (PM_L2_ST / 2)", | ||
| 1328 | "MetricGroup": "l2_stats", | ||
| 1329 | "MetricName": "avg_stores_gathered" | ||
| 1330 | }, | ||
| 1331 | { | ||
| 1332 | "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", | ||
| 1333 | "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100", | ||
| 1334 | "MetricGroup": "l2_stats", | ||
| 1335 | "MetricName": "l2_st_miss_ratio_percent" | ||
| 1336 | }, | ||
| 1337 | { | ||
| 1338 | "BriefDescription": "Percentage of L2 store misses per drained store. A drained store may contain multiple individual stores if they target the same line", | ||
| 1339 | "MetricExpr": "PM_L2_ST_MISS / (PM_L2_ST / 2)", | ||
| 1340 | "MetricGroup": "l2_stats", | ||
| 1341 | "MetricName": "l2_store_miss_ratio_percent" | ||
| 1342 | }, | ||
| 1343 | { | ||
| 1344 | "BriefDescription": "average L1 miss latency using marked events", | ||
| 1345 | "MetricExpr": "PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1", | ||
| 1346 | "MetricGroup": "latency", | ||
| 1347 | "MetricName": "average_dl1miss_latency" | ||
| 1348 | }, | ||
| 1349 | { | ||
| 1350 | "BriefDescription": "Average icache miss latency", | ||
| 1351 | "MetricExpr": "(PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ)", | ||
| 1352 | "MetricGroup": "latency", | ||
| 1353 | "MetricName": "average_il1_miss_latency" | ||
| 1354 | }, | ||
| 1355 | { | ||
| 1356 | "BriefDescription": "average service time for SYNC", | ||
| 1357 | "MetricExpr": "PM_LSU_SRQ_SYNC_CYC / PM_LSU_SRQ_SYNC", | ||
| 1358 | "MetricGroup": "latency", | ||
| 1359 | "MetricName": "average_sync_cyc" | ||
| 1360 | }, | ||
| 1361 | { | ||
| 1362 | "BriefDescription": "Cycles LMQ slot0 was active on an average", | ||
| 1363 | "MetricExpr": "PM_LSU_LMQ_S0_VALID / PM_LSU_LMQ_S0_ALLOC", | ||
| 1364 | "MetricGroup": "latency", | ||
| 1365 | "MetricName": "avg_lmq_life_time" | ||
| 1366 | }, | ||
| 1367 | { | ||
| 1368 | "BriefDescription": "Average number of cycles LRQ stays active for one load. Slot 0 is VALID ONLY FOR EVEN THREADS", | ||
| 1369 | "MetricExpr": "PM_LSU_LRQ_S0_VALID / PM_LSU_LRQ_S0_ALLOC", | ||
| 1370 | "MetricGroup": "latency", | ||
| 1371 | "MetricName": "avg_lrq_life_time_even" | ||
| 1372 | }, | ||
| 1373 | { | ||
| 1374 | "BriefDescription": "Average number of cycles LRQ stays active for one load. Slot 43 is valid ONLY FOR ODD THREADS", | ||
| 1375 | "MetricExpr": "PM_LSU_LRQ_S43_VALID / PM_LSU_LRQ_S43_ALLOC", | ||
| 1376 | "MetricGroup": "latency", | ||
| 1377 | "MetricName": "avg_lrq_life_time_odd" | ||
| 1378 | }, | ||
| 1379 | { | ||
| 1380 | "BriefDescription": "Average number of cycles SRQ stays active for one load. Slot 0 is VALID ONLY FOR EVEN THREADS", | ||
| 1381 | "MetricExpr": "PM_LSU_SRQ_S0_VALID / PM_LSU_SRQ_S0_ALLOC", | ||
| 1382 | "MetricGroup": "latency", | ||
| 1383 | "MetricName": "avg_srq_life_time_even" | ||
| 1384 | }, | ||
| 1385 | { | ||
| 1386 | "BriefDescription": "Average number of cycles SRQ stays active for one load. Slot 39 is valid ONLY FOR ODD THREADS", | ||
| 1387 | "MetricExpr": "PM_LSU_SRQ_S39_VALID / PM_LSU_SRQ_S39_ALLOC", | ||
| 1388 | "MetricGroup": "latency", | ||
| 1389 | "MetricName": "avg_srq_life_time_odd" | ||
| 1390 | }, | ||
| 1391 | { | ||
| 1392 | "BriefDescription": "Marked background kill latency, measured in L2", | ||
| 1393 | "MetricExpr": "PM_MRK_FAB_RSP_BKILL_CYC / PM_MRK_FAB_RSP_BKILL", | ||
| 1394 | "MetricGroup": "latency", | ||
| 1395 | "MetricName": "bkill_latency" | ||
| 1396 | }, | ||
| 1397 | { | ||
| 1398 | "BriefDescription": "Marked dclaim latency, measured in L2", | ||
| 1399 | "MetricExpr": "PM_MRK_FAB_RSP_DCLAIM_CYC / PM_MRK_FAB_RSP_DCLAIM", | ||
| 1400 | "MetricGroup": "latency", | ||
| 1401 | "MetricName": "dclaim_latency" | ||
| 1402 | }, | ||
| 1403 | { | ||
| 1404 | "BriefDescription": "Marked L2L3 remote Load latency", | ||
| 1405 | "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD", | ||
| 1406 | "MetricGroup": "latency", | ||
| 1407 | "MetricName": "dl2l3_mod_latency" | ||
| 1408 | }, | ||
| 1409 | { | ||
| 1410 | "BriefDescription": "Marked L2L3 distant Load latency", | ||
| 1411 | "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR", | ||
| 1412 | "MetricGroup": "latency", | ||
| 1413 | "MetricName": "dl2l3_shr_latency" | ||
| 1414 | }, | ||
| 1415 | { | ||
| 1416 | "BriefDescription": "Distant L4 average load latency", | ||
| 1417 | "MetricExpr": "PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4", | ||
| 1418 | "MetricGroup": "latency", | ||
| 1419 | "MetricName": "dl4_latency" | ||
| 1420 | }, | ||
| 1421 | { | ||
| 1422 | "BriefDescription": "Marked Dmem Load latency", | ||
| 1423 | "MetricExpr": "PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM", | ||
| 1424 | "MetricGroup": "latency", | ||
| 1425 | "MetricName": "dmem_latency" | ||
| 1426 | }, | ||
| 1427 | { | ||
| 1428 | "BriefDescription": "estimated exposed miss latency for dL1 misses, ie load miss when we were NTC", | ||
| 1429 | "MetricExpr": "PM_MRK_LD_MISS_EXPOSED_CYC / PM_MRK_LD_MISS_EXPOSED", | ||
| 1430 | "MetricGroup": "latency", | ||
| 1431 | "MetricName": "exposed_dl1miss_latency" | ||
| 1432 | }, | ||
| 1433 | { | ||
| 1434 | "BriefDescription": "Average load latency for all marked demand loads that came from L2.1 in the M state", | ||
| 1435 | "MetricExpr": "PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD", | ||
| 1436 | "MetricGroup": "latency", | ||
| 1437 | "MetricName": "l21_mod_latency" | ||
| 1438 | }, | ||
| 1439 | { | ||
| 1440 | "BriefDescription": "Average load latency for all marked demand loads that came from L2.1 in the S state", | ||
| 1441 | "MetricExpr": "PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR", | ||
| 1442 | "MetricGroup": "latency", | ||
| 1443 | "MetricName": "l21_shr_latency" | ||
| 1444 | }, | ||
| 1445 | { | ||
| 1446 | "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered a conflict at RC machine dispatch time due to load-hit-store", | ||
| 1447 | "MetricExpr": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC/ PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST", | ||
| 1448 | "MetricGroup": "latency", | ||
| 1449 | "MetricName": "l2_disp_conflict_ldhitst_latency" | ||
| 1450 | }, | ||
| 1451 | { | ||
| 1452 | "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered a conflict at RC machine dispatch time NOT due load-hit-store", | ||
| 1453 | "MetricExpr": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC/ PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER", | ||
| 1454 | "MetricGroup": "latency", | ||
| 1455 | "MetricName": "l2_disp_conflict_other_latency" | ||
| 1456 | }, | ||
| 1457 | { | ||
| 1458 | "BriefDescription": "Average load latency for all marked demand loads that came from the L2", | ||
| 1459 | "MetricExpr": "PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2", | ||
| 1460 | "MetricGroup": "latency", | ||
| 1461 | "MetricName": "l2_latency" | ||
| 1462 | }, | ||
| 1463 | { | ||
| 1464 | "BriefDescription": "Average load latency for all marked demand loads that were satisfied by lines prefetched into the L3. This information is forwarded from the L3", | ||
| 1465 | "MetricExpr": "PM_MRK_DATA_FROM_L2_MEPF_CYC/ PM_MRK_DATA_FROM_L2", | ||
| 1466 | "MetricGroup": "latency", | ||
| 1467 | "MetricName": "l2_mepf_latency" | ||
| 1468 | }, | ||
| 1469 | { | ||
| 1470 | "BriefDescription": "Average load latency for all marked demand loads that came from the L2 and suffered no conflicts", | ||
| 1471 | "MetricExpr": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC/ PM_MRK_DATA_FROM_L2", | ||
| 1472 | "MetricGroup": "latency", | ||
| 1473 | "MetricName": "l2_no_conflict_latency" | ||
| 1474 | }, | ||
| 1475 | { | ||
| 1476 | "BriefDescription": "Average load latency for all marked demand loads that came from the L3 and beyond", | ||
| 1477 | "MetricExpr": "PM_MRK_DATA_FROM_L2MISS_CYC/ PM_MRK_DATA_FROM_L2MISS", | ||
| 1478 | "MetricGroup": "latency", | ||
| 1479 | "MetricName": "l2miss_latency" | ||
| 1480 | }, | ||
| 1481 | { | ||
| 1482 | "BriefDescription": "Marked L31 Load latency", | ||
| 1483 | "MetricExpr": "PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD", | ||
| 1484 | "MetricGroup": "latency", | ||
| 1485 | "MetricName": "l31_mod_latency" | ||
| 1486 | }, | ||
| 1487 | { | ||
| 1488 | "BriefDescription": "Marked L31 Load latency", | ||
| 1489 | "MetricExpr": "PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR", | ||
| 1490 | "MetricGroup": "latency", | ||
| 1491 | "MetricName": "l31_shr_latency" | ||
| 1492 | }, | ||
| 1493 | { | ||
| 1494 | "BriefDescription": "Average load latency for all marked demand loads that came from the L3", | ||
| 1495 | "MetricExpr": "PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3", | ||
| 1496 | "MetricGroup": "latency", | ||
| 1497 | "MetricName": "l3_latency" | ||
| 1498 | }, | ||
| 1499 | { | ||
| 1500 | "BriefDescription": "Average load latency for all marked demand loads that came from the L3 and suffered no conflicts", | ||
| 1501 | "MetricExpr": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC/ PM_MRK_DATA_FROM_L2", | ||
| 1502 | "MetricGroup": "latency", | ||
| 1503 | "MetricName": "l3_no_conflict_latency" | ||
| 1504 | }, | ||
| 1505 | { | ||
| 1506 | "BriefDescription": "Average load latency for all marked demand loads that come from beyond the L3", | ||
| 1507 | "MetricExpr": "PM_MRK_DATA_FROM_L3MISS_CYC/ PM_MRK_DATA_FROM_L3MISS", | ||
| 1508 | "MetricGroup": "latency", | ||
| 1509 | "MetricName": "l3miss_latency" | ||
| 1510 | }, | ||
| 1511 | { | ||
| 1512 | "BriefDescription": "Average latency for marked reloads that hit in the L3 on the MEPF state. i.e. lines that were prefetched into the L3", | ||
| 1513 | "MetricExpr": "PM_MRK_DATA_FROM_L3_MEPF_CYC/ PM_MRK_DATA_FROM_L3_MEPF", | ||
| 1514 | "MetricGroup": "latency", | ||
| 1515 | "MetricName": "l3pref_latency" | ||
| 1516 | }, | ||
| 1517 | { | ||
| 1518 | "BriefDescription": "Local L4 average load latency", | ||
| 1519 | "MetricExpr": "PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4", | ||
| 1520 | "MetricGroup": "latency", | ||
| 1521 | "MetricName": "ll4_latency" | ||
| 1522 | }, | ||
| 1523 | { | ||
| 1524 | "BriefDescription": "Marked Lmem Load latency", | ||
| 1525 | "MetricExpr": "PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM", | ||
| 1526 | "MetricGroup": "latency", | ||
| 1527 | "MetricName": "lmem_latency" | ||
| 1528 | }, | ||
| 1529 | { | ||
| 1530 | "BriefDescription": "Latency for marked reloads that hit in the L2 or L3 of any other core on a different chip", | ||
| 1531 | "MetricExpr": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC/ PM_MRK_DATA_FROM_OFF_CHIP_CACHE", | ||
| 1532 | "MetricGroup": "latency", | ||
| 1533 | "MetricName": "off_chip_cache_latency" | ||
| 1534 | }, | ||
| 1535 | { | ||
| 1536 | "BriefDescription": "Latency for marked reloads that hit in the L2 or L3 of any other core on the same chip", | ||
| 1537 | "MetricExpr": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC/ PM_MRK_DATA_FROM_ON_CHIP_CACHE", | ||
| 1538 | "MetricGroup": "latency", | ||
| 1539 | "MetricName": "on_chip_cache_latency" | ||
| 1540 | }, | ||
| 1541 | { | ||
| 1542 | "BriefDescription": "Marked L2L3 remote Load latency", | ||
| 1543 | "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD", | ||
| 1544 | "MetricGroup": "latency", | ||
| 1545 | "MetricName": "rl2l3_mod_latency" | ||
| 1546 | }, | ||
| 1547 | { | ||
| 1548 | "BriefDescription": "Marked L2L3 remote Load latency", | ||
| 1549 | "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR", | ||
| 1550 | "MetricGroup": "latency", | ||
| 1551 | "MetricName": "rl2l3_shr_latency" | ||
| 1552 | }, | ||
| 1553 | { | ||
| 1554 | "BriefDescription": "Remote L4 average load latency", | ||
| 1555 | "MetricExpr": "PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4", | ||
| 1556 | "MetricGroup": "latency", | ||
| 1557 | "MetricName": "rl4_latency" | ||
| 1558 | }, | ||
| 1559 | { | ||
| 1560 | "BriefDescription": "Marked Rmem Load latency", | ||
| 1561 | "MetricExpr": "PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM", | ||
| 1562 | "MetricGroup": "latency", | ||
| 1563 | "MetricName": "rmem_latency" | ||
| 1564 | }, | ||
| 1565 | { | ||
| 1566 | "BriefDescription": "ERAT miss reject ratio", | ||
| 1567 | "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1568 | "MetricGroup": "lsu_rejects", | ||
| 1569 | "MetricName": "erat_reject_rate_percent" | ||
| 1570 | }, | ||
| 1571 | { | ||
| 1572 | "BriefDescription": "ERAT miss reject ratio", | ||
| 1573 | "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / (PM_LSU_FIN - PM_LSU_FX_FIN)", | ||
| 1574 | "MetricGroup": "lsu_rejects", | ||
| 1575 | "MetricName": "erat_reject_ratio_percent" | ||
| 1576 | }, | ||
| 1577 | { | ||
| 1578 | "BriefDescription": "LHS reject ratio", | ||
| 1579 | "MetricExpr": "PM_LSU_REJECT_LHS *100/ PM_RUN_INST_CMPL", | ||
| 1580 | "MetricGroup": "lsu_rejects", | ||
| 1581 | "MetricName": "lhs_reject_rate_percent" | ||
| 1582 | }, | ||
| 1583 | { | ||
| 1584 | "BriefDescription": "LHS reject ratio", | ||
| 1585 | "MetricExpr": "PM_LSU_REJECT_LHS *100/ (PM_LSU_FIN - PM_LSU_FX_FIN)", | ||
| 1586 | "MetricGroup": "lsu_rejects", | ||
| 1587 | "MetricName": "lhs_reject_ratio_percent" | ||
| 1588 | }, | ||
| 1589 | { | ||
| 1590 | "BriefDescription": "LMQ full reject ratio", | ||
| 1591 | "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_RUN_INST_CMPL", | ||
| 1592 | "MetricGroup": "lsu_rejects", | ||
| 1593 | "MetricName": "lmq_full_reject_rate_percent" | ||
| 1594 | }, | ||
| 1595 | { | ||
| 1596 | "BriefDescription": "ERAT miss reject ratio", | ||
| 1597 | "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_LD_REF_L1", | ||
| 1598 | "MetricGroup": "lsu_rejects", | ||
| 1599 | "MetricName": "lmq_full_reject_ratio_percent" | ||
| 1600 | }, | ||
| 1601 | { | ||
| 1602 | "BriefDescription": "LSU reject ratio", | ||
| 1603 | "MetricExpr": "PM_LSU_REJECT *100/ PM_RUN_INST_CMPL", | ||
| 1604 | "MetricGroup": "lsu_rejects", | ||
| 1605 | "MetricName": "lsu_reject_rate_percent" | ||
| 1606 | }, | ||
| 1607 | { | ||
| 1608 | "BriefDescription": "LSU reject ratio", | ||
| 1609 | "MetricExpr": "PM_LSU_REJECT *100/ (PM_LSU_FIN - PM_LSU_FX_FIN)", | ||
| 1610 | "MetricGroup": "lsu_rejects", | ||
| 1611 | "MetricName": "lsu_reject_ratio_percent" | ||
| 1612 | }, | ||
| 1613 | { | ||
| 1614 | "BriefDescription": "Ratio of reloads from local L4 to distant L4", | ||
| 1615 | "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4", | ||
| 1616 | "MetricGroup": "memory", | ||
| 1617 | "MetricName": "ld_ll4_per_ld_dmem" | ||
| 1618 | }, | ||
| 1619 | { | ||
| 1620 | "BriefDescription": "Ratio of reloads from local L4 to remote+distant L4", | ||
| 1621 | "MetricExpr": "PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4)", | ||
| 1622 | "MetricGroup": "memory", | ||
| 1623 | "MetricName": "ld_ll4_per_ld_mem" | ||
| 1624 | }, | ||
| 1625 | { | ||
| 1626 | "BriefDescription": "Ratio of reloads from local L4 to remote L4", | ||
| 1627 | "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4", | ||
| 1628 | "MetricGroup": "memory", | ||
| 1629 | "MetricName": "ld_ll4_per_ld_rl4" | ||
| 1630 | }, | ||
| 1631 | { | ||
| 1632 | "BriefDescription": "Number of loads from local memory per loads from distant memory", | ||
| 1633 | "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM", | ||
| 1634 | "MetricGroup": "memory", | ||
| 1635 | "MetricName": "ld_lmem_per_ld_dmem" | ||
| 1636 | }, | ||
| 1637 | { | ||
| 1638 | "BriefDescription": "Number of loads from local memory per loads from remote and distant memory", | ||
| 1639 | "MetricExpr": "PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM)", | ||
| 1640 | "MetricGroup": "memory", | ||
| 1641 | "MetricName": "ld_lmem_per_ld_mem" | ||
| 1642 | }, | ||
| 1643 | { | ||
| 1644 | "BriefDescription": "Number of loads from local memory per loads from remote memory", | ||
| 1645 | "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM", | ||
| 1646 | "MetricGroup": "memory", | ||
| 1647 | "MetricName": "ld_lmem_per_ld_rmem" | ||
| 1648 | }, | ||
| 1649 | { | ||
| 1650 | "BriefDescription": "Number of loads from remote memory per loads from distant memory", | ||
| 1651 | "MetricExpr": "PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM", | ||
| 1652 | "MetricGroup": "memory", | ||
| 1653 | "MetricName": "ld_rmem_per_ld_dmem" | ||
| 1654 | }, | ||
| 1655 | { | ||
| 1656 | "BriefDescription": "Memory locality", | ||
| 1657 | "MetricExpr": "(PM_DATA_FROM_LL4 + PM_DATA_FROM_LMEM) * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_LL4 + PM_DATA_FROM_RMEM + PM_DATA_FROM_RL4 + PM_DATA_FROM_DMEM + PM_DATA_FROM_DL4)", | ||
| 1658 | "MetricGroup": "memory", | ||
| 1659 | "MetricName": "mem_locality_percent" | ||
| 1660 | }, | ||
| 1661 | { | ||
| 1662 | "BriefDescription": "DERAT Miss Rate (per run instruction)(%)", | ||
| 1663 | "MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1664 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1665 | "MetricName": "derat_miss_rate_percent" | ||
| 1666 | }, | ||
| 1667 | { | ||
| 1668 | "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified) per inst", | ||
| 1669 | "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1670 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1671 | "MetricName": "pteg_from_dl2l3_mod_rate_percent" | ||
| 1672 | }, | ||
| 1673 | { | ||
| 1674 | "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared) per inst", | ||
| 1675 | "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1676 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1677 | "MetricName": "pteg_from_dl2l3_shr_rate_percent" | ||
| 1678 | }, | ||
| 1679 | { | ||
| 1680 | "BriefDescription": "% of DERAT reloads from Distant L4 per inst", | ||
| 1681 | "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1682 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1683 | "MetricName": "pteg_from_dl4_rate_percent" | ||
| 1684 | }, | ||
| 1685 | { | ||
| 1686 | "BriefDescription": "% of DERAT reloads from Distant Memory per inst", | ||
| 1687 | "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1688 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1689 | "MetricName": "pteg_from_dmem_rate_percent" | ||
| 1690 | }, | ||
| 1691 | { | ||
| 1692 | "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", | ||
| 1693 | "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1694 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1695 | "MetricName": "pteg_from_l21_mod_rate_percent" | ||
| 1696 | }, | ||
| 1697 | { | ||
| 1698 | "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", | ||
| 1699 | "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1700 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1701 | "MetricName": "pteg_from_l21_shr_rate_percent" | ||
| 1702 | }, | ||
| 1703 | { | ||
| 1704 | "BriefDescription": "% of DERAT reloads from L2 per inst", | ||
| 1705 | "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", | ||
| 1706 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1707 | "MetricName": "pteg_from_l2_rate_percent" | ||
| 1708 | }, | ||
| 1709 | { | ||
| 1710 | "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", | ||
| 1711 | "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1712 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1713 | "MetricName": "pteg_from_l31_mod_rate_percent" | ||
| 1714 | }, | ||
| 1715 | { | ||
| 1716 | "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", | ||
| 1717 | "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1718 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1719 | "MetricName": "pteg_from_l31_shr_rate_percent" | ||
| 1720 | }, | ||
| 1721 | { | ||
| 1722 | "BriefDescription": "% of DERAT reloads from L3 per inst", | ||
| 1723 | "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", | ||
| 1724 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1725 | "MetricName": "pteg_from_l3_rate_percent" | ||
| 1726 | }, | ||
| 1727 | { | ||
| 1728 | "BriefDescription": "% of DERAT reloads from Local L4 per inst", | ||
| 1729 | "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1730 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1731 | "MetricName": "pteg_from_ll4_rate_percent" | ||
| 1732 | }, | ||
| 1733 | { | ||
| 1734 | "BriefDescription": "% of DERAT reloads from Local Memory per inst", | ||
| 1735 | "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1736 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1737 | "MetricName": "pteg_from_lmem_rate_percent" | ||
| 1738 | }, | ||
| 1739 | { | ||
| 1740 | "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified) per inst", | ||
| 1741 | "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1742 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1743 | "MetricName": "pteg_from_rl2l3_mod_rate_percent" | ||
| 1744 | }, | ||
| 1745 | { | ||
| 1746 | "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared) per inst", | ||
| 1747 | "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1748 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1749 | "MetricName": "pteg_from_rl2l3_shr_rate_percent" | ||
| 1750 | }, | ||
| 1751 | { | ||
| 1752 | "BriefDescription": "% of DERAT reloads from Remote L4 per inst", | ||
| 1753 | "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1754 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1755 | "MetricName": "pteg_from_rl4_rate_percent" | ||
| 1756 | }, | ||
| 1757 | { | ||
| 1758 | "BriefDescription": "% of DERAT reloads from Remote Memory per inst", | ||
| 1759 | "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1760 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1761 | "MetricName": "pteg_from_rmem_rate_percent" | ||
| 1762 | }, | ||
| 1763 | { | ||
| 1764 | "BriefDescription": "% of DERAT misses that result in an ERAT reload", | ||
| 1765 | "MetricExpr": "PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS", | ||
| 1766 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1767 | "MetricName": "derat_miss_reload_percent" | ||
| 1768 | }, | ||
| 1769 | { | ||
| 1770 | "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified)", | ||
| 1771 | "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS", | ||
| 1772 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1773 | "MetricName": "pteg_from_dl2l3_mod_percent" | ||
| 1774 | }, | ||
| 1775 | { | ||
| 1776 | "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared)", | ||
| 1777 | "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS", | ||
| 1778 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1779 | "MetricName": "pteg_from_dl2l3_shr_percent" | ||
| 1780 | }, | ||
| 1781 | { | ||
| 1782 | "BriefDescription": "% of DERAT reloads from Distant L4", | ||
| 1783 | "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS", | ||
| 1784 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1785 | "MetricName": "pteg_from_dl4_percent" | ||
| 1786 | }, | ||
| 1787 | { | ||
| 1788 | "BriefDescription": "% of DERAT reloads from Distant Memory", | ||
| 1789 | "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS", | ||
| 1790 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1791 | "MetricName": "pteg_from_dmem_percent" | ||
| 1792 | }, | ||
| 1793 | { | ||
| 1794 | "BriefDescription": "% of DERAT reloads from Private L2, other core", | ||
| 1795 | "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS", | ||
| 1796 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1797 | "MetricName": "pteg_from_l21_mod_percent" | ||
| 1798 | }, | ||
| 1799 | { | ||
| 1800 | "BriefDescription": "% of DERAT reloads from Private L2, other core", | ||
| 1801 | "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS", | ||
| 1802 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1803 | "MetricName": "pteg_from_l21_shr_percent" | ||
| 1804 | }, | ||
| 1805 | { | ||
| 1806 | "BriefDescription": "% of DERAT reloads from L2", | ||
| 1807 | "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS", | ||
| 1808 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1809 | "MetricName": "pteg_from_l2_percent" | ||
| 1810 | }, | ||
| 1811 | { | ||
| 1812 | "BriefDescription": "% of DERAT reloads from Private L3, other core", | ||
| 1813 | "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS", | ||
| 1814 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1815 | "MetricName": "pteg_from_l31_mod_percent" | ||
| 1816 | }, | ||
| 1817 | { | ||
| 1818 | "BriefDescription": "% of DERAT reloads from Private L3, other core", | ||
| 1819 | "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS", | ||
| 1820 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1821 | "MetricName": "pteg_from_l31_shr_percent" | ||
| 1822 | }, | ||
| 1823 | { | ||
| 1824 | "BriefDescription": "% of DERAT reloads from L3", | ||
| 1825 | "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS", | ||
| 1826 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1827 | "MetricName": "pteg_from_l3_percent" | ||
| 1828 | }, | ||
| 1829 | { | ||
| 1830 | "BriefDescription": "% of DERAT reloads from Local L4", | ||
| 1831 | "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS", | ||
| 1832 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1833 | "MetricName": "pteg_from_ll4_percent" | ||
| 1834 | }, | ||
| 1835 | { | ||
| 1836 | "BriefDescription": "% of DERAT reloads from Local Memory", | ||
| 1837 | "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS", | ||
| 1838 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1839 | "MetricName": "pteg_from_lmem_percent" | ||
| 1840 | }, | ||
| 1841 | { | ||
| 1842 | "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified)", | ||
| 1843 | "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS", | ||
| 1844 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1845 | "MetricName": "pteg_from_rl2l3_mod_percent" | ||
| 1846 | }, | ||
| 1847 | { | ||
| 1848 | "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared)", | ||
| 1849 | "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS", | ||
| 1850 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1851 | "MetricName": "pteg_from_rl2l3_shr_percent" | ||
| 1852 | }, | ||
| 1853 | { | ||
| 1854 | "BriefDescription": "% of DERAT reloads from Remote L4", | ||
| 1855 | "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS", | ||
| 1856 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1857 | "MetricName": "pteg_from_rl4_percent" | ||
| 1858 | }, | ||
| 1859 | { | ||
| 1860 | "BriefDescription": "% of DERAT reloads from Remote Memory", | ||
| 1861 | "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS", | ||
| 1862 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1863 | "MetricName": "pteg_from_rmem_percent" | ||
| 1864 | }, | ||
| 1865 | { | ||
| 1866 | "BriefDescription": "% DERAT miss ratio for 16G page per inst", | ||
| 1867 | "MetricExpr": "100 * PM_DERAT_MISS_16G / PM_RUN_INST_CMPL", | ||
| 1868 | "MetricGroup": "translation", | ||
| 1869 | "MetricName": "derat_16g_miss_rate_percent" | ||
| 1870 | }, | ||
| 1871 | { | ||
| 1872 | "BriefDescription": "DERAT miss ratio for 16G page", | ||
| 1873 | "MetricExpr": "PM_DERAT_MISS_16G / PM_LSU_DERAT_MISS", | ||
| 1874 | "MetricGroup": "translation", | ||
| 1875 | "MetricName": "derat_16g_miss_ratio" | ||
| 1876 | }, | ||
| 1877 | { | ||
| 1878 | "BriefDescription": "% DERAT miss rate for 16M page per inst", | ||
| 1879 | "MetricExpr": "PM_DERAT_MISS_16M * 100 / PM_RUN_INST_CMPL", | ||
| 1880 | "MetricGroup": "translation", | ||
| 1881 | "MetricName": "derat_16m_miss_rate_percent" | ||
| 1882 | }, | ||
| 1883 | { | ||
| 1884 | "BriefDescription": "DERAT miss ratio for 16M page", | ||
| 1885 | "MetricExpr": "PM_DERAT_MISS_16M / PM_LSU_DERAT_MISS", | ||
| 1886 | "MetricGroup": "translation", | ||
| 1887 | "MetricName": "derat_16m_miss_ratio" | ||
| 1888 | }, | ||
| 1889 | { | ||
| 1890 | "BriefDescription": "% DERAT miss rate for 4K page per inst", | ||
| 1891 | "MetricExpr": "PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL", | ||
| 1892 | "MetricGroup": "translation", | ||
| 1893 | "MetricName": "derat_4k_miss_rate_percent" | ||
| 1894 | }, | ||
| 1895 | { | ||
| 1896 | "BriefDescription": "DERAT miss ratio for 4K page", | ||
| 1897 | "MetricExpr": "PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS", | ||
| 1898 | "MetricGroup": "translation", | ||
| 1899 | "MetricName": "derat_4k_miss_ratio" | ||
| 1900 | }, | ||
| 1901 | { | ||
| 1902 | "BriefDescription": "% DERAT miss ratio for 64K page per inst", | ||
| 1903 | "MetricExpr": "PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL", | ||
| 1904 | "MetricGroup": "translation", | ||
| 1905 | "MetricName": "derat_64k_miss_rate_percent" | ||
| 1906 | }, | ||
| 1907 | { | ||
| 1908 | "BriefDescription": "DERAT miss ratio for 64K page", | ||
| 1909 | "MetricExpr": "PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS", | ||
| 1910 | "MetricGroup": "translation", | ||
| 1911 | "MetricName": "derat_64k_miss_ratio" | ||
| 1912 | }, | ||
| 1913 | { | ||
| 1914 | "BriefDescription": "% DSLB_Miss_Rate per inst", | ||
| 1915 | "MetricExpr": "PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1916 | "MetricGroup": "translation", | ||
| 1917 | "MetricName": "dslb_miss_rate_percent" | ||
| 1918 | }, | ||
| 1919 | { | ||
| 1920 | "BriefDescription": "% ISLB miss rate per inst", | ||
| 1921 | "MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1922 | "MetricGroup": "translation", | ||
| 1923 | "MetricName": "islb_miss_rate_percent" | ||
| 1924 | }, | ||
| 1925 | { | ||
| 1926 | "BriefDescription": "Fraction of hits on any Centaur (local, remote, or distant) on either L4 or DRAM per L1 load ref", | ||
| 1927 | "MetricExpr": "PM_DATA_FROM_MEMORY / PM_LD_REF_L1", | ||
| 1928 | "MetricName": "any_centaur_ld_hit_ratio" | ||
| 1929 | }, | ||
| 1930 | { | ||
| 1931 | "BriefDescription": "Base Completion Cycles", | ||
| 1932 | "MetricExpr": "PM_1PLUS_PPC_CMPL / PM_RUN_INST_CMPL", | ||
| 1933 | "MetricName": "base_completion_cpi" | ||
| 1934 | }, | ||
| 1935 | { | ||
| 1936 | "BriefDescription": "Marked background kill latency, measured in L2", | ||
| 1937 | "MetricExpr": "PM_MRK_FAB_RSP_BKILL_CYC / PM_MRK_FAB_RSP_BKILL", | ||
| 1938 | "MetricName": "bkill_ratio_percent" | ||
| 1939 | }, | ||
| 1940 | { | ||
| 1941 | "BriefDescription": "cycles", | ||
| 1942 | "MetricExpr": "PM_RUN_CYC", | ||
| 1943 | "MetricName": "custom_secs" | ||
| 1944 | }, | ||
| 1945 | { | ||
| 1946 | "BriefDescription": "Fraction of hits on a distant chip's Centaur (L4 or DRAM) per L1 load ref", | ||
| 1947 | "MetricExpr": "(PM_DATA_FROM_DMEM + PM_DATA_FROM_DL4) / PM_LD_REF_L1", | ||
| 1948 | "MetricName": "distant_centaur_ld_hit_ratio" | ||
| 1949 | }, | ||
| 1950 | { | ||
| 1951 | "BriefDescription": "% of DL1 reloads that came from the L3 and beyond", | ||
| 1952 | "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 1953 | "MetricName": "dl1_reload_from_l2_miss_percent" | ||
| 1954 | }, | ||
| 1955 | { | ||
| 1956 | "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", | ||
| 1957 | "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL", | ||
| 1958 | "MetricName": "dl1_reload_from_l31_rate_percent" | ||
| 1959 | }, | ||
| 1960 | { | ||
| 1961 | "BriefDescription": "Percentage of DL1 reloads from L3 where the lines were brought into the L3 by a prefetch operation", | ||
| 1962 | "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 1963 | "MetricName": "dl1_reload_from_l3_mepf_percent" | ||
| 1964 | }, | ||
| 1965 | { | ||
| 1966 | "BriefDescription": "% of DL1 Reloads from beyond the local L3", | ||
| 1967 | "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 1968 | "MetricName": "dl1_reload_from_l3_miss_percent" | ||
| 1969 | }, | ||
| 1970 | { | ||
| 1971 | "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on the L2 or L3 of a core on a distant chip per L1 load ref", | ||
| 1972 | "MetricExpr": "PM_DATA_FROM_DL2L3_MOD / PM_LD_REF_L1", | ||
| 1973 | "MetricName": "dl2l3_mod_ld_hit_ratio" | ||
| 1974 | }, | ||
| 1975 | { | ||
| 1976 | "BriefDescription": "Fraction of hits of a line in the S state on the L2 or L3 of a core on a distant chip per L1 load ref", | ||
| 1977 | "MetricExpr": "PM_DATA_FROM_DL2L3_SHR / PM_LD_REF_L1", | ||
| 1978 | "MetricName": "dl2l3_shr_ld_hit_ratio" | ||
| 1979 | }, | ||
| 1980 | { | ||
| 1981 | "BriefDescription": "Fraction of hits on a distant Centaur's cache per L1 load ref", | ||
| 1982 | "MetricExpr": "PM_DATA_FROM_DL4 / PM_LD_REF_L1", | ||
| 1983 | "MetricName": "dl4_ld_hit_ratio" | ||
| 1984 | }, | ||
| 1985 | { | ||
| 1986 | "BriefDescription": "Fraction of hits on a distant Centaur's DRAM per L1 load ref", | ||
| 1987 | "MetricExpr": "PM_DATA_FROM_DMEM / PM_LD_REF_L1", | ||
| 1988 | "MetricName": "dmem_ld_hit_ratio" | ||
| 1989 | }, | ||
| 1990 | { | ||
| 1991 | "BriefDescription": "Rate of DERAT reloads from L2", | ||
| 1992 | "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", | ||
| 1993 | "MetricName": "dpteg_from_l2_rate_percent" | ||
| 1994 | }, | ||
| 1995 | { | ||
| 1996 | "BriefDescription": "Rate of DERAT reloads from L3", | ||
| 1997 | "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", | ||
| 1998 | "MetricName": "dpteg_from_l3_rate_percent" | ||
| 1999 | }, | ||
| 2000 | { | ||
| 2001 | "BriefDescription": "Overhead of expansion cycles", | ||
| 2002 | "MetricExpr": "(PM_GRP_CMPL / PM_RUN_INST_CMPL) - (PM_1PLUS_PPC_CMPL / PM_RUN_INST_CMPL)", | ||
| 2003 | "MetricName": "expansion_overhead_cpi" | ||
| 2004 | }, | ||
| 2005 | { | ||
| 2006 | "BriefDescription": "Total Fixed point operations executded in the Load/Store Unit following a load/store operation", | ||
| 2007 | "MetricExpr": "PM_LSU_FX_FIN/PM_RUN_INST_CMPL", | ||
| 2008 | "MetricName": "fixed_in_lsu_per_inst" | ||
| 2009 | }, | ||
| 2010 | { | ||
| 2011 | "BriefDescription": "GCT empty cycles", | ||
| 2012 | "MetricExpr": "(PM_GCT_NOSLOT_CYC / PM_RUN_CYC) * 100", | ||
| 2013 | "MetricName": "gct_empty_percent" | ||
| 2014 | }, | ||
| 2015 | { | ||
| 2016 | "BriefDescription": "Rate of IERAT reloads from L2", | ||
| 2017 | "MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", | ||
| 2018 | "MetricName": "ipteg_from_l2_rate_percent" | ||
| 2019 | }, | ||
| 2020 | { | ||
| 2021 | "BriefDescription": "Rate of IERAT reloads from L3", | ||
| 2022 | "MetricExpr": "PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", | ||
| 2023 | "MetricName": "ipteg_from_l3_rate_percent" | ||
| 2024 | }, | ||
| 2025 | { | ||
| 2026 | "BriefDescription": "Rate of IERAT reloads from local memory", | ||
| 2027 | "MetricExpr": "PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", | ||
| 2028 | "MetricName": "ipteg_from_ll4_rate_percent" | ||
| 2029 | }, | ||
| 2030 | { | ||
| 2031 | "BriefDescription": "Rate of IERAT reloads from local memory", | ||
| 2032 | "MetricExpr": "PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", | ||
| 2033 | "MetricName": "ipteg_from_lmem_rate_percent" | ||
| 2034 | }, | ||
| 2035 | { | ||
| 2036 | "BriefDescription": "Fraction of L1 hits per load ref", | ||
| 2037 | "MetricExpr": "(PM_LD_REF_L1 - PM_LD_MISS_L1) / PM_LD_REF_L1", | ||
| 2038 | "MetricName": "l1_ld_hit_ratio" | ||
| 2039 | }, | ||
| 2040 | { | ||
| 2041 | "BriefDescription": "Fraction of L1 load misses per L1 load ref", | ||
| 2042 | "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1", | ||
| 2043 | "MetricName": "l1_ld_miss_ratio" | ||
| 2044 | }, | ||
| 2045 | { | ||
| 2046 | "BriefDescription": "Fraction of hits on another core's L2 on the same chip per L1 load ref", | ||
| 2047 | "MetricExpr": "(PM_DATA_FROM_L21_MOD + PM_DATA_FROM_L21_SHR) / PM_LD_REF_L1", | ||
| 2048 | "MetricName": "l2_1_ld_hit_ratio" | ||
| 2049 | }, | ||
| 2050 | { | ||
| 2051 | "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on another core's L2 on the same chip per L1 load ref", | ||
| 2052 | "MetricExpr": "PM_DATA_FROM_L21_MOD / PM_LD_REF_L1", | ||
| 2053 | "MetricName": "l2_1_mod_ld_hit_ratio" | ||
| 2054 | }, | ||
| 2055 | { | ||
| 2056 | "BriefDescription": "Fraction of hits of a line in the S state on another core's L2 on the same chip per L1 load ref", | ||
| 2057 | "MetricExpr": "PM_DATA_FROM_L21_SHR / PM_LD_REF_L1", | ||
| 2058 | "MetricName": "l2_1_shr_ld_hit_ratio" | ||
| 2059 | }, | ||
| 2060 | { | ||
| 2061 | "BriefDescription": "Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle", | ||
| 2062 | "MetricExpr": "(PM_CO_USAGE / PM_RUN_CYC) * 16", | ||
| 2063 | "MetricName": "l2_co_usage" | ||
| 2064 | }, | ||
| 2065 | { | ||
| 2066 | "BriefDescription": "Fraction of L2 load hits per L1 load ref", | ||
| 2067 | "MetricExpr": "PM_DATA_FROM_L2 / PM_LD_REF_L1", | ||
| 2068 | "MetricName": "l2_ld_hit_ratio" | ||
| 2069 | }, | ||
| 2070 | { | ||
| 2071 | "BriefDescription": "Fraction of L2 load misses per L1 load ref", | ||
| 2072 | "MetricExpr": "PM_DATA_FROM_L2MISS / PM_LD_REF_L1", | ||
| 2073 | "MetricName": "l2_ld_miss_ratio" | ||
| 2074 | }, | ||
| 2075 | { | ||
| 2076 | "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 experienced a Load-Hit-Store conflict", | ||
| 2077 | "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST / PM_LD_REF_L1", | ||
| 2078 | "MetricName": "l2_lhs_ld_hit_ratio" | ||
| 2079 | }, | ||
| 2080 | { | ||
| 2081 | "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 did not experience a conflict", | ||
| 2082 | "MetricExpr": "PM_DATA_FROM_L2_NO_CONFLICT / PM_LD_REF_L1", | ||
| 2083 | "MetricName": "l2_no_conflict_ld_hit_ratio" | ||
| 2084 | }, | ||
| 2085 | { | ||
| 2086 | "BriefDescription": "Fraction of L2 load hits per L1 load ref where the L2 experienced some conflict other than Load-Hit-Store", | ||
| 2087 | "MetricExpr": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER / PM_LD_REF_L1", | ||
| 2088 | "MetricName": "l2_other_conflict_ld_hit_ratio" | ||
| 2089 | }, | ||
| 2090 | { | ||
| 2091 | "BriefDescription": "Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle", | ||
| 2092 | "MetricExpr": "(PM_RC_USAGE / PM_RUN_CYC) * 16", | ||
| 2093 | "MetricName": "l2_rc_usage" | ||
| 2094 | }, | ||
| 2095 | { | ||
| 2096 | "BriefDescription": "Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle", | ||
| 2097 | "MetricExpr": "(PM_SN_USAGE / PM_RUN_CYC) * 8", | ||
| 2098 | "MetricName": "l2_sn_usage" | ||
| 2099 | }, | ||
| 2100 | { | ||
| 2101 | "BriefDescription": "Marked L31 Load latency", | ||
| 2102 | "MetricExpr": "(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD)", | ||
| 2103 | "MetricName": "l31_latency" | ||
| 2104 | }, | ||
| 2105 | { | ||
| 2106 | "BriefDescription": "Fraction of hits on another core's L3 on the same chip per L1 load ref", | ||
| 2107 | "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) / PM_LD_REF_L1", | ||
| 2108 | "MetricName": "l3_1_ld_hit_ratio" | ||
| 2109 | }, | ||
| 2110 | { | ||
| 2111 | "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on another core's L3 on the same chip per L1 load ref", | ||
| 2112 | "MetricExpr": "PM_DATA_FROM_L31_MOD / PM_LD_REF_L1", | ||
| 2113 | "MetricName": "l3_1_mod_ld_hit_ratio" | ||
| 2114 | }, | ||
| 2115 | { | ||
| 2116 | "BriefDescription": "Fraction of hits of a line in the S state on another core's L3 on the same chip per L1 load ref", | ||
| 2117 | "MetricExpr": "PM_DATA_FROM_L31_SHR / PM_LD_REF_L1", | ||
| 2118 | "MetricName": "l3_1_shr_ld_hit_ratio" | ||
| 2119 | }, | ||
| 2120 | { | ||
| 2121 | "BriefDescription": "Fraction of L3 load hits per load ref where the demand load collided with a pending prefetch", | ||
| 2122 | "MetricExpr": "PM_DATA_FROM_L3_DISP_CONFLICT / PM_LD_REF_L1", | ||
| 2123 | "MetricName": "l3_conflict_ld_hit_ratio" | ||
| 2124 | }, | ||
| 2125 | { | ||
| 2126 | "BriefDescription": "Fraction of L3 load hits per L1 load ref", | ||
| 2127 | "MetricExpr": "PM_DATA_FROM_L3 / PM_LD_REF_L1", | ||
| 2128 | "MetricName": "l3_ld_hit_ratio" | ||
| 2129 | }, | ||
| 2130 | { | ||
| 2131 | "BriefDescription": "Fraction of L3 load misses per L1 load ref", | ||
| 2132 | "MetricExpr": "PM_DATA_FROM_L3MISS / PM_LD_REF_L1", | ||
| 2133 | "MetricName": "l3_ld_miss_ratio" | ||
| 2134 | }, | ||
| 2135 | { | ||
| 2136 | "BriefDescription": "Fraction of L3 load hits per load ref where the L3 did not experience a conflict", | ||
| 2137 | "MetricExpr": "PM_DATA_FROM_L3_NO_CONFLICT / PM_LD_REF_L1", | ||
| 2138 | "MetricName": "l3_no_conflict_ld_hit_ratio" | ||
| 2139 | }, | ||
| 2140 | { | ||
| 2141 | "BriefDescription": "Fraction of L3 hits on lines that were not in the MEPF state per L1 load ref", | ||
| 2142 | "MetricExpr": "(PM_DATA_FROM_L3 - PM_DATA_FROM_L3_MEPF) / PM_LD_REF_L1", | ||
| 2143 | "MetricName": "l3other_ld_hit_ratio" | ||
| 2144 | }, | ||
| 2145 | { | ||
| 2146 | "BriefDescription": "Fraction of L3 hits on lines that were recently prefetched into the L3 (MEPF state) per L1 load ref", | ||
| 2147 | "MetricExpr": "PM_DATA_FROM_L3_MEPF / PM_LD_REF_L1", | ||
| 2148 | "MetricName": "l3pref_ld_hit_ratio" | ||
| 2149 | }, | ||
| 2150 | { | ||
| 2151 | "BriefDescription": "Fraction of hits on a local Centaur's cache per L1 load ref", | ||
| 2152 | "MetricExpr": "PM_DATA_FROM_LL4 / PM_LD_REF_L1", | ||
| 2153 | "MetricName": "ll4_ld_hit_ratio" | ||
| 2154 | }, | ||
| 2155 | { | ||
| 2156 | "BriefDescription": "Fraction of hits on a local Centaur's DRAM per L1 load ref", | ||
| 2157 | "MetricExpr": "PM_DATA_FROM_LMEM / PM_LD_REF_L1", | ||
| 2158 | "MetricName": "lmem_ld_hit_ratio" | ||
| 2159 | }, | ||
| 2160 | { | ||
| 2161 | "BriefDescription": "Fraction of hits on a local Centaur (L4 or DRAM) per L1 load ref", | ||
| 2162 | "MetricExpr": "(PM_DATA_FROM_LMEM + PM_DATA_FROM_LL4) / PM_LD_REF_L1", | ||
| 2163 | "MetricName": "local_centaur_ld_hit_ratio" | ||
| 2164 | }, | ||
| 2165 | { | ||
| 2166 | "BriefDescription": "Cycles stalled by Other LSU Operations", | ||
| 2167 | "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_REJECT - PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_STORE) / (PM_LD_REF_L1 - PM_LD_MISS_L1)", | ||
| 2168 | "MetricName": "lsu_stall_avg_cyc_per_l1hit_stfw" | ||
| 2169 | }, | ||
| 2170 | { | ||
| 2171 | "BriefDescription": "Fraction of hits on another core's L2 or L3 on a different chip (remote or distant) per L1 load ref", | ||
| 2172 | "MetricExpr": "PM_DATA_FROM_OFF_CHIP_CACHE / PM_LD_REF_L1", | ||
| 2173 | "MetricName": "off_chip_cache_ld_hit_ratio" | ||
| 2174 | }, | ||
| 2175 | { | ||
| 2176 | "BriefDescription": "Fraction of hits on another core's L2 or L3 on the same chip per L1 load ref", | ||
| 2177 | "MetricExpr": "PM_DATA_FROM_ON_CHIP_CACHE / PM_LD_REF_L1", | ||
| 2178 | "MetricName": "on_chip_cache_ld_hit_ratio" | ||
| 2179 | }, | ||
| 2180 | { | ||
| 2181 | "BriefDescription": "Fraction of hits on a remote chip's Centaur (L4 or DRAM) per L1 load ref", | ||
| 2182 | "MetricExpr": "(PM_DATA_FROM_RMEM + PM_DATA_FROM_RL4) / PM_LD_REF_L1", | ||
| 2183 | "MetricName": "remote_centaur_ld_hit_ratio" | ||
| 2184 | }, | ||
| 2185 | { | ||
| 2186 | "BriefDescription": "Percent of all FXU/VSU instructions that got rejected because of unavailable resources or facilities", | ||
| 2187 | "MetricExpr": "PM_ISU_REJECT_RES_NA *100/ PM_RUN_INST_CMPL", | ||
| 2188 | "MetricName": "resource_na_reject_rate_percent" | ||
| 2189 | }, | ||
| 2190 | { | ||
| 2191 | "BriefDescription": "Fraction of hits of a line in the M (exclusive) state on the L2 or L3 of a core on a remote chip per L1 load ref", | ||
| 2192 | "MetricExpr": "PM_DATA_FROM_RL2L3_MOD / PM_LD_REF_L1", | ||
| 2193 | "MetricName": "rl2l3_mod_ld_hit_ratio" | ||
| 2194 | }, | ||
| 2195 | { | ||
| 2196 | "BriefDescription": "Fraction of hits of a line in the S state on the L2 or L3 of a core on a remote chip per L1 load ref", | ||
| 2197 | "MetricExpr": "PM_DATA_FROM_RL2L3_SHR / PM_LD_REF_L1", | ||
| 2198 | "MetricName": "rl2l3_shr_ld_hit_ratio" | ||
| 2199 | }, | ||
| 2200 | { | ||
| 2201 | "BriefDescription": "Fraction of hits on a remote Centaur's cache per L1 load ref", | ||
| 2202 | "MetricExpr": "PM_DATA_FROM_RL4 / PM_LD_REF_L1", | ||
| 2203 | "MetricName": "rl4_ld_hit_ratio" | ||
| 2204 | }, | ||
| 2205 | { | ||
| 2206 | "BriefDescription": "Fraction of hits on a remote Centaur's DRAM per L1 load ref", | ||
| 2207 | "MetricExpr": "PM_DATA_FROM_RMEM / PM_LD_REF_L1", | ||
| 2208 | "MetricName": "rmem_ld_hit_ratio" | ||
| 2209 | }, | ||
| 2210 | { | ||
| 2211 | "BriefDescription": "Percent of all FXU/VSU instructions that got rejected due to SAR Bypass", | ||
| 2212 | "MetricExpr": "PM_ISU_REJECT_SAR_BYPASS *100/ PM_RUN_INST_CMPL", | ||
| 2213 | "MetricName": "sar_bypass_reject_rate_percent" | ||
| 2214 | }, | ||
| 2215 | { | ||
| 2216 | "BriefDescription": "Percent of all FXU/VSU instructions that got rejected because of unavailable sources", | ||
| 2217 | "MetricExpr": "PM_ISU_REJECT_SRC_NA *100/ PM_RUN_INST_CMPL", | ||
| 2218 | "MetricName": "source_na_reject_rate_percent" | ||
| 2219 | }, | ||
| 2220 | { | ||
| 2221 | "BriefDescription": "Store forward rate", | ||
| 2222 | "MetricExpr": "100 * (PM_LSU0_SRQ_STFWD + PM_LSU1_SRQ_STFWD) / PM_RUN_INST_CMPL", | ||
| 2223 | "MetricName": "store_forward_rate_percent" | ||
| 2224 | }, | ||
| 2225 | { | ||
| 2226 | "BriefDescription": "Store forward rate", | ||
| 2227 | "MetricExpr": "100 * (PM_LSU0_SRQ_STFWD + PM_LSU1_SRQ_STFWD) / (PM_LD_REF_L1 - PM_LD_MISS_L1)", | ||
| 2228 | "MetricName": "store_forward_ratio_percent" | ||
| 2229 | }, | ||
| 2230 | { | ||
| 2231 | "BriefDescription": "Marked store latency, from core completion to L2 RC machine completion", | ||
| 2232 | "MetricExpr": "(PM_MRK_ST_L2DISP_TO_CMPL_CYC + PM_MRK_ST_DRAIN_TO_L2DISP_CYC) / PM_MRK_ST_NEST", | ||
| 2233 | "MetricName": "store_latency" | ||
| 2234 | }, | ||
| 2235 | { | ||
| 2236 | "BriefDescription": "Cycles stalled by any sync", | ||
| 2237 | "MetricExpr": "(PM_CMPLU_STALL_LWSYNC + PM_CMPLU_STALL_HWSYNC) / PM_RUN_INST_CMPL", | ||
| 2238 | "MetricName": "sync_stall_cpi" | ||
| 2239 | }, | ||
| 2240 | { | ||
| 2241 | "BriefDescription": "Percentage of lines that were prefetched into the L3 and evicted before they were consumed", | ||
| 2242 | "MetricExpr": "(PM_L3_CO_MEPF / 2) / PM_L3_PREF_ALL * 100", | ||
| 2243 | "MetricName": "wasted_l3_prefetch_percent" | ||
| 2244 | } | ||
| 2245 | ] | ||
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json new file mode 100644 index 000000000000..811c2a8c1c9e --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json | |||
| @@ -0,0 +1,1982 @@ | |||
| 1 | [ | ||
| 2 | { | ||
| 3 | "MetricExpr": "PM_BR_MPRED_CMPL / PM_BR_PRED * 100", | ||
| 4 | "MetricGroup": "branch_prediction", | ||
| 5 | "MetricName": "br_misprediction_percent" | ||
| 6 | }, | ||
| 7 | { | ||
| 8 | "BriefDescription": "Count cache branch misprediction per instruction", | ||
| 9 | "MetricExpr": "PM_BR_MPRED_CCACHE / PM_RUN_INST_CMPL * 100", | ||
| 10 | "MetricGroup": "branch_prediction", | ||
| 11 | "MetricName": "ccache_mispredict_rate_percent" | ||
| 12 | }, | ||
| 13 | { | ||
| 14 | "BriefDescription": "Count cache branch misprediction", | ||
| 15 | "MetricExpr": "PM_BR_MPRED_CCACHE / PM_BR_PRED_CCACHE * 100", | ||
| 16 | "MetricGroup": "branch_prediction", | ||
| 17 | "MetricName": "ccache_misprediction_percent" | ||
| 18 | }, | ||
| 19 | { | ||
| 20 | "BriefDescription": "Link stack branch misprediction", | ||
| 21 | "MetricExpr": "PM_BR_MPRED_LSTACK / PM_RUN_INST_CMPL * 100", | ||
| 22 | "MetricGroup": "branch_prediction", | ||
| 23 | "MetricName": "lstack_mispredict_rate_percent" | ||
| 24 | }, | ||
| 25 | { | ||
| 26 | "BriefDescription": "Link stack branch misprediction", | ||
| 27 | "MetricExpr": "PM_BR_MPRED_LSTACK/ PM_BR_PRED_LSTACK * 100", | ||
| 28 | "MetricGroup": "branch_prediction", | ||
| 29 | "MetricName": "lstack_misprediction_percent" | ||
| 30 | }, | ||
| 31 | { | ||
| 32 | "BriefDescription": "% Branches Taken", | ||
| 33 | "MetricExpr": "PM_BR_TAKEN_CMPL * 100 / PM_BRU_FIN", | ||
| 34 | "MetricGroup": "branch_prediction", | ||
| 35 | "MetricName": "taken_branches_percent" | ||
| 36 | }, | ||
| 37 | { | ||
| 38 | "BriefDescription": "Completion stall due to a Branch Unit", | ||
| 39 | "MetricExpr": "PM_CMPLU_STALL_BRU/PM_RUN_INST_CMPL", | ||
| 40 | "MetricGroup": "cpi_breakdown", | ||
| 41 | "MetricName": "bru_stall_cpi" | ||
| 42 | }, | ||
| 43 | { | ||
| 44 | "BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish", | ||
| 45 | "MetricExpr": "PM_CMPLU_STALL_CRYPTO/PM_RUN_INST_CMPL", | ||
| 46 | "MetricGroup": "cpi_breakdown", | ||
| 47 | "MetricName": "crypto_stall_cpi" | ||
| 48 | }, | ||
| 49 | { | ||
| 50 | "BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest", | ||
| 51 | "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS/PM_RUN_INST_CMPL", | ||
| 52 | "MetricGroup": "cpi_breakdown", | ||
| 53 | "MetricName": "dcache_miss_stall_cpi" | ||
| 54 | }, | ||
| 55 | { | ||
| 56 | "BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish.", | ||
| 57 | "MetricExpr": "PM_CMPLU_STALL_DFLONG/PM_RUN_INST_CMPL", | ||
| 58 | "MetricGroup": "cpi_breakdown", | ||
| 59 | "MetricName": "dflong_stall_cpi" | ||
| 60 | }, | ||
| 61 | { | ||
| 62 | "BriefDescription": "Stalls due to short latency decimal floating ops.", | ||
| 63 | "MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL", | ||
| 64 | "MetricGroup": "cpi_breakdown", | ||
| 65 | "MetricName": "dfu_other_stall_cpi" | ||
| 66 | }, | ||
| 67 | { | ||
| 68 | "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish.", | ||
| 69 | "MetricExpr": "PM_CMPLU_STALL_DFU/PM_RUN_INST_CMPL", | ||
| 70 | "MetricGroup": "cpi_breakdown", | ||
| 71 | "MetricName": "dfu_stall_cpi" | ||
| 72 | }, | ||
| 73 | { | ||
| 74 | "BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache", | ||
| 75 | "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL", | ||
| 76 | "MetricGroup": "cpi_breakdown", | ||
| 77 | "MetricName": "dmiss_distant_stall_cpi" | ||
| 78 | }, | ||
| 79 | { | ||
| 80 | "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)", | ||
| 81 | "MetricExpr": "PM_CMPLU_STALL_DMISS_L21_L31/PM_RUN_INST_CMPL", | ||
| 82 | "MetricGroup": "cpi_breakdown", | ||
| 83 | "MetricName": "dmiss_l21_l31_stall_cpi" | ||
| 84 | }, | ||
| 85 | { | ||
| 86 | "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict", | ||
| 87 | "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT/PM_RUN_INST_CMPL", | ||
| 88 | "MetricGroup": "cpi_breakdown", | ||
| 89 | "MetricName": "dmiss_l2l3_conflict_stall_cpi" | ||
| 90 | }, | ||
| 91 | { | ||
| 92 | "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict", | ||
| 93 | "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL", | ||
| 94 | "MetricGroup": "cpi_breakdown", | ||
| 95 | "MetricName": "dmiss_l2l3_noconflict_stall_cpi" | ||
| 96 | }, | ||
| 97 | { | ||
| 98 | "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3", | ||
| 99 | "MetricExpr": "PM_CMPLU_STALL_DMISS_L2L3/PM_RUN_INST_CMPL", | ||
| 100 | "MetricGroup": "cpi_breakdown", | ||
| 101 | "MetricName": "dmiss_l2l3_stall_cpi" | ||
| 102 | }, | ||
| 103 | { | ||
| 104 | "BriefDescription": "Completion stall due to cache miss resolving missed the L3", | ||
| 105 | "MetricExpr": "PM_CMPLU_STALL_DMISS_L3MISS/PM_RUN_INST_CMPL", | ||
| 106 | "MetricGroup": "cpi_breakdown", | ||
| 107 | "MetricName": "dmiss_l3miss_stall_cpi" | ||
| 108 | }, | ||
| 109 | { | ||
| 110 | "BriefDescription": "Completion stall due to cache miss that resolves in local memory", | ||
| 111 | "MetricExpr": "PM_CMPLU_STALL_DMISS_LMEM/PM_RUN_INST_CMPL", | ||
| 112 | "MetricGroup": "cpi_breakdown", | ||
| 113 | "MetricName": "dmiss_lmem_stall_cpi" | ||
| 114 | }, | ||
| 115 | { | ||
| 116 | "BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory", | ||
| 117 | "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL", | ||
| 118 | "MetricGroup": "cpi_breakdown", | ||
| 119 | "MetricName": "dmiss_non_local_stall_cpi" | ||
| 120 | }, | ||
| 121 | { | ||
| 122 | "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)", | ||
| 123 | "MetricExpr": "PM_CMPLU_STALL_DMISS_REMOTE/PM_RUN_INST_CMPL", | ||
| 124 | "MetricGroup": "cpi_breakdown", | ||
| 125 | "MetricName": "dmiss_remote_stall_cpi" | ||
| 126 | }, | ||
| 127 | { | ||
| 128 | "BriefDescription": "Stalls due to short latency double precision ops.", | ||
| 129 | "MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL", | ||
| 130 | "MetricGroup": "cpi_breakdown", | ||
| 131 | "MetricName": "dp_other_stall_cpi" | ||
| 132 | }, | ||
| 133 | { | ||
| 134 | "BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.", | ||
| 135 | "MetricExpr": "PM_CMPLU_STALL_DP/PM_RUN_INST_CMPL", | ||
| 136 | "MetricGroup": "cpi_breakdown", | ||
| 137 | "MetricName": "dp_stall_cpi" | ||
| 138 | }, | ||
| 139 | { | ||
| 140 | "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.", | ||
| 141 | "MetricExpr": "PM_CMPLU_STALL_DPLONG/PM_RUN_INST_CMPL", | ||
| 142 | "MetricGroup": "cpi_breakdown", | ||
| 143 | "MetricName": "dplong_stall_cpi" | ||
| 144 | }, | ||
| 145 | { | ||
| 146 | "BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2", | ||
| 147 | "MetricExpr": "PM_CMPLU_STALL_EIEIO/PM_RUN_INST_CMPL", | ||
| 148 | "MetricGroup": "cpi_breakdown", | ||
| 149 | "MetricName": "eieio_stall_cpi" | ||
| 150 | }, | ||
| 151 | { | ||
| 152 | "BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full", | ||
| 153 | "MetricExpr": "PM_CMPLU_STALL_EMQ_FULL/PM_RUN_INST_CMPL", | ||
| 154 | "MetricGroup": "cpi_breakdown", | ||
| 155 | "MetricName": "emq_full_stall_cpi" | ||
| 156 | }, | ||
| 157 | { | ||
| 158 | "MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL", | ||
| 159 | "MetricGroup": "cpi_breakdown", | ||
| 160 | "MetricName": "emq_stall_cpi" | ||
| 161 | }, | ||
| 162 | { | ||
| 163 | "BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss", | ||
| 164 | "MetricExpr": "PM_CMPLU_STALL_ERAT_MISS/PM_RUN_INST_CMPL", | ||
| 165 | "MetricGroup": "cpi_breakdown", | ||
| 166 | "MetricName": "erat_miss_stall_cpi" | ||
| 167 | }, | ||
| 168 | { | ||
| 169 | "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete", | ||
| 170 | "MetricExpr": "PM_CMPLU_STALL_EXCEPTION/PM_RUN_INST_CMPL", | ||
| 171 | "MetricGroup": "cpi_breakdown", | ||
| 172 | "MetricName": "exception_stall_cpi" | ||
| 173 | }, | ||
| 174 | { | ||
| 175 | "BriefDescription": "Completion stall due to execution units for other reasons.", | ||
| 176 | "MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL", | ||
| 177 | "MetricGroup": "cpi_breakdown", | ||
| 178 | "MetricName": "exec_unit_other_stall_cpi" | ||
| 179 | }, | ||
| 180 | { | ||
| 181 | "BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)", | ||
| 182 | "MetricExpr": "PM_CMPLU_STALL_EXEC_UNIT/PM_RUN_INST_CMPL", | ||
| 183 | "MetricGroup": "cpi_breakdown", | ||
| 184 | "MetricName": "exec_unit_stall_cpi" | ||
| 185 | }, | ||
| 186 | { | ||
| 187 | "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion", | ||
| 188 | "MetricExpr": "PM_CMPLU_STALL_FLUSH_ANY_THREAD/PM_RUN_INST_CMPL", | ||
| 189 | "MetricGroup": "cpi_breakdown", | ||
| 190 | "MetricName": "flush_any_thread_stall_cpi" | ||
| 191 | }, | ||
| 192 | { | ||
| 193 | "BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)", | ||
| 194 | "MetricExpr": "PM_CMPLU_STALL_FXLONG/PM_RUN_INST_CMPL", | ||
| 195 | "MetricGroup": "cpi_breakdown", | ||
| 196 | "MetricName": "fxlong_stall_cpi" | ||
| 197 | }, | ||
| 198 | { | ||
| 199 | "BriefDescription": "Stalls due to short latency integer ops", | ||
| 200 | "MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL", | ||
| 201 | "MetricGroup": "cpi_breakdown", | ||
| 202 | "MetricName": "fxu_other_stall_cpi" | ||
| 203 | }, | ||
| 204 | { | ||
| 205 | "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", | ||
| 206 | "MetricExpr": "PM_CMPLU_STALL_FXU/PM_RUN_INST_CMPL", | ||
| 207 | "MetricGroup": "cpi_breakdown", | ||
| 208 | "MetricName": "fxu_stall_cpi" | ||
| 209 | }, | ||
| 210 | { | ||
| 211 | "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL", | ||
| 212 | "MetricGroup": "cpi_breakdown", | ||
| 213 | "MetricName": "issue_hold_cpi" | ||
| 214 | }, | ||
| 215 | { | ||
| 216 | "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied", | ||
| 217 | "MetricExpr": "PM_CMPLU_STALL_LARX/PM_RUN_INST_CMPL", | ||
| 218 | "MetricGroup": "cpi_breakdown", | ||
| 219 | "MetricName": "larx_stall_cpi" | ||
| 220 | }, | ||
| 221 | { | ||
| 222 | "BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data", | ||
| 223 | "MetricExpr": "PM_CMPLU_STALL_LHS/PM_RUN_INST_CMPL", | ||
| 224 | "MetricGroup": "cpi_breakdown", | ||
| 225 | "MetricName": "lhs_stall_cpi" | ||
| 226 | }, | ||
| 227 | { | ||
| 228 | "BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full", | ||
| 229 | "MetricExpr": "PM_CMPLU_STALL_LMQ_FULL/PM_RUN_INST_CMPL", | ||
| 230 | "MetricGroup": "cpi_breakdown", | ||
| 231 | "MetricName": "lmq_full_stall_cpi" | ||
| 232 | }, | ||
| 233 | { | ||
| 234 | "BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish", | ||
| 235 | "MetricExpr": "PM_CMPLU_STALL_LOAD_FINISH/PM_RUN_INST_CMPL", | ||
| 236 | "MetricGroup": "cpi_breakdown", | ||
| 237 | "MetricName": "load_finish_stall_cpi" | ||
| 238 | }, | ||
| 239 | { | ||
| 240 | "BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ because the LRQ was full", | ||
| 241 | "MetricExpr": "PM_CMPLU_STALL_LRQ_FULL/PM_RUN_INST_CMPL", | ||
| 242 | "MetricGroup": "cpi_breakdown", | ||
| 243 | "MetricName": "lrq_full_stall_cpi" | ||
| 244 | }, | ||
| 245 | { | ||
| 246 | "BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others", | ||
| 247 | "MetricExpr": "PM_CMPLU_STALL_LRQ_OTHER/PM_RUN_INST_CMPL", | ||
| 248 | "MetricGroup": "cpi_breakdown", | ||
| 249 | "MetricName": "lrq_other_stall_cpi" | ||
| 250 | }, | ||
| 251 | { | ||
| 252 | "MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL", | ||
| 253 | "MetricGroup": "cpi_breakdown", | ||
| 254 | "MetricName": "lrq_stall_cpi" | ||
| 255 | }, | ||
| 256 | { | ||
| 257 | "BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch", | ||
| 258 | "MetricExpr": "PM_CMPLU_STALL_LSAQ_ARB/PM_RUN_INST_CMPL", | ||
| 259 | "MetricGroup": "cpi_breakdown", | ||
| 260 | "MetricName": "lsaq_arb_stall_cpi" | ||
| 261 | }, | ||
| 262 | { | ||
| 263 | "MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL", | ||
| 264 | "MetricGroup": "cpi_breakdown", | ||
| 265 | "MetricName": "lsaq_stall_cpi" | ||
| 266 | }, | ||
| 267 | { | ||
| 268 | "BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish", | ||
| 269 | "MetricExpr": "PM_CMPLU_STALL_LSU_FIN/PM_RUN_INST_CMPL", | ||
| 270 | "MetricGroup": "cpi_breakdown", | ||
| 271 | "MetricName": "lsu_fin_stall_cpi" | ||
| 272 | }, | ||
| 273 | { | ||
| 274 | "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete", | ||
| 275 | "MetricExpr": "PM_CMPLU_STALL_LSU_FLUSH_NEXT/PM_RUN_INST_CMPL", | ||
| 276 | "MetricGroup": "cpi_breakdown", | ||
| 277 | "MetricName": "lsu_flush_next_stall_cpi" | ||
| 278 | }, | ||
| 279 | { | ||
| 280 | "BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned", | ||
| 281 | "MetricExpr": "PM_CMPLU_STALL_LSU_MFSPR/PM_RUN_INST_CMPL", | ||
| 282 | "MetricGroup": "cpi_breakdown", | ||
| 283 | "MetricName": "lsu_mfspr_stall_cpi" | ||
| 284 | }, | ||
| 285 | { | ||
| 286 | "BriefDescription": "Completion LSU stall for other reasons", | ||
| 287 | "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL", | ||
| 288 | "MetricGroup": "cpi_breakdown", | ||
| 289 | "MetricName": "lsu_other_stall_cpi" | ||
| 290 | }, | ||
| 291 | { | ||
| 292 | "BriefDescription": "Completion stall by LSU instruction", | ||
| 293 | "MetricExpr": "PM_CMPLU_STALL_LSU/PM_RUN_INST_CMPL", | ||
| 294 | "MetricGroup": "cpi_breakdown", | ||
| 295 | "MetricName": "lsu_stall_cpi" | ||
| 296 | }, | ||
| 297 | { | ||
| 298 | "BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)", | ||
| 299 | "MetricExpr": "PM_CMPLU_STALL_MTFPSCR/PM_RUN_INST_CMPL", | ||
| 300 | "MetricGroup": "cpi_breakdown", | ||
| 301 | "MetricName": "mtfpscr_stall_cpi" | ||
| 302 | }, | ||
| 303 | { | ||
| 304 | "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT", | ||
| 305 | "MetricExpr": "PM_CMPLU_STALL_NESTED_TBEGIN/PM_RUN_INST_CMPL", | ||
| 306 | "MetricGroup": "cpi_breakdown", | ||
| 307 | "MetricName": "nested_tbegin_stall_cpi" | ||
| 308 | }, | ||
| 309 | { | ||
| 310 | "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay", | ||
| 311 | "MetricExpr": "PM_CMPLU_STALL_NESTED_TEND/PM_RUN_INST_CMPL", | ||
| 312 | "MetricGroup": "cpi_breakdown", | ||
| 313 | "MetricName": "nested_tend_stall_cpi" | ||
| 314 | }, | ||
| 315 | { | ||
| 316 | "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread", | ||
| 317 | "MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL", | ||
| 318 | "MetricGroup": "cpi_breakdown", | ||
| 319 | "MetricName": "nothing_dispatched_cpi" | ||
| 320 | }, | ||
| 321 | { | ||
| 322 | "BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch.", | ||
| 323 | "MetricExpr": "PM_CMPLU_STALL_NTC_DISP_FIN/PM_RUN_INST_CMPL", | ||
| 324 | "MetricGroup": "cpi_breakdown", | ||
| 325 | "MetricName": "ntc_disp_fin_stall_cpi" | ||
| 326 | }, | ||
| 327 | { | ||
| 328 | "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack", | ||
| 329 | "MetricExpr": "PM_NTC_FIN/PM_RUN_INST_CMPL", | ||
| 330 | "MetricGroup": "cpi_breakdown", | ||
| 331 | "MetricName": "ntc_fin_cpi" | ||
| 332 | }, | ||
| 333 | { | ||
| 334 | "BriefDescription": "Completion stall due to ntc flush", | ||
| 335 | "MetricExpr": "PM_CMPLU_STALL_NTC_FLUSH/PM_RUN_INST_CMPL", | ||
| 336 | "MetricGroup": "cpi_breakdown", | ||
| 337 | "MetricName": "ntc_flush_stall_cpi" | ||
| 338 | }, | ||
| 339 | { | ||
| 340 | "BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)", | ||
| 341 | "MetricExpr": "PM_NTC_ISSUE_HELD_ARB/PM_RUN_INST_CMPL", | ||
| 342 | "MetricGroup": "cpi_breakdown", | ||
| 343 | "MetricName": "ntc_issue_held_arb_cpi" | ||
| 344 | }, | ||
| 345 | { | ||
| 346 | "BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it", | ||
| 347 | "MetricExpr": "PM_NTC_ISSUE_HELD_DARQ_FULL/PM_RUN_INST_CMPL", | ||
| 348 | "MetricGroup": "cpi_breakdown", | ||
| 349 | "MetricName": "ntc_issue_held_darq_full_cpi" | ||
| 350 | }, | ||
| 351 | { | ||
| 352 | "BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU", | ||
| 353 | "MetricExpr": "PM_NTC_ISSUE_HELD_OTHER/PM_RUN_INST_CMPL", | ||
| 354 | "MetricGroup": "cpi_breakdown", | ||
| 355 | "MetricName": "ntc_issue_held_other_cpi" | ||
| 356 | }, | ||
| 357 | { | ||
| 358 | "BriefDescription": "Cycles unaccounted for.", | ||
| 359 | "MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL", | ||
| 360 | "MetricGroup": "cpi_breakdown", | ||
| 361 | "MetricName": "other_cpi" | ||
| 362 | }, | ||
| 363 | { | ||
| 364 | "BriefDescription": "Completion stall for other reasons", | ||
| 365 | "MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL", | ||
| 366 | "MetricGroup": "cpi_breakdown", | ||
| 367 | "MetricName": "other_stall_cpi" | ||
| 368 | }, | ||
| 369 | { | ||
| 370 | "BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2", | ||
| 371 | "MetricExpr": "PM_CMPLU_STALL_PASTE/PM_RUN_INST_CMPL", | ||
| 372 | "MetricGroup": "cpi_breakdown", | ||
| 373 | "MetricName": "paste_stall_cpi" | ||
| 374 | }, | ||
| 375 | { | ||
| 376 | "BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish.", | ||
| 377 | "MetricExpr": "PM_CMPLU_STALL_PM/PM_RUN_INST_CMPL", | ||
| 378 | "MetricGroup": "cpi_breakdown", | ||
| 379 | "MetricName": "pm_stall_cpi" | ||
| 380 | }, | ||
| 381 | { | ||
| 382 | "BriefDescription": "Run cycles per run instruction", | ||
| 383 | "MetricExpr": "PM_RUN_CYC / PM_RUN_INST_CMPL", | ||
| 384 | "MetricGroup": "cpi_breakdown", | ||
| 385 | "MetricName": "run_cpi" | ||
| 386 | }, | ||
| 387 | { | ||
| 388 | "BriefDescription": "Run_cycles", | ||
| 389 | "MetricExpr": "PM_RUN_CYC/PM_RUN_INST_CMPL", | ||
| 390 | "MetricGroup": "cpi_breakdown", | ||
| 391 | "MetricName": "run_cyc_cpi" | ||
| 392 | }, | ||
| 393 | { | ||
| 394 | "MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL", | ||
| 395 | "MetricGroup": "cpi_breakdown", | ||
| 396 | "MetricName": "scalar_stall_cpi" | ||
| 397 | }, | ||
| 398 | { | ||
| 399 | "BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB", | ||
| 400 | "MetricExpr": "PM_CMPLU_STALL_SLB/PM_RUN_INST_CMPL", | ||
| 401 | "MetricGroup": "cpi_breakdown", | ||
| 402 | "MetricName": "slb_stall_cpi" | ||
| 403 | }, | ||
| 404 | { | ||
| 405 | "BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC", | ||
| 406 | "MetricExpr": "PM_CMPLU_STALL_SPEC_FINISH/PM_RUN_INST_CMPL", | ||
| 407 | "MetricGroup": "cpi_breakdown", | ||
| 408 | "MetricName": "spec_finish_stall_cpi" | ||
| 409 | }, | ||
| 410 | { | ||
| 411 | "BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full", | ||
| 412 | "MetricExpr": "PM_CMPLU_STALL_SRQ_FULL/PM_RUN_INST_CMPL", | ||
| 413 | "MetricGroup": "cpi_breakdown", | ||
| 414 | "MetricName": "srq_full_stall_cpi" | ||
| 415 | }, | ||
| 416 | { | ||
| 417 | "MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL", | ||
| 418 | "MetricGroup": "cpi_breakdown", | ||
| 419 | "MetricName": "srq_stall_cpi" | ||
| 420 | }, | ||
| 421 | { | ||
| 422 | "BriefDescription": "Completion stall due to store forward", | ||
| 423 | "MetricExpr": "PM_CMPLU_STALL_ST_FWD/PM_RUN_INST_CMPL", | ||
| 424 | "MetricGroup": "cpi_breakdown", | ||
| 425 | "MetricName": "st_fwd_stall_cpi" | ||
| 426 | }, | ||
| 427 | { | ||
| 428 | "BriefDescription": "Nothing completed and ICT not empty", | ||
| 429 | "MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL", | ||
| 430 | "MetricGroup": "cpi_breakdown", | ||
| 431 | "MetricName": "stall_cpi" | ||
| 432 | }, | ||
| 433 | { | ||
| 434 | "BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2", | ||
| 435 | "MetricExpr": "PM_CMPLU_STALL_STCX/PM_RUN_INST_CMPL", | ||
| 436 | "MetricGroup": "cpi_breakdown", | ||
| 437 | "MetricName": "stcx_stall_cpi" | ||
| 438 | }, | ||
| 439 | { | ||
| 440 | "BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data", | ||
| 441 | "MetricExpr": "PM_CMPLU_STALL_STORE_DATA/PM_RUN_INST_CMPL", | ||
| 442 | "MetricGroup": "cpi_breakdown", | ||
| 443 | "MetricName": "store_data_stall_cpi" | ||
| 444 | }, | ||
| 445 | { | ||
| 446 | "BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe", | ||
| 447 | "MetricExpr": "PM_CMPLU_STALL_STORE_FIN_ARB/PM_RUN_INST_CMPL", | ||
| 448 | "MetricGroup": "cpi_breakdown", | ||
| 449 | "MetricName": "store_fin_arb_stall_cpi" | ||
| 450 | }, | ||
| 451 | { | ||
| 452 | "BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish", | ||
| 453 | "MetricExpr": "PM_CMPLU_STALL_STORE_FINISH/PM_RUN_INST_CMPL", | ||
| 454 | "MetricGroup": "cpi_breakdown", | ||
| 455 | "MetricName": "store_finish_stall_cpi" | ||
| 456 | }, | ||
| 457 | { | ||
| 458 | "BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration", | ||
| 459 | "MetricExpr": "PM_CMPLU_STALL_STORE_PIPE_ARB/PM_RUN_INST_CMPL", | ||
| 460 | "MetricGroup": "cpi_breakdown", | ||
| 461 | "MetricName": "store_pipe_arb_stall_cpi" | ||
| 462 | }, | ||
| 463 | { | ||
| 464 | "BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2", | ||
| 465 | "MetricExpr": "PM_CMPLU_STALL_TEND/PM_RUN_INST_CMPL", | ||
| 466 | "MetricGroup": "cpi_breakdown", | ||
| 467 | "MetricName": "tend_stall_cpi" | ||
| 468 | }, | ||
| 469 | { | ||
| 470 | "BriefDescription": "Completion Stalled because the thread was blocked", | ||
| 471 | "MetricExpr": "PM_CMPLU_STALL_THRD/PM_RUN_INST_CMPL", | ||
| 472 | "MetricGroup": "cpi_breakdown", | ||
| 473 | "MetricName": "thread_block_stall_cpi" | ||
| 474 | }, | ||
| 475 | { | ||
| 476 | "BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2", | ||
| 477 | "MetricExpr": "PM_CMPLU_STALL_TLBIE/PM_RUN_INST_CMPL", | ||
| 478 | "MetricGroup": "cpi_breakdown", | ||
| 479 | "MetricName": "tlbie_stall_cpi" | ||
| 480 | }, | ||
| 481 | { | ||
| 482 | "BriefDescription": "Vector stalls due to small latency double precision ops", | ||
| 483 | "MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL", | ||
| 484 | "MetricGroup": "cpi_breakdown", | ||
| 485 | "MetricName": "vdp_other_stall_cpi" | ||
| 486 | }, | ||
| 487 | { | ||
| 488 | "BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish.", | ||
| 489 | "MetricExpr": "PM_CMPLU_STALL_VDP/PM_RUN_INST_CMPL", | ||
| 490 | "MetricGroup": "cpi_breakdown", | ||
| 491 | "MetricName": "vdp_stall_cpi" | ||
| 492 | }, | ||
| 493 | { | ||
| 494 | "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format.", | ||
| 495 | "MetricExpr": "PM_CMPLU_STALL_VDPLONG/PM_RUN_INST_CMPL", | ||
| 496 | "MetricGroup": "cpi_breakdown", | ||
| 497 | "MetricName": "vdplong_stall_cpi" | ||
| 498 | }, | ||
| 499 | { | ||
| 500 | "MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL", | ||
| 501 | "MetricGroup": "cpi_breakdown", | ||
| 502 | "MetricName": "vector_stall_cpi" | ||
| 503 | }, | ||
| 504 | { | ||
| 505 | "BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)", | ||
| 506 | "MetricExpr": "PM_CMPLU_STALL_VFXLONG/PM_RUN_INST_CMPL", | ||
| 507 | "MetricGroup": "cpi_breakdown", | ||
| 508 | "MetricName": "vfxlong_stall_cpi" | ||
| 509 | }, | ||
| 510 | { | ||
| 511 | "BriefDescription": "Vector stalls due to small latency integer ops", | ||
| 512 | "MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL", | ||
| 513 | "MetricGroup": "cpi_breakdown", | ||
| 514 | "MetricName": "vfxu_other_stall_cpi" | ||
| 515 | }, | ||
| 516 | { | ||
| 517 | "BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes", | ||
| 518 | "MetricExpr": "PM_CMPLU_STALL_VFXU/PM_RUN_INST_CMPL", | ||
| 519 | "MetricGroup": "cpi_breakdown", | ||
| 520 | "MetricName": "vfxu_stall_cpi" | ||
| 521 | }, | ||
| 522 | { | ||
| 523 | "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Modified) per Inst", | ||
| 524 | "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 525 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 526 | "MetricName": "dl1_reload_from_dl2l3_mod_rate_percent" | ||
| 527 | }, | ||
| 528 | { | ||
| 529 | "BriefDescription": "% of DL1 Reloads from Distant L2 or L3 (Shared) per Inst", | ||
| 530 | "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 531 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 532 | "MetricName": "dl1_reload_from_dl2l3_shr_rate_percent" | ||
| 533 | }, | ||
| 534 | { | ||
| 535 | "BriefDescription": "% of DL1 Reloads from Distant Memory per Inst", | ||
| 536 | "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_RUN_INST_CMPL", | ||
| 537 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 538 | "MetricName": "dl1_reload_from_dmem_rate_percent" | ||
| 539 | }, | ||
| 540 | { | ||
| 541 | "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", | ||
| 542 | "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 543 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 544 | "MetricName": "dl1_reload_from_l21_mod_rate_percent" | ||
| 545 | }, | ||
| 546 | { | ||
| 547 | "BriefDescription": "% of DL1 reloads from Private L2, other core per Inst", | ||
| 548 | "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 549 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 550 | "MetricName": "dl1_reload_from_l21_shr_rate_percent" | ||
| 551 | }, | ||
| 552 | { | ||
| 553 | "BriefDescription": "% of DL1 reloads from L2 per Inst", | ||
| 554 | "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", | ||
| 555 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 556 | "MetricName": "dl1_reload_from_l2_miss_rate_percent" | ||
| 557 | }, | ||
| 558 | { | ||
| 559 | "BriefDescription": "% of DL1 reloads from L2 per Inst", | ||
| 560 | "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_RUN_INST_CMPL", | ||
| 561 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 562 | "MetricName": "dl1_reload_from_l2_rate_percent" | ||
| 563 | }, | ||
| 564 | { | ||
| 565 | "BriefDescription": "% of DL1 reloads from Private L3 M state, other core per Inst", | ||
| 566 | "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 567 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 568 | "MetricName": "dl1_reload_from_l31_mod_rate_percent" | ||
| 569 | }, | ||
| 570 | { | ||
| 571 | "BriefDescription": "% of DL1 reloads from Private L3 S tate, other core per Inst", | ||
| 572 | "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 573 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 574 | "MetricName": "dl1_reload_from_l31_shr_rate_percent" | ||
| 575 | }, | ||
| 576 | { | ||
| 577 | "BriefDescription": "% of DL1 Reloads that came from the L3 and were brought into the L3 by a prefetch, per instruction completed", | ||
| 578 | "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_RUN_INST_CMPL", | ||
| 579 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 580 | "MetricName": "dl1_reload_from_l3_mepf_rate_percent" | ||
| 581 | }, | ||
| 582 | { | ||
| 583 | "BriefDescription": "% of DL1 reloads from L3 per Inst", | ||
| 584 | "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", | ||
| 585 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 586 | "MetricName": "dl1_reload_from_l3_miss_rate_percent" | ||
| 587 | }, | ||
| 588 | { | ||
| 589 | "BriefDescription": "% of DL1 Reloads from L3 per Inst", | ||
| 590 | "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_RUN_INST_CMPL", | ||
| 591 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 592 | "MetricName": "dl1_reload_from_l3_rate_percent" | ||
| 593 | }, | ||
| 594 | { | ||
| 595 | "BriefDescription": "% of DL1 Reloads from Local Memory per Inst", | ||
| 596 | "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_RUN_INST_CMPL", | ||
| 597 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 598 | "MetricName": "dl1_reload_from_lmem_rate_percent" | ||
| 599 | }, | ||
| 600 | { | ||
| 601 | "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", | ||
| 602 | "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 603 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 604 | "MetricName": "dl1_reload_from_rl2l3_mod_rate_percent" | ||
| 605 | }, | ||
| 606 | { | ||
| 607 | "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", | ||
| 608 | "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 609 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 610 | "MetricName": "dl1_reload_from_rl2l3_shr_rate_percent" | ||
| 611 | }, | ||
| 612 | { | ||
| 613 | "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", | ||
| 614 | "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_RUN_INST_CMPL", | ||
| 615 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 616 | "MetricName": "dl1_reload_from_rmem_rate_percent" | ||
| 617 | }, | ||
| 618 | { | ||
| 619 | "BriefDescription": "Percentage of L1 demand load misses per run instruction", | ||
| 620 | "MetricExpr": "PM_LD_MISS_L1 * 100 / PM_RUN_INST_CMPL", | ||
| 621 | "MetricGroup": "dl1_reloads_percent_per_inst", | ||
| 622 | "MetricName": "l1_ld_miss_rate_percent" | ||
| 623 | }, | ||
| 624 | { | ||
| 625 | "BriefDescription": "% of DL1 misses that result in a cache reload", | ||
| 626 | "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID * 100 / PM_LD_MISS_L1", | ||
| 627 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 628 | "MetricName": "dl1_miss_reloads_percent" | ||
| 629 | }, | ||
| 630 | { | ||
| 631 | "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Modified)", | ||
| 632 | "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 633 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 634 | "MetricName": "dl1_reload_from_dl2l3_mod_percent" | ||
| 635 | }, | ||
| 636 | { | ||
| 637 | "BriefDescription": "% of DL1 dL1_Reloads from Distant L2 or L3 (Shared)", | ||
| 638 | "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 639 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 640 | "MetricName": "dl1_reload_from_dl2l3_shr_percent" | ||
| 641 | }, | ||
| 642 | { | ||
| 643 | "BriefDescription": "% of DL1 dL1_Reloads from Distant Memory", | ||
| 644 | "MetricExpr": "PM_DATA_FROM_DMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 645 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 646 | "MetricName": "dl1_reload_from_dmem_percent" | ||
| 647 | }, | ||
| 648 | { | ||
| 649 | "BriefDescription": "% of DL1 reloads from Private L2, other core", | ||
| 650 | "MetricExpr": "PM_DATA_FROM_L21_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 651 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 652 | "MetricName": "dl1_reload_from_l21_mod_percent" | ||
| 653 | }, | ||
| 654 | { | ||
| 655 | "BriefDescription": "% of DL1 reloads from Private L2, other core", | ||
| 656 | "MetricExpr": "PM_DATA_FROM_L21_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 657 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 658 | "MetricName": "dl1_reload_from_l21_shr_percent" | ||
| 659 | }, | ||
| 660 | { | ||
| 661 | "BriefDescription": "% of DL1 Reloads from sources beyond the local L2", | ||
| 662 | "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 663 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 664 | "MetricName": "dl1_reload_from_l2_miss_percent" | ||
| 665 | }, | ||
| 666 | { | ||
| 667 | "BriefDescription": "% of DL1 reloads from L2", | ||
| 668 | "MetricExpr": "PM_DATA_FROM_L2 * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 669 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 670 | "MetricName": "dl1_reload_from_l2_percent" | ||
| 671 | }, | ||
| 672 | { | ||
| 673 | "BriefDescription": "% of DL1 reloads from Private L3, other core", | ||
| 674 | "MetricExpr": "PM_DATA_FROM_L31_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 675 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 676 | "MetricName": "dl1_reload_from_l31_mod_percent" | ||
| 677 | }, | ||
| 678 | { | ||
| 679 | "BriefDescription": "% of DL1 reloads from Private L3, other core", | ||
| 680 | "MetricExpr": "PM_DATA_FROM_L31_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 681 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 682 | "MetricName": "dl1_reload_from_l31_shr_percent" | ||
| 683 | }, | ||
| 684 | { | ||
| 685 | "BriefDescription": "% of DL1 Reloads that came from L3 and were brought into the L3 by a prefetch", | ||
| 686 | "MetricExpr": "PM_DATA_FROM_L3_MEPF * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 687 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 688 | "MetricName": "dl1_reload_from_l3_mepf_percent" | ||
| 689 | }, | ||
| 690 | { | ||
| 691 | "BriefDescription": "% of DL1 Reloads from sources beyond the local L3", | ||
| 692 | "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 693 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 694 | "MetricName": "dl1_reload_from_l3_miss_percent" | ||
| 695 | }, | ||
| 696 | { | ||
| 697 | "BriefDescription": "% of DL1 Reloads from L3", | ||
| 698 | "MetricExpr": "PM_DATA_FROM_L3 * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 699 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 700 | "MetricName": "dl1_reload_from_l3_percent" | ||
| 701 | }, | ||
| 702 | { | ||
| 703 | "BriefDescription": "% of DL1 dL1_Reloads from Local Memory", | ||
| 704 | "MetricExpr": "PM_DATA_FROM_LMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 705 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 706 | "MetricName": "dl1_reload_from_lmem_percent" | ||
| 707 | }, | ||
| 708 | { | ||
| 709 | "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Modified)", | ||
| 710 | "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 711 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 712 | "MetricName": "dl1_reload_from_rl2l3_mod_percent" | ||
| 713 | }, | ||
| 714 | { | ||
| 715 | "BriefDescription": "% of DL1 dL1_Reloads from Remote L2 or L3 (Shared)", | ||
| 716 | "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 717 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 718 | "MetricName": "dl1_reload_from_rl2l3_shr_percent" | ||
| 719 | }, | ||
| 720 | { | ||
| 721 | "BriefDescription": "% of DL1 dL1_Reloads from Remote Memory", | ||
| 722 | "MetricExpr": "PM_DATA_FROM_RMEM * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 723 | "MetricGroup": "dl1_reloads_percent_per_ref", | ||
| 724 | "MetricName": "dl1_reload_from_rmem_percent" | ||
| 725 | }, | ||
| 726 | { | ||
| 727 | "BriefDescription": "estimate of dl2l3 distant MOD miss rates with measured DL2L3 MOD latency as a %of dcache miss cpi", | ||
| 728 | "MetricExpr": "PM_DATA_FROM_DL2L3_MOD * PM_MRK_DATA_FROM_DL2L3_MOD_CYC / PM_MRK_DATA_FROM_DL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 729 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 730 | "MetricName": "dl2l3_mod_cpi_percent" | ||
| 731 | }, | ||
| 732 | { | ||
| 733 | "BriefDescription": "estimate of dl2l3 distant SHR miss rates with measured DL2L3 SHR latency as a %of dcache miss cpi", | ||
| 734 | "MetricExpr": "PM_DATA_FROM_DL2L3_SHR * PM_MRK_DATA_FROM_DL2L3_SHR_CYC / PM_MRK_DATA_FROM_DL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 735 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 736 | "MetricName": "dl2l3_shr_cpi_percent" | ||
| 737 | }, | ||
| 738 | { | ||
| 739 | "BriefDescription": "estimate of distant L4 miss rates with measured DL4 latency as a %of dcache miss cpi", | ||
| 740 | "MetricExpr": "PM_DATA_FROM_DL4 * PM_MRK_DATA_FROM_DL4_CYC / PM_MRK_DATA_FROM_DL4 / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 741 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 742 | "MetricName": "dl4_cpi_percent" | ||
| 743 | }, | ||
| 744 | { | ||
| 745 | "BriefDescription": "estimate of distant memory miss rates with measured DMEM latency as a %of dcache miss cpi", | ||
| 746 | "MetricExpr": "PM_DATA_FROM_DMEM * PM_MRK_DATA_FROM_DMEM_CYC / PM_MRK_DATA_FROM_DMEM / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 747 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 748 | "MetricName": "dmem_cpi_percent" | ||
| 749 | }, | ||
| 750 | { | ||
| 751 | "BriefDescription": "estimate of dl21 MOD miss rates with measured L21 MOD latency as a %of dcache miss cpi", | ||
| 752 | "MetricExpr": "PM_DATA_FROM_L21_MOD * PM_MRK_DATA_FROM_L21_MOD_CYC / PM_MRK_DATA_FROM_L21_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 753 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 754 | "MetricName": "l21_mod_cpi_percent" | ||
| 755 | }, | ||
| 756 | { | ||
| 757 | "BriefDescription": "estimate of dl21 SHR miss rates with measured L21 SHR latency as a %of dcache miss cpi", | ||
| 758 | "MetricExpr": "PM_DATA_FROM_L21_SHR * PM_MRK_DATA_FROM_L21_SHR_CYC / PM_MRK_DATA_FROM_L21_SHR / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 759 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 760 | "MetricName": "l21_shr_cpi_percent" | ||
| 761 | }, | ||
| 762 | { | ||
| 763 | "BriefDescription": "estimate of dl2 miss rates with measured L2 latency as a %of dcache miss cpi", | ||
| 764 | "MetricExpr": "PM_DATA_FROM_L2 * PM_MRK_DATA_FROM_L2_CYC / PM_MRK_DATA_FROM_L2 / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 765 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 766 | "MetricName": "l2_cpi_percent" | ||
| 767 | }, | ||
| 768 | { | ||
| 769 | "BriefDescription": "estimate of dl31 MOD miss rates with measured L31 MOD latency as a %of dcache miss cpi", | ||
| 770 | "MetricExpr": "PM_DATA_FROM_L31_MOD * PM_MRK_DATA_FROM_L31_MOD_CYC / PM_MRK_DATA_FROM_L31_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 771 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 772 | "MetricName": "l31_mod_cpi_percent" | ||
| 773 | }, | ||
| 774 | { | ||
| 775 | "BriefDescription": "estimate of dl31 SHR miss rates with measured L31 SHR latency as a %of dcache miss cpi", | ||
| 776 | "MetricExpr": "PM_DATA_FROM_L31_SHR * PM_MRK_DATA_FROM_L31_SHR_CYC / PM_MRK_DATA_FROM_L31_SHR / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 777 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 778 | "MetricName": "l31_shr_cpi_percent" | ||
| 779 | }, | ||
| 780 | { | ||
| 781 | "BriefDescription": "estimate of dl3 miss rates with measured L3 latency as a % of dcache miss cpi", | ||
| 782 | "MetricExpr": "PM_DATA_FROM_L3 * PM_MRK_DATA_FROM_L3_CYC / PM_MRK_DATA_FROM_L3 / PM_CMPLU_STALL_DCACHE_MISS * 100", | ||
| 783 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 784 | "MetricName": "l3_cpi_percent" | ||
| 785 | }, | ||
| 786 | { | ||
| 787 | "BriefDescription": "estimate of Local memory miss rates with measured LMEM latency as a %of dcache miss cpi", | ||
| 788 | "MetricExpr": "PM_DATA_FROM_LMEM * PM_MRK_DATA_FROM_LMEM_CYC / PM_MRK_DATA_FROM_LMEM / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 789 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 790 | "MetricName": "lmem_cpi_percent" | ||
| 791 | }, | ||
| 792 | { | ||
| 793 | "BriefDescription": "estimate of dl2l3 remote MOD miss rates with measured RL2L3 MOD latency as a %of dcache miss cpi", | ||
| 794 | "MetricExpr": "PM_DATA_FROM_RL2L3_MOD * PM_MRK_DATA_FROM_RL2L3_MOD_CYC / PM_MRK_DATA_FROM_RL2L3_MOD / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 795 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 796 | "MetricName": "rl2l3_mod_cpi_percent" | ||
| 797 | }, | ||
| 798 | { | ||
| 799 | "BriefDescription": "estimate of dl2l3 shared miss rates with measured RL2L3 SHR latency as a %of dcache miss cpi", | ||
| 800 | "MetricExpr": "PM_DATA_FROM_RL2L3_SHR * PM_MRK_DATA_FROM_RL2L3_SHR_CYC / PM_MRK_DATA_FROM_RL2L3_SHR / PM_CMPLU_STALL_DCACHE_MISS * 100", | ||
| 801 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 802 | "MetricName": "rl2l3_shr_cpi_percent" | ||
| 803 | }, | ||
| 804 | { | ||
| 805 | "BriefDescription": "estimate of remote L4 miss rates with measured RL4 latency as a %of dcache miss cpi", | ||
| 806 | "MetricExpr": "PM_DATA_FROM_RL4 * PM_MRK_DATA_FROM_RL4_CYC / PM_MRK_DATA_FROM_RL4 / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 807 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 808 | "MetricName": "rl4_cpi_percent" | ||
| 809 | }, | ||
| 810 | { | ||
| 811 | "BriefDescription": "estimate of remote memory miss rates with measured RMEM latency as a %of dcache miss cpi", | ||
| 812 | "MetricExpr": "PM_DATA_FROM_RMEM * PM_MRK_DATA_FROM_RMEM_CYC / PM_MRK_DATA_FROM_RMEM / PM_CMPLU_STALL_DCACHE_MISS *100", | ||
| 813 | "MetricGroup": "estimated_dcache_miss_cpi", | ||
| 814 | "MetricName": "rmem_cpi_percent" | ||
| 815 | }, | ||
| 816 | { | ||
| 817 | "BriefDescription": "Branch Mispredict flushes per instruction", | ||
| 818 | "MetricExpr": "PM_FLUSH_MPRED / PM_RUN_INST_CMPL * 100", | ||
| 819 | "MetricGroup": "general", | ||
| 820 | "MetricName": "br_mpred_flush_rate_percent" | ||
| 821 | }, | ||
| 822 | { | ||
| 823 | "BriefDescription": "Cycles per instruction", | ||
| 824 | "MetricExpr": "PM_CYC / PM_INST_CMPL", | ||
| 825 | "MetricGroup": "general", | ||
| 826 | "MetricName": "cpi" | ||
| 827 | }, | ||
| 828 | { | ||
| 829 | "BriefDescription": "GCT empty cycles", | ||
| 830 | "MetricExpr": "(PM_FLUSH_DISP / PM_RUN_INST_CMPL) * 100", | ||
| 831 | "MetricGroup": "general", | ||
| 832 | "MetricName": "disp_flush_rate_percent" | ||
| 833 | }, | ||
| 834 | { | ||
| 835 | "BriefDescription": "% DTLB miss rate per inst", | ||
| 836 | "MetricExpr": "PM_DTLB_MISS / PM_RUN_INST_CMPL *100", | ||
| 837 | "MetricGroup": "general", | ||
| 838 | "MetricName": "dtlb_miss_rate_percent" | ||
| 839 | }, | ||
| 840 | { | ||
| 841 | "BriefDescription": "Flush rate (%)", | ||
| 842 | "MetricExpr": "PM_FLUSH * 100 / PM_RUN_INST_CMPL", | ||
| 843 | "MetricGroup": "general", | ||
| 844 | "MetricName": "flush_rate_percent" | ||
| 845 | }, | ||
| 846 | { | ||
| 847 | "BriefDescription": "Instructions per cycles", | ||
| 848 | "MetricExpr": "PM_INST_CMPL / PM_CYC", | ||
| 849 | "MetricGroup": "general", | ||
| 850 | "MetricName": "ipc" | ||
| 851 | }, | ||
| 852 | { | ||
| 853 | "BriefDescription": "% ITLB miss rate per inst", | ||
| 854 | "MetricExpr": "PM_ITLB_MISS / PM_RUN_INST_CMPL *100", | ||
| 855 | "MetricGroup": "general", | ||
| 856 | "MetricName": "itlb_miss_rate_percent" | ||
| 857 | }, | ||
| 858 | { | ||
| 859 | "BriefDescription": "Percentage of L1 load misses per L1 load ref", | ||
| 860 | "MetricExpr": "PM_LD_MISS_L1 / PM_LD_REF_L1 * 100", | ||
| 861 | "MetricGroup": "general", | ||
| 862 | "MetricName": "l1_ld_miss_ratio_percent" | ||
| 863 | }, | ||
| 864 | { | ||
| 865 | "BriefDescription": "Percentage of L1 store misses per run instruction", | ||
| 866 | "MetricExpr": "PM_ST_MISS_L1 * 100 / PM_RUN_INST_CMPL", | ||
| 867 | "MetricGroup": "general", | ||
| 868 | "MetricName": "l1_st_miss_rate_percent" | ||
| 869 | }, | ||
| 870 | { | ||
| 871 | "BriefDescription": "Percentage of L1 store misses per L1 store ref", | ||
| 872 | "MetricExpr": "PM_ST_MISS_L1 / PM_ST_FIN * 100", | ||
| 873 | "MetricGroup": "general", | ||
| 874 | "MetricName": "l1_st_miss_ratio_percent" | ||
| 875 | }, | ||
| 876 | { | ||
| 877 | "BriefDescription": "L2 Instruction Miss Rate (per instruction)(%)", | ||
| 878 | "MetricExpr": "PM_INST_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", | ||
| 879 | "MetricGroup": "general", | ||
| 880 | "MetricName": "l2_inst_miss_rate_percent" | ||
| 881 | }, | ||
| 882 | { | ||
| 883 | "BriefDescription": "L2 dmand Load Miss Rate (per run instruction)(%)", | ||
| 884 | "MetricExpr": "PM_DATA_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", | ||
| 885 | "MetricGroup": "general", | ||
| 886 | "MetricName": "l2_ld_miss_rate_percent" | ||
| 887 | }, | ||
| 888 | { | ||
| 889 | "BriefDescription": "L2 PTEG Miss Rate (per run instruction)(%)", | ||
| 890 | "MetricExpr": "PM_DPTEG_FROM_L2MISS * 100 / PM_RUN_INST_CMPL", | ||
| 891 | "MetricGroup": "general", | ||
| 892 | "MetricName": "l2_pteg_miss_rate_percent" | ||
| 893 | }, | ||
| 894 | { | ||
| 895 | "BriefDescription": "L3 Instruction Miss Rate (per instruction)(%)", | ||
| 896 | "MetricExpr": "PM_INST_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", | ||
| 897 | "MetricGroup": "general", | ||
| 898 | "MetricName": "l3_inst_miss_rate_percent" | ||
| 899 | }, | ||
| 900 | { | ||
| 901 | "BriefDescription": "L3 demand Load Miss Rate (per run instruction)(%)", | ||
| 902 | "MetricExpr": "PM_DATA_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", | ||
| 903 | "MetricGroup": "general", | ||
| 904 | "MetricName": "l3_ld_miss_rate_percent" | ||
| 905 | }, | ||
| 906 | { | ||
| 907 | "BriefDescription": "L3 PTEG Miss Rate (per run instruction)(%)", | ||
| 908 | "MetricExpr": "PM_DPTEG_FROM_L3MISS * 100 / PM_RUN_INST_CMPL", | ||
| 909 | "MetricGroup": "general", | ||
| 910 | "MetricName": "l3_pteg_miss_rate_percent" | ||
| 911 | }, | ||
| 912 | { | ||
| 913 | "BriefDescription": "Run cycles per cycle", | ||
| 914 | "MetricExpr": "PM_RUN_CYC / PM_CYC*100", | ||
| 915 | "MetricGroup": "general", | ||
| 916 | "MetricName": "run_cycles_percent" | ||
| 917 | }, | ||
| 918 | { | ||
| 919 | "BriefDescription": "Instruction dispatch-to-completion ratio", | ||
| 920 | "MetricExpr": "PM_INST_DISP / PM_INST_CMPL", | ||
| 921 | "MetricGroup": "general", | ||
| 922 | "MetricName": "speculation" | ||
| 923 | }, | ||
| 924 | { | ||
| 925 | "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified) per Inst", | ||
| 926 | "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 927 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 928 | "MetricName": "inst_from_dl2l3_mod_rate_percent" | ||
| 929 | }, | ||
| 930 | { | ||
| 931 | "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared) per Inst", | ||
| 932 | "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 933 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 934 | "MetricName": "inst_from_dl2l3_shr_rate_percent" | ||
| 935 | }, | ||
| 936 | { | ||
| 937 | "BriefDescription": "% of ICache reloads from Distant L4 per Inst", | ||
| 938 | "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_RUN_INST_CMPL", | ||
| 939 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 940 | "MetricName": "inst_from_dl4_rate_percent" | ||
| 941 | }, | ||
| 942 | { | ||
| 943 | "BriefDescription": "% of ICache reloads from Distant Memory per Inst", | ||
| 944 | "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_RUN_INST_CMPL", | ||
| 945 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 946 | "MetricName": "inst_from_dmem_rate_percent" | ||
| 947 | }, | ||
| 948 | { | ||
| 949 | "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", | ||
| 950 | "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 951 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 952 | "MetricName": "inst_from_l21_mod_rate_percent" | ||
| 953 | }, | ||
| 954 | { | ||
| 955 | "BriefDescription": "% of ICache reloads from Private L2, other core per Inst", | ||
| 956 | "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 957 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 958 | "MetricName": "inst_from_l21_shr_rate_percent" | ||
| 959 | }, | ||
| 960 | { | ||
| 961 | "BriefDescription": "% of ICache reloads from L2 per Inst", | ||
| 962 | "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_RUN_INST_CMPL", | ||
| 963 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 964 | "MetricName": "inst_from_l2_rate_percent" | ||
| 965 | }, | ||
| 966 | { | ||
| 967 | "BriefDescription": "% of ICache reloads from Private L3, other core per Inst", | ||
| 968 | "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 969 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 970 | "MetricName": "inst_from_l31_mod_rate_percent" | ||
| 971 | }, | ||
| 972 | { | ||
| 973 | "BriefDescription": "% of ICache reloads from Private L3 other core per Inst", | ||
| 974 | "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 975 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 976 | "MetricName": "inst_from_l31_shr_rate_percent" | ||
| 977 | }, | ||
| 978 | { | ||
| 979 | "BriefDescription": "% of ICache reloads from L3 per Inst", | ||
| 980 | "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_RUN_INST_CMPL", | ||
| 981 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 982 | "MetricName": "inst_from_l3_rate_percent" | ||
| 983 | }, | ||
| 984 | { | ||
| 985 | "BriefDescription": "% of ICache reloads from Local L4 per Inst", | ||
| 986 | "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_RUN_INST_CMPL", | ||
| 987 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 988 | "MetricName": "inst_from_ll4_rate_percent" | ||
| 989 | }, | ||
| 990 | { | ||
| 991 | "BriefDescription": "% of ICache reloads from Local Memory per Inst", | ||
| 992 | "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_RUN_INST_CMPL", | ||
| 993 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 994 | "MetricName": "inst_from_lmem_rate_percent" | ||
| 995 | }, | ||
| 996 | { | ||
| 997 | "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified) per Inst", | ||
| 998 | "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 999 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1000 | "MetricName": "inst_from_rl2l3_mod_rate_percent" | ||
| 1001 | }, | ||
| 1002 | { | ||
| 1003 | "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared) per Inst", | ||
| 1004 | "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1005 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1006 | "MetricName": "inst_from_rl2l3_shr_rate_percent" | ||
| 1007 | }, | ||
| 1008 | { | ||
| 1009 | "BriefDescription": "% of ICache reloads from Remote L4 per Inst", | ||
| 1010 | "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1011 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1012 | "MetricName": "inst_from_rl4_rate_percent" | ||
| 1013 | }, | ||
| 1014 | { | ||
| 1015 | "BriefDescription": "% of ICache reloads from Remote Memory per Inst", | ||
| 1016 | "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1017 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1018 | "MetricName": "inst_from_rmem_rate_percent" | ||
| 1019 | }, | ||
| 1020 | { | ||
| 1021 | "BriefDescription": "Instruction Cache Miss Rate (Per run Instruction)(%)", | ||
| 1022 | "MetricExpr": "PM_L1_ICACHE_MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1023 | "MetricGroup": "instruction_misses_percent_per_inst", | ||
| 1024 | "MetricName": "l1_inst_miss_rate_percent" | ||
| 1025 | }, | ||
| 1026 | { | ||
| 1027 | "BriefDescription": "Icache Fetchs per Icache Miss", | ||
| 1028 | "MetricExpr": "(PM_L1_ICACHE_MISS - PM_IC_PREF_WRITE) / PM_L1_ICACHE_MISS", | ||
| 1029 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1030 | "MetricName": "icache_miss_reload" | ||
| 1031 | }, | ||
| 1032 | { | ||
| 1033 | "BriefDescription": "% of ICache reloads due to prefetch", | ||
| 1034 | "MetricExpr": "PM_IC_PREF_WRITE * 100 / PM_L1_ICACHE_MISS", | ||
| 1035 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1036 | "MetricName": "icache_pref_percent" | ||
| 1037 | }, | ||
| 1038 | { | ||
| 1039 | "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Modified)", | ||
| 1040 | "MetricExpr": "PM_INST_FROM_DL2L3_MOD * 100 / PM_L1_ICACHE_MISS", | ||
| 1041 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1042 | "MetricName": "inst_from_dl2l3_mod_percent" | ||
| 1043 | }, | ||
| 1044 | { | ||
| 1045 | "BriefDescription": "% of ICache reloads from Distant L2 or L3 (Shared)", | ||
| 1046 | "MetricExpr": "PM_INST_FROM_DL2L3_SHR * 100 / PM_L1_ICACHE_MISS", | ||
| 1047 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1048 | "MetricName": "inst_from_dl2l3_shr_percent" | ||
| 1049 | }, | ||
| 1050 | { | ||
| 1051 | "BriefDescription": "% of ICache reloads from Distant L4", | ||
| 1052 | "MetricExpr": "PM_INST_FROM_DL4 * 100 / PM_L1_ICACHE_MISS", | ||
| 1053 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1054 | "MetricName": "inst_from_dl4_percent" | ||
| 1055 | }, | ||
| 1056 | { | ||
| 1057 | "BriefDescription": "% of ICache reloads from Distant Memory", | ||
| 1058 | "MetricExpr": "PM_INST_FROM_DMEM * 100 / PM_L1_ICACHE_MISS", | ||
| 1059 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1060 | "MetricName": "inst_from_dmem_percent" | ||
| 1061 | }, | ||
| 1062 | { | ||
| 1063 | "BriefDescription": "% of ICache reloads from Private L2, other core", | ||
| 1064 | "MetricExpr": "PM_INST_FROM_L21_MOD * 100 / PM_L1_ICACHE_MISS", | ||
| 1065 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1066 | "MetricName": "inst_from_l21_mod_percent" | ||
| 1067 | }, | ||
| 1068 | { | ||
| 1069 | "BriefDescription": "% of ICache reloads from Private L2, other core", | ||
| 1070 | "MetricExpr": "PM_INST_FROM_L21_SHR * 100 / PM_L1_ICACHE_MISS", | ||
| 1071 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1072 | "MetricName": "inst_from_l21_shr_percent" | ||
| 1073 | }, | ||
| 1074 | { | ||
| 1075 | "BriefDescription": "% of ICache reloads from L2", | ||
| 1076 | "MetricExpr": "PM_INST_FROM_L2 * 100 / PM_L1_ICACHE_MISS", | ||
| 1077 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1078 | "MetricName": "inst_from_l2_percent" | ||
| 1079 | }, | ||
| 1080 | { | ||
| 1081 | "BriefDescription": "% of ICache reloads from Private L3, other core", | ||
| 1082 | "MetricExpr": "PM_INST_FROM_L31_MOD * 100 / PM_L1_ICACHE_MISS", | ||
| 1083 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1084 | "MetricName": "inst_from_l31_mod_percent" | ||
| 1085 | }, | ||
| 1086 | { | ||
| 1087 | "BriefDescription": "% of ICache reloads from Private L3, other core", | ||
| 1088 | "MetricExpr": "PM_INST_FROM_L31_SHR * 100 / PM_L1_ICACHE_MISS", | ||
| 1089 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1090 | "MetricName": "inst_from_l31_shr_percent" | ||
| 1091 | }, | ||
| 1092 | { | ||
| 1093 | "BriefDescription": "% of ICache reloads from L3", | ||
| 1094 | "MetricExpr": "PM_INST_FROM_L3 * 100 / PM_L1_ICACHE_MISS", | ||
| 1095 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1096 | "MetricName": "inst_from_l3_percent" | ||
| 1097 | }, | ||
| 1098 | { | ||
| 1099 | "BriefDescription": "% of ICache reloads from Local L4", | ||
| 1100 | "MetricExpr": "PM_INST_FROM_LL4 * 100 / PM_L1_ICACHE_MISS", | ||
| 1101 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1102 | "MetricName": "inst_from_ll4_percent" | ||
| 1103 | }, | ||
| 1104 | { | ||
| 1105 | "BriefDescription": "% of ICache reloads from Local Memory", | ||
| 1106 | "MetricExpr": "PM_INST_FROM_LMEM * 100 / PM_L1_ICACHE_MISS", | ||
| 1107 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1108 | "MetricName": "inst_from_lmem_percent" | ||
| 1109 | }, | ||
| 1110 | { | ||
| 1111 | "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Modified)", | ||
| 1112 | "MetricExpr": "PM_INST_FROM_RL2L3_MOD * 100 / PM_L1_ICACHE_MISS", | ||
| 1113 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1114 | "MetricName": "inst_from_rl2l3_mod_percent" | ||
| 1115 | }, | ||
| 1116 | { | ||
| 1117 | "BriefDescription": "% of ICache reloads from Remote L2 or L3 (Shared)", | ||
| 1118 | "MetricExpr": "PM_INST_FROM_RL2L3_SHR * 100 / PM_L1_ICACHE_MISS", | ||
| 1119 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1120 | "MetricName": "inst_from_rl2l3_shr_percent" | ||
| 1121 | }, | ||
| 1122 | { | ||
| 1123 | "BriefDescription": "% of ICache reloads from Remote L4", | ||
| 1124 | "MetricExpr": "PM_INST_FROM_RL4 * 100 / PM_L1_ICACHE_MISS", | ||
| 1125 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1126 | "MetricName": "inst_from_rl4_percent" | ||
| 1127 | }, | ||
| 1128 | { | ||
| 1129 | "BriefDescription": "% of ICache reloads from Remote Memory", | ||
| 1130 | "MetricExpr": "PM_INST_FROM_RMEM * 100 / PM_L1_ICACHE_MISS", | ||
| 1131 | "MetricGroup": "instruction_stats_percent_per_ref", | ||
| 1132 | "MetricName": "inst_from_rmem_percent" | ||
| 1133 | }, | ||
| 1134 | { | ||
| 1135 | "BriefDescription": "%L2 Modified CO Cache read Utilization (4 pclks per disp attempt)", | ||
| 1136 | "MetricExpr": "((PM_L2_CASTOUT_MOD/2)*4)/ PM_RUN_CYC * 100", | ||
| 1137 | "MetricGroup": "l2_stats", | ||
| 1138 | "MetricName": "l2_co_m_rd_util" | ||
| 1139 | }, | ||
| 1140 | { | ||
| 1141 | "BriefDescription": "L2 dcache invalidates per run inst (per core)", | ||
| 1142 | "MetricExpr": "(PM_L2_DC_INV / 2) / PM_RUN_INST_CMPL * 100", | ||
| 1143 | "MetricGroup": "l2_stats", | ||
| 1144 | "MetricName": "l2_dc_inv_rate_percent" | ||
| 1145 | }, | ||
| 1146 | { | ||
| 1147 | "BriefDescription": "Demand load misses as a % of L2 LD dispatches (per thread)", | ||
| 1148 | "MetricExpr": "PM_L1_DCACHE_RELOAD_VALID / (PM_L2_LD / 2) * 100", | ||
| 1149 | "MetricGroup": "l2_stats", | ||
| 1150 | "MetricName": "l2_dem_ld_disp_percent" | ||
| 1151 | }, | ||
| 1152 | { | ||
| 1153 | "BriefDescription": "L2 Icache invalidates per run inst (per core)", | ||
| 1154 | "MetricExpr": "(PM_L2_IC_INV / 2) / PM_RUN_INST_CMPL * 100", | ||
| 1155 | "MetricGroup": "l2_stats", | ||
| 1156 | "MetricName": "l2_ic_inv_rate_percent" | ||
| 1157 | }, | ||
| 1158 | { | ||
| 1159 | "BriefDescription": "L2 Inst misses as a % of total L2 Inst dispatches (per thread)", | ||
| 1160 | "MetricExpr": "PM_L2_INST_MISS / PM_L2_INST * 100", | ||
| 1161 | "MetricGroup": "l2_stats", | ||
| 1162 | "MetricName": "l2_inst_miss_ratio_percent" | ||
| 1163 | }, | ||
| 1164 | { | ||
| 1165 | "BriefDescription": "Average number of cycles between L2 Load hits", | ||
| 1166 | "MetricExpr": "(PM_L2_LD_HIT / PM_RUN_CYC) / 2", | ||
| 1167 | "MetricGroup": "l2_stats", | ||
| 1168 | "MetricName": "l2_ld_hit_frequency" | ||
| 1169 | }, | ||
| 1170 | { | ||
| 1171 | "BriefDescription": "Average number of cycles between L2 Load misses", | ||
| 1172 | "MetricExpr": "(PM_L2_LD_MISS / PM_RUN_CYC) / 2", | ||
| 1173 | "MetricGroup": "l2_stats", | ||
| 1174 | "MetricName": "l2_ld_miss_frequency" | ||
| 1175 | }, | ||
| 1176 | { | ||
| 1177 | "BriefDescription": "L2 Load misses as a % of total L2 Load dispatches (per thread)", | ||
| 1178 | "MetricExpr": "PM_L2_LD_MISS / PM_L2_LD * 100", | ||
| 1179 | "MetricGroup": "l2_stats", | ||
| 1180 | "MetricName": "l2_ld_miss_ratio_percent" | ||
| 1181 | }, | ||
| 1182 | { | ||
| 1183 | "BriefDescription": "% L2 load disp attempts Cache read Utilization (4 pclks per disp attempt)", | ||
| 1184 | "MetricExpr": "((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100", | ||
| 1185 | "MetricGroup": "l2_stats", | ||
| 1186 | "MetricName": "l2_ld_rd_util" | ||
| 1187 | }, | ||
| 1188 | { | ||
| 1189 | "BriefDescription": "L2 load misses that require a cache write (4 pclks per disp attempt) % of pclks", | ||
| 1190 | "MetricExpr": "((( PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4)/ PM_RUN_CYC * 100", | ||
| 1191 | "MetricGroup": "l2_stats", | ||
| 1192 | "MetricName": "l2_ldmiss_wr_util" | ||
| 1193 | }, | ||
| 1194 | { | ||
| 1195 | "BriefDescription": "L2 local pump prediction success", | ||
| 1196 | "MetricExpr": "PM_L2_LOC_GUESS_CORRECT / (PM_L2_LOC_GUESS_CORRECT + PM_L2_LOC_GUESS_WRONG) * 100", | ||
| 1197 | "MetricGroup": "l2_stats", | ||
| 1198 | "MetricName": "l2_local_pred_correct_percent" | ||
| 1199 | }, | ||
| 1200 | { | ||
| 1201 | "BriefDescription": "L2 COs that were in M,Me,Mu state as a % of all L2 COs", | ||
| 1202 | "MetricExpr": "PM_L2_CASTOUT_MOD / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", | ||
| 1203 | "MetricGroup": "l2_stats", | ||
| 1204 | "MetricName": "l2_mod_co_percent" | ||
| 1205 | }, | ||
| 1206 | { | ||
| 1207 | "BriefDescription": "% of L2 Load RC dispatch atampts that failed because of address collisions and cclass conflicts", | ||
| 1208 | "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR )/ PM_L2_RCLD_DISP * 100", | ||
| 1209 | "MetricGroup": "l2_stats", | ||
| 1210 | "MetricName": "l2_rc_ld_disp_addr_fail_percent" | ||
| 1211 | }, | ||
| 1212 | { | ||
| 1213 | "BriefDescription": "% of L2 Load RC dispatch attempts that failed", | ||
| 1214 | "MetricExpr": "(PM_L2_RCLD_DISP_FAIL_ADDR + PM_L2_RCLD_DISP_FAIL_OTHER)/ PM_L2_RCLD_DISP * 100", | ||
| 1215 | "MetricGroup": "l2_stats", | ||
| 1216 | "MetricName": "l2_rc_ld_disp_fail_percent" | ||
| 1217 | }, | ||
| 1218 | { | ||
| 1219 | "BriefDescription": "% of L2 Store RC dispatch atampts that failed because of address collisions and cclass conflicts", | ||
| 1220 | "MetricExpr": "PM_L2_RCST_DISP_FAIL_ADDR / PM_L2_RCST_DISP * 100", | ||
| 1221 | "MetricGroup": "l2_stats", | ||
| 1222 | "MetricName": "l2_rc_st_disp_addr_fail_percent" | ||
| 1223 | }, | ||
| 1224 | { | ||
| 1225 | "BriefDescription": "% of L2 Store RC dispatch attempts that failed", | ||
| 1226 | "MetricExpr": "(PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/ PM_L2_RCST_DISP * 100", | ||
| 1227 | "MetricGroup": "l2_stats", | ||
| 1228 | "MetricName": "l2_rc_st_disp_fail_percent" | ||
| 1229 | }, | ||
| 1230 | { | ||
| 1231 | "BriefDescription": "L2 Cache Read Utilization (per core)", | ||
| 1232 | "MetricExpr": "(((PM_L2_RCLD_DISP/2)*4)/ PM_RUN_CYC * 100) + (((PM_L2_RCST_DISP/2)*4)/PM_RUN_CYC * 100) + (((PM_L2_CASTOUT_MOD/2)*4)/PM_RUN_CYC * 100)", | ||
| 1233 | "MetricGroup": "l2_stats", | ||
| 1234 | "MetricName": "l2_rd_util_percent" | ||
| 1235 | }, | ||
| 1236 | { | ||
| 1237 | "BriefDescription": "L2 COs that were in T,Te,Si,S state as a % of all L2 COs", | ||
| 1238 | "MetricExpr": "PM_L2_CASTOUT_SHR / (PM_L2_CASTOUT_MOD + PM_L2_CASTOUT_SHR) * 100", | ||
| 1239 | "MetricGroup": "l2_stats", | ||
| 1240 | "MetricName": "l2_shr_co_percent" | ||
| 1241 | }, | ||
| 1242 | { | ||
| 1243 | "BriefDescription": "L2 Store misses as a % of total L2 Store dispatches (per thread)", | ||
| 1244 | "MetricExpr": "PM_L2_ST_MISS / PM_L2_ST * 100", | ||
| 1245 | "MetricGroup": "l2_stats", | ||
| 1246 | "MetricName": "l2_st_miss_ratio_percent" | ||
| 1247 | }, | ||
| 1248 | { | ||
| 1249 | "BriefDescription": "% L2 store disp attempts Cache read Utilization (4 pclks per disp attempt)", | ||
| 1250 | "MetricExpr": "((PM_L2_RCST_DISP/2)*4) / PM_RUN_CYC * 100", | ||
| 1251 | "MetricGroup": "l2_stats", | ||
| 1252 | "MetricName": "l2_st_rd_util" | ||
| 1253 | }, | ||
| 1254 | { | ||
| 1255 | "BriefDescription": "L2 stores that require a cache write (4 pclks per disp attempt) % of pclks", | ||
| 1256 | "MetricExpr": "((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100", | ||
| 1257 | "MetricGroup": "l2_stats", | ||
| 1258 | "MetricName": "l2_st_wr_util" | ||
| 1259 | }, | ||
| 1260 | { | ||
| 1261 | "BriefDescription": "L2 Cache Write Utilization (per core)", | ||
| 1262 | "MetricExpr": "((((PM_L2_LD_DISP - PM_L2_LD_HIT)/2)*4) / PM_RUN_CYC * 100) + (((PM_L2_ST_DISP/2)*4) / PM_RUN_CYC * 100)", | ||
| 1263 | "MetricGroup": "l2_stats", | ||
| 1264 | "MetricName": "l2_wr_util_percent" | ||
| 1265 | }, | ||
| 1266 | { | ||
| 1267 | "BriefDescription": "Average number of cycles between L3 Load hits", | ||
| 1268 | "MetricExpr": "(PM_L3_LD_HIT / PM_RUN_CYC) / 2", | ||
| 1269 | "MetricGroup": "l3_stats", | ||
| 1270 | "MetricName": "l3_ld_hit_frequency" | ||
| 1271 | }, | ||
| 1272 | { | ||
| 1273 | "BriefDescription": "Average number of cycles between L3 Load misses", | ||
| 1274 | "MetricExpr": "(PM_L3_LD_MISS / PM_RUN_CYC) / 2", | ||
| 1275 | "MetricGroup": "l3_stats", | ||
| 1276 | "MetricName": "l3_ld_miss_frequency" | ||
| 1277 | }, | ||
| 1278 | { | ||
| 1279 | "BriefDescription": "Average number of Write-in machines used. 1 of 8 WI machines is sampled every L3 cycle", | ||
| 1280 | "MetricExpr": "(PM_L3_WI_USAGE / PM_RUN_CYC) * 8", | ||
| 1281 | "MetricGroup": "l3_stats", | ||
| 1282 | "MetricName": "l3_wi_usage" | ||
| 1283 | }, | ||
| 1284 | { | ||
| 1285 | "BriefDescription": "Average icache miss latency", | ||
| 1286 | "MetricExpr": "PM_IC_DEMAND_CYC / PM_IC_DEMAND_REQ", | ||
| 1287 | "MetricGroup": "latency", | ||
| 1288 | "MetricName": "average_il1_miss_latency" | ||
| 1289 | }, | ||
| 1290 | { | ||
| 1291 | "BriefDescription": "Marked L2L3 remote Load latency", | ||
| 1292 | "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC/ PM_MRK_DATA_FROM_DL2L3_MOD", | ||
| 1293 | "MetricGroup": "latency", | ||
| 1294 | "MetricName": "dl2l3_mod_latency" | ||
| 1295 | }, | ||
| 1296 | { | ||
| 1297 | "BriefDescription": "Marked L2L3 distant Load latency", | ||
| 1298 | "MetricExpr": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC/ PM_MRK_DATA_FROM_DL2L3_SHR", | ||
| 1299 | "MetricGroup": "latency", | ||
| 1300 | "MetricName": "dl2l3_shr_latency" | ||
| 1301 | }, | ||
| 1302 | { | ||
| 1303 | "BriefDescription": "Distant L4 average load latency", | ||
| 1304 | "MetricExpr": "PM_MRK_DATA_FROM_DL4_CYC/ PM_MRK_DATA_FROM_DL4", | ||
| 1305 | "MetricGroup": "latency", | ||
| 1306 | "MetricName": "dl4_latency" | ||
| 1307 | }, | ||
| 1308 | { | ||
| 1309 | "BriefDescription": "Marked Dmem Load latency", | ||
| 1310 | "MetricExpr": "PM_MRK_DATA_FROM_DMEM_CYC/ PM_MRK_DATA_FROM_DMEM", | ||
| 1311 | "MetricGroup": "latency", | ||
| 1312 | "MetricName": "dmem_latency" | ||
| 1313 | }, | ||
| 1314 | { | ||
| 1315 | "BriefDescription": "average L1 miss latency using marked events", | ||
| 1316 | "MetricExpr": "PM_MRK_LD_MISS_L1_CYC / PM_MRK_LD_MISS_L1", | ||
| 1317 | "MetricGroup": "latency", | ||
| 1318 | "MetricName": "estimated_dl1miss_latency" | ||
| 1319 | }, | ||
| 1320 | { | ||
| 1321 | "BriefDescription": "Marked L21 Load latency", | ||
| 1322 | "MetricExpr": "PM_MRK_DATA_FROM_L21_MOD_CYC/ PM_MRK_DATA_FROM_L21_MOD", | ||
| 1323 | "MetricGroup": "latency", | ||
| 1324 | "MetricName": "l21_mod_latency" | ||
| 1325 | }, | ||
| 1326 | { | ||
| 1327 | "BriefDescription": "Marked L21 Load latency", | ||
| 1328 | "MetricExpr": "PM_MRK_DATA_FROM_L21_SHR_CYC/ PM_MRK_DATA_FROM_L21_SHR", | ||
| 1329 | "MetricGroup": "latency", | ||
| 1330 | "MetricName": "l21_shr_latency" | ||
| 1331 | }, | ||
| 1332 | { | ||
| 1333 | "BriefDescription": "Marked L2 Load latency", | ||
| 1334 | "MetricExpr": "PM_MRK_DATA_FROM_L2_CYC/ PM_MRK_DATA_FROM_L2", | ||
| 1335 | "MetricGroup": "latency", | ||
| 1336 | "MetricName": "l2_latency" | ||
| 1337 | }, | ||
| 1338 | { | ||
| 1339 | "BriefDescription": "Marked L31 Load latency", | ||
| 1340 | "MetricExpr": "PM_MRK_DATA_FROM_L31_MOD_CYC/ PM_MRK_DATA_FROM_L31_MOD", | ||
| 1341 | "MetricGroup": "latency", | ||
| 1342 | "MetricName": "l31_mod_latency" | ||
| 1343 | }, | ||
| 1344 | { | ||
| 1345 | "BriefDescription": "Marked L31 Load latency", | ||
| 1346 | "MetricExpr": "PM_MRK_DATA_FROM_L31_SHR_CYC/ PM_MRK_DATA_FROM_L31_SHR", | ||
| 1347 | "MetricGroup": "latency", | ||
| 1348 | "MetricName": "l31_shr_latency" | ||
| 1349 | }, | ||
| 1350 | { | ||
| 1351 | "BriefDescription": "Marked L3 Load latency", | ||
| 1352 | "MetricExpr": "PM_MRK_DATA_FROM_L3_CYC/ PM_MRK_DATA_FROM_L3", | ||
| 1353 | "MetricGroup": "latency", | ||
| 1354 | "MetricName": "l3_latency" | ||
| 1355 | }, | ||
| 1356 | { | ||
| 1357 | "BriefDescription": "Local L4 average load latency", | ||
| 1358 | "MetricExpr": "PM_MRK_DATA_FROM_LL4_CYC/ PM_MRK_DATA_FROM_LL4", | ||
| 1359 | "MetricGroup": "latency", | ||
| 1360 | "MetricName": "ll4_latency" | ||
| 1361 | }, | ||
| 1362 | { | ||
| 1363 | "BriefDescription": "Marked Lmem Load latency", | ||
| 1364 | "MetricExpr": "PM_MRK_DATA_FROM_LMEM_CYC/ PM_MRK_DATA_FROM_LMEM", | ||
| 1365 | "MetricGroup": "latency", | ||
| 1366 | "MetricName": "lmem_latency" | ||
| 1367 | }, | ||
| 1368 | { | ||
| 1369 | "BriefDescription": "Marked L2L3 remote Load latency", | ||
| 1370 | "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC/ PM_MRK_DATA_FROM_RL2L3_MOD", | ||
| 1371 | "MetricGroup": "latency", | ||
| 1372 | "MetricName": "rl2l3_mod_latency" | ||
| 1373 | }, | ||
| 1374 | { | ||
| 1375 | "BriefDescription": "Marked L2L3 remote Load latency", | ||
| 1376 | "MetricExpr": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC/ PM_MRK_DATA_FROM_RL2L3_SHR", | ||
| 1377 | "MetricGroup": "latency", | ||
| 1378 | "MetricName": "rl2l3_shr_latency" | ||
| 1379 | }, | ||
| 1380 | { | ||
| 1381 | "BriefDescription": "Remote L4 average load latency", | ||
| 1382 | "MetricExpr": "PM_MRK_DATA_FROM_RL4_CYC/ PM_MRK_DATA_FROM_RL4", | ||
| 1383 | "MetricGroup": "latency", | ||
| 1384 | "MetricName": "rl4_latency" | ||
| 1385 | }, | ||
| 1386 | { | ||
| 1387 | "BriefDescription": "Marked Rmem Load latency", | ||
| 1388 | "MetricExpr": "PM_MRK_DATA_FROM_RMEM_CYC/ PM_MRK_DATA_FROM_RMEM", | ||
| 1389 | "MetricGroup": "latency", | ||
| 1390 | "MetricName": "rmem_latency" | ||
| 1391 | }, | ||
| 1392 | { | ||
| 1393 | "BriefDescription": "ERAT miss reject ratio", | ||
| 1394 | "MetricExpr": "PM_LSU_REJECT_ERAT_MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1395 | "MetricGroup": "lsu_rejects", | ||
| 1396 | "MetricName": "erat_reject_rate_percent" | ||
| 1397 | }, | ||
| 1398 | { | ||
| 1399 | "BriefDescription": "LHS reject ratio", | ||
| 1400 | "MetricExpr": "PM_LSU_REJECT_LHS *100/ PM_RUN_INST_CMPL", | ||
| 1401 | "MetricGroup": "lsu_rejects", | ||
| 1402 | "MetricName": "lhs_reject_rate_percent" | ||
| 1403 | }, | ||
| 1404 | { | ||
| 1405 | "BriefDescription": "ERAT miss reject ratio", | ||
| 1406 | "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_RUN_INST_CMPL", | ||
| 1407 | "MetricGroup": "lsu_rejects", | ||
| 1408 | "MetricName": "lmq_full_reject_rate_percent" | ||
| 1409 | }, | ||
| 1410 | { | ||
| 1411 | "BriefDescription": "ERAT miss reject ratio", | ||
| 1412 | "MetricExpr": "PM_LSU_REJECT_LMQ_FULL * 100 / PM_LD_REF_L1", | ||
| 1413 | "MetricGroup": "lsu_rejects", | ||
| 1414 | "MetricName": "lmq_full_reject_ratio_percent" | ||
| 1415 | }, | ||
| 1416 | { | ||
| 1417 | "BriefDescription": "L4 locality(%)", | ||
| 1418 | "MetricExpr": "PM_DATA_FROM_LL4 * 100 / (PM_DATA_FROM_LL4 + PM_DATA_FROM_RL4 + PM_DATA_FROM_DL4)", | ||
| 1419 | "MetricGroup": "memory", | ||
| 1420 | "MetricName": "l4_locality" | ||
| 1421 | }, | ||
| 1422 | { | ||
| 1423 | "BriefDescription": "Ratio of reloads from local L4 to distant L4", | ||
| 1424 | "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_DL4", | ||
| 1425 | "MetricGroup": "memory", | ||
| 1426 | "MetricName": "ld_ll4_per_ld_dmem" | ||
| 1427 | }, | ||
| 1428 | { | ||
| 1429 | "BriefDescription": "Ratio of reloads from local L4 to remote+distant L4", | ||
| 1430 | "MetricExpr": "PM_DATA_FROM_LL4 / (PM_DATA_FROM_DL4 + PM_DATA_FROM_RL4)", | ||
| 1431 | "MetricGroup": "memory", | ||
| 1432 | "MetricName": "ld_ll4_per_ld_mem" | ||
| 1433 | }, | ||
| 1434 | { | ||
| 1435 | "BriefDescription": "Ratio of reloads from local L4 to remote L4", | ||
| 1436 | "MetricExpr": "PM_DATA_FROM_LL4 / PM_DATA_FROM_RL4", | ||
| 1437 | "MetricGroup": "memory", | ||
| 1438 | "MetricName": "ld_ll4_per_ld_rl4" | ||
| 1439 | }, | ||
| 1440 | { | ||
| 1441 | "BriefDescription": "Number of loads from local memory per loads from distant memory", | ||
| 1442 | "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_DMEM", | ||
| 1443 | "MetricGroup": "memory", | ||
| 1444 | "MetricName": "ld_lmem_per_ld_dmem" | ||
| 1445 | }, | ||
| 1446 | { | ||
| 1447 | "BriefDescription": "Number of loads from local memory per loads from remote and distant memory", | ||
| 1448 | "MetricExpr": "PM_DATA_FROM_LMEM / (PM_DATA_FROM_DMEM + PM_DATA_FROM_RMEM)", | ||
| 1449 | "MetricGroup": "memory", | ||
| 1450 | "MetricName": "ld_lmem_per_ld_mem" | ||
| 1451 | }, | ||
| 1452 | { | ||
| 1453 | "BriefDescription": "Number of loads from local memory per loads from remote memory", | ||
| 1454 | "MetricExpr": "PM_DATA_FROM_LMEM / PM_DATA_FROM_RMEM", | ||
| 1455 | "MetricGroup": "memory", | ||
| 1456 | "MetricName": "ld_lmem_per_ld_rmem" | ||
| 1457 | }, | ||
| 1458 | { | ||
| 1459 | "BriefDescription": "Number of loads from remote memory per loads from distant memory", | ||
| 1460 | "MetricExpr": "PM_DATA_FROM_RMEM / PM_DATA_FROM_DMEM", | ||
| 1461 | "MetricGroup": "memory", | ||
| 1462 | "MetricName": "ld_rmem_per_ld_dmem" | ||
| 1463 | }, | ||
| 1464 | { | ||
| 1465 | "BriefDescription": "Memory locality", | ||
| 1466 | "MetricExpr": "PM_DATA_FROM_LMEM * 100/ (PM_DATA_FROM_LMEM + PM_DATA_FROM_RMEM + PM_DATA_FROM_DMEM)", | ||
| 1467 | "MetricGroup": "memory", | ||
| 1468 | "MetricName": "mem_locality_percent" | ||
| 1469 | }, | ||
| 1470 | { | ||
| 1471 | "BriefDescription": "L1 Prefetches issued by the prefetch machine per instruction (per thread)", | ||
| 1472 | "MetricExpr": "PM_L1_PREF / PM_RUN_INST_CMPL * 100", | ||
| 1473 | "MetricGroup": "prefetch", | ||
| 1474 | "MetricName": "l1_prefetch_rate_percent" | ||
| 1475 | }, | ||
| 1476 | { | ||
| 1477 | "BriefDescription": "DERAT Miss Rate (per run instruction)(%)", | ||
| 1478 | "MetricExpr": "PM_LSU_DERAT_MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1479 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1480 | "MetricName": "derat_miss_rate_percent" | ||
| 1481 | }, | ||
| 1482 | { | ||
| 1483 | "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified) per inst", | ||
| 1484 | "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1485 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1486 | "MetricName": "pteg_from_dl2l3_mod_rate_percent" | ||
| 1487 | }, | ||
| 1488 | { | ||
| 1489 | "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared) per inst", | ||
| 1490 | "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1491 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1492 | "MetricName": "pteg_from_dl2l3_shr_rate_percent" | ||
| 1493 | }, | ||
| 1494 | { | ||
| 1495 | "BriefDescription": "% of DERAT reloads from Distant L4 per inst", | ||
| 1496 | "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1497 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1498 | "MetricName": "pteg_from_dl4_rate_percent" | ||
| 1499 | }, | ||
| 1500 | { | ||
| 1501 | "BriefDescription": "% of DERAT reloads from Distant Memory per inst", | ||
| 1502 | "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1503 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1504 | "MetricName": "pteg_from_dmem_rate_percent" | ||
| 1505 | }, | ||
| 1506 | { | ||
| 1507 | "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", | ||
| 1508 | "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1509 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1510 | "MetricName": "pteg_from_l21_mod_rate_percent" | ||
| 1511 | }, | ||
| 1512 | { | ||
| 1513 | "BriefDescription": "% of DERAT reloads from Private L2, other core per inst", | ||
| 1514 | "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1515 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1516 | "MetricName": "pteg_from_l21_shr_rate_percent" | ||
| 1517 | }, | ||
| 1518 | { | ||
| 1519 | "BriefDescription": "% of DERAT reloads from L2 per inst", | ||
| 1520 | "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", | ||
| 1521 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1522 | "MetricName": "pteg_from_l2_rate_percent" | ||
| 1523 | }, | ||
| 1524 | { | ||
| 1525 | "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", | ||
| 1526 | "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1527 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1528 | "MetricName": "pteg_from_l31_mod_rate_percent" | ||
| 1529 | }, | ||
| 1530 | { | ||
| 1531 | "BriefDescription": "% of DERAT reloads from Private L3, other core per inst", | ||
| 1532 | "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1533 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1534 | "MetricName": "pteg_from_l31_shr_rate_percent" | ||
| 1535 | }, | ||
| 1536 | { | ||
| 1537 | "BriefDescription": "% of DERAT reloads from L3 per inst", | ||
| 1538 | "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", | ||
| 1539 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1540 | "MetricName": "pteg_from_l3_rate_percent" | ||
| 1541 | }, | ||
| 1542 | { | ||
| 1543 | "BriefDescription": "% of DERAT reloads from Local L4 per inst", | ||
| 1544 | "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1545 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1546 | "MetricName": "pteg_from_ll4_rate_percent" | ||
| 1547 | }, | ||
| 1548 | { | ||
| 1549 | "BriefDescription": "% of DERAT reloads from Local Memory per inst", | ||
| 1550 | "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1551 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1552 | "MetricName": "pteg_from_lmem_rate_percent" | ||
| 1553 | }, | ||
| 1554 | { | ||
| 1555 | "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified) per inst", | ||
| 1556 | "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_RUN_INST_CMPL", | ||
| 1557 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1558 | "MetricName": "pteg_from_rl2l3_mod_rate_percent" | ||
| 1559 | }, | ||
| 1560 | { | ||
| 1561 | "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared) per inst", | ||
| 1562 | "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_RUN_INST_CMPL", | ||
| 1563 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1564 | "MetricName": "pteg_from_rl2l3_shr_rate_percent" | ||
| 1565 | }, | ||
| 1566 | { | ||
| 1567 | "BriefDescription": "% of DERAT reloads from Remote L4 per inst", | ||
| 1568 | "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1569 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1570 | "MetricName": "pteg_from_rl4_rate_percent" | ||
| 1571 | }, | ||
| 1572 | { | ||
| 1573 | "BriefDescription": "% of DERAT reloads from Remote Memory per inst", | ||
| 1574 | "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1575 | "MetricGroup": "pteg_reloads_percent_per_inst", | ||
| 1576 | "MetricName": "pteg_from_rmem_rate_percent" | ||
| 1577 | }, | ||
| 1578 | { | ||
| 1579 | "BriefDescription": "% of DERAT misses that result in an ERAT reload", | ||
| 1580 | "MetricExpr": "PM_DTLB_MISS * 100 / PM_LSU_DERAT_MISS", | ||
| 1581 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1582 | "MetricName": "derat_miss_reload_percent" | ||
| 1583 | }, | ||
| 1584 | { | ||
| 1585 | "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Modified)", | ||
| 1586 | "MetricExpr": "PM_DPTEG_FROM_DL2L3_MOD * 100 / PM_DTLB_MISS", | ||
| 1587 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1588 | "MetricName": "pteg_from_dl2l3_mod_percent" | ||
| 1589 | }, | ||
| 1590 | { | ||
| 1591 | "BriefDescription": "% of DERAT reloads from Distant L2 or L3 (Shared)", | ||
| 1592 | "MetricExpr": "PM_DPTEG_FROM_DL2L3_SHR * 100 / PM_DTLB_MISS", | ||
| 1593 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1594 | "MetricName": "pteg_from_dl2l3_shr_percent" | ||
| 1595 | }, | ||
| 1596 | { | ||
| 1597 | "BriefDescription": "% of DERAT reloads from Distant L4", | ||
| 1598 | "MetricExpr": "PM_DPTEG_FROM_DL4 * 100 / PM_DTLB_MISS", | ||
| 1599 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1600 | "MetricName": "pteg_from_dl4_percent" | ||
| 1601 | }, | ||
| 1602 | { | ||
| 1603 | "BriefDescription": "% of DERAT reloads from Distant Memory", | ||
| 1604 | "MetricExpr": "PM_DPTEG_FROM_DMEM * 100 / PM_DTLB_MISS", | ||
| 1605 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1606 | "MetricName": "pteg_from_dmem_percent" | ||
| 1607 | }, | ||
| 1608 | { | ||
| 1609 | "BriefDescription": "% of DERAT reloads from Private L2, other core", | ||
| 1610 | "MetricExpr": "PM_DPTEG_FROM_L21_MOD * 100 / PM_DTLB_MISS", | ||
| 1611 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1612 | "MetricName": "pteg_from_l21_mod_percent" | ||
| 1613 | }, | ||
| 1614 | { | ||
| 1615 | "BriefDescription": "% of DERAT reloads from Private L2, other core", | ||
| 1616 | "MetricExpr": "PM_DPTEG_FROM_L21_SHR * 100 / PM_DTLB_MISS", | ||
| 1617 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1618 | "MetricName": "pteg_from_l21_shr_percent" | ||
| 1619 | }, | ||
| 1620 | { | ||
| 1621 | "BriefDescription": "% of DERAT reloads from L2", | ||
| 1622 | "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_DTLB_MISS", | ||
| 1623 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1624 | "MetricName": "pteg_from_l2_percent" | ||
| 1625 | }, | ||
| 1626 | { | ||
| 1627 | "BriefDescription": "% of DERAT reloads from Private L3, other core", | ||
| 1628 | "MetricExpr": "PM_DPTEG_FROM_L31_MOD * 100 / PM_DTLB_MISS", | ||
| 1629 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1630 | "MetricName": "pteg_from_l31_mod_percent" | ||
| 1631 | }, | ||
| 1632 | { | ||
| 1633 | "BriefDescription": "% of DERAT reloads from Private L3, other core", | ||
| 1634 | "MetricExpr": "PM_DPTEG_FROM_L31_SHR * 100 / PM_DTLB_MISS", | ||
| 1635 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1636 | "MetricName": "pteg_from_l31_shr_percent" | ||
| 1637 | }, | ||
| 1638 | { | ||
| 1639 | "BriefDescription": "% of DERAT reloads from L3", | ||
| 1640 | "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_DTLB_MISS", | ||
| 1641 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1642 | "MetricName": "pteg_from_l3_percent" | ||
| 1643 | }, | ||
| 1644 | { | ||
| 1645 | "BriefDescription": "% of DERAT reloads from Local L4", | ||
| 1646 | "MetricExpr": "PM_DPTEG_FROM_LL4 * 100 / PM_DTLB_MISS", | ||
| 1647 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1648 | "MetricName": "pteg_from_ll4_percent" | ||
| 1649 | }, | ||
| 1650 | { | ||
| 1651 | "BriefDescription": "% of DERAT reloads from Local Memory", | ||
| 1652 | "MetricExpr": "PM_DPTEG_FROM_LMEM * 100 / PM_DTLB_MISS", | ||
| 1653 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1654 | "MetricName": "pteg_from_lmem_percent" | ||
| 1655 | }, | ||
| 1656 | { | ||
| 1657 | "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Modified)", | ||
| 1658 | "MetricExpr": "PM_DPTEG_FROM_RL2L3_MOD * 100 / PM_DTLB_MISS", | ||
| 1659 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1660 | "MetricName": "pteg_from_rl2l3_mod_percent" | ||
| 1661 | }, | ||
| 1662 | { | ||
| 1663 | "BriefDescription": "% of DERAT reloads from Remote L2 or L3 (Shared)", | ||
| 1664 | "MetricExpr": "PM_DPTEG_FROM_RL2L3_SHR * 100 / PM_DTLB_MISS", | ||
| 1665 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1666 | "MetricName": "pteg_from_rl2l3_shr_percent" | ||
| 1667 | }, | ||
| 1668 | { | ||
| 1669 | "BriefDescription": "% of DERAT reloads from Remote L4", | ||
| 1670 | "MetricExpr": "PM_DPTEG_FROM_RL4 * 100 / PM_DTLB_MISS", | ||
| 1671 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1672 | "MetricName": "pteg_from_rl4_percent" | ||
| 1673 | }, | ||
| 1674 | { | ||
| 1675 | "BriefDescription": "% of DERAT reloads from Remote Memory", | ||
| 1676 | "MetricExpr": "PM_DPTEG_FROM_RMEM * 100 / PM_DTLB_MISS", | ||
| 1677 | "MetricGroup": "pteg_reloads_percent_per_ref", | ||
| 1678 | "MetricName": "pteg_from_rmem_percent" | ||
| 1679 | }, | ||
| 1680 | { | ||
| 1681 | "BriefDescription": "% DERAT miss rate for 4K page per inst", | ||
| 1682 | "MetricExpr": "PM_DERAT_MISS_4K * 100 / PM_RUN_INST_CMPL", | ||
| 1683 | "MetricGroup": "translation", | ||
| 1684 | "MetricName": "derat_4k_miss_rate_percent" | ||
| 1685 | }, | ||
| 1686 | { | ||
| 1687 | "BriefDescription": "DERAT miss ratio for 4K page", | ||
| 1688 | "MetricExpr": "PM_DERAT_MISS_4K / PM_LSU_DERAT_MISS", | ||
| 1689 | "MetricGroup": "translation", | ||
| 1690 | "MetricName": "derat_4k_miss_ratio" | ||
| 1691 | }, | ||
| 1692 | { | ||
| 1693 | "BriefDescription": "% DERAT miss ratio for 64K page per inst", | ||
| 1694 | "MetricExpr": "PM_DERAT_MISS_64K * 100 / PM_RUN_INST_CMPL", | ||
| 1695 | "MetricGroup": "translation", | ||
| 1696 | "MetricName": "derat_64k_miss_rate_percent" | ||
| 1697 | }, | ||
| 1698 | { | ||
| 1699 | "BriefDescription": "DERAT miss ratio for 64K page", | ||
| 1700 | "MetricExpr": "PM_DERAT_MISS_64K / PM_LSU_DERAT_MISS", | ||
| 1701 | "MetricGroup": "translation", | ||
| 1702 | "MetricName": "derat_64k_miss_ratio" | ||
| 1703 | }, | ||
| 1704 | { | ||
| 1705 | "BriefDescription": "DERAT miss ratio", | ||
| 1706 | "MetricExpr": "PM_LSU_DERAT_MISS / PM_LSU_DERAT_MISS", | ||
| 1707 | "MetricGroup": "translation", | ||
| 1708 | "MetricName": "derat_miss_ratio" | ||
| 1709 | }, | ||
| 1710 | { | ||
| 1711 | "BriefDescription": "% DSLB_Miss_Rate per inst", | ||
| 1712 | "MetricExpr": "PM_DSLB_MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1713 | "MetricGroup": "translation", | ||
| 1714 | "MetricName": "dslb_miss_rate_percent" | ||
| 1715 | }, | ||
| 1716 | { | ||
| 1717 | "BriefDescription": "% ISLB miss rate per inst", | ||
| 1718 | "MetricExpr": "PM_ISLB_MISS * 100 / PM_RUN_INST_CMPL", | ||
| 1719 | "MetricGroup": "translation", | ||
| 1720 | "MetricName": "islb_miss_rate_percent" | ||
| 1721 | }, | ||
| 1722 | { | ||
| 1723 | "BriefDescription": "ANY_SYNC_STALL_CPI", | ||
| 1724 | "MetricExpr": "PM_CMPLU_STALL_ANY_SYNC / PM_RUN_INST_CMPL", | ||
| 1725 | "MetricName": "any_sync_stall_cpi" | ||
| 1726 | }, | ||
| 1727 | { | ||
| 1728 | "BriefDescription": "Avg. more than 1 instructions completed", | ||
| 1729 | "MetricExpr": "PM_INST_CMPL / PM_1PLUS_PPC_CMPL", | ||
| 1730 | "MetricName": "average_completed_instruction_set_size" | ||
| 1731 | }, | ||
| 1732 | { | ||
| 1733 | "BriefDescription": "% Branches per instruction", | ||
| 1734 | "MetricExpr": "PM_BRU_FIN / PM_RUN_INST_CMPL", | ||
| 1735 | "MetricName": "branches_per_inst" | ||
| 1736 | }, | ||
| 1737 | { | ||
| 1738 | "BriefDescription": "Cycles in which at least one instruction completes in this thread", | ||
| 1739 | "MetricExpr": "PM_1PLUS_PPC_CMPL/PM_RUN_INST_CMPL", | ||
| 1740 | "MetricName": "completion_cpi" | ||
| 1741 | }, | ||
| 1742 | { | ||
| 1743 | "BriefDescription": "cycles", | ||
| 1744 | "MetricExpr": "PM_RUN_CYC", | ||
| 1745 | "MetricName": "custom_secs" | ||
| 1746 | }, | ||
| 1747 | { | ||
| 1748 | "BriefDescription": "Percentage Cycles atleast one instruction dispatched", | ||
| 1749 | "MetricExpr": "PM_1PLUS_PPC_DISP / PM_CYC * 100", | ||
| 1750 | "MetricName": "cycles_atleast_one_inst_dispatched_percent" | ||
| 1751 | }, | ||
| 1752 | { | ||
| 1753 | "BriefDescription": "Cycles per instruction group", | ||
| 1754 | "MetricExpr": "PM_CYC / PM_1PLUS_PPC_CMPL", | ||
| 1755 | "MetricName": "cycles_per_completed_instructions_set" | ||
| 1756 | }, | ||
| 1757 | { | ||
| 1758 | "BriefDescription": "% of DL1 dL1_Reloads from Distant L4", | ||
| 1759 | "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 1760 | "MetricName": "dl1_reload_from_dl4_percent" | ||
| 1761 | }, | ||
| 1762 | { | ||
| 1763 | "BriefDescription": "% of DL1 Reloads from Distant L4 per Inst", | ||
| 1764 | "MetricExpr": "PM_DATA_FROM_DL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1765 | "MetricName": "dl1_reload_from_dl4_rate_percent" | ||
| 1766 | }, | ||
| 1767 | { | ||
| 1768 | "BriefDescription": "% of DL1 reloads from Private L3, other core per Inst", | ||
| 1769 | "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL", | ||
| 1770 | "MetricName": "dl1_reload_from_l31_rate_percent" | ||
| 1771 | }, | ||
| 1772 | { | ||
| 1773 | "BriefDescription": "% of DL1 dL1_Reloads from Local L4", | ||
| 1774 | "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 1775 | "MetricName": "dl1_reload_from_ll4_percent" | ||
| 1776 | }, | ||
| 1777 | { | ||
| 1778 | "BriefDescription": "% of DL1 Reloads from Local L4 per Inst", | ||
| 1779 | "MetricExpr": "PM_DATA_FROM_LL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1780 | "MetricName": "dl1_reload_from_ll4_rate_percent" | ||
| 1781 | }, | ||
| 1782 | { | ||
| 1783 | "BriefDescription": "% of DL1 dL1_Reloads from Remote L4", | ||
| 1784 | "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_L1_DCACHE_RELOAD_VALID", | ||
| 1785 | "MetricName": "dl1_reload_from_rl4_percent" | ||
| 1786 | }, | ||
| 1787 | { | ||
| 1788 | "BriefDescription": "% of DL1 Reloads from Remote Memory per Inst", | ||
| 1789 | "MetricExpr": "PM_DATA_FROM_RL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1790 | "MetricName": "dl1_reload_from_rl4_rate_percent" | ||
| 1791 | }, | ||
| 1792 | { | ||
| 1793 | "BriefDescription": "Rate of DERAT reloads from L2", | ||
| 1794 | "MetricExpr": "PM_DPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", | ||
| 1795 | "MetricName": "dpteg_from_l2_rate_percent" | ||
| 1796 | }, | ||
| 1797 | { | ||
| 1798 | "BriefDescription": "Rate of DERAT reloads from L3", | ||
| 1799 | "MetricExpr": "PM_DPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", | ||
| 1800 | "MetricName": "dpteg_from_l3_rate_percent" | ||
| 1801 | }, | ||
| 1802 | { | ||
| 1803 | "BriefDescription": "Cycles in which the oldest instruction is finished and ready to complete for waiting to get through the completion pipe", | ||
| 1804 | "MetricExpr": "PM_NTC_ALL_FIN / PM_RUN_INST_CMPL", | ||
| 1805 | "MetricName": "finish_to_cmpl_cpi" | ||
| 1806 | }, | ||
| 1807 | { | ||
| 1808 | "BriefDescription": "Total Fixed point operations", | ||
| 1809 | "MetricExpr": "PM_FXU_FIN/PM_RUN_INST_CMPL", | ||
| 1810 | "MetricName": "fixed_per_inst" | ||
| 1811 | }, | ||
| 1812 | { | ||
| 1813 | "BriefDescription": "All FXU Busy", | ||
| 1814 | "MetricExpr": "PM_FXU_BUSY / PM_CYC", | ||
| 1815 | "MetricName": "fxu_all_busy" | ||
| 1816 | }, | ||
| 1817 | { | ||
| 1818 | "BriefDescription": "All FXU Idle", | ||
| 1819 | "MetricExpr": "PM_FXU_IDLE / PM_CYC", | ||
| 1820 | "MetricName": "fxu_all_idle" | ||
| 1821 | }, | ||
| 1822 | { | ||
| 1823 | "BriefDescription": "Ict empty for this thread due to branch mispred", | ||
| 1824 | "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL", | ||
| 1825 | "MetricName": "ict_noslot_br_mpred_cpi" | ||
| 1826 | }, | ||
| 1827 | { | ||
| 1828 | "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred", | ||
| 1829 | "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL", | ||
| 1830 | "MetricName": "ict_noslot_br_mpred_icmiss_cpi" | ||
| 1831 | }, | ||
| 1832 | { | ||
| 1833 | "BriefDescription": "ICT other stalls", | ||
| 1834 | "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL", | ||
| 1835 | "MetricName": "ict_noslot_cyc_other_cpi" | ||
| 1836 | }, | ||
| 1837 | { | ||
| 1838 | "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason", | ||
| 1839 | "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL", | ||
| 1840 | "MetricName": "ict_noslot_disp_held_cpi" | ||
| 1841 | }, | ||
| 1842 | { | ||
| 1843 | "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF", | ||
| 1844 | "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL", | ||
| 1845 | "MetricName": "ict_noslot_disp_held_hb_full_cpi" | ||
| 1846 | }, | ||
| 1847 | { | ||
| 1848 | "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full", | ||
| 1849 | "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL", | ||
| 1850 | "MetricName": "ict_noslot_disp_held_issq_cpi" | ||
| 1851 | }, | ||
| 1852 | { | ||
| 1853 | "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI", | ||
| 1854 | "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL", | ||
| 1855 | "MetricName": "ict_noslot_disp_held_other_cpi" | ||
| 1856 | }, | ||
| 1857 | { | ||
| 1858 | "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch", | ||
| 1859 | "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL", | ||
| 1860 | "MetricName": "ict_noslot_disp_held_sync_cpi" | ||
| 1861 | }, | ||
| 1862 | { | ||
| 1863 | "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch", | ||
| 1864 | "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL", | ||
| 1865 | "MetricName": "ict_noslot_disp_held_tbegin_cpi" | ||
| 1866 | }, | ||
| 1867 | { | ||
| 1868 | "BriefDescription": "ICT_NOSLOT_IC_L2_CPI", | ||
| 1869 | "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL", | ||
| 1870 | "MetricName": "ict_noslot_ic_l2_cpi" | ||
| 1871 | }, | ||
| 1872 | { | ||
| 1873 | "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3", | ||
| 1874 | "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL", | ||
| 1875 | "MetricName": "ict_noslot_ic_l3_cpi" | ||
| 1876 | }, | ||
| 1877 | { | ||
| 1878 | "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache", | ||
| 1879 | "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL", | ||
| 1880 | "MetricName": "ict_noslot_ic_l3miss_cpi" | ||
| 1881 | }, | ||
| 1882 | { | ||
| 1883 | "BriefDescription": "Ict empty for this thread due to Icache Miss", | ||
| 1884 | "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL", | ||
| 1885 | "MetricName": "ict_noslot_ic_miss_cpi" | ||
| 1886 | }, | ||
| 1887 | { | ||
| 1888 | "BriefDescription": "Rate of IERAT reloads from L2", | ||
| 1889 | "MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL", | ||
| 1890 | "MetricName": "ipteg_from_l2_rate_percent" | ||
| 1891 | }, | ||
| 1892 | { | ||
| 1893 | "BriefDescription": "Rate of IERAT reloads from L3", | ||
| 1894 | "MetricExpr": "PM_IPTEG_FROM_L3 * 100 / PM_RUN_INST_CMPL", | ||
| 1895 | "MetricName": "ipteg_from_l3_rate_percent" | ||
| 1896 | }, | ||
| 1897 | { | ||
| 1898 | "BriefDescription": "Rate of IERAT reloads from local memory", | ||
| 1899 | "MetricExpr": "PM_IPTEG_FROM_LL4 * 100 / PM_RUN_INST_CMPL", | ||
| 1900 | "MetricName": "ipteg_from_ll4_rate_percent" | ||
| 1901 | }, | ||
| 1902 | { | ||
| 1903 | "BriefDescription": "Rate of IERAT reloads from local memory", | ||
| 1904 | "MetricExpr": "PM_IPTEG_FROM_LMEM * 100 / PM_RUN_INST_CMPL", | ||
| 1905 | "MetricName": "ipteg_from_lmem_rate_percent" | ||
| 1906 | }, | ||
| 1907 | { | ||
| 1908 | "BriefDescription": "Average number of Castout machines used. 1 of 16 CO machines is sampled every L2 cycle", | ||
| 1909 | "MetricExpr": "PM_CO_USAGE / PM_RUN_CYC * 16", | ||
| 1910 | "MetricName": "l2_co_usage" | ||
| 1911 | }, | ||
| 1912 | { | ||
| 1913 | "BriefDescription": "Percent of instruction reads out of all L2 commands", | ||
| 1914 | "MetricExpr": "PM_ISIDE_DISP * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)", | ||
| 1915 | "MetricName": "l2_instr_commands_percent" | ||
| 1916 | }, | ||
| 1917 | { | ||
| 1918 | "BriefDescription": "Percent of loads out of all L2 commands", | ||
| 1919 | "MetricExpr": "PM_L2_LD * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)", | ||
| 1920 | "MetricName": "l2_ld_commands_percent" | ||
| 1921 | }, | ||
| 1922 | { | ||
| 1923 | "BriefDescription": "Rate of L2 store dispatches that failed per core", | ||
| 1924 | "MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL", | ||
| 1925 | "MetricName": "l2_rc_st_disp_fail_rate_percent" | ||
| 1926 | }, | ||
| 1927 | { | ||
| 1928 | "BriefDescription": "Average number of Read/Claim machines used. 1 of 16 RC machines is sampled every L2 cycle", | ||
| 1929 | "MetricExpr": "PM_RC_USAGE / PM_RUN_CYC * 16", | ||
| 1930 | "MetricName": "l2_rc_usage" | ||
| 1931 | }, | ||
| 1932 | { | ||
| 1933 | "BriefDescription": "Average number of Snoop machines used. 1 of 8 SN machines is sampled every L2 cycle", | ||
| 1934 | "MetricExpr": "PM_SN_USAGE / PM_RUN_CYC * 8", | ||
| 1935 | "MetricName": "l2_sn_usage" | ||
| 1936 | }, | ||
| 1937 | { | ||
| 1938 | "BriefDescription": "Percent of stores out of all L2 commands", | ||
| 1939 | "MetricExpr": "PM_L2_ST * 100 / (PM_L2_ST + PM_L2_LD + PM_ISIDE_DISP)", | ||
| 1940 | "MetricName": "l2_st_commands_percent" | ||
| 1941 | }, | ||
| 1942 | { | ||
| 1943 | "BriefDescription": "Rate of L2 store dispatches that failed per core", | ||
| 1944 | "MetricExpr": "100 * (PM_L2_RCST_DISP_FAIL_ADDR + PM_L2_RCST_DISP_FAIL_OTHER)/2 / PM_RUN_INST_CMPL", | ||
| 1945 | "MetricName": "l2_st_disp_fail_rate_percent" | ||
| 1946 | }, | ||
| 1947 | { | ||
| 1948 | "BriefDescription": "Rate of L2 dispatches per core", | ||
| 1949 | "MetricExpr": "100 * PM_L2_RCST_DISP/2 / PM_RUN_INST_CMPL", | ||
| 1950 | "MetricName": "l2_st_disp_rate_percent" | ||
| 1951 | }, | ||
| 1952 | { | ||
| 1953 | "BriefDescription": "Marked L31 Load latency", | ||
| 1954 | "MetricExpr": "(PM_MRK_DATA_FROM_L31_SHR_CYC + PM_MRK_DATA_FROM_L31_MOD_CYC) / (PM_MRK_DATA_FROM_L31_SHR + PM_MRK_DATA_FROM_L31_MOD)", | ||
| 1955 | "MetricName": "l31_latency" | ||
| 1956 | }, | ||
| 1957 | { | ||
| 1958 | "BriefDescription": "PCT instruction loads", | ||
| 1959 | "MetricExpr": "PM_LD_REF_L1 / PM_RUN_INST_CMPL", | ||
| 1960 | "MetricName": "loads_per_inst" | ||
| 1961 | }, | ||
| 1962 | { | ||
| 1963 | "BriefDescription": "Cycles stalled by D-Cache Misses", | ||
| 1964 | "MetricExpr": "PM_CMPLU_STALL_DCACHE_MISS / PM_RUN_INST_CMPL", | ||
| 1965 | "MetricName": "lsu_stall_dcache_miss_cpi" | ||
| 1966 | }, | ||
| 1967 | { | ||
| 1968 | "BriefDescription": "Completion stall because a different thread was using the completion pipe", | ||
| 1969 | "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL", | ||
| 1970 | "MetricName": "other_thread_cmpl_stall" | ||
| 1971 | }, | ||
| 1972 | { | ||
| 1973 | "BriefDescription": "PCT instruction stores", | ||
| 1974 | "MetricExpr": "PM_ST_FIN / PM_RUN_INST_CMPL", | ||
| 1975 | "MetricName": "stores_per_inst" | ||
| 1976 | }, | ||
| 1977 | { | ||
| 1978 | "BriefDescription": "ANY_SYNC_STALL_CPI", | ||
| 1979 | "MetricExpr": "PM_CMPLU_STALL_SYNC_PMU_INT / PM_RUN_INST_CMPL", | ||
| 1980 | "MetricName": "sync_pmu_int_stall_cpi" | ||
| 1981 | } | ||
| 1982 | ] | ||
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index 36c903faed0b..71e9737f4614 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json | |||
| @@ -73,7 +73,7 @@ | |||
| 73 | }, | 73 | }, |
| 74 | { | 74 | { |
| 75 | "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", | 75 | "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads", |
| 76 | "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS_PS + MEM_LOAD_RETIRED.FB_HIT_PS )", | 76 | "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )", |
| 77 | "MetricGroup": "Memory_Bound;Memory_Lat", | 77 | "MetricGroup": "Memory_Bound;Memory_Lat", |
| 78 | "MetricName": "Load_Miss_Real_Latency" | 78 | "MetricName": "Load_Miss_Real_Latency" |
| 79 | }, | 79 | }, |
diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build index 41efd7e368b3..68d4b54574ad 100644 --- a/tools/perf/scripts/Build +++ b/tools/perf/scripts/Build | |||
| @@ -1,2 +1,2 @@ | |||
| 1 | libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ | 1 | perf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ |
| 2 | libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ | 2 | perf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ |
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build index 34faecf774ae..db0036129307 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Build +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | libperf-y += Context.o | 1 | perf-y += Context.o |
| 2 | 2 | ||
| 3 | CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes | 3 | CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes |
| 4 | CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef | 4 | CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef |
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build index aefc15c9444a..7d0e33ce6aba 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Build +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build | |||
| @@ -1,3 +1,3 @@ | |||
| 1 | libperf-y += Context.o | 1 | perf-y += Context.o |
| 2 | 2 | ||
| 3 | CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs | 3 | CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs |
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 0564dd7377f2..30130213da7e 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py | |||
| @@ -478,7 +478,7 @@ if perf_db_export_calls: | |||
| 478 | 'branch_count,' | 478 | 'branch_count,' |
| 479 | 'call_id,' | 479 | 'call_id,' |
| 480 | 'return_id,' | 480 | 'return_id,' |
| 481 | 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' | 481 | 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' |
| 482 | 'parent_call_path_id' | 482 | 'parent_call_path_id' |
| 483 | ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') | 483 | ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') |
| 484 | 484 | ||
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 245caf2643ed..ed237f2ed03f 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py | |||
| @@ -320,7 +320,7 @@ if perf_db_export_calls: | |||
| 320 | 'branch_count,' | 320 | 'branch_count,' |
| 321 | 'call_id,' | 321 | 'call_id,' |
| 322 | 'return_id,' | 322 | 'return_id,' |
| 323 | 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' | 323 | 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' |
| 324 | 'parent_call_path_id' | 324 | 'parent_call_path_id' |
| 325 | ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') | 325 | ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') |
| 326 | 326 | ||
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index f278ce5ebab7..09ce73b07d35 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #!/usr/bin/python2 | 1 | #!/usr/bin/env python2 |
| 2 | # SPDX-License-Identifier: GPL-2.0 | 2 | # SPDX-License-Identifier: GPL-2.0 |
| 3 | # exported-sql-viewer.py: view data from sql database | 3 | # exported-sql-viewer.py: view data from sql database |
| 4 | # Copyright (c) 2014-2018, Intel Corporation. | 4 | # Copyright (c) 2014-2018, Intel Corporation. |
| @@ -1398,18 +1398,28 @@ class BranchModel(TreeModel): | |||
| 1398 | def HasMoreRecords(self): | 1398 | def HasMoreRecords(self): |
| 1399 | return self.more | 1399 | return self.more |
| 1400 | 1400 | ||
| 1401 | # Report Variables | ||
| 1402 | |||
| 1403 | class ReportVars(): | ||
| 1404 | |||
| 1405 | def __init__(self, name = "", where_clause = "", limit = ""): | ||
| 1406 | self.name = name | ||
| 1407 | self.where_clause = where_clause | ||
| 1408 | self.limit = limit | ||
| 1409 | |||
| 1410 | def UniqueId(self): | ||
| 1411 | return str(self.where_clause + ";" + self.limit) | ||
| 1412 | |||
| 1401 | # Branch window | 1413 | # Branch window |
| 1402 | 1414 | ||
| 1403 | class BranchWindow(QMdiSubWindow): | 1415 | class BranchWindow(QMdiSubWindow): |
| 1404 | 1416 | ||
| 1405 | def __init__(self, glb, event_id, name, where_clause, parent=None): | 1417 | def __init__(self, glb, event_id, report_vars, parent=None): |
| 1406 | super(BranchWindow, self).__init__(parent) | 1418 | super(BranchWindow, self).__init__(parent) |
| 1407 | 1419 | ||
| 1408 | model_name = "Branch Events " + str(event_id) | 1420 | model_name = "Branch Events " + str(event_id) + " " + report_vars.UniqueId() |
| 1409 | if len(where_clause): | ||
| 1410 | model_name = where_clause + " " + model_name | ||
| 1411 | 1421 | ||
| 1412 | self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, where_clause)) | 1422 | self.model = LookupCreateModel(model_name, lambda: BranchModel(glb, event_id, report_vars.where_clause)) |
| 1413 | 1423 | ||
| 1414 | self.view = QTreeView() | 1424 | self.view = QTreeView() |
| 1415 | self.view.setUniformRowHeights(True) | 1425 | self.view.setUniformRowHeights(True) |
| @@ -1427,7 +1437,7 @@ class BranchWindow(QMdiSubWindow): | |||
| 1427 | 1437 | ||
| 1428 | self.setWidget(self.vbox.Widget()) | 1438 | self.setWidget(self.vbox.Widget()) |
| 1429 | 1439 | ||
| 1430 | AddSubWindow(glb.mainwindow.mdi_area, self, name + " Branch Events") | 1440 | AddSubWindow(glb.mainwindow.mdi_area, self, report_vars.name + " Branch Events") |
| 1431 | 1441 | ||
| 1432 | def ResizeColumnToContents(self, column, n): | 1442 | def ResizeColumnToContents(self, column, n): |
| 1433 | # Using the view's resizeColumnToContents() here is extrememly slow | 1443 | # Using the view's resizeColumnToContents() here is extrememly slow |
| @@ -1472,47 +1482,134 @@ class BranchWindow(QMdiSubWindow): | |||
| 1472 | else: | 1482 | else: |
| 1473 | self.find_bar.NotFound() | 1483 | self.find_bar.NotFound() |
| 1474 | 1484 | ||
| 1475 | # Dialog data item converted and validated using a SQL table | 1485 | # Line edit data item |
| 1476 | 1486 | ||
| 1477 | class SQLTableDialogDataItem(): | 1487 | class LineEditDataItem(object): |
| 1478 | 1488 | ||
| 1479 | def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent): | 1489 | def __init__(self, glb, label, placeholder_text, parent, id = "", default = ""): |
| 1480 | self.glb = glb | 1490 | self.glb = glb |
| 1481 | self.label = label | 1491 | self.label = label |
| 1482 | self.placeholder_text = placeholder_text | 1492 | self.placeholder_text = placeholder_text |
| 1483 | self.table_name = table_name | ||
| 1484 | self.match_column = match_column | ||
| 1485 | self.column_name1 = column_name1 | ||
| 1486 | self.column_name2 = column_name2 | ||
| 1487 | self.parent = parent | 1493 | self.parent = parent |
| 1494 | self.id = id | ||
| 1488 | 1495 | ||
| 1489 | self.value = "" | 1496 | self.value = default |
| 1490 | 1497 | ||
| 1491 | self.widget = QLineEdit() | 1498 | self.widget = QLineEdit(default) |
| 1492 | self.widget.editingFinished.connect(self.Validate) | 1499 | self.widget.editingFinished.connect(self.Validate) |
| 1493 | self.widget.textChanged.connect(self.Invalidate) | 1500 | self.widget.textChanged.connect(self.Invalidate) |
| 1494 | self.red = False | 1501 | self.red = False |
| 1495 | self.error = "" | 1502 | self.error = "" |
| 1496 | self.validated = True | 1503 | self.validated = True |
| 1497 | 1504 | ||
| 1498 | self.last_id = 0 | ||
| 1499 | self.first_time = 0 | ||
| 1500 | self.last_time = 2 ** 64 | ||
| 1501 | if self.table_name == "<timeranges>": | ||
| 1502 | query = QSqlQuery(self.glb.db) | ||
| 1503 | QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1") | ||
| 1504 | if query.next(): | ||
| 1505 | self.last_id = int(query.value(0)) | ||
| 1506 | self.last_time = int(query.value(1)) | ||
| 1507 | QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1") | ||
| 1508 | if query.next(): | ||
| 1509 | self.first_time = int(query.value(0)) | ||
| 1510 | if placeholder_text: | ||
| 1511 | placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time) | ||
| 1512 | |||
| 1513 | if placeholder_text: | 1505 | if placeholder_text: |
| 1514 | self.widget.setPlaceholderText(placeholder_text) | 1506 | self.widget.setPlaceholderText(placeholder_text) |
| 1515 | 1507 | ||
| 1508 | def TurnTextRed(self): | ||
| 1509 | if not self.red: | ||
| 1510 | palette = QPalette() | ||
| 1511 | palette.setColor(QPalette.Text,Qt.red) | ||
| 1512 | self.widget.setPalette(palette) | ||
| 1513 | self.red = True | ||
| 1514 | |||
| 1515 | def TurnTextNormal(self): | ||
| 1516 | if self.red: | ||
| 1517 | palette = QPalette() | ||
| 1518 | self.widget.setPalette(palette) | ||
| 1519 | self.red = False | ||
| 1520 | |||
| 1521 | def InvalidValue(self, value): | ||
| 1522 | self.value = "" | ||
| 1523 | self.TurnTextRed() | ||
| 1524 | self.error = self.label + " invalid value '" + value + "'" | ||
| 1525 | self.parent.ShowMessage(self.error) | ||
| 1526 | |||
| 1527 | def Invalidate(self): | ||
| 1528 | self.validated = False | ||
| 1529 | |||
| 1530 | def DoValidate(self, input_string): | ||
| 1531 | self.value = input_string.strip() | ||
| 1532 | |||
| 1533 | def Validate(self): | ||
| 1534 | self.validated = True | ||
| 1535 | self.error = "" | ||
| 1536 | self.TurnTextNormal() | ||
| 1537 | self.parent.ClearMessage() | ||
| 1538 | input_string = self.widget.text() | ||
| 1539 | if not len(input_string.strip()): | ||
| 1540 | self.value = "" | ||
| 1541 | return | ||
| 1542 | self.DoValidate(input_string) | ||
| 1543 | |||
| 1544 | def IsValid(self): | ||
| 1545 | if not self.validated: | ||
| 1546 | self.Validate() | ||
| 1547 | if len(self.error): | ||
| 1548 | self.parent.ShowMessage(self.error) | ||
| 1549 | return False | ||
| 1550 | return True | ||
| 1551 | |||
| 1552 | def IsNumber(self, value): | ||
| 1553 | try: | ||
| 1554 | x = int(value) | ||
| 1555 | except: | ||
| 1556 | x = 0 | ||
| 1557 | return str(x) == value | ||
| 1558 | |||
| 1559 | # Non-negative integer ranges dialog data item | ||
| 1560 | |||
| 1561 | class NonNegativeIntegerRangesDataItem(LineEditDataItem): | ||
| 1562 | |||
| 1563 | def __init__(self, glb, label, placeholder_text, column_name, parent): | ||
| 1564 | super(NonNegativeIntegerRangesDataItem, self).__init__(glb, label, placeholder_text, parent) | ||
| 1565 | |||
| 1566 | self.column_name = column_name | ||
| 1567 | |||
| 1568 | def DoValidate(self, input_string): | ||
| 1569 | singles = [] | ||
| 1570 | ranges = [] | ||
| 1571 | for value in [x.strip() for x in input_string.split(",")]: | ||
| 1572 | if "-" in value: | ||
| 1573 | vrange = value.split("-") | ||
| 1574 | if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): | ||
| 1575 | return self.InvalidValue(value) | ||
| 1576 | ranges.append(vrange) | ||
| 1577 | else: | ||
| 1578 | if not self.IsNumber(value): | ||
| 1579 | return self.InvalidValue(value) | ||
| 1580 | singles.append(value) | ||
| 1581 | ranges = [("(" + self.column_name + " >= " + r[0] + " AND " + self.column_name + " <= " + r[1] + ")") for r in ranges] | ||
| 1582 | if len(singles): | ||
| 1583 | ranges.append(self.column_name + " IN (" + ",".join(singles) + ")") | ||
| 1584 | self.value = " OR ".join(ranges) | ||
| 1585 | |||
| 1586 | # Positive integer dialog data item | ||
| 1587 | |||
| 1588 | class PositiveIntegerDataItem(LineEditDataItem): | ||
| 1589 | |||
| 1590 | def __init__(self, glb, label, placeholder_text, parent, id = "", default = ""): | ||
| 1591 | super(PositiveIntegerDataItem, self).__init__(glb, label, placeholder_text, parent, id, default) | ||
| 1592 | |||
| 1593 | def DoValidate(self, input_string): | ||
| 1594 | if not self.IsNumber(input_string.strip()): | ||
| 1595 | return self.InvalidValue(input_string) | ||
| 1596 | value = int(input_string.strip()) | ||
| 1597 | if value <= 0: | ||
| 1598 | return self.InvalidValue(input_string) | ||
| 1599 | self.value = str(value) | ||
| 1600 | |||
| 1601 | # Dialog data item converted and validated using a SQL table | ||
| 1602 | |||
| 1603 | class SQLTableDataItem(LineEditDataItem): | ||
| 1604 | |||
| 1605 | def __init__(self, glb, label, placeholder_text, table_name, match_column, column_name1, column_name2, parent): | ||
| 1606 | super(SQLTableDataItem, self).__init__(glb, label, placeholder_text, parent) | ||
| 1607 | |||
| 1608 | self.table_name = table_name | ||
| 1609 | self.match_column = match_column | ||
| 1610 | self.column_name1 = column_name1 | ||
| 1611 | self.column_name2 = column_name2 | ||
| 1612 | |||
| 1516 | def ValueToIds(self, value): | 1613 | def ValueToIds(self, value): |
| 1517 | ids = [] | 1614 | ids = [] |
| 1518 | query = QSqlQuery(self.glb.db) | 1615 | query = QSqlQuery(self.glb.db) |
| @@ -1523,6 +1620,42 @@ class SQLTableDialogDataItem(): | |||
| 1523 | ids.append(str(query.value(0))) | 1620 | ids.append(str(query.value(0))) |
| 1524 | return ids | 1621 | return ids |
| 1525 | 1622 | ||
| 1623 | def DoValidate(self, input_string): | ||
| 1624 | all_ids = [] | ||
| 1625 | for value in [x.strip() for x in input_string.split(",")]: | ||
| 1626 | ids = self.ValueToIds(value) | ||
| 1627 | if len(ids): | ||
| 1628 | all_ids.extend(ids) | ||
| 1629 | else: | ||
| 1630 | return self.InvalidValue(value) | ||
| 1631 | self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")" | ||
| 1632 | if self.column_name2: | ||
| 1633 | self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )" | ||
| 1634 | |||
| 1635 | # Sample time ranges dialog data item converted and validated using 'samples' SQL table | ||
| 1636 | |||
| 1637 | class SampleTimeRangesDataItem(LineEditDataItem): | ||
| 1638 | |||
| 1639 | def __init__(self, glb, label, placeholder_text, column_name, parent): | ||
| 1640 | self.column_name = column_name | ||
| 1641 | |||
| 1642 | self.last_id = 0 | ||
| 1643 | self.first_time = 0 | ||
| 1644 | self.last_time = 2 ** 64 | ||
| 1645 | |||
| 1646 | query = QSqlQuery(glb.db) | ||
| 1647 | QueryExec(query, "SELECT id, time FROM samples ORDER BY id DESC LIMIT 1") | ||
| 1648 | if query.next(): | ||
| 1649 | self.last_id = int(query.value(0)) | ||
| 1650 | self.last_time = int(query.value(1)) | ||
| 1651 | QueryExec(query, "SELECT time FROM samples WHERE time != 0 ORDER BY id LIMIT 1") | ||
| 1652 | if query.next(): | ||
| 1653 | self.first_time = int(query.value(0)) | ||
| 1654 | if placeholder_text: | ||
| 1655 | placeholder_text += ", between " + str(self.first_time) + " and " + str(self.last_time) | ||
| 1656 | |||
| 1657 | super(SampleTimeRangesDataItem, self).__init__(glb, label, placeholder_text, parent) | ||
| 1658 | |||
| 1526 | def IdBetween(self, query, lower_id, higher_id, order): | 1659 | def IdBetween(self, query, lower_id, higher_id, order): |
| 1527 | QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1") | 1660 | QueryExec(query, "SELECT id FROM samples WHERE id > " + str(lower_id) + " AND id < " + str(higher_id) + " ORDER BY id " + order + " LIMIT 1") |
| 1528 | if query.next(): | 1661 | if query.next(): |
| @@ -1560,7 +1693,6 @@ class SQLTableDialogDataItem(): | |||
| 1560 | return str(lower_id) | 1693 | return str(lower_id) |
| 1561 | 1694 | ||
| 1562 | def ConvertRelativeTime(self, val): | 1695 | def ConvertRelativeTime(self, val): |
| 1563 | print "val ", val | ||
| 1564 | mult = 1 | 1696 | mult = 1 |
| 1565 | suffix = val[-2:] | 1697 | suffix = val[-2:] |
| 1566 | if suffix == "ms": | 1698 | if suffix == "ms": |
| @@ -1582,29 +1714,23 @@ class SQLTableDialogDataItem(): | |||
| 1582 | return str(val) | 1714 | return str(val) |
| 1583 | 1715 | ||
| 1584 | def ConvertTimeRange(self, vrange): | 1716 | def ConvertTimeRange(self, vrange): |
| 1585 | print "vrange ", vrange | ||
| 1586 | if vrange[0] == "": | 1717 | if vrange[0] == "": |
| 1587 | vrange[0] = str(self.first_time) | 1718 | vrange[0] = str(self.first_time) |
| 1588 | if vrange[1] == "": | 1719 | if vrange[1] == "": |
| 1589 | vrange[1] = str(self.last_time) | 1720 | vrange[1] = str(self.last_time) |
| 1590 | vrange[0] = self.ConvertRelativeTime(vrange[0]) | 1721 | vrange[0] = self.ConvertRelativeTime(vrange[0]) |
| 1591 | vrange[1] = self.ConvertRelativeTime(vrange[1]) | 1722 | vrange[1] = self.ConvertRelativeTime(vrange[1]) |
| 1592 | print "vrange2 ", vrange | ||
| 1593 | if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): | 1723 | if not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): |
| 1594 | return False | 1724 | return False |
| 1595 | print "ok1" | ||
| 1596 | beg_range = max(int(vrange[0]), self.first_time) | 1725 | beg_range = max(int(vrange[0]), self.first_time) |
| 1597 | end_range = min(int(vrange[1]), self.last_time) | 1726 | end_range = min(int(vrange[1]), self.last_time) |
| 1598 | if beg_range > self.last_time or end_range < self.first_time: | 1727 | if beg_range > self.last_time or end_range < self.first_time: |
| 1599 | return False | 1728 | return False |
| 1600 | print "ok2" | ||
| 1601 | vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True) | 1729 | vrange[0] = self.BinarySearchTime(0, self.last_id, beg_range, True) |
| 1602 | vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False) | 1730 | vrange[1] = self.BinarySearchTime(1, self.last_id + 1, end_range, False) |
| 1603 | print "vrange3 ", vrange | ||
| 1604 | return True | 1731 | return True |
| 1605 | 1732 | ||
| 1606 | def AddTimeRange(self, value, ranges): | 1733 | def AddTimeRange(self, value, ranges): |
| 1607 | print "value ", value | ||
| 1608 | n = value.count("-") | 1734 | n = value.count("-") |
| 1609 | if n == 1: | 1735 | if n == 1: |
| 1610 | pass | 1736 | pass |
| @@ -1622,111 +1748,31 @@ class SQLTableDialogDataItem(): | |||
| 1622 | return True | 1748 | return True |
| 1623 | return False | 1749 | return False |
| 1624 | 1750 | ||
| 1625 | def InvalidValue(self, value): | 1751 | def DoValidate(self, input_string): |
| 1626 | self.value = "" | 1752 | ranges = [] |
| 1627 | palette = QPalette() | 1753 | for value in [x.strip() for x in input_string.split(",")]: |
| 1628 | palette.setColor(QPalette.Text,Qt.red) | 1754 | if not self.AddTimeRange(value, ranges): |
| 1629 | self.widget.setPalette(palette) | 1755 | return self.InvalidValue(value) |
| 1630 | self.red = True | 1756 | ranges = [("(" + self.column_name + " >= " + r[0] + " AND " + self.column_name + " <= " + r[1] + ")") for r in ranges] |
| 1631 | self.error = self.label + " invalid value '" + value + "'" | 1757 | self.value = " OR ".join(ranges) |
| 1632 | self.parent.ShowMessage(self.error) | ||
| 1633 | 1758 | ||
| 1634 | def IsNumber(self, value): | 1759 | # Report Dialog Base |
| 1635 | try: | ||
| 1636 | x = int(value) | ||
| 1637 | except: | ||
| 1638 | x = 0 | ||
| 1639 | return str(x) == value | ||
| 1640 | 1760 | ||
| 1641 | def Invalidate(self): | 1761 | class ReportDialogBase(QDialog): |
| 1642 | self.validated = False | ||
| 1643 | 1762 | ||
| 1644 | def Validate(self): | 1763 | def __init__(self, glb, title, items, partial, parent=None): |
| 1645 | input_string = self.widget.text() | 1764 | super(ReportDialogBase, self).__init__(parent) |
| 1646 | self.validated = True | ||
| 1647 | if self.red: | ||
| 1648 | palette = QPalette() | ||
| 1649 | self.widget.setPalette(palette) | ||
| 1650 | self.red = False | ||
| 1651 | if not len(input_string.strip()): | ||
| 1652 | self.error = "" | ||
| 1653 | self.value = "" | ||
| 1654 | return | ||
| 1655 | if self.table_name == "<timeranges>": | ||
| 1656 | ranges = [] | ||
| 1657 | for value in [x.strip() for x in input_string.split(",")]: | ||
| 1658 | if not self.AddTimeRange(value, ranges): | ||
| 1659 | return self.InvalidValue(value) | ||
| 1660 | ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges] | ||
| 1661 | self.value = " OR ".join(ranges) | ||
| 1662 | elif self.table_name == "<ranges>": | ||
| 1663 | singles = [] | ||
| 1664 | ranges = [] | ||
| 1665 | for value in [x.strip() for x in input_string.split(",")]: | ||
| 1666 | if "-" in value: | ||
| 1667 | vrange = value.split("-") | ||
| 1668 | if len(vrange) != 2 or not self.IsNumber(vrange[0]) or not self.IsNumber(vrange[1]): | ||
| 1669 | return self.InvalidValue(value) | ||
| 1670 | ranges.append(vrange) | ||
| 1671 | else: | ||
| 1672 | if not self.IsNumber(value): | ||
| 1673 | return self.InvalidValue(value) | ||
| 1674 | singles.append(value) | ||
| 1675 | ranges = [("(" + self.column_name1 + " >= " + r[0] + " AND " + self.column_name1 + " <= " + r[1] + ")") for r in ranges] | ||
| 1676 | if len(singles): | ||
| 1677 | ranges.append(self.column_name1 + " IN (" + ",".join(singles) + ")") | ||
| 1678 | self.value = " OR ".join(ranges) | ||
| 1679 | elif self.table_name: | ||
| 1680 | all_ids = [] | ||
| 1681 | for value in [x.strip() for x in input_string.split(",")]: | ||
| 1682 | ids = self.ValueToIds(value) | ||
| 1683 | if len(ids): | ||
| 1684 | all_ids.extend(ids) | ||
| 1685 | else: | ||
| 1686 | return self.InvalidValue(value) | ||
| 1687 | self.value = self.column_name1 + " IN (" + ",".join(all_ids) + ")" | ||
| 1688 | if self.column_name2: | ||
| 1689 | self.value = "( " + self.value + " OR " + self.column_name2 + " IN (" + ",".join(all_ids) + ") )" | ||
| 1690 | else: | ||
| 1691 | self.value = input_string.strip() | ||
| 1692 | self.error = "" | ||
| 1693 | self.parent.ClearMessage() | ||
| 1694 | |||
| 1695 | def IsValid(self): | ||
| 1696 | if not self.validated: | ||
| 1697 | self.Validate() | ||
| 1698 | if len(self.error): | ||
| 1699 | self.parent.ShowMessage(self.error) | ||
| 1700 | return False | ||
| 1701 | return True | ||
| 1702 | |||
| 1703 | # Selected branch report creation dialog | ||
| 1704 | |||
| 1705 | class SelectedBranchDialog(QDialog): | ||
| 1706 | |||
| 1707 | def __init__(self, glb, parent=None): | ||
| 1708 | super(SelectedBranchDialog, self).__init__(parent) | ||
| 1709 | 1765 | ||
| 1710 | self.glb = glb | 1766 | self.glb = glb |
| 1711 | 1767 | ||
| 1712 | self.name = "" | 1768 | self.report_vars = ReportVars() |
| 1713 | self.where_clause = "" | ||
| 1714 | 1769 | ||
| 1715 | self.setWindowTitle("Selected Branches") | 1770 | self.setWindowTitle(title) |
| 1716 | self.setMinimumWidth(600) | 1771 | self.setMinimumWidth(600) |
| 1717 | 1772 | ||
| 1718 | items = ( | 1773 | self.data_items = [x(glb, self) for x in items] |
| 1719 | ("Report name:", "Enter a name to appear in the window title bar", "", "", "", ""), | 1774 | |
| 1720 | ("Time ranges:", "Enter time ranges", "<timeranges>", "", "samples.id", ""), | 1775 | self.partial = partial |
| 1721 | ("CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "<ranges>", "", "cpu", ""), | ||
| 1722 | ("Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", ""), | ||
| 1723 | ("PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", ""), | ||
| 1724 | ("TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", ""), | ||
| 1725 | ("DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id"), | ||
| 1726 | ("Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id"), | ||
| 1727 | ("Raw SQL clause: ", "Enter a raw SQL WHERE clause", "", "", "", ""), | ||
| 1728 | ) | ||
| 1729 | self.data_items = [SQLTableDialogDataItem(glb, *x, parent=self) for x in items] | ||
| 1730 | 1776 | ||
| 1731 | self.grid = QGridLayout() | 1777 | self.grid = QGridLayout() |
| 1732 | 1778 | ||
| @@ -1758,23 +1804,28 @@ class SelectedBranchDialog(QDialog): | |||
| 1758 | self.setLayout(self.vbox); | 1804 | self.setLayout(self.vbox); |
| 1759 | 1805 | ||
| 1760 | def Ok(self): | 1806 | def Ok(self): |
| 1761 | self.name = self.data_items[0].value | 1807 | vars = self.report_vars |
| 1762 | if not self.name: | 1808 | for d in self.data_items: |
| 1809 | if d.id == "REPORTNAME": | ||
| 1810 | vars.name = d.value | ||
| 1811 | if not vars.name: | ||
| 1763 | self.ShowMessage("Report name is required") | 1812 | self.ShowMessage("Report name is required") |
| 1764 | return | 1813 | return |
| 1765 | for d in self.data_items: | 1814 | for d in self.data_items: |
| 1766 | if not d.IsValid(): | 1815 | if not d.IsValid(): |
| 1767 | return | 1816 | return |
| 1768 | for d in self.data_items[1:]: | 1817 | for d in self.data_items[1:]: |
| 1769 | if len(d.value): | 1818 | if d.id == "LIMIT": |
| 1770 | if len(self.where_clause): | 1819 | vars.limit = d.value |
| 1771 | self.where_clause += " AND " | 1820 | elif len(d.value): |
| 1772 | self.where_clause += d.value | 1821 | if len(vars.where_clause): |
| 1773 | if len(self.where_clause): | 1822 | vars.where_clause += " AND " |
| 1774 | self.where_clause = " AND ( " + self.where_clause + " ) " | 1823 | vars.where_clause += d.value |
| 1775 | else: | 1824 | if len(vars.where_clause): |
| 1776 | self.ShowMessage("No selection") | 1825 | if self.partial: |
| 1777 | return | 1826 | vars.where_clause = " AND ( " + vars.where_clause + " ) " |
| 1827 | else: | ||
| 1828 | vars.where_clause = " WHERE " + vars.where_clause + " " | ||
| 1778 | self.accept() | 1829 | self.accept() |
| 1779 | 1830 | ||
| 1780 | def ShowMessage(self, msg): | 1831 | def ShowMessage(self, msg): |
| @@ -1783,6 +1834,23 @@ class SelectedBranchDialog(QDialog): | |||
| 1783 | def ClearMessage(self): | 1834 | def ClearMessage(self): |
| 1784 | self.status.setText("") | 1835 | self.status.setText("") |
| 1785 | 1836 | ||
| 1837 | # Selected branch report creation dialog | ||
| 1838 | |||
| 1839 | class SelectedBranchDialog(ReportDialogBase): | ||
| 1840 | |||
| 1841 | def __init__(self, glb, parent=None): | ||
| 1842 | title = "Selected Branches" | ||
| 1843 | items = (lambda g, p: LineEditDataItem(g, "Report name:", "Enter a name to appear in the window title bar", p, "REPORTNAME"), | ||
| 1844 | lambda g, p: SampleTimeRangesDataItem(g, "Time ranges:", "Enter time ranges", "samples.id", p), | ||
| 1845 | lambda g, p: NonNegativeIntegerRangesDataItem(g, "CPUs:", "Enter CPUs or ranges e.g. 0,5-6", "cpu", p), | ||
| 1846 | lambda g, p: SQLTableDataItem(g, "Commands:", "Only branches with these commands will be included", "comms", "comm", "comm_id", "", p), | ||
| 1847 | lambda g, p: SQLTableDataItem(g, "PIDs:", "Only branches with these process IDs will be included", "threads", "pid", "thread_id", "", p), | ||
| 1848 | lambda g, p: SQLTableDataItem(g, "TIDs:", "Only branches with these thread IDs will be included", "threads", "tid", "thread_id", "", p), | ||
| 1849 | lambda g, p: SQLTableDataItem(g, "DSOs:", "Only branches with these DSOs will be included", "dsos", "short_name", "samples.dso_id", "to_dso_id", p), | ||
| 1850 | lambda g, p: SQLTableDataItem(g, "Symbols:", "Only branches with these symbols will be included", "symbols", "name", "symbol_id", "to_symbol_id", p), | ||
| 1851 | lambda g, p: LineEditDataItem(g, "Raw SQL clause: ", "Enter a raw SQL WHERE clause", p)) | ||
| 1852 | super(SelectedBranchDialog, self).__init__(glb, title, items, True, parent) | ||
| 1853 | |||
| 1786 | # Event list | 1854 | # Event list |
| 1787 | 1855 | ||
| 1788 | def GetEventList(db): | 1856 | def GetEventList(db): |
| @@ -1793,6 +1861,16 @@ def GetEventList(db): | |||
| 1793 | events.append(query.value(0)) | 1861 | events.append(query.value(0)) |
| 1794 | return events | 1862 | return events |
| 1795 | 1863 | ||
| 1864 | # Is a table selectable | ||
| 1865 | |||
| 1866 | def IsSelectable(db, table): | ||
| 1867 | query = QSqlQuery(db) | ||
| 1868 | try: | ||
| 1869 | QueryExec(query, "SELECT * FROM " + table + " LIMIT 1") | ||
| 1870 | except: | ||
| 1871 | return False | ||
| 1872 | return True | ||
| 1873 | |||
| 1796 | # SQL data preparation | 1874 | # SQL data preparation |
| 1797 | 1875 | ||
| 1798 | def SQLTableDataPrep(query, count): | 1876 | def SQLTableDataPrep(query, count): |
| @@ -1818,12 +1896,13 @@ class SQLTableModel(TableModel): | |||
| 1818 | 1896 | ||
| 1819 | progress = Signal(object) | 1897 | progress = Signal(object) |
| 1820 | 1898 | ||
| 1821 | def __init__(self, glb, sql, column_count, parent=None): | 1899 | def __init__(self, glb, sql, column_headers, parent=None): |
| 1822 | super(SQLTableModel, self).__init__(parent) | 1900 | super(SQLTableModel, self).__init__(parent) |
| 1823 | self.glb = glb | 1901 | self.glb = glb |
| 1824 | self.more = True | 1902 | self.more = True |
| 1825 | self.populated = 0 | 1903 | self.populated = 0 |
| 1826 | self.fetcher = SQLFetcher(glb, sql, lambda x, y=column_count: SQLTableDataPrep(x, y), self.AddSample) | 1904 | self.column_headers = column_headers |
| 1905 | self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): SQLTableDataPrep(x, y), self.AddSample) | ||
| 1827 | self.fetcher.done.connect(self.Update) | 1906 | self.fetcher.done.connect(self.Update) |
| 1828 | self.fetcher.Fetch(glb_chunk_sz) | 1907 | self.fetcher.Fetch(glb_chunk_sz) |
| 1829 | 1908 | ||
| @@ -1861,6 +1940,12 @@ class SQLTableModel(TableModel): | |||
| 1861 | def HasMoreRecords(self): | 1940 | def HasMoreRecords(self): |
| 1862 | return self.more | 1941 | return self.more |
| 1863 | 1942 | ||
| 1943 | def columnCount(self, parent=None): | ||
| 1944 | return len(self.column_headers) | ||
| 1945 | |||
| 1946 | def columnHeader(self, column): | ||
| 1947 | return self.column_headers[column] | ||
| 1948 | |||
| 1864 | # SQL automatic table data model | 1949 | # SQL automatic table data model |
| 1865 | 1950 | ||
| 1866 | class SQLAutoTableModel(SQLTableModel): | 1951 | class SQLAutoTableModel(SQLTableModel): |
| @@ -1870,12 +1955,12 @@ class SQLAutoTableModel(SQLTableModel): | |||
| 1870 | if table_name == "comm_threads_view": | 1955 | if table_name == "comm_threads_view": |
| 1871 | # For now, comm_threads_view has no id column | 1956 | # For now, comm_threads_view has no id column |
| 1872 | sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz) | 1957 | sql = "SELECT * FROM " + table_name + " WHERE comm_id > $$last_id$$ ORDER BY comm_id LIMIT " + str(glb_chunk_sz) |
| 1873 | self.column_headers = [] | 1958 | column_headers = [] |
| 1874 | query = QSqlQuery(glb.db) | 1959 | query = QSqlQuery(glb.db) |
| 1875 | if glb.dbref.is_sqlite3: | 1960 | if glb.dbref.is_sqlite3: |
| 1876 | QueryExec(query, "PRAGMA table_info(" + table_name + ")") | 1961 | QueryExec(query, "PRAGMA table_info(" + table_name + ")") |
| 1877 | while query.next(): | 1962 | while query.next(): |
| 1878 | self.column_headers.append(query.value(1)) | 1963 | column_headers.append(query.value(1)) |
| 1879 | if table_name == "sqlite_master": | 1964 | if table_name == "sqlite_master": |
| 1880 | sql = "SELECT * FROM " + table_name | 1965 | sql = "SELECT * FROM " + table_name |
| 1881 | else: | 1966 | else: |
| @@ -1888,14 +1973,8 @@ class SQLAutoTableModel(SQLTableModel): | |||
| 1888 | schema = "public" | 1973 | schema = "public" |
| 1889 | QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'") | 1974 | QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'") |
| 1890 | while query.next(): | 1975 | while query.next(): |
| 1891 | self.column_headers.append(query.value(0)) | 1976 | column_headers.append(query.value(0)) |
| 1892 | super(SQLAutoTableModel, self).__init__(glb, sql, len(self.column_headers), parent) | 1977 | super(SQLAutoTableModel, self).__init__(glb, sql, column_headers, parent) |
| 1893 | |||
| 1894 | def columnCount(self, parent=None): | ||
| 1895 | return len(self.column_headers) | ||
| 1896 | |||
| 1897 | def columnHeader(self, column): | ||
| 1898 | return self.column_headers[column] | ||
| 1899 | 1978 | ||
| 1900 | # Base class for custom ResizeColumnsToContents | 1979 | # Base class for custom ResizeColumnsToContents |
| 1901 | 1980 | ||
| @@ -1998,6 +2077,103 @@ def GetTableList(glb): | |||
| 1998 | tables.append("information_schema.columns") | 2077 | tables.append("information_schema.columns") |
| 1999 | return tables | 2078 | return tables |
| 2000 | 2079 | ||
| 2080 | # Top Calls data model | ||
| 2081 | |||
| 2082 | class TopCallsModel(SQLTableModel): | ||
| 2083 | |||
| 2084 | def __init__(self, glb, report_vars, parent=None): | ||
| 2085 | text = "" | ||
| 2086 | if not glb.dbref.is_sqlite3: | ||
| 2087 | text = "::text" | ||
| 2088 | limit = "" | ||
| 2089 | if len(report_vars.limit): | ||
| 2090 | limit = " LIMIT " + report_vars.limit | ||
| 2091 | sql = ("SELECT comm, pid, tid, name," | ||
| 2092 | " CASE" | ||
| 2093 | " WHEN (short_name = '[kernel.kallsyms]') THEN '[kernel]'" + text + | ||
| 2094 | " ELSE short_name" | ||
| 2095 | " END AS dso," | ||
| 2096 | " call_time, return_time, (return_time - call_time) AS elapsed_time, branch_count, " | ||
| 2097 | " CASE" | ||
| 2098 | " WHEN (calls.flags = 1) THEN 'no call'" + text + | ||
| 2099 | " WHEN (calls.flags = 2) THEN 'no return'" + text + | ||
| 2100 | " WHEN (calls.flags = 3) THEN 'no call/return'" + text + | ||
| 2101 | " ELSE ''" + text + | ||
| 2102 | " END AS flags" | ||
| 2103 | " FROM calls" | ||
| 2104 | " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" | ||
| 2105 | " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" | ||
| 2106 | " INNER JOIN dsos ON symbols.dso_id = dsos.id" | ||
| 2107 | " INNER JOIN comms ON calls.comm_id = comms.id" | ||
| 2108 | " INNER JOIN threads ON calls.thread_id = threads.id" + | ||
| 2109 | report_vars.where_clause + | ||
| 2110 | " ORDER BY elapsed_time DESC" + | ||
| 2111 | limit | ||
| 2112 | ) | ||
| 2113 | column_headers = ("Command", "PID", "TID", "Symbol", "Object", "Call Time", "Return Time", "Elapsed Time (ns)", "Branch Count", "Flags") | ||
| 2114 | self.alignment = (Qt.AlignLeft, Qt.AlignLeft, Qt.AlignLeft, Qt.AlignLeft, Qt.AlignLeft, Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignLeft) | ||
| 2115 | super(TopCallsModel, self).__init__(glb, sql, column_headers, parent) | ||
| 2116 | |||
| 2117 | def columnAlignment(self, column): | ||
| 2118 | return self.alignment[column] | ||
| 2119 | |||
| 2120 | # Top Calls report creation dialog | ||
| 2121 | |||
| 2122 | class TopCallsDialog(ReportDialogBase): | ||
| 2123 | |||
| 2124 | def __init__(self, glb, parent=None): | ||
| 2125 | title = "Top Calls by Elapsed Time" | ||
| 2126 | items = (lambda g, p: LineEditDataItem(g, "Report name:", "Enter a name to appear in the window title bar", p, "REPORTNAME"), | ||
| 2127 | lambda g, p: SQLTableDataItem(g, "Commands:", "Only calls with these commands will be included", "comms", "comm", "comm_id", "", p), | ||
| 2128 | lambda g, p: SQLTableDataItem(g, "PIDs:", "Only calls with these process IDs will be included", "threads", "pid", "thread_id", "", p), | ||
| 2129 | lambda g, p: SQLTableDataItem(g, "TIDs:", "Only calls with these thread IDs will be included", "threads", "tid", "thread_id", "", p), | ||
| 2130 | lambda g, p: SQLTableDataItem(g, "DSOs:", "Only calls with these DSOs will be included", "dsos", "short_name", "dso_id", "", p), | ||
| 2131 | lambda g, p: SQLTableDataItem(g, "Symbols:", "Only calls with these symbols will be included", "symbols", "name", "symbol_id", "", p), | ||
| 2132 | lambda g, p: LineEditDataItem(g, "Raw SQL clause: ", "Enter a raw SQL WHERE clause", p), | ||
| 2133 | lambda g, p: PositiveIntegerDataItem(g, "Record limit:", "Limit selection to this number of records", p, "LIMIT", "100")) | ||
| 2134 | super(TopCallsDialog, self).__init__(glb, title, items, False, parent) | ||
| 2135 | |||
| 2136 | # Top Calls window | ||
| 2137 | |||
| 2138 | class TopCallsWindow(QMdiSubWindow, ResizeColumnsToContentsBase): | ||
| 2139 | |||
| 2140 | def __init__(self, glb, report_vars, parent=None): | ||
| 2141 | super(TopCallsWindow, self).__init__(parent) | ||
| 2142 | |||
| 2143 | self.data_model = LookupCreateModel("Top Calls " + report_vars.UniqueId(), lambda: TopCallsModel(glb, report_vars)) | ||
| 2144 | self.model = self.data_model | ||
| 2145 | |||
| 2146 | self.view = QTableView() | ||
| 2147 | self.view.setModel(self.model) | ||
| 2148 | self.view.setEditTriggers(QAbstractItemView.NoEditTriggers) | ||
| 2149 | self.view.verticalHeader().setVisible(False) | ||
| 2150 | |||
| 2151 | self.ResizeColumnsToContents() | ||
| 2152 | |||
| 2153 | self.find_bar = FindBar(self, self, True) | ||
| 2154 | |||
| 2155 | self.finder = ChildDataItemFinder(self.model) | ||
| 2156 | |||
| 2157 | self.fetch_bar = FetchMoreRecordsBar(self.data_model, self) | ||
| 2158 | |||
| 2159 | self.vbox = VBox(self.view, self.find_bar.Widget(), self.fetch_bar.Widget()) | ||
| 2160 | |||
| 2161 | self.setWidget(self.vbox.Widget()) | ||
| 2162 | |||
| 2163 | AddSubWindow(glb.mainwindow.mdi_area, self, report_vars.name) | ||
| 2164 | |||
| 2165 | def Find(self, value, direction, pattern, context): | ||
| 2166 | self.view.setFocus() | ||
| 2167 | self.find_bar.Busy() | ||
| 2168 | self.finder.Find(value, direction, pattern, context, self.FindDone) | ||
| 2169 | |||
| 2170 | def FindDone(self, row): | ||
| 2171 | self.find_bar.Idle() | ||
| 2172 | if row >= 0: | ||
| 2173 | self.view.setCurrentIndex(self.model.index(row, 0, QModelIndex())) | ||
| 2174 | else: | ||
| 2175 | self.find_bar.NotFound() | ||
| 2176 | |||
| 2001 | # Action Definition | 2177 | # Action Definition |
| 2002 | 2178 | ||
| 2003 | def CreateAction(label, tip, callback, parent=None, shortcut=None): | 2179 | def CreateAction(label, tip, callback, parent=None, shortcut=None): |
| @@ -2101,6 +2277,7 @@ p.c2 { | |||
| 2101 | <p class=c2><a href=#callgraph>1.1 Context-Sensitive Call Graph</a></p> | 2277 | <p class=c2><a href=#callgraph>1.1 Context-Sensitive Call Graph</a></p> |
| 2102 | <p class=c2><a href=#allbranches>1.2 All branches</a></p> | 2278 | <p class=c2><a href=#allbranches>1.2 All branches</a></p> |
| 2103 | <p class=c2><a href=#selectedbranches>1.3 Selected branches</a></p> | 2279 | <p class=c2><a href=#selectedbranches>1.3 Selected branches</a></p> |
| 2280 | <p class=c2><a href=#topcallsbyelapsedtime>1.4 Top calls by elapsed time</a></p> | ||
| 2104 | <p class=c1><a href=#tables>2. Tables</a></p> | 2281 | <p class=c1><a href=#tables>2. Tables</a></p> |
| 2105 | <h1 id=reports>1. Reports</h1> | 2282 | <h1 id=reports>1. Reports</h1> |
| 2106 | <h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2> | 2283 | <h2 id=callgraph>1.1 Context-Sensitive Call Graph</h2> |
| @@ -2176,6 +2353,10 @@ ms, us or ns. Also, negative values are relative to the end of trace. Examples: | |||
| 2176 | -10ms- The last 10ms | 2353 | -10ms- The last 10ms |
| 2177 | </pre> | 2354 | </pre> |
| 2178 | N.B. Due to the granularity of timestamps, there could be no branches in any given time range. | 2355 | N.B. Due to the granularity of timestamps, there could be no branches in any given time range. |
| 2356 | <h2 id=topcallsbyelapsedtime>1.4 Top calls by elapsed time</h2> | ||
| 2357 | The Top calls by elapsed time report displays calls in descending order of time elapsed between when the function was called and when it returned. | ||
| 2358 | The data is reduced by various selection criteria. A dialog box displays available criteria which are AND'ed together. | ||
| 2359 | If not all data is fetched, a Fetch bar is provided. Ctrl-F displays a Find bar. | ||
| 2179 | <h1 id=tables>2. Tables</h1> | 2360 | <h1 id=tables>2. Tables</h1> |
| 2180 | The Tables menu shows all tables and views in the database. Most tables have an associated view | 2361 | The Tables menu shows all tables and views in the database. Most tables have an associated view |
| 2181 | which displays the information in a more friendly way. Not all data for large tables is fetched | 2362 | which displays the information in a more friendly way. Not all data for large tables is fetched |
| @@ -2305,10 +2486,14 @@ class MainWindow(QMainWindow): | |||
| 2305 | edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")])) | 2486 | edit_menu.addAction(CreateAction("&Enlarge Font", "Make text bigger", self.EnlargeFont, self, [QKeySequence("Ctrl++")])) |
| 2306 | 2487 | ||
| 2307 | reports_menu = menu.addMenu("&Reports") | 2488 | reports_menu = menu.addMenu("&Reports") |
| 2308 | reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self)) | 2489 | if IsSelectable(glb.db, "calls"): |
| 2490 | reports_menu.addAction(CreateAction("Context-Sensitive Call &Graph", "Create a new window containing a context-sensitive call graph", self.NewCallGraph, self)) | ||
| 2309 | 2491 | ||
| 2310 | self.EventMenu(GetEventList(glb.db), reports_menu) | 2492 | self.EventMenu(GetEventList(glb.db), reports_menu) |
| 2311 | 2493 | ||
| 2494 | if IsSelectable(glb.db, "calls"): | ||
| 2495 | reports_menu.addAction(CreateAction("&Top calls by elapsed time", "Create a new window displaying top calls by elapsed time", self.NewTopCalls, self)) | ||
| 2496 | |||
| 2312 | self.TableMenu(GetTableList(glb), menu) | 2497 | self.TableMenu(GetTableList(glb), menu) |
| 2313 | 2498 | ||
| 2314 | self.window_menu = WindowMenu(self.mdi_area, menu) | 2499 | self.window_menu = WindowMenu(self.mdi_area, menu) |
| @@ -2364,14 +2549,20 @@ class MainWindow(QMainWindow): | |||
| 2364 | def NewCallGraph(self): | 2549 | def NewCallGraph(self): |
| 2365 | CallGraphWindow(self.glb, self) | 2550 | CallGraphWindow(self.glb, self) |
| 2366 | 2551 | ||
| 2552 | def NewTopCalls(self): | ||
| 2553 | dialog = TopCallsDialog(self.glb, self) | ||
| 2554 | ret = dialog.exec_() | ||
| 2555 | if ret: | ||
| 2556 | TopCallsWindow(self.glb, dialog.report_vars, self) | ||
| 2557 | |||
| 2367 | def NewBranchView(self, event_id): | 2558 | def NewBranchView(self, event_id): |
| 2368 | BranchWindow(self.glb, event_id, "", "", self) | 2559 | BranchWindow(self.glb, event_id, ReportVars(), self) |
| 2369 | 2560 | ||
| 2370 | def NewSelectedBranchView(self, event_id): | 2561 | def NewSelectedBranchView(self, event_id): |
| 2371 | dialog = SelectedBranchDialog(self.glb, self) | 2562 | dialog = SelectedBranchDialog(self.glb, self) |
| 2372 | ret = dialog.exec_() | 2563 | ret = dialog.exec_() |
| 2373 | if ret: | 2564 | if ret: |
| 2374 | BranchWindow(self.glb, event_id, dialog.name, dialog.where_clause, self) | 2565 | BranchWindow(self.glb, event_id, dialog.report_vars, self) |
| 2375 | 2566 | ||
| 2376 | def NewTableView(self, table_name): | 2567 | def NewTableView(self, table_name): |
| 2377 | TableWindow(self.glb, table_name, self) | 2568 | TableWindow(self.glb, table_name, self) |
diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py index cafeff3d74db..3648e8b986ec 100644 --- a/tools/perf/scripts/python/failed-syscalls-by-pid.py +++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | # Displays system-wide failed system call totals, broken down by pid. | 5 | # Displays system-wide failed system call totals, broken down by pid. |
| 6 | # If a [comm] arg is specified, only syscalls called by [comm] are displayed. | 6 | # If a [comm] arg is specified, only syscalls called by [comm] are displayed. |
| 7 | 7 | ||
| 8 | from __future__ import print_function | ||
| 9 | |||
| 8 | import os | 10 | import os |
| 9 | import sys | 11 | import sys |
| 10 | 12 | ||
| @@ -32,7 +34,7 @@ if len(sys.argv) > 1: | |||
| 32 | syscalls = autodict() | 34 | syscalls = autodict() |
| 33 | 35 | ||
| 34 | def trace_begin(): | 36 | def trace_begin(): |
| 35 | print "Press control+C to stop and show the summary" | 37 | print("Press control+C to stop and show the summary") |
| 36 | 38 | ||
| 37 | def trace_end(): | 39 | def trace_end(): |
| 38 | print_error_totals() | 40 | print_error_totals() |
| @@ -57,22 +59,21 @@ def syscalls__sys_exit(event_name, context, common_cpu, | |||
| 57 | 59 | ||
| 58 | def print_error_totals(): | 60 | def print_error_totals(): |
| 59 | if for_comm is not None: | 61 | if for_comm is not None: |
| 60 | print "\nsyscall errors for %s:\n\n" % (for_comm), | 62 | print("\nsyscall errors for %s:\n" % (for_comm)) |
| 61 | else: | 63 | else: |
| 62 | print "\nsyscall errors:\n\n", | 64 | print("\nsyscall errors:\n") |
| 63 | 65 | ||
| 64 | print "%-30s %10s\n" % ("comm [pid]", "count"), | 66 | print("%-30s %10s" % ("comm [pid]", "count")) |
| 65 | print "%-30s %10s\n" % ("------------------------------", \ | 67 | print("%-30s %10s" % ("------------------------------", "----------")) |
| 66 | "----------"), | ||
| 67 | 68 | ||
| 68 | comm_keys = syscalls.keys() | 69 | comm_keys = syscalls.keys() |
| 69 | for comm in comm_keys: | 70 | for comm in comm_keys: |
| 70 | pid_keys = syscalls[comm].keys() | 71 | pid_keys = syscalls[comm].keys() |
| 71 | for pid in pid_keys: | 72 | for pid in pid_keys: |
| 72 | print "\n%s [%d]\n" % (comm, pid), | 73 | print("\n%s [%d]" % (comm, pid)) |
| 73 | id_keys = syscalls[comm][pid].keys() | 74 | id_keys = syscalls[comm][pid].keys() |
| 74 | for id in id_keys: | 75 | for id in id_keys: |
| 75 | print " syscall: %-16s\n" % syscall_name(id), | 76 | print(" syscall: %-16s" % syscall_name(id)) |
| 76 | ret_keys = syscalls[comm][pid][id].keys() | 77 | ret_keys = syscalls[comm][pid][id].keys() |
| 77 | for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True): | 78 | for ret, val in sorted(syscalls[comm][pid][id].items(), key = lambda kv: (kv[1], kv[0]), reverse = True): |
| 78 | print " err = %-20s %10d\n" % (strerror(ret), val), | 79 | print(" err = %-20s %10d" % (strerror(ret), val)) |
diff --git a/tools/perf/scripts/python/mem-phys-addr.py b/tools/perf/scripts/python/mem-phys-addr.py index ebee2c5ae496..fb0bbcbfa0f0 100644 --- a/tools/perf/scripts/python/mem-phys-addr.py +++ b/tools/perf/scripts/python/mem-phys-addr.py | |||
| @@ -4,6 +4,8 @@ | |||
| 4 | # Copyright (c) 2018, Intel Corporation. | 4 | # Copyright (c) 2018, Intel Corporation. |
| 5 | 5 | ||
| 6 | from __future__ import division | 6 | from __future__ import division |
| 7 | from __future__ import print_function | ||
| 8 | |||
| 7 | import os | 9 | import os |
| 8 | import sys | 10 | import sys |
| 9 | import struct | 11 | import struct |
| @@ -31,21 +33,23 @@ def parse_iomem(): | |||
| 31 | for i, j in enumerate(f): | 33 | for i, j in enumerate(f): |
| 32 | m = re.split('-|:',j,2) | 34 | m = re.split('-|:',j,2) |
| 33 | if m[2].strip() == 'System RAM': | 35 | if m[2].strip() == 'System RAM': |
| 34 | system_ram.append(long(m[0], 16)) | 36 | system_ram.append(int(m[0], 16)) |
| 35 | system_ram.append(long(m[1], 16)) | 37 | system_ram.append(int(m[1], 16)) |
| 36 | if m[2].strip() == 'Persistent Memory': | 38 | if m[2].strip() == 'Persistent Memory': |
| 37 | pmem.append(long(m[0], 16)) | 39 | pmem.append(int(m[0], 16)) |
| 38 | pmem.append(long(m[1], 16)) | 40 | pmem.append(int(m[1], 16)) |
| 39 | 41 | ||
| 40 | def print_memory_type(): | 42 | def print_memory_type(): |
| 41 | print "Event: %s" % (event_name) | 43 | print("Event: %s" % (event_name)) |
| 42 | print "%-40s %10s %10s\n" % ("Memory type", "count", "percentage"), | 44 | print("%-40s %10s %10s\n" % ("Memory type", "count", "percentage"), end='') |
| 43 | print "%-40s %10s %10s\n" % ("----------------------------------------", \ | 45 | print("%-40s %10s %10s\n" % ("----------------------------------------", |
| 44 | "-----------", "-----------"), | 46 | "-----------", "-----------"), |
| 47 | end=''); | ||
| 45 | total = sum(load_mem_type_cnt.values()) | 48 | total = sum(load_mem_type_cnt.values()) |
| 46 | for mem_type, count in sorted(load_mem_type_cnt.most_common(), \ | 49 | for mem_type, count in sorted(load_mem_type_cnt.most_common(), \ |
| 47 | key = lambda(k, v): (v, k), reverse = True): | 50 | key = lambda kv: (kv[1], kv[0]), reverse = True): |
| 48 | print "%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total), | 51 | print("%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total), |
| 52 | end='') | ||
| 49 | 53 | ||
| 50 | def trace_begin(): | 54 | def trace_begin(): |
| 51 | parse_iomem() | 55 | parse_iomem() |
| @@ -80,7 +84,7 @@ def find_memory_type(phys_addr): | |||
| 80 | f.seek(0, 0) | 84 | f.seek(0, 0) |
| 81 | for j in f: | 85 | for j in f: |
| 82 | m = re.split('-|:',j,2) | 86 | m = re.split('-|:',j,2) |
| 83 | if long(m[0], 16) <= phys_addr <= long(m[1], 16): | 87 | if int(m[0], 16) <= phys_addr <= int(m[1], 16): |
| 84 | return m[2] | 88 | return m[2] |
| 85 | return "N/A" | 89 | return "N/A" |
| 86 | 90 | ||
diff --git a/tools/perf/scripts/python/net_dropmonitor.py b/tools/perf/scripts/python/net_dropmonitor.py index a150164b44a3..212557a02c50 100755 --- a/tools/perf/scripts/python/net_dropmonitor.py +++ b/tools/perf/scripts/python/net_dropmonitor.py | |||
| @@ -1,6 +1,8 @@ | |||
| 1 | # Monitor the system for dropped packets and proudce a report of drop locations and counts | 1 | # Monitor the system for dropped packets and proudce a report of drop locations and counts |
| 2 | # SPDX-License-Identifier: GPL-2.0 | 2 | # SPDX-License-Identifier: GPL-2.0 |
| 3 | 3 | ||
| 4 | from __future__ import print_function | ||
| 5 | |||
| 4 | import os | 6 | import os |
| 5 | import sys | 7 | import sys |
| 6 | 8 | ||
| @@ -50,19 +52,19 @@ def get_sym(sloc): | |||
| 50 | return (None, 0) | 52 | return (None, 0) |
| 51 | 53 | ||
| 52 | def print_drop_table(): | 54 | def print_drop_table(): |
| 53 | print "%25s %25s %25s" % ("LOCATION", "OFFSET", "COUNT") | 55 | print("%25s %25s %25s" % ("LOCATION", "OFFSET", "COUNT")) |
| 54 | for i in drop_log.keys(): | 56 | for i in drop_log.keys(): |
| 55 | (sym, off) = get_sym(i) | 57 | (sym, off) = get_sym(i) |
| 56 | if sym == None: | 58 | if sym == None: |
| 57 | sym = i | 59 | sym = i |
| 58 | print "%25s %25s %25s" % (sym, off, drop_log[i]) | 60 | print("%25s %25s %25s" % (sym, off, drop_log[i])) |
| 59 | 61 | ||
| 60 | 62 | ||
| 61 | def trace_begin(): | 63 | def trace_begin(): |
| 62 | print "Starting trace (Ctrl-C to dump results)" | 64 | print("Starting trace (Ctrl-C to dump results)") |
| 63 | 65 | ||
| 64 | def trace_end(): | 66 | def trace_end(): |
| 65 | print "Gathering kallsyms data" | 67 | print("Gathering kallsyms data") |
| 66 | get_kallsyms_table() | 68 | get_kallsyms_table() |
| 67 | print_drop_table() | 69 | print_drop_table() |
| 68 | 70 | ||
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py index 9b2050f778f1..267bda49325d 100644 --- a/tools/perf/scripts/python/netdev-times.py +++ b/tools/perf/scripts/python/netdev-times.py | |||
| @@ -8,6 +8,8 @@ | |||
| 8 | # dev=: show only thing related to specified device | 8 | # dev=: show only thing related to specified device |
| 9 | # debug: work with debug mode. It shows buffer status. | 9 | # debug: work with debug mode. It shows buffer status. |
| 10 | 10 | ||
| 11 | from __future__ import print_function | ||
| 12 | |||
| 11 | import os | 13 | import os |
| 12 | import sys | 14 | import sys |
| 13 | 15 | ||
| @@ -17,6 +19,7 @@ sys.path.append(os.environ['PERF_EXEC_PATH'] + \ | |||
| 17 | from perf_trace_context import * | 19 | from perf_trace_context import * |
| 18 | from Core import * | 20 | from Core import * |
| 19 | from Util import * | 21 | from Util import * |
| 22 | from functools import cmp_to_key | ||
| 20 | 23 | ||
| 21 | all_event_list = []; # insert all tracepoint event related with this script | 24 | all_event_list = []; # insert all tracepoint event related with this script |
| 22 | irq_dic = {}; # key is cpu and value is a list which stacks irqs | 25 | irq_dic = {}; # key is cpu and value is a list which stacks irqs |
| @@ -61,12 +64,12 @@ def diff_msec(src, dst): | |||
| 61 | def print_transmit(hunk): | 64 | def print_transmit(hunk): |
| 62 | if dev != 0 and hunk['dev'].find(dev) < 0: | 65 | if dev != 0 and hunk['dev'].find(dev) < 0: |
| 63 | return | 66 | return |
| 64 | print "%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" % \ | 67 | print("%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" % |
| 65 | (hunk['dev'], hunk['len'], | 68 | (hunk['dev'], hunk['len'], |
| 66 | nsecs_secs(hunk['queue_t']), | 69 | nsecs_secs(hunk['queue_t']), |
| 67 | nsecs_nsecs(hunk['queue_t'])/1000, | 70 | nsecs_nsecs(hunk['queue_t'])/1000, |
| 68 | diff_msec(hunk['queue_t'], hunk['xmit_t']), | 71 | diff_msec(hunk['queue_t'], hunk['xmit_t']), |
| 69 | diff_msec(hunk['xmit_t'], hunk['free_t'])) | 72 | diff_msec(hunk['xmit_t'], hunk['free_t']))) |
| 70 | 73 | ||
| 71 | # Format for displaying rx packet processing | 74 | # Format for displaying rx packet processing |
| 72 | PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)" | 75 | PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)" |
| @@ -98,55 +101,55 @@ def print_receive(hunk): | |||
| 98 | if show_hunk == 0: | 101 | if show_hunk == 0: |
| 99 | return | 102 | return |
| 100 | 103 | ||
| 101 | print "%d.%06dsec cpu=%d" % \ | 104 | print("%d.%06dsec cpu=%d" % |
| 102 | (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu) | 105 | (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu)) |
| 103 | for i in range(len(irq_list)): | 106 | for i in range(len(irq_list)): |
| 104 | print PF_IRQ_ENTRY % \ | 107 | print(PF_IRQ_ENTRY % |
| 105 | (diff_msec(base_t, irq_list[i]['irq_ent_t']), | 108 | (diff_msec(base_t, irq_list[i]['irq_ent_t']), |
| 106 | irq_list[i]['irq'], irq_list[i]['name']) | 109 | irq_list[i]['irq'], irq_list[i]['name'])) |
| 107 | print PF_JOINT | 110 | print(PF_JOINT) |
| 108 | irq_event_list = irq_list[i]['event_list'] | 111 | irq_event_list = irq_list[i]['event_list'] |
| 109 | for j in range(len(irq_event_list)): | 112 | for j in range(len(irq_event_list)): |
| 110 | irq_event = irq_event_list[j] | 113 | irq_event = irq_event_list[j] |
| 111 | if irq_event['event'] == 'netif_rx': | 114 | if irq_event['event'] == 'netif_rx': |
| 112 | print PF_NET_RX % \ | 115 | print(PF_NET_RX % |
| 113 | (diff_msec(base_t, irq_event['time']), | 116 | (diff_msec(base_t, irq_event['time']), |
| 114 | irq_event['skbaddr']) | 117 | irq_event['skbaddr'])) |
| 115 | print PF_JOINT | 118 | print(PF_JOINT) |
| 116 | print PF_SOFT_ENTRY % \ | 119 | print(PF_SOFT_ENTRY % |
| 117 | diff_msec(base_t, hunk['sirq_ent_t']) | 120 | diff_msec(base_t, hunk['sirq_ent_t'])) |
| 118 | print PF_JOINT | 121 | print(PF_JOINT) |
| 119 | event_list = hunk['event_list'] | 122 | event_list = hunk['event_list'] |
| 120 | for i in range(len(event_list)): | 123 | for i in range(len(event_list)): |
| 121 | event = event_list[i] | 124 | event = event_list[i] |
| 122 | if event['event_name'] == 'napi_poll': | 125 | if event['event_name'] == 'napi_poll': |
| 123 | print PF_NAPI_POLL % \ | 126 | print(PF_NAPI_POLL % |
| 124 | (diff_msec(base_t, event['event_t']), event['dev']) | 127 | (diff_msec(base_t, event['event_t']), event['dev'])) |
| 125 | if i == len(event_list) - 1: | 128 | if i == len(event_list) - 1: |
| 126 | print "" | 129 | print("") |
| 127 | else: | 130 | else: |
| 128 | print PF_JOINT | 131 | print(PF_JOINT) |
| 129 | else: | 132 | else: |
| 130 | print PF_NET_RECV % \ | 133 | print(PF_NET_RECV % |
| 131 | (diff_msec(base_t, event['event_t']), event['skbaddr'], | 134 | (diff_msec(base_t, event['event_t']), event['skbaddr'], |
| 132 | event['len']) | 135 | event['len'])) |
| 133 | if 'comm' in event.keys(): | 136 | if 'comm' in event.keys(): |
| 134 | print PF_WJOINT | 137 | print(PF_WJOINT) |
| 135 | print PF_CPY_DGRAM % \ | 138 | print(PF_CPY_DGRAM % |
| 136 | (diff_msec(base_t, event['comm_t']), | 139 | (diff_msec(base_t, event['comm_t']), |
| 137 | event['pid'], event['comm']) | 140 | event['pid'], event['comm'])) |
| 138 | elif 'handle' in event.keys(): | 141 | elif 'handle' in event.keys(): |
| 139 | print PF_WJOINT | 142 | print(PF_WJOINT) |
| 140 | if event['handle'] == "kfree_skb": | 143 | if event['handle'] == "kfree_skb": |
| 141 | print PF_KFREE_SKB % \ | 144 | print(PF_KFREE_SKB % |
| 142 | (diff_msec(base_t, | 145 | (diff_msec(base_t, |
| 143 | event['comm_t']), | 146 | event['comm_t']), |
| 144 | event['location']) | 147 | event['location'])) |
| 145 | elif event['handle'] == "consume_skb": | 148 | elif event['handle'] == "consume_skb": |
| 146 | print PF_CONS_SKB % \ | 149 | print(PF_CONS_SKB % |
| 147 | diff_msec(base_t, | 150 | diff_msec(base_t, |
| 148 | event['comm_t']) | 151 | event['comm_t'])) |
| 149 | print PF_JOINT | 152 | print(PF_JOINT) |
| 150 | 153 | ||
| 151 | def trace_begin(): | 154 | def trace_begin(): |
| 152 | global show_tx | 155 | global show_tx |
| @@ -172,8 +175,7 @@ def trace_begin(): | |||
| 172 | 175 | ||
| 173 | def trace_end(): | 176 | def trace_end(): |
| 174 | # order all events in time | 177 | # order all events in time |
| 175 | all_event_list.sort(lambda a,b :cmp(a[EINFO_IDX_TIME], | 178 | all_event_list.sort(key=cmp_to_key(lambda a,b :a[EINFO_IDX_TIME] < b[EINFO_IDX_TIME])) |
| 176 | b[EINFO_IDX_TIME])) | ||
| 177 | # process all events | 179 | # process all events |
| 178 | for i in range(len(all_event_list)): | 180 | for i in range(len(all_event_list)): |
| 179 | event_info = all_event_list[i] | 181 | event_info = all_event_list[i] |
| @@ -210,19 +212,19 @@ def trace_end(): | |||
| 210 | print_receive(receive_hunk_list[i]) | 212 | print_receive(receive_hunk_list[i]) |
| 211 | # display transmit hunks | 213 | # display transmit hunks |
| 212 | if show_tx: | 214 | if show_tx: |
| 213 | print " dev len Qdisc " \ | 215 | print(" dev len Qdisc " |
| 214 | " netdevice free" | 216 | " netdevice free") |
| 215 | for i in range(len(tx_free_list)): | 217 | for i in range(len(tx_free_list)): |
| 216 | print_transmit(tx_free_list[i]) | 218 | print_transmit(tx_free_list[i]) |
| 217 | if debug: | 219 | if debug: |
| 218 | print "debug buffer status" | 220 | print("debug buffer status") |
| 219 | print "----------------------------" | 221 | print("----------------------------") |
| 220 | print "xmit Qdisc:remain:%d overflow:%d" % \ | 222 | print("xmit Qdisc:remain:%d overflow:%d" % |
| 221 | (len(tx_queue_list), of_count_tx_queue_list) | 223 | (len(tx_queue_list), of_count_tx_queue_list)) |
| 222 | print "xmit netdevice:remain:%d overflow:%d" % \ | 224 | print("xmit netdevice:remain:%d overflow:%d" % |
| 223 | (len(tx_xmit_list), of_count_tx_xmit_list) | 225 | (len(tx_xmit_list), of_count_tx_xmit_list)) |
| 224 | print "receive:remain:%d overflow:%d" % \ | 226 | print("receive:remain:%d overflow:%d" % |
| 225 | (len(rx_skb_list), of_count_rx_skb_list) | 227 | (len(rx_skb_list), of_count_rx_skb_list)) |
| 226 | 228 | ||
| 227 | # called from perf, when it finds a correspoinding event | 229 | # called from perf, when it finds a correspoinding event |
| 228 | def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, callchain, vec): | 230 | def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, callchain, vec): |
diff --git a/tools/perf/scripts/python/powerpc-hcalls.py b/tools/perf/scripts/python/powerpc-hcalls.py index 00e0e7476e55..8b78dc790adb 100644 --- a/tools/perf/scripts/python/powerpc-hcalls.py +++ b/tools/perf/scripts/python/powerpc-hcalls.py | |||
| @@ -4,6 +4,8 @@ | |||
| 4 | # | 4 | # |
| 5 | # Hypervisor call statisics | 5 | # Hypervisor call statisics |
| 6 | 6 | ||
| 7 | from __future__ import print_function | ||
| 8 | |||
| 7 | import os | 9 | import os |
| 8 | import sys | 10 | import sys |
| 9 | 11 | ||
| @@ -149,7 +151,7 @@ hcall_table = { | |||
| 149 | } | 151 | } |
| 150 | 152 | ||
| 151 | def hcall_table_lookup(opcode): | 153 | def hcall_table_lookup(opcode): |
| 152 | if (hcall_table.has_key(opcode)): | 154 | if (opcode in hcall_table): |
| 153 | return hcall_table[opcode] | 155 | return hcall_table[opcode] |
| 154 | else: | 156 | else: |
| 155 | return opcode | 157 | return opcode |
| @@ -157,8 +159,8 @@ def hcall_table_lookup(opcode): | |||
| 157 | print_ptrn = '%-28s%10s%10s%10s%10s' | 159 | print_ptrn = '%-28s%10s%10s%10s%10s' |
| 158 | 160 | ||
| 159 | def trace_end(): | 161 | def trace_end(): |
| 160 | print print_ptrn % ('hcall', 'count', 'min(ns)', 'max(ns)', 'avg(ns)') | 162 | print(print_ptrn % ('hcall', 'count', 'min(ns)', 'max(ns)', 'avg(ns)')) |
| 161 | print '-' * 68 | 163 | print('-' * 68) |
| 162 | for opcode in output: | 164 | for opcode in output: |
| 163 | h_name = hcall_table_lookup(opcode) | 165 | h_name = hcall_table_lookup(opcode) |
| 164 | time = output[opcode]['time'] | 166 | time = output[opcode]['time'] |
| @@ -166,14 +168,14 @@ def trace_end(): | |||
| 166 | min_t = output[opcode]['min'] | 168 | min_t = output[opcode]['min'] |
| 167 | max_t = output[opcode]['max'] | 169 | max_t = output[opcode]['max'] |
| 168 | 170 | ||
| 169 | print print_ptrn % (h_name, cnt, min_t, max_t, time/cnt) | 171 | print(print_ptrn % (h_name, cnt, min_t, max_t, time//cnt)) |
| 170 | 172 | ||
| 171 | def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain, | 173 | def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain, |
| 172 | opcode, retval): | 174 | opcode, retval): |
| 173 | if (d_enter.has_key(cpu) and d_enter[cpu].has_key(opcode)): | 175 | if (cpu in d_enter and opcode in d_enter[cpu]): |
| 174 | diff = nsecs(sec, nsec) - d_enter[cpu][opcode] | 176 | diff = nsecs(sec, nsec) - d_enter[cpu][opcode] |
| 175 | 177 | ||
| 176 | if (output.has_key(opcode)): | 178 | if (opcode in output): |
| 177 | output[opcode]['time'] += diff | 179 | output[opcode]['time'] += diff |
| 178 | output[opcode]['cnt'] += 1 | 180 | output[opcode]['cnt'] += 1 |
| 179 | if (output[opcode]['min'] > diff): | 181 | if (output[opcode]['min'] > diff): |
| @@ -190,11 +192,11 @@ def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain, | |||
| 190 | 192 | ||
| 191 | del d_enter[cpu][opcode] | 193 | del d_enter[cpu][opcode] |
| 192 | # else: | 194 | # else: |
| 193 | # print "Can't find matching hcall_enter event. Ignoring sample" | 195 | # print("Can't find matching hcall_enter event. Ignoring sample") |
| 194 | 196 | ||
| 195 | def powerpc__hcall_entry(event_name, context, cpu, sec, nsec, pid, comm, | 197 | def powerpc__hcall_entry(event_name, context, cpu, sec, nsec, pid, comm, |
| 196 | callchain, opcode): | 198 | callchain, opcode): |
| 197 | if (d_enter.has_key(cpu)): | 199 | if (cpu in d_enter): |
| 198 | d_enter[cpu][opcode] = nsecs(sec, nsec) | 200 | d_enter[cpu][opcode] = nsecs(sec, nsec) |
| 199 | else: | 201 | else: |
| 200 | d_enter[cpu] = {opcode: nsecs(sec, nsec)} | 202 | d_enter[cpu] = {opcode: nsecs(sec, nsec)} |
diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py index 3473e7f66081..3984bf51f3c5 100644 --- a/tools/perf/scripts/python/sched-migration.py +++ b/tools/perf/scripts/python/sched-migration.py | |||
| @@ -1,5 +1,3 @@ | |||
| 1 | #!/usr/bin/python | ||
| 2 | # | ||
| 3 | # Cpu task migration overview toy | 1 | # Cpu task migration overview toy |
| 4 | # | 2 | # |
| 5 | # Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com> | 3 | # Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com> |
diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py index 61621b93affb..987ffae7c8ca 100644 --- a/tools/perf/scripts/python/sctop.py +++ b/tools/perf/scripts/python/sctop.py | |||
| @@ -8,7 +8,14 @@ | |||
| 8 | # will be refreshed every [interval] seconds. The default interval is | 8 | # will be refreshed every [interval] seconds. The default interval is |
| 9 | # 3 seconds. | 9 | # 3 seconds. |
| 10 | 10 | ||
| 11 | import os, sys, thread, time | 11 | from __future__ import print_function |
| 12 | |||
| 13 | import os, sys, time | ||
| 14 | |||
| 15 | try: | ||
| 16 | import thread | ||
| 17 | except ImportError: | ||
| 18 | import _thread as thread | ||
| 12 | 19 | ||
| 13 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ | 20 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ |
| 14 | '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') | 21 | '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') |
| @@ -62,18 +69,19 @@ def print_syscall_totals(interval): | |||
| 62 | while 1: | 69 | while 1: |
| 63 | clear_term() | 70 | clear_term() |
| 64 | if for_comm is not None: | 71 | if for_comm is not None: |
| 65 | print "\nsyscall events for %s:\n\n" % (for_comm), | 72 | print("\nsyscall events for %s:\n" % (for_comm)) |
| 66 | else: | 73 | else: |
| 67 | print "\nsyscall events:\n\n", | 74 | print("\nsyscall events:\n") |
| 68 | 75 | ||
| 69 | print "%-40s %10s\n" % ("event", "count"), | 76 | print("%-40s %10s" % ("event", "count")) |
| 70 | print "%-40s %10s\n" % ("----------------------------------------", \ | 77 | print("%-40s %10s" % |
| 71 | "----------"), | 78 | ("----------------------------------------", |
| 79 | "----------")) | ||
| 72 | 80 | ||
| 73 | for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ | 81 | for id, val in sorted(syscalls.items(), key = lambda kv: (kv[1], kv[0]), \ |
| 74 | reverse = True): | 82 | reverse = True): |
| 75 | try: | 83 | try: |
| 76 | print "%-40s %10d\n" % (syscall_name(id), val), | 84 | print("%-40s %10d" % (syscall_name(id), val)) |
| 77 | except TypeError: | 85 | except TypeError: |
| 78 | pass | 86 | pass |
| 79 | syscalls.clear() | 87 | syscalls.clear() |
diff --git a/tools/perf/scripts/python/stackcollapse.py b/tools/perf/scripts/python/stackcollapse.py index 1697b5e18c96..5e703efaddcc 100755 --- a/tools/perf/scripts/python/stackcollapse.py +++ b/tools/perf/scripts/python/stackcollapse.py | |||
| @@ -19,6 +19,8 @@ | |||
| 19 | # Written by Paolo Bonzini <pbonzini@redhat.com> | 19 | # Written by Paolo Bonzini <pbonzini@redhat.com> |
| 20 | # Based on Brendan Gregg's stackcollapse-perf.pl script. | 20 | # Based on Brendan Gregg's stackcollapse-perf.pl script. |
| 21 | 21 | ||
| 22 | from __future__ import print_function | ||
| 23 | |||
| 22 | import os | 24 | import os |
| 23 | import sys | 25 | import sys |
| 24 | from collections import defaultdict | 26 | from collections import defaultdict |
| @@ -120,7 +122,6 @@ def process_event(param_dict): | |||
| 120 | lines[stack_string] = lines[stack_string] + 1 | 122 | lines[stack_string] = lines[stack_string] + 1 |
| 121 | 123 | ||
| 122 | def trace_end(): | 124 | def trace_end(): |
| 123 | list = lines.keys() | 125 | list = sorted(lines) |
| 124 | list.sort() | ||
| 125 | for stack in list: | 126 | for stack in list: |
| 126 | print "%s %d" % (stack, lines[stack]) | 127 | print("%s %d" % (stack, lines[stack])) |
diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py index 8410672efb8b..01fa933ff3cf 100644 --- a/tools/perf/scripts/python/stat-cpi.py +++ b/tools/perf/scripts/python/stat-cpi.py | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | #!/usr/bin/env python | ||
| 2 | # SPDX-License-Identifier: GPL-2.0 | 1 | # SPDX-License-Identifier: GPL-2.0 |
| 3 | 2 | ||
| 3 | from __future__ import print_function | ||
| 4 | |||
| 4 | data = {} | 5 | data = {} |
| 5 | times = [] | 6 | times = [] |
| 6 | threads = [] | 7 | threads = [] |
| @@ -20,8 +21,8 @@ def store_key(time, cpu, thread): | |||
| 20 | threads.append(thread) | 21 | threads.append(thread) |
| 21 | 22 | ||
| 22 | def store(time, event, cpu, thread, val, ena, run): | 23 | def store(time, event, cpu, thread, val, ena, run): |
| 23 | #print "event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % \ | 24 | #print("event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % |
| 24 | # (event, cpu, thread, time, val, ena, run) | 25 | # (event, cpu, thread, time, val, ena, run)) |
| 25 | 26 | ||
| 26 | store_key(time, cpu, thread) | 27 | store_key(time, cpu, thread) |
| 27 | key = get_key(time, event, cpu, thread) | 28 | key = get_key(time, event, cpu, thread) |
| @@ -59,7 +60,7 @@ def stat__interval(time): | |||
| 59 | if ins != 0: | 60 | if ins != 0: |
| 60 | cpi = cyc/float(ins) | 61 | cpi = cyc/float(ins) |
| 61 | 62 | ||
| 62 | print "%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins) | 63 | print("%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins)) |
| 63 | 64 | ||
| 64 | def trace_end(): | 65 | def trace_end(): |
| 65 | pass | 66 | pass |
| @@ -75,4 +76,4 @@ def trace_end(): | |||
| 75 | # if ins != 0: | 76 | # if ins != 0: |
| 76 | # cpi = cyc/float(ins) | 77 | # cpi = cyc/float(ins) |
| 77 | # | 78 | # |
| 78 | # print "time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi) | 79 | # print("time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi)) |
diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py index daf314cc5dd3..42782487b0e9 100644 --- a/tools/perf/scripts/python/syscall-counts-by-pid.py +++ b/tools/perf/scripts/python/syscall-counts-by-pid.py | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | # Displays system-wide system call totals, broken down by syscall. | 5 | # Displays system-wide system call totals, broken down by syscall. |
| 6 | # If a [comm] arg is specified, only syscalls called by [comm] are displayed. | 6 | # If a [comm] arg is specified, only syscalls called by [comm] are displayed. |
| 7 | 7 | ||
| 8 | from __future__ import print_function | ||
| 9 | |||
| 8 | import os, sys | 10 | import os, sys |
| 9 | 11 | ||
| 10 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ | 12 | sys.path.append(os.environ['PERF_EXEC_PATH'] + \ |
| @@ -31,7 +33,7 @@ if len(sys.argv) > 1: | |||
| 31 | syscalls = autodict() | 33 | syscalls = autodict() |
| 32 | 34 | ||
| 33 | def trace_begin(): | 35 | def trace_begin(): |
| 34 | print "Press control+C to stop and show the summary" | 36 | print("Press control+C to stop and show the summary") |
| 35 | 37 | ||
| 36 | def trace_end(): | 38 | def trace_end(): |
| 37 | print_syscall_totals() | 39 | print_syscall_totals() |
| @@ -55,20 +57,20 @@ def syscalls__sys_enter(event_name, context, common_cpu, | |||
| 55 | 57 | ||
| 56 | def print_syscall_totals(): | 58 | def print_syscall_totals(): |
| 57 | if for_comm is not None: | 59 | if for_comm is not None: |
| 58 | print "\nsyscall events for %s:\n\n" % (for_comm), | 60 | print("\nsyscall events for %s:\n" % (for_comm)) |
| 59 | else: | 61 | else: |
| 60 | print "\nsyscall events by comm/pid:\n\n", | 62 | print("\nsyscall events by comm/pid:\n") |
| 61 | 63 | ||
| 62 | print "%-40s %10s\n" % ("comm [pid]/syscalls", "count"), | 64 | print("%-40s %10s" % ("comm [pid]/syscalls", "count")) |
| 63 | print "%-40s %10s\n" % ("----------------------------------------", \ | 65 | print("%-40s %10s" % ("----------------------------------------", |
| 64 | "----------"), | 66 | "----------")) |
| 65 | 67 | ||
| 66 | comm_keys = syscalls.keys() | 68 | comm_keys = syscalls.keys() |
| 67 | for comm in comm_keys: | 69 | for comm in comm_keys: |
| 68 | pid_keys = syscalls[comm].keys() | 70 | pid_keys = syscalls[comm].keys() |
| 69 | for pid in pid_keys: | 71 | for pid in pid_keys: |
| 70 | print "\n%s [%d]\n" % (comm, pid), | 72 | print("\n%s [%d]" % (comm, pid)) |
| 71 | id_keys = syscalls[comm][pid].keys() | 73 | id_keys = syscalls[comm][pid].keys() |
| 72 | for id, val in sorted(syscalls[comm][pid].iteritems(), \ | 74 | for id, val in sorted(syscalls[comm][pid].items(), \ |
| 73 | key = lambda(k, v): (v, k), reverse = True): | 75 | key = lambda kv: (kv[1], kv[0]), reverse = True): |
| 74 | print " %-38s %10d\n" % (syscall_name(id), val), | 76 | print(" %-38s %10d" % (syscall_name(id), val)) |
diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py index e66a7730aeb5..0ebd89cfd42c 100644 --- a/tools/perf/scripts/python/syscall-counts.py +++ b/tools/perf/scripts/python/syscall-counts.py | |||
| @@ -5,6 +5,8 @@ | |||
| 5 | # Displays system-wide system call totals, broken down by syscall. | 5 | # Displays system-wide system call totals, broken down by syscall. |
| 6 | # If a [comm] arg is specified, only syscalls called by [comm] are displayed. | 6 | # If a [comm] arg is specified, only syscalls called by [comm] are displayed. |
| 7 | 7 | ||
| 8 | from __future__ import print_function | ||
| 9 | |||
| 8 | import os | 10 | import os |
| 9 | import sys | 11 | import sys |
| 10 | 12 | ||
| @@ -28,7 +30,7 @@ if len(sys.argv) > 1: | |||
| 28 | syscalls = autodict() | 30 | syscalls = autodict() |
| 29 | 31 | ||
| 30 | def trace_begin(): | 32 | def trace_begin(): |
| 31 | print "Press control+C to stop and show the summary" | 33 | print("Press control+C to stop and show the summary") |
| 32 | 34 | ||
| 33 | def trace_end(): | 35 | def trace_end(): |
| 34 | print_syscall_totals() | 36 | print_syscall_totals() |
| @@ -51,14 +53,14 @@ def syscalls__sys_enter(event_name, context, common_cpu, | |||
| 51 | 53 | ||
| 52 | def print_syscall_totals(): | 54 | def print_syscall_totals(): |
| 53 | if for_comm is not None: | 55 | if for_comm is not None: |
| 54 | print "\nsyscall events for %s:\n\n" % (for_comm), | 56 | print("\nsyscall events for %s:\n" % (for_comm)) |
| 55 | else: | 57 | else: |
| 56 | print "\nsyscall events:\n\n", | 58 | print("\nsyscall events:\n") |
| 57 | 59 | ||
| 58 | print "%-40s %10s\n" % ("event", "count"), | 60 | print("%-40s %10s" % ("event", "count")) |
| 59 | print "%-40s %10s\n" % ("----------------------------------------", \ | 61 | print("%-40s %10s" % ("----------------------------------------", |
| 60 | "-----------"), | 62 | "-----------")) |
| 61 | 63 | ||
| 62 | for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \ | 64 | for id, val in sorted(syscalls.items(), key = lambda kv: (kv[1], kv[0]), \ |
| 63 | reverse = True): | 65 | reverse = True): |
| 64 | print "%-40s %10d\n" % (syscall_name(id), val), | 66 | print("%-40s %10d" % (syscall_name(id), val)) |
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py index e952127e4fb0..cb39ac46bc73 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/attr.py | |||
| @@ -1,4 +1,3 @@ | |||
| 1 | #! /usr/bin/python | ||
| 2 | # SPDX-License-Identifier: GPL-2.0 | 1 | # SPDX-License-Identifier: GPL-2.0 |
| 3 | 2 | ||
| 4 | from __future__ import print_function | 3 | from __future__ import print_function |
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c index a20cbc445426..57fc544aedb0 100644 --- a/tools/perf/tests/bp_account.c +++ b/tools/perf/tests/bp_account.c | |||
| @@ -15,7 +15,6 @@ | |||
| 15 | #include <sys/mman.h> | 15 | #include <sys/mman.h> |
| 16 | #include <linux/compiler.h> | 16 | #include <linux/compiler.h> |
| 17 | #include <linux/hw_breakpoint.h> | 17 | #include <linux/hw_breakpoint.h> |
| 18 | #include <sys/ioctl.h> | ||
| 19 | 18 | ||
| 20 | #include "tests.h" | 19 | #include "tests.h" |
| 21 | #include "debug.h" | 20 | #include "debug.h" |
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index dbf2c69944d2..4ebd2681e760 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | #include "thread_map.h" | 15 | #include "thread_map.h" |
| 16 | #include "cpumap.h" | 16 | #include "cpumap.h" |
| 17 | #include "machine.h" | 17 | #include "machine.h" |
| 18 | #include "map.h" | ||
| 19 | #include "symbol.h" | ||
| 18 | #include "event.h" | 20 | #include "event.h" |
| 19 | #include "thread.h" | 21 | #include "thread.h" |
| 20 | 22 | ||
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 7c8d2e422401..077c306c1cae 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "../util/unwind.h" | 10 | #include "../util/unwind.h" |
| 11 | #include "perf_regs.h" | 11 | #include "perf_regs.h" |
| 12 | #include "map.h" | 12 | #include "map.h" |
| 13 | #include "symbol.h" | ||
| 13 | #include "thread.h" | 14 | #include "thread.h" |
| 14 | #include "callchain.h" | 15 | #include "callchain.h" |
| 15 | 16 | ||
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index 5cbba70bcdd0..ea7acf403727 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c | |||
| @@ -43,7 +43,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes | |||
| 43 | return -1; | 43 | return -1; |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | if (perf_evsel__test_field(evsel, "prev_comm", 16, true)) | 46 | if (perf_evsel__test_field(evsel, "prev_comm", 16, false)) |
| 47 | ret = -1; | 47 | ret = -1; |
| 48 | 48 | ||
| 49 | if (perf_evsel__test_field(evsel, "prev_pid", 4, true)) | 49 | if (perf_evsel__test_field(evsel, "prev_pid", 4, true)) |
| @@ -55,7 +55,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes | |||
| 55 | if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true)) | 55 | if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true)) |
| 56 | ret = -1; | 56 | ret = -1; |
| 57 | 57 | ||
| 58 | if (perf_evsel__test_field(evsel, "next_comm", 16, true)) | 58 | if (perf_evsel__test_field(evsel, "next_comm", 16, false)) |
| 59 | ret = -1; | 59 | ret = -1; |
| 60 | 60 | ||
| 61 | if (perf_evsel__test_field(evsel, "next_pid", 4, true)) | 61 | if (perf_evsel__test_field(evsel, "next_pid", 4, true)) |
| @@ -73,7 +73,7 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes | |||
| 73 | return -1; | 73 | return -1; |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | if (perf_evsel__test_field(evsel, "comm", 16, true)) | 76 | if (perf_evsel__test_field(evsel, "comm", 16, false)) |
| 77 | ret = -1; | 77 | ret = -1; |
| 78 | 78 | ||
| 79 | if (perf_evsel__test_field(evsel, "pid", 4, true)) | 79 | if (perf_evsel__test_field(evsel, "pid", 4, true)) |
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index b889a28fd80b..469958cd7fe0 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #include <inttypes.h> | 2 | #include <inttypes.h> |
| 3 | #include "perf.h" | 3 | #include "perf.h" |
| 4 | #include "util/debug.h" | 4 | #include "util/debug.h" |
| 5 | #include "util/map.h" | ||
| 5 | #include "util/symbol.h" | 6 | #include "util/symbol.h" |
| 6 | #include "util/sort.h" | 7 | #include "util/sort.h" |
| 7 | #include "util/evsel.h" | 8 | #include "util/evsel.h" |
| @@ -161,7 +162,7 @@ out: | |||
| 161 | void print_hists_in(struct hists *hists) | 162 | void print_hists_in(struct hists *hists) |
| 162 | { | 163 | { |
| 163 | int i = 0; | 164 | int i = 0; |
| 164 | struct rb_root *root; | 165 | struct rb_root_cached *root; |
| 165 | struct rb_node *node; | 166 | struct rb_node *node; |
| 166 | 167 | ||
| 167 | if (hists__has(hists, need_collapse)) | 168 | if (hists__has(hists, need_collapse)) |
| @@ -170,7 +171,7 @@ void print_hists_in(struct hists *hists) | |||
| 170 | root = hists->entries_in; | 171 | root = hists->entries_in; |
| 171 | 172 | ||
| 172 | pr_info("----- %s --------\n", __func__); | 173 | pr_info("----- %s --------\n", __func__); |
| 173 | node = rb_first(root); | 174 | node = rb_first_cached(root); |
| 174 | while (node) { | 175 | while (node) { |
| 175 | struct hist_entry *he; | 176 | struct hist_entry *he; |
| 176 | 177 | ||
| @@ -191,13 +192,13 @@ void print_hists_in(struct hists *hists) | |||
| 191 | void print_hists_out(struct hists *hists) | 192 | void print_hists_out(struct hists *hists) |
| 192 | { | 193 | { |
| 193 | int i = 0; | 194 | int i = 0; |
| 194 | struct rb_root *root; | 195 | struct rb_root_cached *root; |
| 195 | struct rb_node *node; | 196 | struct rb_node *node; |
| 196 | 197 | ||
| 197 | root = &hists->entries; | 198 | root = &hists->entries; |
| 198 | 199 | ||
| 199 | pr_info("----- %s --------\n", __func__); | 200 | pr_info("----- %s --------\n", __func__); |
| 200 | node = rb_first(root); | 201 | node = rb_first_cached(root); |
| 201 | while (node) { | 202 | while (node) { |
| 202 | struct hist_entry *he; | 203 | struct hist_entry *he; |
| 203 | 204 | ||
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 65fe02bebbee..7a2eed6c783e 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #include "perf.h" | 2 | #include "perf.h" |
| 3 | #include "util/debug.h" | 3 | #include "util/debug.h" |
| 4 | #include "util/event.h" | 4 | #include "util/event.h" |
| 5 | #include "util/map.h" | ||
| 5 | #include "util/symbol.h" | 6 | #include "util/symbol.h" |
| 6 | #include "util/sort.h" | 7 | #include "util/sort.h" |
| 7 | #include "util/evsel.h" | 8 | #include "util/evsel.h" |
| @@ -125,8 +126,8 @@ out: | |||
| 125 | static void del_hist_entries(struct hists *hists) | 126 | static void del_hist_entries(struct hists *hists) |
| 126 | { | 127 | { |
| 127 | struct hist_entry *he; | 128 | struct hist_entry *he; |
| 128 | struct rb_root *root_in; | 129 | struct rb_root_cached *root_in; |
| 129 | struct rb_root *root_out; | 130 | struct rb_root_cached *root_out; |
| 130 | struct rb_node *node; | 131 | struct rb_node *node; |
| 131 | 132 | ||
| 132 | if (hists__has(hists, need_collapse)) | 133 | if (hists__has(hists, need_collapse)) |
| @@ -136,12 +137,12 @@ static void del_hist_entries(struct hists *hists) | |||
| 136 | 137 | ||
| 137 | root_out = &hists->entries; | 138 | root_out = &hists->entries; |
| 138 | 139 | ||
| 139 | while (!RB_EMPTY_ROOT(root_out)) { | 140 | while (!RB_EMPTY_ROOT(&root_out->rb_root)) { |
| 140 | node = rb_first(root_out); | 141 | node = rb_first_cached(root_out); |
| 141 | 142 | ||
| 142 | he = rb_entry(node, struct hist_entry, rb_node); | 143 | he = rb_entry(node, struct hist_entry, rb_node); |
| 143 | rb_erase(node, root_out); | 144 | rb_erase_cached(node, root_out); |
| 144 | rb_erase(&he->rb_node_in, root_in); | 145 | rb_erase_cached(&he->rb_node_in, root_in); |
| 145 | hist_entry__delete(he); | 146 | hist_entry__delete(he); |
| 146 | } | 147 | } |
| 147 | } | 148 | } |
| @@ -198,7 +199,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec | |||
| 198 | print_hists_out(hists); | 199 | print_hists_out(hists); |
| 199 | } | 200 | } |
| 200 | 201 | ||
| 201 | root = &hists->entries; | 202 | root = &hists->entries.rb_root; |
| 202 | for (node = rb_first(root), i = 0; | 203 | for (node = rb_first(root), i = 0; |
| 203 | node && (he = rb_entry(node, struct hist_entry, rb_node)); | 204 | node && (he = rb_entry(node, struct hist_entry, rb_node)); |
| 204 | node = rb_next(node), i++) { | 205 | node = rb_next(node), i++) { |
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 1c5bedab3c2c..975844807fe2 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include "perf.h" | 2 | #include "perf.h" |
| 3 | #include "util/debug.h" | 3 | #include "util/debug.h" |
| 4 | #include "util/map.h" | ||
| 4 | #include "util/symbol.h" | 5 | #include "util/symbol.h" |
| 5 | #include "util/sort.h" | 6 | #include "util/sort.h" |
| 6 | #include "util/evsel.h" | 7 | #include "util/evsel.h" |
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 9a9d06cb0222..af633db63f4d 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c | |||
| @@ -142,7 +142,7 @@ static int find_sample(struct sample *samples, size_t nr_samples, | |||
| 142 | static int __validate_match(struct hists *hists) | 142 | static int __validate_match(struct hists *hists) |
| 143 | { | 143 | { |
| 144 | size_t count = 0; | 144 | size_t count = 0; |
| 145 | struct rb_root *root; | 145 | struct rb_root_cached *root; |
| 146 | struct rb_node *node; | 146 | struct rb_node *node; |
| 147 | 147 | ||
| 148 | /* | 148 | /* |
| @@ -153,7 +153,7 @@ static int __validate_match(struct hists *hists) | |||
| 153 | else | 153 | else |
| 154 | root = hists->entries_in; | 154 | root = hists->entries_in; |
| 155 | 155 | ||
| 156 | node = rb_first(root); | 156 | node = rb_first_cached(root); |
| 157 | while (node) { | 157 | while (node) { |
| 158 | struct hist_entry *he; | 158 | struct hist_entry *he; |
| 159 | 159 | ||
| @@ -192,7 +192,7 @@ static int __validate_link(struct hists *hists, int idx) | |||
| 192 | size_t count = 0; | 192 | size_t count = 0; |
| 193 | size_t count_pair = 0; | 193 | size_t count_pair = 0; |
| 194 | size_t count_dummy = 0; | 194 | size_t count_dummy = 0; |
| 195 | struct rb_root *root; | 195 | struct rb_root_cached *root; |
| 196 | struct rb_node *node; | 196 | struct rb_node *node; |
| 197 | 197 | ||
| 198 | /* | 198 | /* |
| @@ -205,7 +205,7 @@ static int __validate_link(struct hists *hists, int idx) | |||
| 205 | else | 205 | else |
| 206 | root = hists->entries_in; | 206 | root = hists->entries_in; |
| 207 | 207 | ||
| 208 | node = rb_first(root); | 208 | node = rb_first_cached(root); |
| 209 | while (node) { | 209 | while (node) { |
| 210 | struct hist_entry *he; | 210 | struct hist_entry *he; |
| 211 | 211 | ||
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index faacb4f41460..0a510c524a5d 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #include "perf.h" | 2 | #include "perf.h" |
| 3 | #include "util/debug.h" | 3 | #include "util/debug.h" |
| 4 | #include "util/event.h" | 4 | #include "util/event.h" |
| 5 | #include "util/map.h" | ||
| 5 | #include "util/symbol.h" | 6 | #include "util/symbol.h" |
| 6 | #include "util/sort.h" | 7 | #include "util/sort.h" |
| 7 | #include "util/evsel.h" | 8 | #include "util/evsel.h" |
| @@ -91,8 +92,8 @@ out: | |||
| 91 | static void del_hist_entries(struct hists *hists) | 92 | static void del_hist_entries(struct hists *hists) |
| 92 | { | 93 | { |
| 93 | struct hist_entry *he; | 94 | struct hist_entry *he; |
| 94 | struct rb_root *root_in; | 95 | struct rb_root_cached *root_in; |
| 95 | struct rb_root *root_out; | 96 | struct rb_root_cached *root_out; |
| 96 | struct rb_node *node; | 97 | struct rb_node *node; |
| 97 | 98 | ||
| 98 | if (hists__has(hists, need_collapse)) | 99 | if (hists__has(hists, need_collapse)) |
| @@ -102,12 +103,12 @@ static void del_hist_entries(struct hists *hists) | |||
| 102 | 103 | ||
| 103 | root_out = &hists->entries; | 104 | root_out = &hists->entries; |
| 104 | 105 | ||
| 105 | while (!RB_EMPTY_ROOT(root_out)) { | 106 | while (!RB_EMPTY_ROOT(&root_out->rb_root)) { |
| 106 | node = rb_first(root_out); | 107 | node = rb_first_cached(root_out); |
| 107 | 108 | ||
| 108 | he = rb_entry(node, struct hist_entry, rb_node); | 109 | he = rb_entry(node, struct hist_entry, rb_node); |
| 109 | rb_erase(node, root_out); | 110 | rb_erase_cached(node, root_out); |
| 110 | rb_erase(&he->rb_node_in, root_in); | 111 | rb_erase_cached(&he->rb_node_in, root_in); |
| 111 | hist_entry__delete(he); | 112 | hist_entry__delete(he); |
| 112 | } | 113 | } |
| 113 | } | 114 | } |
| @@ -126,7 +127,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) | |||
| 126 | int err; | 127 | int err; |
| 127 | struct hists *hists = evsel__hists(evsel); | 128 | struct hists *hists = evsel__hists(evsel); |
| 128 | struct hist_entry *he; | 129 | struct hist_entry *he; |
| 129 | struct rb_root *root; | 130 | struct rb_root_cached *root; |
| 130 | struct rb_node *node; | 131 | struct rb_node *node; |
| 131 | 132 | ||
| 132 | field_order = NULL; | 133 | field_order = NULL; |
| @@ -162,7 +163,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) | |||
| 162 | } | 163 | } |
| 163 | 164 | ||
| 164 | root = &hists->entries; | 165 | root = &hists->entries; |
| 165 | node = rb_first(root); | 166 | node = rb_first_cached(root); |
| 166 | he = rb_entry(node, struct hist_entry, rb_node); | 167 | he = rb_entry(node, struct hist_entry, rb_node); |
| 167 | TEST_ASSERT_VAL("Invalid hist entry", | 168 | TEST_ASSERT_VAL("Invalid hist entry", |
| 168 | !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") && | 169 | !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") && |
| @@ -228,7 +229,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) | |||
| 228 | int err; | 229 | int err; |
| 229 | struct hists *hists = evsel__hists(evsel); | 230 | struct hists *hists = evsel__hists(evsel); |
| 230 | struct hist_entry *he; | 231 | struct hist_entry *he; |
| 231 | struct rb_root *root; | 232 | struct rb_root_cached *root; |
| 232 | struct rb_node *node; | 233 | struct rb_node *node; |
| 233 | 234 | ||
| 234 | field_order = "overhead,cpu"; | 235 | field_order = "overhead,cpu"; |
| @@ -262,7 +263,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) | |||
| 262 | } | 263 | } |
| 263 | 264 | ||
| 264 | root = &hists->entries; | 265 | root = &hists->entries; |
| 265 | node = rb_first(root); | 266 | node = rb_first_cached(root); |
| 266 | he = rb_entry(node, struct hist_entry, rb_node); | 267 | he = rb_entry(node, struct hist_entry, rb_node); |
| 267 | TEST_ASSERT_VAL("Invalid hist entry", | 268 | TEST_ASSERT_VAL("Invalid hist entry", |
| 268 | CPU(he) == 1 && PID(he) == 100 && he->stat.period == 300); | 269 | CPU(he) == 1 && PID(he) == 100 && he->stat.period == 300); |
| @@ -284,7 +285,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) | |||
| 284 | int err; | 285 | int err; |
| 285 | struct hists *hists = evsel__hists(evsel); | 286 | struct hists *hists = evsel__hists(evsel); |
| 286 | struct hist_entry *he; | 287 | struct hist_entry *he; |
| 287 | struct rb_root *root; | 288 | struct rb_root_cached *root; |
| 288 | struct rb_node *node; | 289 | struct rb_node *node; |
| 289 | 290 | ||
| 290 | field_order = "comm,overhead,dso"; | 291 | field_order = "comm,overhead,dso"; |
| @@ -316,7 +317,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) | |||
| 316 | } | 317 | } |
| 317 | 318 | ||
| 318 | root = &hists->entries; | 319 | root = &hists->entries; |
| 319 | node = rb_first(root); | 320 | node = rb_first_cached(root); |
| 320 | he = rb_entry(node, struct hist_entry, rb_node); | 321 | he = rb_entry(node, struct hist_entry, rb_node); |
| 321 | TEST_ASSERT_VAL("Invalid hist entry", | 322 | TEST_ASSERT_VAL("Invalid hist entry", |
| 322 | !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") && | 323 | !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") && |
| @@ -358,7 +359,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) | |||
| 358 | int err; | 359 | int err; |
| 359 | struct hists *hists = evsel__hists(evsel); | 360 | struct hists *hists = evsel__hists(evsel); |
| 360 | struct hist_entry *he; | 361 | struct hist_entry *he; |
| 361 | struct rb_root *root; | 362 | struct rb_root_cached *root; |
| 362 | struct rb_node *node; | 363 | struct rb_node *node; |
| 363 | 364 | ||
| 364 | field_order = "dso,sym,comm,overhead,dso"; | 365 | field_order = "dso,sym,comm,overhead,dso"; |
| @@ -394,7 +395,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) | |||
| 394 | } | 395 | } |
| 395 | 396 | ||
| 396 | root = &hists->entries; | 397 | root = &hists->entries; |
| 397 | node = rb_first(root); | 398 | node = rb_first_cached(root); |
| 398 | he = rb_entry(node, struct hist_entry, rb_node); | 399 | he = rb_entry(node, struct hist_entry, rb_node); |
| 399 | TEST_ASSERT_VAL("Invalid hist entry", | 400 | TEST_ASSERT_VAL("Invalid hist entry", |
| 400 | !strcmp(DSO(he), "perf") && !strcmp(SYM(he), "cmd_record") && | 401 | !strcmp(DSO(he), "perf") && !strcmp(SYM(he), "cmd_record") && |
| @@ -460,7 +461,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) | |||
| 460 | int err; | 461 | int err; |
| 461 | struct hists *hists = evsel__hists(evsel); | 462 | struct hists *hists = evsel__hists(evsel); |
| 462 | struct hist_entry *he; | 463 | struct hist_entry *he; |
| 463 | struct rb_root *root; | 464 | struct rb_root_cached *root; |
| 464 | struct rb_node *node; | 465 | struct rb_node *node; |
| 465 | 466 | ||
| 466 | field_order = "cpu,pid,comm,dso,sym"; | 467 | field_order = "cpu,pid,comm,dso,sym"; |
| @@ -497,7 +498,7 @@ static int test5(struct perf_evsel *evsel, struct machine *machine) | |||
| 497 | } | 498 | } |
| 498 | 499 | ||
| 499 | root = &hists->entries; | 500 | root = &hists->entries; |
| 500 | node = rb_first(root); | 501 | node = rb_first_cached(root); |
| 501 | he = rb_entry(node, struct hist_entry, rb_node); | 502 | he = rb_entry(node, struct hist_entry, rb_node); |
| 502 | 503 | ||
| 503 | TEST_ASSERT_VAL("Invalid hist entry", | 504 | TEST_ASSERT_VAL("Invalid hist entry", |
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 5ede9b561d32..ba87e6e8d18c 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | #include "tests.h" | 11 | #include "tests.h" |
| 12 | #include "machine.h" | 12 | #include "machine.h" |
| 13 | #include "thread_map.h" | 13 | #include "thread_map.h" |
| 14 | #include "map.h" | ||
| 14 | #include "symbol.h" | 15 | #include "symbol.h" |
| 15 | #include "thread.h" | 16 | #include "thread.h" |
| 16 | #include "util.h" | 17 | #include "util.h" |
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 3b97ac018d5a..4a69c07f4101 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c | |||
| @@ -1330,6 +1330,26 @@ static int test__checkevent_complex_name(struct perf_evlist *evlist) | |||
| 1330 | return 0; | 1330 | return 0; |
| 1331 | } | 1331 | } |
| 1332 | 1332 | ||
| 1333 | static int test__sym_event_slash(struct perf_evlist *evlist) | ||
| 1334 | { | ||
| 1335 | struct perf_evsel *evsel = perf_evlist__first(evlist); | ||
| 1336 | |||
| 1337 | TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE); | ||
| 1338 | TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES); | ||
| 1339 | TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel); | ||
| 1340 | return 0; | ||
| 1341 | } | ||
| 1342 | |||
| 1343 | static int test__sym_event_dc(struct perf_evlist *evlist) | ||
| 1344 | { | ||
| 1345 | struct perf_evsel *evsel = perf_evlist__first(evlist); | ||
| 1346 | |||
| 1347 | TEST_ASSERT_VAL("wrong type", evsel->attr.type == PERF_TYPE_HARDWARE); | ||
| 1348 | TEST_ASSERT_VAL("wrong config", evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES); | ||
| 1349 | TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); | ||
| 1350 | return 0; | ||
| 1351 | } | ||
| 1352 | |||
| 1333 | static int count_tracepoints(void) | 1353 | static int count_tracepoints(void) |
| 1334 | { | 1354 | { |
| 1335 | struct dirent *events_ent; | 1355 | struct dirent *events_ent; |
| @@ -1670,6 +1690,16 @@ static struct evlist_test test__events[] = { | |||
| 1670 | .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk", | 1690 | .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk", |
| 1671 | .check = test__checkevent_complex_name, | 1691 | .check = test__checkevent_complex_name, |
| 1672 | .id = 53 | 1692 | .id = 53 |
| 1693 | }, | ||
| 1694 | { | ||
| 1695 | .name = "cycles//u", | ||
| 1696 | .check = test__sym_event_slash, | ||
| 1697 | .id = 54, | ||
| 1698 | }, | ||
| 1699 | { | ||
| 1700 | .name = "cycles:k", | ||
| 1701 | .check = test__sym_event_dc, | ||
| 1702 | .id = 55, | ||
| 1673 | } | 1703 | } |
| 1674 | }; | 1704 | }; |
| 1675 | 1705 | ||
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 7bedf8608fdd..14a78898d79e 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c | |||
| @@ -4,7 +4,9 @@ | |||
| 4 | #include "util.h" | 4 | #include "util.h" |
| 5 | #include "tests.h" | 5 | #include "tests.h" |
| 6 | #include <errno.h> | 6 | #include <errno.h> |
| 7 | #include <stdio.h> | ||
| 7 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
| 9 | #include <linux/limits.h> | ||
| 8 | 10 | ||
| 9 | /* Simulated format definitions. */ | 11 | /* Simulated format definitions. */ |
| 10 | static struct test_format { | 12 | static struct test_format { |
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 0e2d00d69e6e..236ce0d6c826 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c | |||
| @@ -1,9 +1,11 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <stdbool.h> | 2 | #include <stdbool.h> |
| 3 | #include <inttypes.h> | 3 | #include <inttypes.h> |
| 4 | #include <linux/bitops.h> | ||
| 4 | #include <linux/kernel.h> | 5 | #include <linux/kernel.h> |
| 5 | #include <linux/types.h> | 6 | #include <linux/types.h> |
| 6 | 7 | ||
| 8 | #include "branch.h" | ||
| 7 | #include "util.h" | 9 | #include "util.h" |
| 8 | #include "event.h" | 10 | #include "event.h" |
| 9 | #include "evsel.h" | 11 | #include "evsel.h" |
diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c index 5059452d27dd..8bfaa630389c 100644 --- a/tools/perf/tests/sdt.c +++ b/tools/perf/tests/sdt.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include <stdio.h> | 3 | #include <stdio.h> |
| 4 | #include <sys/epoll.h> | 4 | #include <sys/epoll.h> |
| 5 | #include <util/evlist.h> | 5 | #include <util/evlist.h> |
| 6 | #include <util/symbol.h> | ||
| 6 | #include <linux/filter.h> | 7 | #include <linux/filter.h> |
| 7 | #include "tests.h" | 8 | #include "tests.h" |
| 8 | #include "debug.h" | 9 | #include "debug.h" |
diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh index 6293cc660947..e37787be672b 100644 --- a/tools/perf/tests/shell/lib/probe.sh +++ b/tools/perf/tests/shell/lib/probe.sh | |||
| @@ -4,3 +4,8 @@ skip_if_no_perf_probe() { | |||
| 4 | perf probe 2>&1 | grep -q 'is not a perf-command' && return 2 | 4 | perf probe 2>&1 | grep -q 'is not a perf-command' && return 2 |
| 5 | return 0 | 5 | return 0 |
| 6 | } | 6 | } |
| 7 | |||
| 8 | skip_if_no_perf_trace() { | ||
| 9 | perf trace -h 2>&1 | grep -q -e 'is not a perf-command' -e 'trace command not available' && return 2 | ||
| 10 | return 0 | ||
| 11 | } | ||
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 50109f27ca07..147efeb6b195 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | . $(dirname $0)/lib/probe.sh | 12 | . $(dirname $0)/lib/probe.sh |
| 13 | 13 | ||
| 14 | skip_if_no_perf_probe || exit 2 | 14 | skip_if_no_perf_probe || exit 2 |
| 15 | skip_if_no_perf_trace || exit 2 | ||
| 15 | 16 | ||
| 16 | . $(dirname $0)/lib/probe_vfs_getname.sh | 17 | . $(dirname $0)/lib/probe_vfs_getname.sh |
| 17 | 18 | ||
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 637365099b7d..85f328ddf897 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build | |||
| @@ -1,15 +1,15 @@ | |||
| 1 | libperf-y += clone.o | 1 | perf-y += clone.o |
| 2 | libperf-y += fcntl.o | 2 | perf-y += fcntl.o |
| 3 | libperf-y += flock.o | 3 | perf-y += flock.o |
| 4 | ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) | 4 | ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) |
| 5 | libperf-y += ioctl.o | 5 | perf-y += ioctl.o |
| 6 | endif | 6 | endif |
| 7 | libperf-y += kcmp.o | 7 | perf-y += kcmp.o |
| 8 | libperf-y += mount_flags.o | 8 | perf-y += mount_flags.o |
| 9 | libperf-y += pkey_alloc.o | 9 | perf-y += pkey_alloc.o |
| 10 | libperf-y += arch_prctl.o | 10 | perf-y += arch_prctl.o |
| 11 | libperf-y += prctl.o | 11 | perf-y += prctl.o |
| 12 | libperf-y += renameat.o | 12 | perf-y += renameat.o |
| 13 | libperf-y += sockaddr.o | 13 | perf-y += sockaddr.o |
| 14 | libperf-y += socket.o | 14 | perf-y += socket.o |
| 15 | libperf-y += statx.o | 15 | perf-y += statx.o |
diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c index 620350d41209..52242fa4072b 100644 --- a/tools/perf/trace/beauty/ioctl.c +++ b/tools/perf/trace/beauty/ioctl.c | |||
| @@ -175,7 +175,7 @@ static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size, boo | |||
| 175 | size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg) | 175 | size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg) |
| 176 | { | 176 | { |
| 177 | unsigned long cmd = arg->val; | 177 | unsigned long cmd = arg->val; |
| 178 | unsigned int fd = syscall_arg__val(arg, 0); | 178 | int fd = syscall_arg__val(arg, 0); |
| 179 | struct file *file = thread__files_entry(arg->thread, fd); | 179 | struct file *file = thread__files_entry(arg->thread, fd); |
| 180 | 180 | ||
| 181 | if (file != NULL) { | 181 | if (file != NULL) { |
diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c index 6897fab40dcc..d4d10b33ba0e 100644 --- a/tools/perf/trace/beauty/waitid_options.c +++ b/tools/perf/trace/beauty/waitid_options.c | |||
| @@ -11,7 +11,7 @@ static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size, | |||
| 11 | 11 | ||
| 12 | #define P_OPTION(n) \ | 12 | #define P_OPTION(n) \ |
| 13 | if (options & W##n) { \ | 13 | if (options & W##n) { \ |
| 14 | printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : #n); \ | 14 | printed += scnprintf(bf + printed, size - printed, "%s%s%s", printed ? "|" : "", show_prefix ? prefix : "", #n); \ |
| 15 | options &= ~W##n; \ | 15 | options &= ~W##n; \ |
| 16 | } | 16 | } |
| 17 | 17 | ||
diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build index 0a73538c0441..3aff83c3275f 100644 --- a/tools/perf/ui/Build +++ b/tools/perf/ui/Build | |||
| @@ -1,14 +1,14 @@ | |||
| 1 | libperf-y += setup.o | 1 | perf-y += setup.o |
| 2 | libperf-y += helpline.o | 2 | perf-y += helpline.o |
| 3 | libperf-y += progress.o | 3 | perf-y += progress.o |
| 4 | libperf-y += util.o | 4 | perf-y += util.o |
| 5 | libperf-y += hist.o | 5 | perf-y += hist.o |
| 6 | libperf-y += stdio/hist.o | 6 | perf-y += stdio/hist.o |
| 7 | 7 | ||
| 8 | CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" | 8 | CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" |
| 9 | 9 | ||
| 10 | libperf-$(CONFIG_SLANG) += browser.o | 10 | perf-$(CONFIG_SLANG) += browser.o |
| 11 | libperf-$(CONFIG_SLANG) += browsers/ | 11 | perf-$(CONFIG_SLANG) += browsers/ |
| 12 | libperf-$(CONFIG_SLANG) += tui/ | 12 | perf-$(CONFIG_SLANG) += tui/ |
| 13 | 13 | ||
| 14 | CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST | 14 | CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST |
diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build index de223f5bed58..8fee56b46502 100644 --- a/tools/perf/ui/browsers/Build +++ b/tools/perf/ui/browsers/Build | |||
| @@ -1,8 +1,8 @@ | |||
| 1 | libperf-y += annotate.o | 1 | perf-y += annotate.o |
| 2 | libperf-y += hists.o | 2 | perf-y += hists.o |
| 3 | libperf-y += map.o | 3 | perf-y += map.o |
| 4 | libperf-y += scripts.o | 4 | perf-y += scripts.o |
| 5 | libperf-y += header.o | 5 | perf-y += header.o |
| 6 | 6 | ||
| 7 | CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST | 7 | CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST |
| 8 | CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST | 8 | CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST |
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 82e16bf84466..35bdfd8b1e71 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include "../../util/annotate.h" | 7 | #include "../../util/annotate.h" |
| 8 | #include "../../util/hist.h" | 8 | #include "../../util/hist.h" |
| 9 | #include "../../util/sort.h" | 9 | #include "../../util/sort.h" |
| 10 | #include "../../util/map.h" | ||
| 10 | #include "../../util/symbol.h" | 11 | #include "../../util/symbol.h" |
| 11 | #include "../../util/evsel.h" | 12 | #include "../../util/evsel.h" |
| 12 | #include "../../util/evlist.h" | 13 | #include "../../util/evlist.h" |
diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c index d75492189acb..5aeb663dd184 100644 --- a/tools/perf/ui/browsers/header.c +++ b/tools/perf/ui/browsers/header.c | |||
| @@ -35,7 +35,7 @@ static int list_menu__run(struct ui_browser *menu) | |||
| 35 | { | 35 | { |
| 36 | int key; | 36 | int key; |
| 37 | unsigned long offset; | 37 | unsigned long offset; |
| 38 | const char help[] = | 38 | static const char help[] = |
| 39 | "h/?/F1 Show this window\n" | 39 | "h/?/F1 Show this window\n" |
| 40 | "UP/DOWN/PGUP\n" | 40 | "UP/DOWN/PGUP\n" |
| 41 | "PGDN/SPACE\n" | 41 | "PGDN/SPACE\n" |
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index ffac1d54a3d4..aef800d97ea1 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c | |||
| @@ -8,9 +8,12 @@ | |||
| 8 | #include <linux/rbtree.h> | 8 | #include <linux/rbtree.h> |
| 9 | #include <sys/ttydefaults.h> | 9 | #include <sys/ttydefaults.h> |
| 10 | 10 | ||
| 11 | #include "../../util/callchain.h" | ||
| 11 | #include "../../util/evsel.h" | 12 | #include "../../util/evsel.h" |
| 12 | #include "../../util/evlist.h" | 13 | #include "../../util/evlist.h" |
| 13 | #include "../../util/hist.h" | 14 | #include "../../util/hist.h" |
| 15 | #include "../../util/map.h" | ||
| 16 | #include "../../util/symbol.h" | ||
| 14 | #include "../../util/pstack.h" | 17 | #include "../../util/pstack.h" |
| 15 | #include "../../util/sort.h" | 18 | #include "../../util/sort.h" |
| 16 | #include "../../util/util.h" | 19 | #include "../../util/util.h" |
| @@ -49,7 +52,7 @@ static int hist_browser__get_folding(struct hist_browser *browser) | |||
| 49 | struct hists *hists = browser->hists; | 52 | struct hists *hists = browser->hists; |
| 50 | int unfolded_rows = 0; | 53 | int unfolded_rows = 0; |
| 51 | 54 | ||
| 52 | for (nd = rb_first(&hists->entries); | 55 | for (nd = rb_first_cached(&hists->entries); |
| 53 | (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; | 56 | (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; |
| 54 | nd = rb_hierarchy_next(nd)) { | 57 | nd = rb_hierarchy_next(nd)) { |
| 55 | struct hist_entry *he = | 58 | struct hist_entry *he = |
| @@ -267,7 +270,7 @@ static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he, | |||
| 267 | if (he->has_no_entry) | 270 | if (he->has_no_entry) |
| 268 | return 1; | 271 | return 1; |
| 269 | 272 | ||
| 270 | node = rb_first(&he->hroot_out); | 273 | node = rb_first_cached(&he->hroot_out); |
| 271 | while (node) { | 274 | while (node) { |
| 272 | float percent; | 275 | float percent; |
| 273 | 276 | ||
| @@ -372,7 +375,7 @@ static void hist_entry__init_have_children(struct hist_entry *he) | |||
| 372 | he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain); | 375 | he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain); |
| 373 | callchain__init_have_children(&he->sorted_chain); | 376 | callchain__init_have_children(&he->sorted_chain); |
| 374 | } else { | 377 | } else { |
| 375 | he->has_children = !RB_EMPTY_ROOT(&he->hroot_out); | 378 | he->has_children = !RB_EMPTY_ROOT(&he->hroot_out.rb_root); |
| 376 | } | 379 | } |
| 377 | 380 | ||
| 378 | he->init_have_children = true; | 381 | he->init_have_children = true; |
| @@ -508,7 +511,7 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he, | |||
| 508 | struct hist_entry *child; | 511 | struct hist_entry *child; |
| 509 | int n = 0; | 512 | int n = 0; |
| 510 | 513 | ||
| 511 | for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) { | 514 | for (nd = rb_first_cached(&he->hroot_out); nd; nd = rb_next(nd)) { |
| 512 | child = rb_entry(nd, struct hist_entry, rb_node); | 515 | child = rb_entry(nd, struct hist_entry, rb_node); |
| 513 | percent = hist_entry__get_percent_limit(child); | 516 | percent = hist_entry__get_percent_limit(child); |
| 514 | if (!child->filtered && percent >= hb->min_pcnt) | 517 | if (!child->filtered && percent >= hb->min_pcnt) |
| @@ -566,7 +569,7 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold) | |||
| 566 | struct rb_node *nd; | 569 | struct rb_node *nd; |
| 567 | struct hist_entry *he; | 570 | struct hist_entry *he; |
| 568 | 571 | ||
| 569 | nd = rb_first(&browser->hists->entries); | 572 | nd = rb_first_cached(&browser->hists->entries); |
| 570 | while (nd) { | 573 | while (nd) { |
| 571 | he = rb_entry(nd, struct hist_entry, rb_node); | 574 | he = rb_entry(nd, struct hist_entry, rb_node); |
| 572 | 575 | ||
| @@ -1738,7 +1741,7 @@ static void ui_browser__hists_init_top(struct ui_browser *browser) | |||
| 1738 | struct hist_browser *hb; | 1741 | struct hist_browser *hb; |
| 1739 | 1742 | ||
| 1740 | hb = container_of(browser, struct hist_browser, b); | 1743 | hb = container_of(browser, struct hist_browser, b); |
| 1741 | browser->top = rb_first(&hb->hists->entries); | 1744 | browser->top = rb_first_cached(&hb->hists->entries); |
| 1742 | } | 1745 | } |
| 1743 | } | 1746 | } |
| 1744 | 1747 | ||
| @@ -2649,7 +2652,7 @@ add_socket_opt(struct hist_browser *browser, struct popup_action *act, | |||
| 2649 | static void hist_browser__update_nr_entries(struct hist_browser *hb) | 2652 | static void hist_browser__update_nr_entries(struct hist_browser *hb) |
| 2650 | { | 2653 | { |
| 2651 | u64 nr_entries = 0; | 2654 | u64 nr_entries = 0; |
| 2652 | struct rb_node *nd = rb_first(&hb->hists->entries); | 2655 | struct rb_node *nd = rb_first_cached(&hb->hists->entries); |
| 2653 | 2656 | ||
| 2654 | if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) { | 2657 | if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) { |
| 2655 | hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries; | 2658 | hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries; |
| @@ -2669,7 +2672,7 @@ static void hist_browser__update_percent_limit(struct hist_browser *hb, | |||
| 2669 | double percent) | 2672 | double percent) |
| 2670 | { | 2673 | { |
| 2671 | struct hist_entry *he; | 2674 | struct hist_entry *he; |
| 2672 | struct rb_node *nd = rb_first(&hb->hists->entries); | 2675 | struct rb_node *nd = rb_first_cached(&hb->hists->entries); |
| 2673 | u64 total = hists__total_period(hb->hists); | 2676 | u64 total = hists__total_period(hb->hists); |
| 2674 | u64 min_callchain_hits = total * (percent / 100); | 2677 | u64 min_callchain_hits = total * (percent / 100); |
| 2675 | 2678 | ||
| @@ -2748,7 +2751,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 2748 | "S Zoom into current Processor Socket\n" \ | 2751 | "S Zoom into current Processor Socket\n" \ |
| 2749 | 2752 | ||
| 2750 | /* help messages are sorted by lexical order of the hotkey */ | 2753 | /* help messages are sorted by lexical order of the hotkey */ |
| 2751 | const char report_help[] = HIST_BROWSER_HELP_COMMON | 2754 | static const char report_help[] = HIST_BROWSER_HELP_COMMON |
| 2752 | "i Show header information\n" | 2755 | "i Show header information\n" |
| 2753 | "P Print histograms to perf.hist.N\n" | 2756 | "P Print histograms to perf.hist.N\n" |
| 2754 | "r Run available scripts\n" | 2757 | "r Run available scripts\n" |
| @@ -2756,7 +2759,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
| 2756 | "t Zoom into current Thread\n" | 2759 | "t Zoom into current Thread\n" |
| 2757 | "V Verbose (DSO names in callchains, etc)\n" | 2760 | "V Verbose (DSO names in callchains, etc)\n" |
| 2758 | "/ Filter symbol by name"; | 2761 | "/ Filter symbol by name"; |
| 2759 | const char top_help[] = HIST_BROWSER_HELP_COMMON | 2762 | static const char top_help[] = HIST_BROWSER_HELP_COMMON |
| 2760 | "P Print histograms to perf.hist.N\n" | 2763 | "P Print histograms to perf.hist.N\n" |
| 2761 | "t Zoom into current Thread\n" | 2764 | "t Zoom into current Thread\n" |
| 2762 | "V Verbose (DSO names in callchains, etc)\n" | 2765 | "V Verbose (DSO names in callchains, etc)\n" |
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index 5b8b8c637686..c70d9337405b 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <linux/bitops.h> | 6 | #include <linux/bitops.h> |
| 7 | #include "../../util/util.h" | 7 | #include "../../util/util.h" |
| 8 | #include "../../util/debug.h" | 8 | #include "../../util/debug.h" |
| 9 | #include "../../util/map.h" | ||
| 9 | #include "../../util/symbol.h" | 10 | #include "../../util/symbol.h" |
| 10 | #include "../browser.h" | 11 | #include "../browser.h" |
| 11 | #include "../helpline.h" | 12 | #include "../helpline.h" |
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 48428c9acd89..df49c9ba1785 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c | |||
| @@ -1,8 +1,11 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include "gtk.h" | 2 | #include "gtk.h" |
| 3 | #include "util/sort.h" | ||
| 3 | #include "util/debug.h" | 4 | #include "util/debug.h" |
| 4 | #include "util/annotate.h" | 5 | #include "util/annotate.h" |
| 5 | #include "util/evsel.h" | 6 | #include "util/evsel.h" |
| 7 | #include "util/map.h" | ||
| 8 | #include "util/symbol.h" | ||
| 6 | #include "ui/helpline.h" | 9 | #include "ui/helpline.h" |
| 7 | #include <inttypes.h> | 10 | #include <inttypes.h> |
| 8 | #include <signal.h> | 11 | #include <signal.h> |
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c index 4ab663ec3e5e..0c08890f006a 100644 --- a/tools/perf/ui/gtk/hists.c +++ b/tools/perf/ui/gtk/hists.c | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include "../evlist.h" | 2 | #include "../evlist.h" |
| 3 | #include "../cache.h" | 3 | #include "../cache.h" |
| 4 | #include "../callchain.h" | ||
| 4 | #include "../evsel.h" | 5 | #include "../evsel.h" |
| 5 | #include "../sort.h" | 6 | #include "../sort.h" |
| 6 | #include "../hist.h" | 7 | #include "../hist.h" |
| @@ -353,7 +354,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, | |||
| 353 | 354 | ||
| 354 | g_object_unref(GTK_TREE_MODEL(store)); | 355 | g_object_unref(GTK_TREE_MODEL(store)); |
| 355 | 356 | ||
| 356 | for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { | 357 | for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) { |
| 357 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); | 358 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); |
| 358 | GtkTreeIter iter; | 359 | GtkTreeIter iter; |
| 359 | u64 total = hists__total_period(h->hists); | 360 | u64 total = hists__total_period(h->hists); |
| @@ -401,7 +402,7 @@ static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists, | |||
| 401 | } | 402 | } |
| 402 | 403 | ||
| 403 | static void perf_gtk__add_hierarchy_entries(struct hists *hists, | 404 | static void perf_gtk__add_hierarchy_entries(struct hists *hists, |
| 404 | struct rb_root *root, | 405 | struct rb_root_cached *root, |
| 405 | GtkTreeStore *store, | 406 | GtkTreeStore *store, |
| 406 | GtkTreeIter *parent, | 407 | GtkTreeIter *parent, |
| 407 | struct perf_hpp *hpp, | 408 | struct perf_hpp *hpp, |
| @@ -415,7 +416,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists, | |||
| 415 | u64 total = hists__total_period(hists); | 416 | u64 total = hists__total_period(hists); |
| 416 | int size; | 417 | int size; |
| 417 | 418 | ||
| 418 | for (node = rb_first(root); node; node = rb_next(node)) { | 419 | for (node = rb_first_cached(root); node; node = rb_next(node)) { |
| 419 | GtkTreeIter iter; | 420 | GtkTreeIter iter; |
| 420 | float percent; | 421 | float percent; |
| 421 | char *bf; | 422 | char *bf; |
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c index fe3dfaa64a91..412d6f1626e3 100644 --- a/tools/perf/ui/hist.c +++ b/tools/perf/ui/hist.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include <math.h> | 3 | #include <math.h> |
| 4 | #include <linux/compiler.h> | 4 | #include <linux/compiler.h> |
| 5 | 5 | ||
| 6 | #include "../util/callchain.h" | ||
| 6 | #include "../util/hist.h" | 7 | #include "../util/hist.h" |
| 7 | #include "../util/util.h" | 8 | #include "../util/util.h" |
| 8 | #include "../util/sort.h" | 9 | #include "../util/sort.h" |
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index 74c4ae1f0a05..a60f2993d390 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c | |||
| @@ -2,8 +2,12 @@ | |||
| 2 | #include <stdio.h> | 2 | #include <stdio.h> |
| 3 | #include <linux/string.h> | 3 | #include <linux/string.h> |
| 4 | 4 | ||
| 5 | #include "../../util/callchain.h" | ||
| 5 | #include "../../util/util.h" | 6 | #include "../../util/util.h" |
| 6 | #include "../../util/hist.h" | 7 | #include "../../util/hist.h" |
| 8 | #include "../../util/map.h" | ||
| 9 | #include "../../util/map_groups.h" | ||
| 10 | #include "../../util/symbol.h" | ||
| 7 | #include "../../util/sort.h" | 11 | #include "../../util/sort.h" |
| 8 | #include "../../util/evsel.h" | 12 | #include "../../util/evsel.h" |
| 9 | #include "../../util/srcline.h" | 13 | #include "../../util/srcline.h" |
| @@ -788,7 +792,8 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows, | |||
| 788 | 792 | ||
| 789 | indent = hists__overhead_width(hists) + 4; | 793 | indent = hists__overhead_width(hists) + 4; |
| 790 | 794 | ||
| 791 | for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { | 795 | for (nd = rb_first_cached(&hists->entries); nd; |
| 796 | nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) { | ||
| 792 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); | 797 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); |
| 793 | float percent; | 798 | float percent; |
| 794 | 799 | ||
diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build index 9e4c6ca41a9f..f916df33a1a7 100644 --- a/tools/perf/ui/tui/Build +++ b/tools/perf/ui/tui/Build | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | libperf-y += setup.o | 1 | perf-y += setup.o |
| 2 | libperf-y += util.o | 2 | perf-y += util.o |
| 3 | libperf-y += helpline.o | 3 | perf-y += helpline.o |
| 4 | libperf-y += progress.o | 4 | perf-y += progress.o |
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index af72be7f5b3b..8dd3102301ea 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build | |||
| @@ -1,158 +1,164 @@ | |||
| 1 | libperf-y += annotate.o | 1 | perf-y += annotate.o |
| 2 | libperf-y += block-range.o | 2 | perf-y += block-range.o |
| 3 | libperf-y += build-id.o | 3 | perf-y += build-id.o |
| 4 | libperf-y += config.o | 4 | perf-y += config.o |
| 5 | libperf-y += ctype.o | 5 | perf-y += ctype.o |
| 6 | libperf-y += db-export.o | 6 | perf-y += db-export.o |
| 7 | libperf-y += env.o | 7 | perf-y += env.o |
| 8 | libperf-y += event.o | 8 | perf-y += event.o |
| 9 | libperf-y += evlist.o | 9 | perf-y += evlist.o |
| 10 | libperf-y += evsel.o | 10 | perf-y += evsel.o |
| 11 | libperf-y += evsel_fprintf.o | 11 | perf-y += evsel_fprintf.o |
| 12 | libperf-y += find_bit.o | 12 | perf-y += find_bit.o |
| 13 | libperf-y += get_current_dir_name.o | 13 | perf-y += get_current_dir_name.o |
| 14 | libperf-y += kallsyms.o | 14 | perf-y += kallsyms.o |
| 15 | libperf-y += levenshtein.o | 15 | perf-y += levenshtein.o |
| 16 | libperf-y += llvm-utils.o | 16 | perf-y += llvm-utils.o |
| 17 | libperf-y += mmap.o | 17 | perf-y += mmap.o |
| 18 | libperf-y += memswap.o | 18 | perf-y += memswap.o |
| 19 | libperf-y += parse-events.o | 19 | perf-y += parse-events.o |
| 20 | libperf-y += perf_regs.o | 20 | perf-y += perf_regs.o |
| 21 | libperf-y += path.o | 21 | perf-y += path.o |
| 22 | libperf-y += print_binary.o | 22 | perf-y += print_binary.o |
| 23 | libperf-y += rbtree.o | 23 | perf-y += rbtree.o |
| 24 | libperf-y += libstring.o | 24 | perf-y += libstring.o |
| 25 | libperf-y += bitmap.o | 25 | perf-y += bitmap.o |
| 26 | libperf-y += hweight.o | 26 | perf-y += hweight.o |
| 27 | libperf-y += smt.o | 27 | perf-y += smt.o |
| 28 | libperf-y += strbuf.o | 28 | perf-y += strbuf.o |
| 29 | libperf-y += string.o | 29 | perf-y += string.o |
| 30 | libperf-y += strlist.o | 30 | perf-y += strlist.o |
| 31 | libperf-y += strfilter.o | 31 | perf-y += strfilter.o |
| 32 | libperf-y += top.o | 32 | perf-y += top.o |
| 33 | libperf-y += usage.o | 33 | perf-y += usage.o |
| 34 | libperf-y += dso.o | 34 | perf-y += dso.o |
| 35 | libperf-y += symbol.o | 35 | perf-y += symbol.o |
| 36 | libperf-y += symbol_fprintf.o | 36 | perf-y += symbol_fprintf.o |
| 37 | libperf-y += color.o | 37 | perf-y += color.o |
| 38 | libperf-y += metricgroup.o | 38 | perf-y += color_config.o |
| 39 | libperf-y += header.o | 39 | perf-y += metricgroup.o |
| 40 | libperf-y += callchain.o | 40 | perf-y += header.o |
| 41 | libperf-y += values.o | 41 | perf-y += callchain.o |
| 42 | libperf-y += debug.o | 42 | perf-y += values.o |
| 43 | libperf-y += machine.o | 43 | perf-y += debug.o |
| 44 | libperf-y += map.o | 44 | perf-y += machine.o |
| 45 | libperf-y += pstack.o | 45 | perf-y += map.o |
| 46 | libperf-y += session.o | 46 | perf-y += pstack.o |
| 47 | libperf-$(CONFIG_TRACE) += syscalltbl.o | 47 | perf-y += session.o |
| 48 | libperf-y += ordered-events.o | 48 | perf-y += sample-raw.o |
| 49 | libperf-y += namespaces.o | 49 | perf-y += s390-sample-raw.o |
| 50 | libperf-y += comm.o | 50 | perf-$(CONFIG_TRACE) += syscalltbl.o |
| 51 | libperf-y += thread.o | 51 | perf-y += ordered-events.o |
| 52 | libperf-y += thread_map.o | 52 | perf-y += namespaces.o |
| 53 | libperf-y += trace-event-parse.o | 53 | perf-y += comm.o |
| 54 | libperf-y += parse-events-flex.o | 54 | perf-y += thread.o |
| 55 | libperf-y += parse-events-bison.o | 55 | perf-y += thread_map.o |
| 56 | libperf-y += pmu.o | 56 | perf-y += trace-event-parse.o |
| 57 | libperf-y += pmu-flex.o | 57 | perf-y += parse-events-flex.o |
| 58 | libperf-y += pmu-bison.o | 58 | perf-y += parse-events-bison.o |
| 59 | libperf-y += trace-event-read.o | 59 | perf-y += pmu.o |
| 60 | libperf-y += trace-event-info.o | 60 | perf-y += pmu-flex.o |
| 61 | libperf-y += trace-event-scripting.o | 61 | perf-y += pmu-bison.o |
| 62 | libperf-y += trace-event.o | 62 | perf-y += trace-event-read.o |
| 63 | libperf-y += svghelper.o | 63 | perf-y += trace-event-info.o |
| 64 | libperf-y += sort.o | 64 | perf-y += trace-event-scripting.o |
| 65 | libperf-y += hist.o | 65 | perf-y += trace-event.o |
| 66 | libperf-y += util.o | 66 | perf-y += svghelper.o |
| 67 | libperf-y += xyarray.o | 67 | perf-y += sort.o |
| 68 | libperf-y += cpumap.o | 68 | perf-y += hist.o |
| 69 | libperf-y += cgroup.o | 69 | perf-y += util.o |
| 70 | libperf-y += target.o | 70 | perf-y += xyarray.o |
| 71 | libperf-y += rblist.o | 71 | perf-y += cpumap.o |
| 72 | libperf-y += intlist.o | 72 | perf-y += cputopo.o |
| 73 | libperf-y += vdso.o | 73 | perf-y += cgroup.o |
| 74 | libperf-y += counts.o | 74 | perf-y += target.o |
| 75 | libperf-y += stat.o | 75 | perf-y += rblist.o |
| 76 | libperf-y += stat-shadow.o | 76 | perf-y += intlist.o |
| 77 | libperf-y += stat-display.o | 77 | perf-y += vdso.o |
| 78 | libperf-y += record.o | 78 | perf-y += counts.o |
| 79 | libperf-y += srcline.o | 79 | perf-y += stat.o |
| 80 | libperf-y += srccode.o | 80 | perf-y += stat-shadow.o |
| 81 | libperf-y += data.o | 81 | perf-y += stat-display.o |
| 82 | libperf-y += tsc.o | 82 | perf-y += record.o |
| 83 | libperf-y += cloexec.o | 83 | perf-y += srcline.o |
| 84 | libperf-y += call-path.o | 84 | perf-y += srccode.o |
| 85 | libperf-y += rwsem.o | 85 | perf-y += data.o |
| 86 | libperf-y += thread-stack.o | 86 | perf-y += tsc.o |
| 87 | libperf-$(CONFIG_AUXTRACE) += auxtrace.o | 87 | perf-y += cloexec.o |
| 88 | libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ | 88 | perf-y += call-path.o |
| 89 | libperf-$(CONFIG_AUXTRACE) += intel-pt.o | 89 | perf-y += rwsem.o |
| 90 | libperf-$(CONFIG_AUXTRACE) += intel-bts.o | 90 | perf-y += thread-stack.o |
| 91 | libperf-$(CONFIG_AUXTRACE) += arm-spe.o | 91 | perf-$(CONFIG_AUXTRACE) += auxtrace.o |
| 92 | libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o | 92 | perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ |
| 93 | libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o | 93 | perf-$(CONFIG_AUXTRACE) += intel-pt.o |
| 94 | perf-$(CONFIG_AUXTRACE) += intel-bts.o | ||
| 95 | perf-$(CONFIG_AUXTRACE) += arm-spe.o | ||
| 96 | perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o | ||
| 97 | perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o | ||
| 94 | 98 | ||
| 95 | ifdef CONFIG_LIBOPENCSD | 99 | ifdef CONFIG_LIBOPENCSD |
| 96 | libperf-$(CONFIG_AUXTRACE) += cs-etm.o | 100 | perf-$(CONFIG_AUXTRACE) += cs-etm.o |
| 97 | libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ | 101 | perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ |
| 98 | endif | 102 | endif |
| 99 | 103 | ||
| 100 | libperf-y += parse-branch-options.o | 104 | perf-y += parse-branch-options.o |
| 101 | libperf-y += dump-insn.o | 105 | perf-y += dump-insn.o |
| 102 | libperf-y += parse-regs-options.o | 106 | perf-y += parse-regs-options.o |
| 103 | libperf-y += term.o | 107 | perf-y += term.o |
| 104 | libperf-y += help-unknown-cmd.o | 108 | perf-y += help-unknown-cmd.o |
| 105 | libperf-y += mem-events.o | 109 | perf-y += mem-events.o |
| 106 | libperf-y += vsprintf.o | 110 | perf-y += vsprintf.o |
| 107 | libperf-y += drv_configs.o | 111 | perf-y += units.o |
| 108 | libperf-y += units.o | 112 | perf-y += time-utils.o |
| 109 | libperf-y += time-utils.o | 113 | perf-y += expr-bison.o |
| 110 | libperf-y += expr-bison.o | 114 | perf-y += branch.o |
| 111 | libperf-y += branch.o | 115 | perf-y += mem2node.o |
| 112 | libperf-y += mem2node.o | 116 | |
| 113 | 117 | perf-$(CONFIG_LIBBPF) += bpf-loader.o | |
| 114 | libperf-$(CONFIG_LIBBPF) += bpf-loader.o | 118 | perf-$(CONFIG_LIBBPF) += bpf_map.o |
| 115 | libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o | 119 | perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o |
| 116 | libperf-$(CONFIG_LIBELF) += symbol-elf.o | 120 | perf-$(CONFIG_LIBELF) += symbol-elf.o |
| 117 | libperf-$(CONFIG_LIBELF) += probe-file.o | 121 | perf-$(CONFIG_LIBELF) += probe-file.o |
| 118 | libperf-$(CONFIG_LIBELF) += probe-event.o | 122 | perf-$(CONFIG_LIBELF) += probe-event.o |
| 119 | 123 | ||
| 120 | ifndef CONFIG_LIBELF | 124 | ifndef CONFIG_LIBELF |
| 121 | libperf-y += symbol-minimal.o | 125 | perf-y += symbol-minimal.o |
| 122 | endif | 126 | endif |
| 123 | 127 | ||
| 124 | ifndef CONFIG_SETNS | 128 | ifndef CONFIG_SETNS |
| 125 | libperf-y += setns.o | 129 | perf-y += setns.o |
| 126 | endif | 130 | endif |
| 127 | 131 | ||
| 128 | libperf-$(CONFIG_DWARF) += probe-finder.o | 132 | perf-$(CONFIG_DWARF) += probe-finder.o |
| 129 | libperf-$(CONFIG_DWARF) += dwarf-aux.o | 133 | perf-$(CONFIG_DWARF) += dwarf-aux.o |
| 130 | libperf-$(CONFIG_DWARF) += dwarf-regs.o | 134 | perf-$(CONFIG_DWARF) += dwarf-regs.o |
| 131 | 135 | ||
| 132 | libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o | 136 | perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o |
| 133 | libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o | 137 | perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o |
| 134 | libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o | 138 | perf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o |
| 135 | libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o | 139 | perf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o |
| 136 | libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o | 140 | perf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o |
| 137 | 141 | ||
| 138 | libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o | 142 | perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o |
| 139 | 143 | ||
| 140 | libperf-y += scripting-engines/ | 144 | perf-y += scripting-engines/ |
| 141 | 145 | ||
| 142 | libperf-$(CONFIG_ZLIB) += zlib.o | 146 | perf-$(CONFIG_ZLIB) += zlib.o |
| 143 | libperf-$(CONFIG_LZMA) += lzma.o | 147 | perf-$(CONFIG_LZMA) += lzma.o |
| 144 | libperf-y += demangle-java.o | 148 | perf-y += demangle-java.o |
| 145 | libperf-y += demangle-rust.o | 149 | perf-y += demangle-rust.o |
| 146 | 150 | ||
| 147 | ifdef CONFIG_JITDUMP | 151 | ifdef CONFIG_JITDUMP |
| 148 | libperf-$(CONFIG_LIBELF) += jitdump.o | 152 | perf-$(CONFIG_LIBELF) += jitdump.o |
| 149 | libperf-$(CONFIG_LIBELF) += genelf.o | 153 | perf-$(CONFIG_LIBELF) += genelf.o |
| 150 | libperf-$(CONFIG_DWARF) += genelf_debug.o | 154 | perf-$(CONFIG_DWARF) += genelf_debug.o |
| 151 | endif | 155 | endif |
| 152 | 156 | ||
| 153 | libperf-y += perf-hooks.o | 157 | perf-y += perf-hooks.o |
| 154 | 158 | ||
| 155 | libperf-$(CONFIG_CXX) += c++/ | 159 | perf-$(CONFIG_LIBBPF) += bpf-event.o |
| 160 | |||
| 161 | perf-$(CONFIG_CXX) += c++/ | ||
| 156 | 162 | ||
| 157 | CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" | 163 | CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" |
| 158 | CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" | 164 | CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" |
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 70de8f6b3aee..11a8a447a3af 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c | |||
| @@ -9,6 +9,7 @@ | |||
| 9 | 9 | ||
| 10 | #include <errno.h> | 10 | #include <errno.h> |
| 11 | #include <inttypes.h> | 11 | #include <inttypes.h> |
| 12 | #include <libgen.h> | ||
| 12 | #include "util.h" | 13 | #include "util.h" |
| 13 | #include "ui/ui.h" | 14 | #include "ui/ui.h" |
| 14 | #include "sort.h" | 15 | #include "sort.h" |
| @@ -16,6 +17,7 @@ | |||
| 16 | #include "color.h" | 17 | #include "color.h" |
| 17 | #include "config.h" | 18 | #include "config.h" |
| 18 | #include "cache.h" | 19 | #include "cache.h" |
| 20 | #include "map.h" | ||
| 19 | #include "symbol.h" | 21 | #include "symbol.h" |
| 20 | #include "units.h" | 22 | #include "units.h" |
| 21 | #include "debug.h" | 23 | #include "debug.h" |
| @@ -1889,6 +1891,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, | |||
| 1889 | struct annotation_options *options, | 1891 | struct annotation_options *options, |
| 1890 | struct arch **parch) | 1892 | struct arch **parch) |
| 1891 | { | 1893 | { |
| 1894 | struct annotation *notes = symbol__annotation(sym); | ||
| 1892 | struct annotate_args args = { | 1895 | struct annotate_args args = { |
| 1893 | .privsize = privsize, | 1896 | .privsize = privsize, |
| 1894 | .evsel = evsel, | 1897 | .evsel = evsel, |
| @@ -1919,6 +1922,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, | |||
| 1919 | 1922 | ||
| 1920 | args.ms.map = map; | 1923 | args.ms.map = map; |
| 1921 | args.ms.sym = sym; | 1924 | args.ms.sym = sym; |
| 1925 | notes->start = map__rip_2objdump(map, sym->start); | ||
| 1922 | 1926 | ||
| 1923 | return symbol__disassemble(sym, &args); | 1927 | return symbol__disassemble(sym, &args); |
| 1924 | } | 1928 | } |
| @@ -2794,8 +2798,6 @@ int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *ev | |||
| 2794 | 2798 | ||
| 2795 | symbol__calc_percent(sym, evsel); | 2799 | symbol__calc_percent(sym, evsel); |
| 2796 | 2800 | ||
| 2797 | notes->start = map__rip_2objdump(map, sym->start); | ||
| 2798 | |||
| 2799 | annotation__set_offsets(notes, size); | 2801 | annotation__set_offsets(notes, size); |
| 2800 | annotation__mark_jump_targets(notes, sym); | 2802 | annotation__mark_jump_targets(notes, sym); |
| 2801 | annotation__compute_ipc(notes, size); | 2803 | annotation__compute_ipc(notes, size); |
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index fb6463730ba4..95053cab41fe 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h | |||
| @@ -4,16 +4,24 @@ | |||
| 4 | 4 | ||
| 5 | #include <stdbool.h> | 5 | #include <stdbool.h> |
| 6 | #include <stdint.h> | 6 | #include <stdint.h> |
| 7 | #include <stdio.h> | ||
| 7 | #include <linux/types.h> | 8 | #include <linux/types.h> |
| 8 | #include "symbol.h" | ||
| 9 | #include "hist.h" | ||
| 10 | #include "sort.h" | ||
| 11 | #include <linux/list.h> | 9 | #include <linux/list.h> |
| 12 | #include <linux/rbtree.h> | 10 | #include <linux/rbtree.h> |
| 13 | #include <pthread.h> | 11 | #include <pthread.h> |
| 14 | #include <asm/bug.h> | 12 | #include <asm/bug.h> |
| 13 | #include "symbol_conf.h" | ||
| 15 | 14 | ||
| 15 | struct hist_browser_timer; | ||
| 16 | struct hist_entry; | ||
| 16 | struct ins_ops; | 17 | struct ins_ops; |
| 18 | struct map; | ||
| 19 | struct map_symbol; | ||
| 20 | struct addr_map_symbol; | ||
| 21 | struct option; | ||
| 22 | struct perf_sample; | ||
| 23 | struct perf_evsel; | ||
| 24 | struct symbol; | ||
| 17 | 25 | ||
| 18 | struct ins { | 26 | struct ins { |
| 19 | const char *name; | 27 | const char *name; |
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index f69961c4a4f3..267e54df511b 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/bitops.h> | 27 | #include <linux/bitops.h> |
| 28 | #include <linux/log2.h> | 28 | #include <linux/log2.h> |
| 29 | #include <linux/string.h> | 29 | #include <linux/string.h> |
| 30 | #include <linux/time64.h> | ||
| 30 | 31 | ||
| 31 | #include <sys/param.h> | 32 | #include <sys/param.h> |
| 32 | #include <stdlib.h> | 33 | #include <stdlib.h> |
| @@ -41,6 +42,7 @@ | |||
| 41 | #include "pmu.h" | 42 | #include "pmu.h" |
| 42 | #include "evsel.h" | 43 | #include "evsel.h" |
| 43 | #include "cpumap.h" | 44 | #include "cpumap.h" |
| 45 | #include "symbol.h" | ||
| 44 | #include "thread_map.h" | 46 | #include "thread_map.h" |
| 45 | #include "asm/bug.h" | 47 | #include "asm/bug.h" |
| 46 | #include "auxtrace.h" | 48 | #include "auxtrace.h" |
| @@ -857,7 +859,7 @@ void auxtrace_buffer__free(struct auxtrace_buffer *buffer) | |||
| 857 | 859 | ||
| 858 | void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, | 860 | void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, |
| 859 | int code, int cpu, pid_t pid, pid_t tid, u64 ip, | 861 | int code, int cpu, pid_t pid, pid_t tid, u64 ip, |
| 860 | const char *msg) | 862 | const char *msg, u64 timestamp) |
| 861 | { | 863 | { |
| 862 | size_t size; | 864 | size_t size; |
| 863 | 865 | ||
| @@ -869,7 +871,9 @@ void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, | |||
| 869 | auxtrace_error->cpu = cpu; | 871 | auxtrace_error->cpu = cpu; |
| 870 | auxtrace_error->pid = pid; | 872 | auxtrace_error->pid = pid; |
| 871 | auxtrace_error->tid = tid; | 873 | auxtrace_error->tid = tid; |
| 874 | auxtrace_error->fmt = 1; | ||
| 872 | auxtrace_error->ip = ip; | 875 | auxtrace_error->ip = ip; |
| 876 | auxtrace_error->time = timestamp; | ||
| 873 | strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG); | 877 | strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG); |
| 874 | 878 | ||
| 875 | size = (void *)auxtrace_error->msg - (void *)auxtrace_error + | 879 | size = (void *)auxtrace_error->msg - (void *)auxtrace_error + |
| @@ -1159,12 +1163,27 @@ static const char *auxtrace_error_name(int type) | |||
| 1159 | size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp) | 1163 | size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp) |
| 1160 | { | 1164 | { |
| 1161 | struct auxtrace_error_event *e = &event->auxtrace_error; | 1165 | struct auxtrace_error_event *e = &event->auxtrace_error; |
| 1166 | unsigned long long nsecs = e->time; | ||
| 1167 | const char *msg = e->msg; | ||
| 1162 | int ret; | 1168 | int ret; |
| 1163 | 1169 | ||
| 1164 | ret = fprintf(fp, " %s error type %u", | 1170 | ret = fprintf(fp, " %s error type %u", |
| 1165 | auxtrace_error_name(e->type), e->type); | 1171 | auxtrace_error_name(e->type), e->type); |
| 1172 | |||
| 1173 | if (e->fmt && nsecs) { | ||
| 1174 | unsigned long secs = nsecs / NSEC_PER_SEC; | ||
| 1175 | |||
| 1176 | nsecs -= secs * NSEC_PER_SEC; | ||
| 1177 | ret += fprintf(fp, " time %lu.%09llu", secs, nsecs); | ||
| 1178 | } else { | ||
| 1179 | ret += fprintf(fp, " time 0"); | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | if (!e->fmt) | ||
| 1183 | msg = (const char *)&e->time; | ||
| 1184 | |||
| 1166 | ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n", | 1185 | ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n", |
| 1167 | e->cpu, e->pid, e->tid, e->ip, e->code, e->msg); | 1186 | e->cpu, e->pid, e->tid, e->ip, e->code, msg); |
| 1168 | return ret; | 1187 | return ret; |
| 1169 | } | 1188 | } |
| 1170 | 1189 | ||
| @@ -1278,9 +1297,9 @@ static int __auxtrace_mmap__read(struct perf_mmap *map, | |||
| 1278 | } | 1297 | } |
| 1279 | 1298 | ||
| 1280 | /* padding must be written by fn() e.g. record__process_auxtrace() */ | 1299 | /* padding must be written by fn() e.g. record__process_auxtrace() */ |
| 1281 | padding = size & 7; | 1300 | padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1); |
| 1282 | if (padding) | 1301 | if (padding) |
| 1283 | padding = 8 - padding; | 1302 | padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding; |
| 1284 | 1303 | ||
| 1285 | memset(&ev, 0, sizeof(ev)); | 1304 | memset(&ev, 0, sizeof(ev)); |
| 1286 | ev.auxtrace.header.type = PERF_RECORD_AUXTRACE; | 1305 | ev.auxtrace.header.type = PERF_RECORD_AUXTRACE; |
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 8e50f96d4b23..c69bcd9a3091 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h | |||
| @@ -40,6 +40,9 @@ struct record_opts; | |||
| 40 | struct auxtrace_info_event; | 40 | struct auxtrace_info_event; |
| 41 | struct events_stats; | 41 | struct events_stats; |
| 42 | 42 | ||
| 43 | /* Auxtrace records must have the same alignment as perf event records */ | ||
| 44 | #define PERF_AUXTRACE_RECORD_ALIGNMENT 8 | ||
| 45 | |||
| 43 | enum auxtrace_type { | 46 | enum auxtrace_type { |
| 44 | PERF_AUXTRACE_UNKNOWN, | 47 | PERF_AUXTRACE_UNKNOWN, |
| 45 | PERF_AUXTRACE_INTEL_PT, | 48 | PERF_AUXTRACE_INTEL_PT, |
| @@ -516,7 +519,7 @@ void auxtrace_index__free(struct list_head *head); | |||
| 516 | 519 | ||
| 517 | void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, | 520 | void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, |
| 518 | int code, int cpu, pid_t pid, pid_t tid, u64 ip, | 521 | int code, int cpu, pid_t pid, pid_t tid, u64 ip, |
| 519 | const char *msg); | 522 | const char *msg, u64 timestamp); |
| 520 | 523 | ||
| 521 | int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, | 524 | int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, |
| 522 | struct perf_tool *tool, | 525 | struct perf_tool *tool, |
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c index f1451c987eec..1be432657501 100644 --- a/tools/perf/util/block-range.c +++ b/tools/perf/util/block-range.c | |||
| @@ -1,6 +1,8 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include "block-range.h" | 2 | #include "block-range.h" |
| 3 | #include "annotate.h" | 3 | #include "annotate.h" |
| 4 | #include <assert.h> | ||
| 5 | #include <stdlib.h> | ||
| 4 | 6 | ||
| 5 | struct { | 7 | struct { |
| 6 | struct rb_root root; | 8 | struct rb_root root; |
diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h index a5ba719d69fb..ec0fb534bf56 100644 --- a/tools/perf/util/block-range.h +++ b/tools/perf/util/block-range.h | |||
| @@ -2,7 +2,11 @@ | |||
| 2 | #ifndef __PERF_BLOCK_RANGE_H | 2 | #ifndef __PERF_BLOCK_RANGE_H |
| 3 | #define __PERF_BLOCK_RANGE_H | 3 | #define __PERF_BLOCK_RANGE_H |
| 4 | 4 | ||
| 5 | #include "symbol.h" | 5 | #include <stdbool.h> |
| 6 | #include <linux/rbtree.h> | ||
| 7 | #include <linux/types.h> | ||
| 8 | |||
| 9 | struct symbol; | ||
| 6 | 10 | ||
| 7 | /* | 11 | /* |
| 8 | * struct block_range - non-overlapping parts of basic blocks | 12 | * struct block_range - non-overlapping parts of basic blocks |
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c new file mode 100644 index 000000000000..028c8ec1f62a --- /dev/null +++ b/tools/perf/util/bpf-event.c | |||
| @@ -0,0 +1,263 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | #include <errno.h> | ||
| 3 | #include <stdlib.h> | ||
| 4 | #include <bpf/bpf.h> | ||
| 5 | #include <bpf/btf.h> | ||
| 6 | #include <linux/btf.h> | ||
| 7 | #include "bpf-event.h" | ||
| 8 | #include "debug.h" | ||
| 9 | #include "symbol.h" | ||
| 10 | #include "machine.h" | ||
| 11 | |||
| 12 | #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) | ||
| 13 | |||
| 14 | static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) | ||
| 15 | { | ||
| 16 | int ret = 0; | ||
| 17 | size_t i; | ||
| 18 | |||
| 19 | for (i = 0; i < len; i++) | ||
| 20 | ret += snprintf(buf + ret, size - ret, "%02x", data[i]); | ||
| 21 | return ret; | ||
| 22 | } | ||
| 23 | |||
| 24 | int machine__process_bpf_event(struct machine *machine __maybe_unused, | ||
| 25 | union perf_event *event, | ||
| 26 | struct perf_sample *sample __maybe_unused) | ||
| 27 | { | ||
| 28 | if (dump_trace) | ||
| 29 | perf_event__fprintf_bpf_event(event, stdout); | ||
| 30 | return 0; | ||
| 31 | } | ||
| 32 | |||
| 33 | /* | ||
| 34 | * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf | ||
| 35 | * program. One PERF_RECORD_BPF_EVENT is generated for the program. And | ||
| 36 | * one PERF_RECORD_KSYMBOL is generated for each sub program. | ||
| 37 | * | ||
| 38 | * Returns: | ||
| 39 | * 0 for success; | ||
| 40 | * -1 for failures; | ||
| 41 | * -2 for lack of kernel support. | ||
| 42 | */ | ||
| 43 | static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, | ||
| 44 | perf_event__handler_t process, | ||
| 45 | struct machine *machine, | ||
| 46 | int fd, | ||
| 47 | union perf_event *event, | ||
| 48 | struct record_opts *opts) | ||
| 49 | { | ||
| 50 | struct ksymbol_event *ksymbol_event = &event->ksymbol_event; | ||
| 51 | struct bpf_event *bpf_event = &event->bpf_event; | ||
| 52 | u32 sub_prog_cnt, i, func_info_rec_size = 0; | ||
| 53 | u8 (*prog_tags)[BPF_TAG_SIZE] = NULL; | ||
| 54 | struct bpf_prog_info info = { .type = 0, }; | ||
| 55 | u32 info_len = sizeof(info); | ||
| 56 | void *func_infos = NULL; | ||
| 57 | u64 *prog_addrs = NULL; | ||
| 58 | struct btf *btf = NULL; | ||
| 59 | u32 *prog_lens = NULL; | ||
| 60 | bool has_btf = false; | ||
| 61 | char errbuf[512]; | ||
| 62 | int err = 0; | ||
| 63 | |||
| 64 | /* Call bpf_obj_get_info_by_fd() to get sizes of arrays */ | ||
| 65 | err = bpf_obj_get_info_by_fd(fd, &info, &info_len); | ||
| 66 | |||
| 67 | if (err) { | ||
| 68 | pr_debug("%s: failed to get BPF program info: %s, aborting\n", | ||
| 69 | __func__, str_error_r(errno, errbuf, sizeof(errbuf))); | ||
| 70 | return -1; | ||
| 71 | } | ||
| 72 | if (info_len < offsetof(struct bpf_prog_info, prog_tags)) { | ||
| 73 | pr_debug("%s: the kernel is too old, aborting\n", __func__); | ||
| 74 | return -2; | ||
| 75 | } | ||
| 76 | |||
| 77 | /* number of ksyms, func_lengths, and tags should match */ | ||
| 78 | sub_prog_cnt = info.nr_jited_ksyms; | ||
| 79 | if (sub_prog_cnt != info.nr_prog_tags || | ||
| 80 | sub_prog_cnt != info.nr_jited_func_lens) | ||
| 81 | return -1; | ||
| 82 | |||
| 83 | /* check BTF func info support */ | ||
| 84 | if (info.btf_id && info.nr_func_info && info.func_info_rec_size) { | ||
| 85 | /* btf func info number should be same as sub_prog_cnt */ | ||
| 86 | if (sub_prog_cnt != info.nr_func_info) { | ||
| 87 | pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__); | ||
| 88 | return -1; | ||
| 89 | } | ||
| 90 | if (btf__get_from_id(info.btf_id, &btf)) { | ||
| 91 | pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info.btf_id); | ||
| 92 | return -1; | ||
| 93 | } | ||
| 94 | func_info_rec_size = info.func_info_rec_size; | ||
| 95 | func_infos = calloc(sub_prog_cnt, func_info_rec_size); | ||
| 96 | if (!func_infos) { | ||
| 97 | pr_debug("%s: failed to allocate memory for func_infos, aborting\n", __func__); | ||
| 98 | return -1; | ||
| 99 | } | ||
| 100 | has_btf = true; | ||
| 101 | } | ||
| 102 | |||
| 103 | /* | ||
| 104 | * We need address, length, and tag for each sub program. | ||
| 105 | * Allocate memory and call bpf_obj_get_info_by_fd() again | ||
| 106 | */ | ||
| 107 | prog_addrs = calloc(sub_prog_cnt, sizeof(u64)); | ||
| 108 | if (!prog_addrs) { | ||
| 109 | pr_debug("%s: failed to allocate memory for prog_addrs, aborting\n", __func__); | ||
| 110 | goto out; | ||
| 111 | } | ||
| 112 | prog_lens = calloc(sub_prog_cnt, sizeof(u32)); | ||
| 113 | if (!prog_lens) { | ||
| 114 | pr_debug("%s: failed to allocate memory for prog_lens, aborting\n", __func__); | ||
| 115 | goto out; | ||
| 116 | } | ||
| 117 | prog_tags = calloc(sub_prog_cnt, BPF_TAG_SIZE); | ||
| 118 | if (!prog_tags) { | ||
| 119 | pr_debug("%s: failed to allocate memory for prog_tags, aborting\n", __func__); | ||
| 120 | goto out; | ||
| 121 | } | ||
| 122 | |||
| 123 | memset(&info, 0, sizeof(info)); | ||
| 124 | info.nr_jited_ksyms = sub_prog_cnt; | ||
| 125 | info.nr_jited_func_lens = sub_prog_cnt; | ||
| 126 | info.nr_prog_tags = sub_prog_cnt; | ||
| 127 | info.jited_ksyms = ptr_to_u64(prog_addrs); | ||
| 128 | info.jited_func_lens = ptr_to_u64(prog_lens); | ||
| 129 | info.prog_tags = ptr_to_u64(prog_tags); | ||
| 130 | info_len = sizeof(info); | ||
| 131 | if (has_btf) { | ||
| 132 | info.nr_func_info = sub_prog_cnt; | ||
| 133 | info.func_info_rec_size = func_info_rec_size; | ||
| 134 | info.func_info = ptr_to_u64(func_infos); | ||
| 135 | } | ||
| 136 | |||
| 137 | err = bpf_obj_get_info_by_fd(fd, &info, &info_len); | ||
| 138 | if (err) { | ||
| 139 | pr_debug("%s: failed to get BPF program info, aborting\n", __func__); | ||
| 140 | goto out; | ||
| 141 | } | ||
| 142 | |||
| 143 | /* Synthesize PERF_RECORD_KSYMBOL */ | ||
| 144 | for (i = 0; i < sub_prog_cnt; i++) { | ||
| 145 | const struct bpf_func_info *finfo; | ||
| 146 | const char *short_name = NULL; | ||
| 147 | const struct btf_type *t; | ||
| 148 | int name_len; | ||
| 149 | |||
| 150 | *ksymbol_event = (struct ksymbol_event){ | ||
| 151 | .header = { | ||
| 152 | .type = PERF_RECORD_KSYMBOL, | ||
| 153 | .size = offsetof(struct ksymbol_event, name), | ||
| 154 | }, | ||
| 155 | .addr = prog_addrs[i], | ||
| 156 | .len = prog_lens[i], | ||
| 157 | .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF, | ||
| 158 | .flags = 0, | ||
| 159 | }; | ||
| 160 | name_len = snprintf(ksymbol_event->name, KSYM_NAME_LEN, | ||
| 161 | "bpf_prog_"); | ||
| 162 | name_len += snprintf_hex(ksymbol_event->name + name_len, | ||
| 163 | KSYM_NAME_LEN - name_len, | ||
| 164 | prog_tags[i], BPF_TAG_SIZE); | ||
| 165 | if (has_btf) { | ||
| 166 | finfo = func_infos + i * info.func_info_rec_size; | ||
| 167 | t = btf__type_by_id(btf, finfo->type_id); | ||
| 168 | short_name = btf__name_by_offset(btf, t->name_off); | ||
| 169 | } else if (i == 0 && sub_prog_cnt == 1) { | ||
| 170 | /* no subprog */ | ||
| 171 | if (info.name[0]) | ||
| 172 | short_name = info.name; | ||
| 173 | } else | ||
| 174 | short_name = "F"; | ||
| 175 | if (short_name) | ||
| 176 | name_len += snprintf(ksymbol_event->name + name_len, | ||
| 177 | KSYM_NAME_LEN - name_len, | ||
| 178 | "_%s", short_name); | ||
| 179 | |||
| 180 | ksymbol_event->header.size += PERF_ALIGN(name_len + 1, | ||
| 181 | sizeof(u64)); | ||
| 182 | |||
| 183 | memset((void *)event + event->header.size, 0, machine->id_hdr_size); | ||
| 184 | event->header.size += machine->id_hdr_size; | ||
| 185 | err = perf_tool__process_synth_event(tool, event, | ||
| 186 | machine, process); | ||
| 187 | } | ||
| 188 | |||
| 189 | /* Synthesize PERF_RECORD_BPF_EVENT */ | ||
| 190 | if (opts->bpf_event) { | ||
| 191 | *bpf_event = (struct bpf_event){ | ||
| 192 | .header = { | ||
| 193 | .type = PERF_RECORD_BPF_EVENT, | ||
| 194 | .size = sizeof(struct bpf_event), | ||
| 195 | }, | ||
| 196 | .type = PERF_BPF_EVENT_PROG_LOAD, | ||
| 197 | .flags = 0, | ||
| 198 | .id = info.id, | ||
| 199 | }; | ||
| 200 | memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE); | ||
| 201 | memset((void *)event + event->header.size, 0, machine->id_hdr_size); | ||
| 202 | event->header.size += machine->id_hdr_size; | ||
| 203 | err = perf_tool__process_synth_event(tool, event, | ||
| 204 | machine, process); | ||
| 205 | } | ||
| 206 | |||
| 207 | out: | ||
| 208 | free(prog_tags); | ||
| 209 | free(prog_lens); | ||
| 210 | free(prog_addrs); | ||
| 211 | free(func_infos); | ||
| 212 | free(btf); | ||
| 213 | return err ? -1 : 0; | ||
| 214 | } | ||
| 215 | |||
| 216 | int perf_event__synthesize_bpf_events(struct perf_tool *tool, | ||
| 217 | perf_event__handler_t process, | ||
| 218 | struct machine *machine, | ||
| 219 | struct record_opts *opts) | ||
| 220 | { | ||
| 221 | union perf_event *event; | ||
| 222 | __u32 id = 0; | ||
| 223 | int err; | ||
| 224 | int fd; | ||
| 225 | |||
| 226 | event = malloc(sizeof(event->bpf_event) + KSYM_NAME_LEN + machine->id_hdr_size); | ||
| 227 | if (!event) | ||
| 228 | return -1; | ||
| 229 | while (true) { | ||
| 230 | err = bpf_prog_get_next_id(id, &id); | ||
| 231 | if (err) { | ||
| 232 | if (errno == ENOENT) { | ||
| 233 | err = 0; | ||
| 234 | break; | ||
| 235 | } | ||
| 236 | pr_debug("%s: can't get next program: %s%s\n", | ||
| 237 | __func__, strerror(errno), | ||
| 238 | errno == EINVAL ? " -- kernel too old?" : ""); | ||
| 239 | /* don't report error on old kernel or EPERM */ | ||
| 240 | err = (errno == EINVAL || errno == EPERM) ? 0 : -1; | ||
| 241 | break; | ||
| 242 | } | ||
| 243 | fd = bpf_prog_get_fd_by_id(id); | ||
| 244 | if (fd < 0) { | ||
| 245 | pr_debug("%s: failed to get fd for prog_id %u\n", | ||
| 246 | __func__, id); | ||
| 247 | continue; | ||
| 248 | } | ||
| 249 | |||
| 250 | err = perf_event__synthesize_one_bpf_prog(tool, process, | ||
| 251 | machine, fd, | ||
| 252 | event, opts); | ||
| 253 | close(fd); | ||
| 254 | if (err) { | ||
| 255 | /* do not return error for old kernel */ | ||
| 256 | if (err == -2) | ||
| 257 | err = 0; | ||
| 258 | break; | ||
| 259 | } | ||
| 260 | } | ||
| 261 | free(event); | ||
| 262 | return err; | ||
| 263 | } | ||
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h new file mode 100644 index 000000000000..7890067e1a37 --- /dev/null +++ b/tools/perf/util/bpf-event.h | |||
| @@ -0,0 +1,38 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #ifndef __PERF_BPF_EVENT_H | ||
| 3 | #define __PERF_BPF_EVENT_H | ||
| 4 | |||
| 5 | #include <linux/compiler.h> | ||
| 6 | #include "event.h" | ||
| 7 | |||
| 8 | struct machine; | ||
| 9 | union perf_event; | ||
| 10 | struct perf_sample; | ||
| 11 | struct perf_tool; | ||
| 12 | struct record_opts; | ||
| 13 | |||
| 14 | #ifdef HAVE_LIBBPF_SUPPORT | ||
| 15 | int machine__process_bpf_event(struct machine *machine, union perf_event *event, | ||
| 16 | struct perf_sample *sample); | ||
| 17 | |||
| 18 | int perf_event__synthesize_bpf_events(struct perf_tool *tool, | ||
| 19 | perf_event__handler_t process, | ||
| 20 | struct machine *machine, | ||
| 21 | struct record_opts *opts); | ||
| 22 | #else | ||
| 23 | static inline int machine__process_bpf_event(struct machine *machine __maybe_unused, | ||
| 24 | union perf_event *event __maybe_unused, | ||
| 25 | struct perf_sample *sample __maybe_unused) | ||
| 26 | { | ||
| 27 | return 0; | ||
| 28 | } | ||
| 29 | |||
| 30 | static inline int perf_event__synthesize_bpf_events(struct perf_tool *tool __maybe_unused, | ||
| 31 | perf_event__handler_t process __maybe_unused, | ||
| 32 | struct machine *machine __maybe_unused, | ||
| 33 | struct record_opts *opts __maybe_unused) | ||
| 34 | { | ||
| 35 | return 0; | ||
| 36 | } | ||
| 37 | #endif // HAVE_LIBBPF_SUPPORT | ||
| 38 | #endif | ||
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 31b7e5a1453b..251d9ea6252f 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c | |||
| @@ -15,6 +15,7 @@ | |||
| 15 | #include <errno.h> | 15 | #include <errno.h> |
| 16 | #include "perf.h" | 16 | #include "perf.h" |
| 17 | #include "debug.h" | 17 | #include "debug.h" |
| 18 | #include "evlist.h" | ||
| 18 | #include "bpf-loader.h" | 19 | #include "bpf-loader.h" |
| 19 | #include "bpf-prologue.h" | 20 | #include "bpf-prologue.h" |
| 20 | #include "probe-event.h" | 21 | #include "probe-event.h" |
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h index 62d245a90e1d..3f46856e3330 100644 --- a/tools/perf/util/bpf-loader.h +++ b/tools/perf/util/bpf-loader.h | |||
| @@ -8,11 +8,7 @@ | |||
| 8 | 8 | ||
| 9 | #include <linux/compiler.h> | 9 | #include <linux/compiler.h> |
| 10 | #include <linux/err.h> | 10 | #include <linux/err.h> |
| 11 | #include <string.h> | ||
| 12 | #include <bpf/libbpf.h> | 11 | #include <bpf/libbpf.h> |
| 13 | #include "probe-event.h" | ||
| 14 | #include "evlist.h" | ||
| 15 | #include "debug.h" | ||
| 16 | 12 | ||
| 17 | enum bpf_loader_errno { | 13 | enum bpf_loader_errno { |
| 18 | __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100, | 14 | __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100, |
| @@ -44,6 +40,7 @@ enum bpf_loader_errno { | |||
| 44 | }; | 40 | }; |
| 45 | 41 | ||
| 46 | struct perf_evsel; | 42 | struct perf_evsel; |
| 43 | struct perf_evlist; | ||
| 47 | struct bpf_object; | 44 | struct bpf_object; |
| 48 | struct parse_events_term; | 45 | struct parse_events_term; |
| 49 | #define PERF_BPF_PROBE_GROUP "perf_bpf_probe" | 46 | #define PERF_BPF_PROBE_GROUP "perf_bpf_probe" |
| @@ -87,6 +84,8 @@ struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const cha | |||
| 87 | int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size); | 84 | int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size); |
| 88 | #else | 85 | #else |
| 89 | #include <errno.h> | 86 | #include <errno.h> |
| 87 | #include <string.h> | ||
| 88 | #include "debug.h" | ||
| 90 | 89 | ||
| 91 | static inline struct bpf_object * | 90 | static inline struct bpf_object * |
| 92 | bpf__prepare_load(const char *filename __maybe_unused, | 91 | bpf__prepare_load(const char *filename __maybe_unused, |
diff --git a/tools/perf/util/bpf_map.c b/tools/perf/util/bpf_map.c new file mode 100644 index 000000000000..eb853ca67cf4 --- /dev/null +++ b/tools/perf/util/bpf_map.c | |||
| @@ -0,0 +1,72 @@ | |||
| 1 | // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) | ||
| 2 | |||
| 3 | #include "util/bpf_map.h" | ||
| 4 | #include <bpf/bpf.h> | ||
| 5 | #include <bpf/libbpf.h> | ||
| 6 | #include <linux/err.h> | ||
| 7 | #include <linux/kernel.h> | ||
| 8 | #include <stdbool.h> | ||
| 9 | #include <stdlib.h> | ||
| 10 | #include <unistd.h> | ||
| 11 | |||
| 12 | static bool bpf_map_def__is_per_cpu(const struct bpf_map_def *def) | ||
| 13 | { | ||
| 14 | return def->type == BPF_MAP_TYPE_PERCPU_HASH || | ||
| 15 | def->type == BPF_MAP_TYPE_PERCPU_ARRAY || | ||
| 16 | def->type == BPF_MAP_TYPE_LRU_PERCPU_HASH || | ||
| 17 | def->type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE; | ||
| 18 | } | ||
| 19 | |||
| 20 | static void *bpf_map_def__alloc_value(const struct bpf_map_def *def) | ||
| 21 | { | ||
| 22 | if (bpf_map_def__is_per_cpu(def)) | ||
| 23 | return malloc(round_up(def->value_size, 8) * sysconf(_SC_NPROCESSORS_CONF)); | ||
| 24 | |||
| 25 | return malloc(def->value_size); | ||
| 26 | } | ||
| 27 | |||
| 28 | int bpf_map__fprintf(struct bpf_map *map, FILE *fp) | ||
| 29 | { | ||
| 30 | const struct bpf_map_def *def = bpf_map__def(map); | ||
| 31 | void *prev_key = NULL, *key, *value; | ||
| 32 | int fd = bpf_map__fd(map), err; | ||
| 33 | int printed = 0; | ||
| 34 | |||
| 35 | if (fd < 0) | ||
| 36 | return fd; | ||
| 37 | |||
| 38 | if (IS_ERR(def)) | ||
| 39 | return PTR_ERR(def); | ||
| 40 | |||
| 41 | err = -ENOMEM; | ||
| 42 | key = malloc(def->key_size); | ||
| 43 | if (key == NULL) | ||
| 44 | goto out; | ||
| 45 | |||
| 46 | value = bpf_map_def__alloc_value(def); | ||
| 47 | if (value == NULL) | ||
| 48 | goto out_free_key; | ||
| 49 | |||
| 50 | while ((err = bpf_map_get_next_key(fd, prev_key, key) == 0)) { | ||
| 51 | int intkey = *(int *)key; | ||
| 52 | |||
| 53 | if (!bpf_map_lookup_elem(fd, key, value)) { | ||
| 54 | bool boolval = *(bool *)value; | ||
| 55 | if (boolval) | ||
| 56 | printed += fprintf(fp, "[%d] = %d,\n", intkey, boolval); | ||
| 57 | } else { | ||
| 58 | printed += fprintf(fp, "[%d] = ERROR,\n", intkey); | ||
| 59 | } | ||
| 60 | |||
| 61 | prev_key = key; | ||
| 62 | } | ||
| 63 | |||
| 64 | if (err == ENOENT) | ||
| 65 | err = printed; | ||
| 66 | |||
| 67 | free(value); | ||
| 68 | out_free_key: | ||
| 69 | free(key); | ||
| 70 | out: | ||
| 71 | return err; | ||
| 72 | } | ||
diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h new file mode 100644 index 000000000000..d6abd5e47af8 --- /dev/null +++ b/tools/perf/util/bpf_map.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) | ||
| 2 | #ifndef __PERF_BPF_MAP_H | ||
| 3 | #define __PERF_BPF_MAP_H 1 | ||
| 4 | |||
| 5 | #include <stdio.h> | ||
| 6 | #include <linux/compiler.h> | ||
| 7 | struct bpf_map; | ||
| 8 | |||
| 9 | #ifdef HAVE_LIBBPF_SUPPORT | ||
| 10 | |||
| 11 | int bpf_map__fprintf(struct bpf_map *map, FILE *fp); | ||
| 12 | |||
| 13 | #else | ||
| 14 | |||
| 15 | static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused) | ||
| 16 | { | ||
| 17 | return 0; | ||
| 18 | } | ||
| 19 | |||
| 20 | #endif // HAVE_LIBBPF_SUPPORT | ||
| 21 | |||
| 22 | #endif // __PERF_BPF_MAP_H | ||
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index 1e3c7c5cdc63..64f96b79f1d7 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h | |||
| @@ -1,8 +1,31 @@ | |||
| 1 | #ifndef _PERF_BRANCH_H | 1 | #ifndef _PERF_BRANCH_H |
| 2 | #define _PERF_BRANCH_H 1 | 2 | #define _PERF_BRANCH_H 1 |
| 3 | 3 | ||
| 4 | #include <stdio.h> | ||
| 4 | #include <stdint.h> | 5 | #include <stdint.h> |
| 5 | #include "../perf.h" | 6 | #include <linux/perf_event.h> |
| 7 | #include <linux/types.h> | ||
| 8 | |||
| 9 | struct branch_flags { | ||
| 10 | u64 mispred:1; | ||
| 11 | u64 predicted:1; | ||
| 12 | u64 in_tx:1; | ||
| 13 | u64 abort:1; | ||
| 14 | u64 cycles:16; | ||
| 15 | u64 type:4; | ||
| 16 | u64 reserved:40; | ||
| 17 | }; | ||
| 18 | |||
| 19 | struct branch_entry { | ||
| 20 | u64 from; | ||
| 21 | u64 to; | ||
| 22 | struct branch_flags flags; | ||
| 23 | }; | ||
| 24 | |||
| 25 | struct branch_stack { | ||
| 26 | u64 nr; | ||
| 27 | struct branch_entry entries[0]; | ||
| 28 | }; | ||
| 6 | 29 | ||
| 7 | struct branch_type_stat { | 30 | struct branch_type_stat { |
| 8 | bool branch_to; | 31 | bool branch_to; |
| @@ -13,8 +36,6 @@ struct branch_type_stat { | |||
| 13 | u64 cross_2m; | 36 | u64 cross_2m; |
| 14 | }; | 37 | }; |
| 15 | 38 | ||
| 16 | struct branch_flags; | ||
| 17 | |||
| 18 | void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, | 39 | void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags, |
| 19 | u64 from, u64 to); | 40 | u64 from, u64 to); |
| 20 | 41 | ||
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 04b1d53e4bf9..bff0d17920ed 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c | |||
| @@ -15,6 +15,8 @@ | |||
| 15 | #include <sys/types.h> | 15 | #include <sys/types.h> |
| 16 | #include "build-id.h" | 16 | #include "build-id.h" |
| 17 | #include "event.h" | 17 | #include "event.h" |
| 18 | #include "namespaces.h" | ||
| 19 | #include "map.h" | ||
| 18 | #include "symbol.h" | 20 | #include "symbol.h" |
| 19 | #include "thread.h" | 21 | #include "thread.h" |
| 20 | #include <linux/kernel.h> | 22 | #include <linux/kernel.h> |
| @@ -363,7 +365,8 @@ int perf_session__write_buildid_table(struct perf_session *session, | |||
| 363 | if (err) | 365 | if (err) |
| 364 | return err; | 366 | return err; |
| 365 | 367 | ||
| 366 | for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { | 368 | for (nd = rb_first_cached(&session->machines.guests); nd; |
| 369 | nd = rb_next(nd)) { | ||
| 367 | struct machine *pos = rb_entry(nd, struct machine, rb_node); | 370 | struct machine *pos = rb_entry(nd, struct machine, rb_node); |
| 368 | err = machine__write_buildid_table(pos, fd); | 371 | err = machine__write_buildid_table(pos, fd); |
| 369 | if (err) | 372 | if (err) |
| @@ -396,7 +399,8 @@ int dsos__hit_all(struct perf_session *session) | |||
| 396 | if (err) | 399 | if (err) |
| 397 | return err; | 400 | return err; |
| 398 | 401 | ||
| 399 | for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { | 402 | for (nd = rb_first_cached(&session->machines.guests); nd; |
| 403 | nd = rb_next(nd)) { | ||
| 400 | struct machine *pos = rb_entry(nd, struct machine, rb_node); | 404 | struct machine *pos = rb_entry(nd, struct machine, rb_node); |
| 401 | 405 | ||
| 402 | err = machine__hit_all_dsos(pos); | 406 | err = machine__hit_all_dsos(pos); |
| @@ -849,7 +853,8 @@ int perf_session__cache_build_ids(struct perf_session *session) | |||
| 849 | 853 | ||
| 850 | ret = machine__cache_build_ids(&session->machines.host); | 854 | ret = machine__cache_build_ids(&session->machines.host); |
| 851 | 855 | ||
| 852 | for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { | 856 | for (nd = rb_first_cached(&session->machines.guests); nd; |
| 857 | nd = rb_next(nd)) { | ||
| 853 | struct machine *pos = rb_entry(nd, struct machine, rb_node); | 858 | struct machine *pos = rb_entry(nd, struct machine, rb_node); |
| 854 | ret |= machine__cache_build_ids(pos); | 859 | ret |= machine__cache_build_ids(pos); |
| 855 | } | 860 | } |
| @@ -866,7 +871,8 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) | |||
| 866 | struct rb_node *nd; | 871 | struct rb_node *nd; |
| 867 | bool ret = machine__read_build_ids(&session->machines.host, with_hits); | 872 | bool ret = machine__read_build_ids(&session->machines.host, with_hits); |
| 868 | 873 | ||
| 869 | for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { | 874 | for (nd = rb_first_cached(&session->machines.guests); nd; |
| 875 | nd = rb_next(nd)) { | ||
| 870 | struct machine *pos = rb_entry(nd, struct machine, rb_node); | 876 | struct machine *pos = rb_entry(nd, struct machine, rb_node); |
| 871 | ret |= machine__read_build_ids(pos, with_hits); | 877 | ret |= machine__read_build_ids(pos, with_hits); |
| 872 | } | 878 | } |
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index f0c565164a97..93668f38f1ed 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h | |||
| @@ -6,9 +6,10 @@ | |||
| 6 | #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) | 6 | #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) |
| 7 | 7 | ||
| 8 | #include "tool.h" | 8 | #include "tool.h" |
| 9 | #include "namespaces.h" | ||
| 10 | #include <linux/types.h> | 9 | #include <linux/types.h> |
| 11 | 10 | ||
| 11 | struct nsinfo; | ||
| 12 | |||
| 12 | extern struct perf_tool build_id__mark_dso_hit_ops; | 13 | extern struct perf_tool build_id__mark_dso_hit_ops; |
| 13 | struct dso; | 14 | struct dso; |
| 14 | struct feat_fd; | 15 | struct feat_fd; |
diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build index 988fef1b11d7..613ecfd76527 100644 --- a/tools/perf/util/c++/Build +++ b/tools/perf/util/c++/Build | |||
| @@ -1,2 +1,2 @@ | |||
| 1 | libperf-$(CONFIG_CLANGLLVM) += clang.o | 1 | perf-$(CONFIG_CLANGLLVM) += clang.o |
| 2 | libperf-$(CONFIG_CLANGLLVM) += clang-test.o | 2 | perf-$(CONFIG_CLANGLLVM) += clang-test.o |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index dc2212e12184..abb608b09269 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
| @@ -23,8 +23,10 @@ | |||
| 23 | #include "util.h" | 23 | #include "util.h" |
| 24 | #include "sort.h" | 24 | #include "sort.h" |
| 25 | #include "machine.h" | 25 | #include "machine.h" |
| 26 | #include "map.h" | ||
| 26 | #include "callchain.h" | 27 | #include "callchain.h" |
| 27 | #include "branch.h" | 28 | #include "branch.h" |
| 29 | #include "symbol.h" | ||
| 28 | 30 | ||
| 29 | #define CALLCHAIN_PARAM_DEFAULT \ | 31 | #define CALLCHAIN_PARAM_DEFAULT \ |
| 30 | .mode = CHAIN_GRAPH_ABS, \ | 32 | .mode = CHAIN_GRAPH_ABS, \ |
| @@ -1577,3 +1579,18 @@ int callchain_cursor__copy(struct callchain_cursor *dst, | |||
| 1577 | 1579 | ||
| 1578 | return rc; | 1580 | return rc; |
| 1579 | } | 1581 | } |
| 1582 | |||
| 1583 | /* | ||
| 1584 | * Initialize a cursor before adding entries inside, but keep | ||
| 1585 | * the previously allocated entries as a cache. | ||
| 1586 | */ | ||
| 1587 | void callchain_cursor_reset(struct callchain_cursor *cursor) | ||
| 1588 | { | ||
| 1589 | struct callchain_cursor_node *node; | ||
| 1590 | |||
| 1591 | cursor->nr = 0; | ||
| 1592 | cursor->last = &cursor->first; | ||
| 1593 | |||
| 1594 | for (node = cursor->first; node != NULL; node = node->next) | ||
| 1595 | map__zput(node->map); | ||
| 1596 | } | ||
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 99d38ac019b8..80e056a3d882 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
| @@ -2,14 +2,14 @@ | |||
| 2 | #ifndef __PERF_CALLCHAIN_H | 2 | #ifndef __PERF_CALLCHAIN_H |
| 3 | #define __PERF_CALLCHAIN_H | 3 | #define __PERF_CALLCHAIN_H |
| 4 | 4 | ||
| 5 | #include "../perf.h" | ||
| 6 | #include <linux/list.h> | 5 | #include <linux/list.h> |
| 7 | #include <linux/rbtree.h> | 6 | #include <linux/rbtree.h> |
| 8 | #include "event.h" | 7 | #include "event.h" |
| 9 | #include "map.h" | 8 | #include "map_symbol.h" |
| 10 | #include "symbol.h" | ||
| 11 | #include "branch.h" | 9 | #include "branch.h" |
| 12 | 10 | ||
| 11 | struct map; | ||
| 12 | |||
| 13 | #define HELP_PAD "\t\t\t\t" | 13 | #define HELP_PAD "\t\t\t\t" |
| 14 | 14 | ||
| 15 | #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" | 15 | #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n" |
| @@ -188,20 +188,7 @@ int callchain_append(struct callchain_root *root, | |||
| 188 | int callchain_merge(struct callchain_cursor *cursor, | 188 | int callchain_merge(struct callchain_cursor *cursor, |
| 189 | struct callchain_root *dst, struct callchain_root *src); | 189 | struct callchain_root *dst, struct callchain_root *src); |
| 190 | 190 | ||
| 191 | /* | 191 | void callchain_cursor_reset(struct callchain_cursor *cursor); |
| 192 | * Initialize a cursor before adding entries inside, but keep | ||
| 193 | * the previously allocated entries as a cache. | ||
| 194 | */ | ||
| 195 | static inline void callchain_cursor_reset(struct callchain_cursor *cursor) | ||
| 196 | { | ||
| 197 | struct callchain_cursor_node *node; | ||
| 198 | |||
| 199 | cursor->nr = 0; | ||
| 200 | cursor->last = &cursor->first; | ||
| 201 | |||
| 202 | for (node = cursor->first; node != NULL; node = node->next) | ||
| 203 | map__zput(node->map); | ||
| 204 | } | ||
| 205 | 192 | ||
| 206 | int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, | 193 | int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip, |
| 207 | struct map *map, struct symbol *sym, | 194 | struct map *map, struct symbol *sym, |
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 39e628b8938e..39b8c4ec4e2e 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c | |||
| @@ -1,7 +1,6 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
| 3 | #include "cache.h" | 3 | #include "cache.h" |
| 4 | #include "config.h" | ||
| 5 | #include <stdlib.h> | 4 | #include <stdlib.h> |
| 6 | #include <stdio.h> | 5 | #include <stdio.h> |
| 7 | #include "color.h" | 6 | #include "color.h" |
| @@ -10,44 +9,6 @@ | |||
| 10 | 9 | ||
| 11 | int perf_use_color_default = -1; | 10 | int perf_use_color_default = -1; |
| 12 | 11 | ||
| 13 | int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) | ||
| 14 | { | ||
| 15 | if (value) { | ||
| 16 | if (!strcasecmp(value, "never")) | ||
| 17 | return 0; | ||
| 18 | if (!strcasecmp(value, "always")) | ||
| 19 | return 1; | ||
| 20 | if (!strcasecmp(value, "auto")) | ||
| 21 | goto auto_color; | ||
| 22 | } | ||
| 23 | |||
| 24 | /* Missing or explicit false to turn off colorization */ | ||
| 25 | if (!perf_config_bool(var, value)) | ||
| 26 | return 0; | ||
| 27 | |||
| 28 | /* any normal truth value defaults to 'auto' */ | ||
| 29 | auto_color: | ||
| 30 | if (stdout_is_tty < 0) | ||
| 31 | stdout_is_tty = isatty(1); | ||
| 32 | if (stdout_is_tty || pager_in_use()) { | ||
| 33 | char *term = getenv("TERM"); | ||
| 34 | if (term && strcmp(term, "dumb")) | ||
| 35 | return 1; | ||
| 36 | } | ||
| 37 | return 0; | ||
| 38 | } | ||
| 39 | |||
| 40 | int perf_color_default_config(const char *var, const char *value, | ||
| 41 | void *cb __maybe_unused) | ||
| 42 | { | ||
| 43 | if (!strcmp(var, "color.ui")) { | ||
| 44 | perf_use_color_default = perf_config_colorbool(var, value, -1); | ||
| 45 | return 0; | ||
| 46 | } | ||
| 47 | |||
| 48 | return 0; | ||
| 49 | } | ||
| 50 | |||
| 51 | static int __color_vsnprintf(char *bf, size_t size, const char *color, | 12 | static int __color_vsnprintf(char *bf, size_t size, const char *color, |
| 52 | const char *fmt, va_list args, const char *trail) | 13 | const char *fmt, va_list args, const char *trail) |
| 53 | { | 14 | { |
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 22777b1812ee..01f7bed21c9b 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #define __PERF_COLOR_H | 3 | #define __PERF_COLOR_H |
| 4 | 4 | ||
| 5 | #include <stdio.h> | 5 | #include <stdio.h> |
| 6 | #include <stdarg.h> | ||
| 6 | 7 | ||
| 7 | /* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ | 8 | /* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ |
| 8 | #define COLOR_MAXLEN 24 | 9 | #define COLOR_MAXLEN 24 |
diff --git a/tools/perf/util/color_config.c b/tools/perf/util/color_config.c new file mode 100644 index 000000000000..817dc56e7e95 --- /dev/null +++ b/tools/perf/util/color_config.c | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | #include <linux/kernel.h> | ||
| 3 | #include "cache.h" | ||
| 4 | #include "config.h" | ||
| 5 | #include <stdlib.h> | ||
| 6 | #include <stdio.h> | ||
| 7 | #include "color.h" | ||
| 8 | #include <math.h> | ||
| 9 | #include <unistd.h> | ||
| 10 | |||
| 11 | int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) | ||
| 12 | { | ||
| 13 | if (value) { | ||
| 14 | if (!strcasecmp(value, "never")) | ||
| 15 | return 0; | ||
| 16 | if (!strcasecmp(value, "always")) | ||
| 17 | return 1; | ||
| 18 | if (!strcasecmp(value, "auto")) | ||
| 19 | goto auto_color; | ||
| 20 | } | ||
| 21 | |||
| 22 | /* Missing or explicit false to turn off colorization */ | ||
| 23 | if (!perf_config_bool(var, value)) | ||
| 24 | return 0; | ||
| 25 | |||
| 26 | /* any normal truth value defaults to 'auto' */ | ||
| 27 | auto_color: | ||
| 28 | if (stdout_is_tty < 0) | ||
| 29 | stdout_is_tty = isatty(1); | ||
| 30 | if (stdout_is_tty || pager_in_use()) { | ||
| 31 | char *term = getenv("TERM"); | ||
| 32 | if (term && strcmp(term, "dumb")) | ||
| 33 | return 1; | ||
| 34 | } | ||
| 35 | return 0; | ||
| 36 | } | ||
| 37 | |||
| 38 | int perf_color_default_config(const char *var, const char *value, | ||
| 39 | void *cb __maybe_unused) | ||
| 40 | { | ||
| 41 | if (!strcmp(var, "color.ui")) { | ||
| 42 | perf_use_color_default = perf_config_colorbool(var, value, -1); | ||
| 43 | return 0; | ||
| 44 | } | ||
| 45 | |||
| 46 | return 0; | ||
| 47 | } | ||
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index 31279a7bd919..1066de92af12 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <stdio.h> | 6 | #include <stdio.h> |
| 7 | #include <string.h> | 7 | #include <string.h> |
| 8 | #include <linux/refcount.h> | 8 | #include <linux/refcount.h> |
| 9 | #include <linux/rbtree.h> | ||
| 9 | #include "rwsem.h" | 10 | #include "rwsem.h" |
| 10 | 11 | ||
| 11 | struct comm_str { | 12 | struct comm_str { |
diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h index 3e5c438fe85e..f35d8fbfa2dd 100644 --- a/tools/perf/util/comm.h +++ b/tools/perf/util/comm.h | |||
| @@ -2,9 +2,9 @@ | |||
| 2 | #ifndef __PERF_COMM_H | 2 | #ifndef __PERF_COMM_H |
| 3 | #define __PERF_COMM_H | 3 | #define __PERF_COMM_H |
| 4 | 4 | ||
| 5 | #include "../perf.h" | ||
| 6 | #include <linux/rbtree.h> | ||
| 7 | #include <linux/list.h> | 5 | #include <linux/list.h> |
| 6 | #include <linux/types.h> | ||
| 7 | #include <stdbool.h> | ||
| 8 | 8 | ||
| 9 | struct comm_str; | 9 | struct comm_str; |
| 10 | 10 | ||
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 1ea8f898f1a1..fa092511c52b 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <sys/param.h> | 13 | #include <sys/param.h> |
| 14 | #include "util.h" | 14 | #include "util.h" |
| 15 | #include "cache.h" | 15 | #include "cache.h" |
| 16 | #include "callchain.h" | ||
| 16 | #include <subcmd/exec-cmd.h> | 17 | #include <subcmd/exec-cmd.h> |
| 17 | #include "util/event.h" /* proc_map_timeout */ | 18 | #include "util/event.h" /* proc_map_timeout */ |
| 18 | #include "util/hist.h" /* perf_hist_config */ | 19 | #include "util/hist.h" /* perf_hist_config */ |
diff --git a/tools/perf/util/cpu-set-sched.h b/tools/perf/util/cpu-set-sched.h new file mode 100644 index 000000000000..8cf4e40d322a --- /dev/null +++ b/tools/perf/util/cpu-set-sched.h | |||
| @@ -0,0 +1,50 @@ | |||
| 1 | // SPDX-License-Identifier: LGPL-2.1 | ||
| 2 | // Definitions taken from glibc for use with older systems, same licensing. | ||
| 3 | #ifndef _CPU_SET_SCHED_PERF_H | ||
| 4 | #define _CPU_SET_SCHED_PERF_H | ||
| 5 | |||
| 6 | #include <features.h> | ||
| 7 | #include <sched.h> | ||
| 8 | |||
| 9 | #ifndef CPU_EQUAL | ||
| 10 | #ifndef __CPU_EQUAL_S | ||
| 11 | #if __GNUC_PREREQ (2, 91) | ||
| 12 | # define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \ | ||
| 13 | (__builtin_memcmp (cpusetp1, cpusetp2, setsize) == 0) | ||
| 14 | #else | ||
| 15 | # define __CPU_EQUAL_S(setsize, cpusetp1, cpusetp2) \ | ||
| 16 | (__extension__ \ | ||
| 17 | ({ const __cpu_mask *__arr1 = (cpusetp1)->__bits; \ | ||
| 18 | const __cpu_mask *__arr2 = (cpusetp2)->__bits; \ | ||
| 19 | size_t __imax = (setsize) / sizeof (__cpu_mask); \ | ||
| 20 | size_t __i; \ | ||
| 21 | for (__i = 0; __i < __imax; ++__i) \ | ||
| 22 | if (__arr1[__i] != __arr2[__i]) \ | ||
| 23 | break; \ | ||
| 24 | __i == __imax; })) | ||
| 25 | #endif | ||
| 26 | #endif // __CPU_EQUAL_S | ||
| 27 | |||
| 28 | #define CPU_EQUAL(cpusetp1, cpusetp2) \ | ||
| 29 | __CPU_EQUAL_S (sizeof (cpu_set_t), cpusetp1, cpusetp2) | ||
| 30 | #endif // CPU_EQUAL | ||
| 31 | |||
| 32 | #ifndef CPU_OR | ||
| 33 | #ifndef __CPU_OP_S | ||
| 34 | #define __CPU_OP_S(setsize, destset, srcset1, srcset2, op) \ | ||
| 35 | (__extension__ \ | ||
| 36 | ({ cpu_set_t *__dest = (destset); \ | ||
| 37 | const __cpu_mask *__arr1 = (srcset1)->__bits; \ | ||
| 38 | const __cpu_mask *__arr2 = (srcset2)->__bits; \ | ||
| 39 | size_t __imax = (setsize) / sizeof (__cpu_mask); \ | ||
| 40 | size_t __i; \ | ||
| 41 | for (__i = 0; __i < __imax; ++__i) \ | ||
| 42 | ((__cpu_mask *) __dest->__bits)[__i] = __arr1[__i] op __arr2[__i]; \ | ||
| 43 | __dest; })) | ||
| 44 | #endif // __CPU_OP_S | ||
| 45 | |||
| 46 | #define CPU_OR(destset, srcset1, srcset2) \ | ||
| 47 | __CPU_OP_S (sizeof (cpu_set_t), destset, srcset1, srcset2, |) | ||
| 48 | #endif // CPU_OR | ||
| 49 | |||
| 50 | #endif // _CPU_SET_SCHED_PERF_H | ||
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 383674f448fc..0b599229bc7e 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c | |||
| @@ -681,7 +681,7 @@ size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size) | |||
| 681 | 681 | ||
| 682 | #undef COMMA | 682 | #undef COMMA |
| 683 | 683 | ||
| 684 | pr_debug("cpumask list: %s\n", buf); | 684 | pr_debug2("cpumask list: %s\n", buf); |
| 685 | return ret; | 685 | return ret; |
| 686 | } | 686 | } |
| 687 | 687 | ||
| @@ -730,3 +730,13 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size) | |||
| 730 | buf[size - 1] = '\0'; | 730 | buf[size - 1] = '\0'; |
| 731 | return ptr - buf; | 731 | return ptr - buf; |
| 732 | } | 732 | } |
| 733 | |||
| 734 | const struct cpu_map *cpu_map__online(void) /* thread unsafe */ | ||
| 735 | { | ||
| 736 | static const struct cpu_map *online = NULL; | ||
| 737 | |||
| 738 | if (!online) | ||
| 739 | online = cpu_map__new(NULL); /* from /sys/devices/system/cpu/online */ | ||
| 740 | |||
| 741 | return online; | ||
| 742 | } | ||
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index ed8999d1a640..f00ce624b9f7 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h | |||
| @@ -29,6 +29,7 @@ int cpu_map__get_core_id(int cpu); | |||
| 29 | int cpu_map__get_core(struct cpu_map *map, int idx, void *data); | 29 | int cpu_map__get_core(struct cpu_map *map, int idx, void *data); |
| 30 | int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); | 30 | int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); |
| 31 | int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); | 31 | int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); |
| 32 | const struct cpu_map *cpu_map__online(void); /* thread unsafe */ | ||
| 32 | 33 | ||
| 33 | struct cpu_map *cpu_map__get(struct cpu_map *map); | 34 | struct cpu_map *cpu_map__get(struct cpu_map *map); |
| 34 | void cpu_map__put(struct cpu_map *map); | 35 | void cpu_map__put(struct cpu_map *map); |
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c new file mode 100644 index 000000000000..ece0710249d4 --- /dev/null +++ b/tools/perf/util/cputopo.c | |||
| @@ -0,0 +1,277 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | #include <sys/param.h> | ||
| 3 | #include <inttypes.h> | ||
| 4 | #include <api/fs/fs.h> | ||
| 5 | |||
| 6 | #include "cputopo.h" | ||
| 7 | #include "cpumap.h" | ||
| 8 | #include "util.h" | ||
| 9 | #include "env.h" | ||
| 10 | |||
| 11 | |||
| 12 | #define CORE_SIB_FMT \ | ||
| 13 | "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" | ||
| 14 | #define THRD_SIB_FMT \ | ||
| 15 | "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" | ||
| 16 | #define NODE_ONLINE_FMT \ | ||
| 17 | "%s/devices/system/node/online" | ||
| 18 | #define NODE_MEMINFO_FMT \ | ||
| 19 | "%s/devices/system/node/node%d/meminfo" | ||
| 20 | #define NODE_CPULIST_FMT \ | ||
| 21 | "%s/devices/system/node/node%d/cpulist" | ||
| 22 | |||
| 23 | static int build_cpu_topology(struct cpu_topology *tp, int cpu) | ||
| 24 | { | ||
| 25 | FILE *fp; | ||
| 26 | char filename[MAXPATHLEN]; | ||
| 27 | char *buf = NULL, *p; | ||
| 28 | size_t len = 0; | ||
| 29 | ssize_t sret; | ||
| 30 | u32 i = 0; | ||
| 31 | int ret = -1; | ||
| 32 | |||
| 33 | scnprintf(filename, MAXPATHLEN, CORE_SIB_FMT, | ||
| 34 | sysfs__mountpoint(), cpu); | ||
| 35 | fp = fopen(filename, "r"); | ||
| 36 | if (!fp) | ||
| 37 | goto try_threads; | ||
| 38 | |||
| 39 | sret = getline(&buf, &len, fp); | ||
| 40 | fclose(fp); | ||
| 41 | if (sret <= 0) | ||
| 42 | goto try_threads; | ||
| 43 | |||
| 44 | p = strchr(buf, '\n'); | ||
| 45 | if (p) | ||
| 46 | *p = '\0'; | ||
| 47 | |||
| 48 | for (i = 0; i < tp->core_sib; i++) { | ||
| 49 | if (!strcmp(buf, tp->core_siblings[i])) | ||
| 50 | break; | ||
| 51 | } | ||
| 52 | if (i == tp->core_sib) { | ||
| 53 | tp->core_siblings[i] = buf; | ||
| 54 | tp->core_sib++; | ||
| 55 | buf = NULL; | ||
| 56 | len = 0; | ||
| 57 | } | ||
| 58 | ret = 0; | ||
| 59 | |||
| 60 | try_threads: | ||
| 61 | scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, | ||
| 62 | sysfs__mountpoint(), cpu); | ||
| 63 | fp = fopen(filename, "r"); | ||
| 64 | if (!fp) | ||
| 65 | goto done; | ||
| 66 | |||
| 67 | if (getline(&buf, &len, fp) <= 0) | ||
| 68 | goto done; | ||
| 69 | |||
| 70 | p = strchr(buf, '\n'); | ||
| 71 | if (p) | ||
| 72 | *p = '\0'; | ||
| 73 | |||
| 74 | for (i = 0; i < tp->thread_sib; i++) { | ||
| 75 | if (!strcmp(buf, tp->thread_siblings[i])) | ||
| 76 | break; | ||
| 77 | } | ||
| 78 | if (i == tp->thread_sib) { | ||
| 79 | tp->thread_siblings[i] = buf; | ||
| 80 | tp->thread_sib++; | ||
| 81 | buf = NULL; | ||
| 82 | } | ||
| 83 | ret = 0; | ||
| 84 | done: | ||
| 85 | if (fp) | ||
| 86 | fclose(fp); | ||
| 87 | free(buf); | ||
| 88 | return ret; | ||
| 89 | } | ||
| 90 | |||
| 91 | void cpu_topology__delete(struct cpu_topology *tp) | ||
| 92 | { | ||
| 93 | u32 i; | ||
| 94 | |||
| 95 | if (!tp) | ||
| 96 | return; | ||
| 97 | |||
| 98 | for (i = 0 ; i < tp->core_sib; i++) | ||
| 99 | zfree(&tp->core_siblings[i]); | ||
| 100 | |||
| 101 | for (i = 0 ; i < tp->thread_sib; i++) | ||
| 102 | zfree(&tp->thread_siblings[i]); | ||
| 103 | |||
| 104 | free(tp); | ||
| 105 | } | ||
| 106 | |||
| 107 | struct cpu_topology *cpu_topology__new(void) | ||
| 108 | { | ||
| 109 | struct cpu_topology *tp = NULL; | ||
| 110 | void *addr; | ||
| 111 | u32 nr, i; | ||
| 112 | size_t sz; | ||
| 113 | long ncpus; | ||
| 114 | int ret = -1; | ||
| 115 | struct cpu_map *map; | ||
| 116 | |||
| 117 | ncpus = cpu__max_present_cpu(); | ||
| 118 | |||
| 119 | /* build online CPU map */ | ||
| 120 | map = cpu_map__new(NULL); | ||
| 121 | if (map == NULL) { | ||
| 122 | pr_debug("failed to get system cpumap\n"); | ||
| 123 | return NULL; | ||
| 124 | } | ||
| 125 | |||
| 126 | nr = (u32)(ncpus & UINT_MAX); | ||
| 127 | |||
| 128 | sz = nr * sizeof(char *); | ||
| 129 | addr = calloc(1, sizeof(*tp) + 2 * sz); | ||
| 130 | if (!addr) | ||
| 131 | goto out_free; | ||
| 132 | |||
| 133 | tp = addr; | ||
| 134 | addr += sizeof(*tp); | ||
| 135 | tp->core_siblings = addr; | ||
| 136 | addr += sz; | ||
| 137 | tp->thread_siblings = addr; | ||
| 138 | |||
| 139 | for (i = 0; i < nr; i++) { | ||
| 140 | if (!cpu_map__has(map, i)) | ||
| 141 | continue; | ||
| 142 | |||
| 143 | ret = build_cpu_topology(tp, i); | ||
| 144 | if (ret < 0) | ||
| 145 | break; | ||
| 146 | } | ||
| 147 | |||
| 148 | out_free: | ||
| 149 | cpu_map__put(map); | ||
| 150 | if (ret) { | ||
| 151 | cpu_topology__delete(tp); | ||
| 152 | tp = NULL; | ||
| 153 | } | ||
| 154 | return tp; | ||
| 155 | } | ||
| 156 | |||
| 157 | static int load_numa_node(struct numa_topology_node *node, int nr) | ||
| 158 | { | ||
| 159 | char str[MAXPATHLEN]; | ||
| 160 | char field[32]; | ||
| 161 | char *buf = NULL, *p; | ||
| 162 | size_t len = 0; | ||
| 163 | int ret = -1; | ||
| 164 | FILE *fp; | ||
| 165 | u64 mem; | ||
| 166 | |||
| 167 | node->node = (u32) nr; | ||
| 168 | |||
| 169 | scnprintf(str, MAXPATHLEN, NODE_MEMINFO_FMT, | ||
| 170 | sysfs__mountpoint(), nr); | ||
| 171 | fp = fopen(str, "r"); | ||
| 172 | if (!fp) | ||
| 173 | return -1; | ||
| 174 | |||
| 175 | while (getline(&buf, &len, fp) > 0) { | ||
| 176 | /* skip over invalid lines */ | ||
| 177 | if (!strchr(buf, ':')) | ||
| 178 | continue; | ||
| 179 | if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2) | ||
| 180 | goto err; | ||
| 181 | if (!strcmp(field, "MemTotal:")) | ||
| 182 | node->mem_total = mem; | ||
| 183 | if (!strcmp(field, "MemFree:")) | ||
| 184 | node->mem_free = mem; | ||
| 185 | if (node->mem_total && node->mem_free) | ||
| 186 | break; | ||
| 187 | } | ||
| 188 | |||
| 189 | fclose(fp); | ||
| 190 | fp = NULL; | ||
| 191 | |||
| 192 | scnprintf(str, MAXPATHLEN, NODE_CPULIST_FMT, | ||
| 193 | sysfs__mountpoint(), nr); | ||
| 194 | |||
| 195 | fp = fopen(str, "r"); | ||
| 196 | if (!fp) | ||
| 197 | return -1; | ||
| 198 | |||
| 199 | if (getline(&buf, &len, fp) <= 0) | ||
| 200 | goto err; | ||
| 201 | |||
| 202 | p = strchr(buf, '\n'); | ||
| 203 | if (p) | ||
| 204 | *p = '\0'; | ||
| 205 | |||
| 206 | node->cpus = buf; | ||
| 207 | fclose(fp); | ||
| 208 | return 0; | ||
| 209 | |||
| 210 | err: | ||
| 211 | free(buf); | ||
| 212 | if (fp) | ||
| 213 | fclose(fp); | ||
| 214 | return ret; | ||
| 215 | } | ||
| 216 | |||
| 217 | struct numa_topology *numa_topology__new(void) | ||
| 218 | { | ||
| 219 | struct cpu_map *node_map = NULL; | ||
| 220 | struct numa_topology *tp = NULL; | ||
| 221 | char path[MAXPATHLEN]; | ||
| 222 | char *buf = NULL; | ||
| 223 | size_t len = 0; | ||
| 224 | u32 nr, i; | ||
| 225 | FILE *fp; | ||
| 226 | char *c; | ||
| 227 | |||
| 228 | scnprintf(path, MAXPATHLEN, NODE_ONLINE_FMT, | ||
| 229 | sysfs__mountpoint()); | ||
| 230 | |||
| 231 | fp = fopen(path, "r"); | ||
| 232 | if (!fp) | ||
| 233 | return NULL; | ||
| 234 | |||
| 235 | if (getline(&buf, &len, fp) <= 0) | ||
| 236 | goto out; | ||
| 237 | |||
| 238 | c = strchr(buf, '\n'); | ||
| 239 | if (c) | ||
| 240 | *c = '\0'; | ||
| 241 | |||
| 242 | node_map = cpu_map__new(buf); | ||
| 243 | if (!node_map) | ||
| 244 | goto out; | ||
| 245 | |||
| 246 | nr = (u32) node_map->nr; | ||
| 247 | |||
| 248 | tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr); | ||
| 249 | if (!tp) | ||
| 250 | goto out; | ||
| 251 | |||
| 252 | tp->nr = nr; | ||
| 253 | |||
| 254 | for (i = 0; i < nr; i++) { | ||
| 255 | if (load_numa_node(&tp->nodes[i], node_map->map[i])) { | ||
| 256 | numa_topology__delete(tp); | ||
| 257 | tp = NULL; | ||
| 258 | break; | ||
| 259 | } | ||
| 260 | } | ||
| 261 | |||
| 262 | out: | ||
| 263 | free(buf); | ||
| 264 | fclose(fp); | ||
| 265 | cpu_map__put(node_map); | ||
| 266 | return tp; | ||
| 267 | } | ||
| 268 | |||
| 269 | void numa_topology__delete(struct numa_topology *tp) | ||
| 270 | { | ||
| 271 | u32 i; | ||
| 272 | |||
| 273 | for (i = 0; i < tp->nr; i++) | ||
| 274 | free(tp->nodes[i].cpus); | ||
| 275 | |||
| 276 | free(tp); | ||
| 277 | } | ||
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h new file mode 100644 index 000000000000..47a97e71acdf --- /dev/null +++ b/tools/perf/util/cputopo.h | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #ifndef __PERF_CPUTOPO_H | ||
| 3 | #define __PERF_CPUTOPO_H | ||
| 4 | |||
| 5 | #include <linux/types.h> | ||
| 6 | #include "env.h" | ||
| 7 | |||
| 8 | struct cpu_topology { | ||
| 9 | u32 core_sib; | ||
| 10 | u32 thread_sib; | ||
| 11 | char **core_siblings; | ||
| 12 | char **thread_siblings; | ||
| 13 | }; | ||
| 14 | |||
| 15 | struct numa_topology_node { | ||
| 16 | char *cpus; | ||
| 17 | u32 node; | ||
| 18 | u64 mem_total; | ||
| 19 | u64 mem_free; | ||
| 20 | }; | ||
| 21 | |||
| 22 | struct numa_topology { | ||
| 23 | u32 nr; | ||
| 24 | struct numa_topology_node nodes[0]; | ||
| 25 | }; | ||
| 26 | |||
| 27 | struct cpu_topology *cpu_topology__new(void); | ||
| 28 | void cpu_topology__delete(struct cpu_topology *tp); | ||
| 29 | |||
| 30 | struct numa_topology *numa_topology__new(void); | ||
| 31 | void numa_topology__delete(struct numa_topology *tp); | ||
| 32 | |||
| 33 | #endif /* __PERF_CPUTOPO_H */ | ||
diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build index bc22c39c727f..216cb17a3322 100644 --- a/tools/perf/util/cs-etm-decoder/Build +++ b/tools/perf/util/cs-etm-decoder/Build | |||
| @@ -1 +1 @@ | |||
| libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o | perf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o | ||
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 8c155575c6c5..ba4c623cd8de 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | |||
| @@ -290,6 +290,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) | |||
| 290 | decoder->packet_buffer[i].instr_count = 0; | 290 | decoder->packet_buffer[i].instr_count = 0; |
| 291 | decoder->packet_buffer[i].last_instr_taken_branch = false; | 291 | decoder->packet_buffer[i].last_instr_taken_branch = false; |
| 292 | decoder->packet_buffer[i].last_instr_size = 0; | 292 | decoder->packet_buffer[i].last_instr_size = 0; |
| 293 | decoder->packet_buffer[i].last_instr_type = 0; | ||
| 294 | decoder->packet_buffer[i].last_instr_subtype = 0; | ||
| 295 | decoder->packet_buffer[i].last_instr_cond = 0; | ||
| 296 | decoder->packet_buffer[i].flags = 0; | ||
| 297 | decoder->packet_buffer[i].exception_number = UINT32_MAX; | ||
| 298 | decoder->packet_buffer[i].trace_chan_id = UINT8_MAX; | ||
| 293 | decoder->packet_buffer[i].cpu = INT_MIN; | 299 | decoder->packet_buffer[i].cpu = INT_MIN; |
| 294 | } | 300 | } |
| 295 | } | 301 | } |
| @@ -300,14 +306,12 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, | |||
| 300 | enum cs_etm_sample_type sample_type) | 306 | enum cs_etm_sample_type sample_type) |
| 301 | { | 307 | { |
| 302 | u32 et = 0; | 308 | u32 et = 0; |
| 303 | struct int_node *inode = NULL; | 309 | int cpu; |
| 304 | 310 | ||
| 305 | if (decoder->packet_count >= MAX_BUFFER - 1) | 311 | if (decoder->packet_count >= MAX_BUFFER - 1) |
| 306 | return OCSD_RESP_FATAL_SYS_ERR; | 312 | return OCSD_RESP_FATAL_SYS_ERR; |
| 307 | 313 | ||
| 308 | /* Search the RB tree for the cpu associated with this traceID */ | 314 | if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) |
| 309 | inode = intlist__find(traceid_list, trace_chan_id); | ||
| 310 | if (!inode) | ||
| 311 | return OCSD_RESP_FATAL_SYS_ERR; | 315 | return OCSD_RESP_FATAL_SYS_ERR; |
| 312 | 316 | ||
| 313 | et = decoder->tail; | 317 | et = decoder->tail; |
| @@ -317,12 +321,18 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, | |||
| 317 | 321 | ||
| 318 | decoder->packet_buffer[et].sample_type = sample_type; | 322 | decoder->packet_buffer[et].sample_type = sample_type; |
| 319 | decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; | 323 | decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; |
| 320 | decoder->packet_buffer[et].cpu = *((int *)inode->priv); | 324 | decoder->packet_buffer[et].cpu = cpu; |
| 321 | decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; | 325 | decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; |
| 322 | decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; | 326 | decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; |
| 323 | decoder->packet_buffer[et].instr_count = 0; | 327 | decoder->packet_buffer[et].instr_count = 0; |
| 324 | decoder->packet_buffer[et].last_instr_taken_branch = false; | 328 | decoder->packet_buffer[et].last_instr_taken_branch = false; |
| 325 | decoder->packet_buffer[et].last_instr_size = 0; | 329 | decoder->packet_buffer[et].last_instr_size = 0; |
| 330 | decoder->packet_buffer[et].last_instr_type = 0; | ||
| 331 | decoder->packet_buffer[et].last_instr_subtype = 0; | ||
| 332 | decoder->packet_buffer[et].last_instr_cond = 0; | ||
| 333 | decoder->packet_buffer[et].flags = 0; | ||
| 334 | decoder->packet_buffer[et].exception_number = UINT32_MAX; | ||
| 335 | decoder->packet_buffer[et].trace_chan_id = trace_chan_id; | ||
| 326 | 336 | ||
| 327 | if (decoder->packet_count == MAX_BUFFER - 1) | 337 | if (decoder->packet_count == MAX_BUFFER - 1) |
| 328 | return OCSD_RESP_WAIT; | 338 | return OCSD_RESP_WAIT; |
| @@ -366,6 +376,9 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, | |||
| 366 | packet->start_addr = elem->st_addr; | 376 | packet->start_addr = elem->st_addr; |
| 367 | packet->end_addr = elem->en_addr; | 377 | packet->end_addr = elem->en_addr; |
| 368 | packet->instr_count = elem->num_instr_range; | 378 | packet->instr_count = elem->num_instr_range; |
| 379 | packet->last_instr_type = elem->last_i_type; | ||
| 380 | packet->last_instr_subtype = elem->last_i_subtype; | ||
| 381 | packet->last_instr_cond = elem->last_instr_cond; | ||
| 369 | 382 | ||
| 370 | switch (elem->last_i_type) { | 383 | switch (elem->last_i_type) { |
| 371 | case OCSD_INSTR_BR: | 384 | case OCSD_INSTR_BR: |
| @@ -395,10 +408,20 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder, | |||
| 395 | 408 | ||
| 396 | static ocsd_datapath_resp_t | 409 | static ocsd_datapath_resp_t |
| 397 | cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, | 410 | cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, |
| 411 | const ocsd_generic_trace_elem *elem, | ||
| 398 | const uint8_t trace_chan_id) | 412 | const uint8_t trace_chan_id) |
| 399 | { | 413 | { int ret = 0; |
| 400 | return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, | 414 | struct cs_etm_packet *packet; |
| 401 | CS_ETM_EXCEPTION); | 415 | |
| 416 | ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, | ||
| 417 | CS_ETM_EXCEPTION); | ||
| 418 | if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) | ||
| 419 | return ret; | ||
| 420 | |||
| 421 | packet = &decoder->packet_buffer[decoder->tail]; | ||
| 422 | packet->exception_number = elem->exception_number; | ||
| 423 | |||
| 424 | return ret; | ||
| 402 | } | 425 | } |
| 403 | 426 | ||
| 404 | static ocsd_datapath_resp_t | 427 | static ocsd_datapath_resp_t |
| @@ -432,7 +455,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( | |||
| 432 | trace_chan_id); | 455 | trace_chan_id); |
| 433 | break; | 456 | break; |
| 434 | case OCSD_GEN_TRC_ELEM_EXCEPTION: | 457 | case OCSD_GEN_TRC_ELEM_EXCEPTION: |
| 435 | resp = cs_etm_decoder__buffer_exception(decoder, | 458 | resp = cs_etm_decoder__buffer_exception(decoder, elem, |
| 436 | trace_chan_id); | 459 | trace_chan_id); |
| 437 | break; | 460 | break; |
| 438 | case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: | 461 | case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: |
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index a6407d41598f..3ab11dfa92ae 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | |||
| @@ -15,13 +15,6 @@ | |||
| 15 | 15 | ||
| 16 | struct cs_etm_decoder; | 16 | struct cs_etm_decoder; |
| 17 | 17 | ||
| 18 | struct cs_etm_buffer { | ||
| 19 | const unsigned char *buf; | ||
| 20 | size_t len; | ||
| 21 | u64 offset; | ||
| 22 | u64 ref_timestamp; | ||
| 23 | }; | ||
| 24 | |||
| 25 | enum cs_etm_sample_type { | 18 | enum cs_etm_sample_type { |
| 26 | CS_ETM_EMPTY, | 19 | CS_ETM_EMPTY, |
| 27 | CS_ETM_RANGE, | 20 | CS_ETM_RANGE, |
| @@ -43,8 +36,14 @@ struct cs_etm_packet { | |||
| 43 | u64 start_addr; | 36 | u64 start_addr; |
| 44 | u64 end_addr; | 37 | u64 end_addr; |
| 45 | u32 instr_count; | 38 | u32 instr_count; |
| 39 | u32 last_instr_type; | ||
| 40 | u32 last_instr_subtype; | ||
| 41 | u32 flags; | ||
| 42 | u32 exception_number; | ||
| 43 | u8 last_instr_cond; | ||
| 46 | u8 last_instr_taken_branch; | 44 | u8 last_instr_taken_branch; |
| 47 | u8 last_instr_size; | 45 | u8 last_instr_size; |
| 46 | u8 trace_chan_id; | ||
| 48 | int cpu; | 47 | int cpu; |
| 49 | }; | 48 | }; |
| 50 | 49 | ||
| @@ -99,9 +98,10 @@ enum { | |||
| 99 | CS_ETM_PROTO_PTM, | 98 | CS_ETM_PROTO_PTM, |
| 100 | }; | 99 | }; |
| 101 | 100 | ||
| 102 | enum { | 101 | enum cs_etm_decoder_operation { |
| 103 | CS_ETM_OPERATION_PRINT = 1, | 102 | CS_ETM_OPERATION_PRINT = 1, |
| 104 | CS_ETM_OPERATION_DECODE, | 103 | CS_ETM_OPERATION_DECODE, |
| 104 | CS_ETM_OPERATION_MAX, | ||
| 105 | }; | 105 | }; |
| 106 | 106 | ||
| 107 | int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, | 107 | int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, |
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 27a374ddf661..110804936fc3 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include <linux/log2.h> | 12 | #include <linux/log2.h> |
| 13 | #include <linux/types.h> | 13 | #include <linux/types.h> |
| 14 | 14 | ||
| 15 | #include <opencsd/ocsd_if_types.h> | ||
| 15 | #include <stdlib.h> | 16 | #include <stdlib.h> |
| 16 | 17 | ||
| 17 | #include "auxtrace.h" | 18 | #include "auxtrace.h" |
| @@ -24,6 +25,7 @@ | |||
| 24 | #include "machine.h" | 25 | #include "machine.h" |
| 25 | #include "map.h" | 26 | #include "map.h" |
| 26 | #include "perf.h" | 27 | #include "perf.h" |
| 28 | #include "symbol.h" | ||
| 27 | #include "thread.h" | 29 | #include "thread.h" |
| 28 | #include "thread_map.h" | 30 | #include "thread_map.h" |
| 29 | #include "thread-stack.h" | 31 | #include "thread-stack.h" |
| @@ -63,13 +65,10 @@ struct cs_etm_queue { | |||
| 63 | struct thread *thread; | 65 | struct thread *thread; |
| 64 | struct cs_etm_decoder *decoder; | 66 | struct cs_etm_decoder *decoder; |
| 65 | struct auxtrace_buffer *buffer; | 67 | struct auxtrace_buffer *buffer; |
| 66 | const struct cs_etm_state *state; | ||
| 67 | union perf_event *event_buf; | 68 | union perf_event *event_buf; |
| 68 | unsigned int queue_nr; | 69 | unsigned int queue_nr; |
| 69 | pid_t pid, tid; | 70 | pid_t pid, tid; |
| 70 | int cpu; | 71 | int cpu; |
| 71 | u64 time; | ||
| 72 | u64 timestamp; | ||
| 73 | u64 offset; | 72 | u64 offset; |
| 74 | u64 period_instructions; | 73 | u64 period_instructions; |
| 75 | struct branch_stack *last_branch; | 74 | struct branch_stack *last_branch; |
| @@ -77,11 +76,13 @@ struct cs_etm_queue { | |||
| 77 | size_t last_branch_pos; | 76 | size_t last_branch_pos; |
| 78 | struct cs_etm_packet *prev_packet; | 77 | struct cs_etm_packet *prev_packet; |
| 79 | struct cs_etm_packet *packet; | 78 | struct cs_etm_packet *packet; |
| 79 | const unsigned char *buf; | ||
| 80 | size_t buf_len, buf_used; | ||
| 80 | }; | 81 | }; |
| 81 | 82 | ||
| 82 | static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); | 83 | static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); |
| 83 | static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, | 84 | static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, |
| 84 | pid_t tid, u64 time_); | 85 | pid_t tid); |
| 85 | 86 | ||
| 86 | /* PTMs ETMIDR [11:8] set to b0011 */ | 87 | /* PTMs ETMIDR [11:8] set to b0011 */ |
| 87 | #define ETMIDR_PTM_VERSION 0x00000300 | 88 | #define ETMIDR_PTM_VERSION 0x00000300 |
| @@ -96,6 +97,34 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr) | |||
| 96 | return CS_ETM_PROTO_ETMV3; | 97 | return CS_ETM_PROTO_ETMV3; |
| 97 | } | 98 | } |
| 98 | 99 | ||
| 100 | static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic) | ||
| 101 | { | ||
| 102 | struct int_node *inode; | ||
| 103 | u64 *metadata; | ||
| 104 | |||
| 105 | inode = intlist__find(traceid_list, trace_chan_id); | ||
| 106 | if (!inode) | ||
| 107 | return -EINVAL; | ||
| 108 | |||
| 109 | metadata = inode->priv; | ||
| 110 | *magic = metadata[CS_ETM_MAGIC]; | ||
| 111 | return 0; | ||
| 112 | } | ||
| 113 | |||
| 114 | int cs_etm__get_cpu(u8 trace_chan_id, int *cpu) | ||
| 115 | { | ||
| 116 | struct int_node *inode; | ||
| 117 | u64 *metadata; | ||
| 118 | |||
| 119 | inode = intlist__find(traceid_list, trace_chan_id); | ||
| 120 | if (!inode) | ||
| 121 | return -EINVAL; | ||
| 122 | |||
| 123 | metadata = inode->priv; | ||
| 124 | *cpu = (int)metadata[CS_ETM_CPU]; | ||
| 125 | return 0; | ||
| 126 | } | ||
| 127 | |||
| 99 | static void cs_etm__packet_dump(const char *pkt_string) | 128 | static void cs_etm__packet_dump(const char *pkt_string) |
| 100 | { | 129 | { |
| 101 | const char *color = PERF_COLOR_BLUE; | 130 | const char *color = PERF_COLOR_BLUE; |
| @@ -109,10 +138,83 @@ static void cs_etm__packet_dump(const char *pkt_string) | |||
| 109 | fflush(stdout); | 138 | fflush(stdout); |
| 110 | } | 139 | } |
| 111 | 140 | ||
| 141 | static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params, | ||
| 142 | struct cs_etm_auxtrace *etm, int idx, | ||
| 143 | u32 etmidr) | ||
| 144 | { | ||
| 145 | u64 **metadata = etm->metadata; | ||
| 146 | |||
| 147 | t_params[idx].protocol = cs_etm__get_v7_protocol_version(etmidr); | ||
| 148 | t_params[idx].etmv3.reg_ctrl = metadata[idx][CS_ETM_ETMCR]; | ||
| 149 | t_params[idx].etmv3.reg_trc_id = metadata[idx][CS_ETM_ETMTRACEIDR]; | ||
| 150 | } | ||
| 151 | |||
| 152 | static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params, | ||
| 153 | struct cs_etm_auxtrace *etm, int idx) | ||
| 154 | { | ||
| 155 | u64 **metadata = etm->metadata; | ||
| 156 | |||
| 157 | t_params[idx].protocol = CS_ETM_PROTO_ETMV4i; | ||
| 158 | t_params[idx].etmv4.reg_idr0 = metadata[idx][CS_ETMV4_TRCIDR0]; | ||
| 159 | t_params[idx].etmv4.reg_idr1 = metadata[idx][CS_ETMV4_TRCIDR1]; | ||
| 160 | t_params[idx].etmv4.reg_idr2 = metadata[idx][CS_ETMV4_TRCIDR2]; | ||
| 161 | t_params[idx].etmv4.reg_idr8 = metadata[idx][CS_ETMV4_TRCIDR8]; | ||
| 162 | t_params[idx].etmv4.reg_configr = metadata[idx][CS_ETMV4_TRCCONFIGR]; | ||
| 163 | t_params[idx].etmv4.reg_traceidr = metadata[idx][CS_ETMV4_TRCTRACEIDR]; | ||
| 164 | } | ||
| 165 | |||
| 166 | static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params, | ||
| 167 | struct cs_etm_auxtrace *etm) | ||
| 168 | { | ||
| 169 | int i; | ||
| 170 | u32 etmidr; | ||
| 171 | u64 architecture; | ||
| 172 | |||
| 173 | for (i = 0; i < etm->num_cpu; i++) { | ||
| 174 | architecture = etm->metadata[i][CS_ETM_MAGIC]; | ||
| 175 | |||
| 176 | switch (architecture) { | ||
| 177 | case __perf_cs_etmv3_magic: | ||
| 178 | etmidr = etm->metadata[i][CS_ETM_ETMIDR]; | ||
| 179 | cs_etm__set_trace_param_etmv3(t_params, etm, i, etmidr); | ||
| 180 | break; | ||
| 181 | case __perf_cs_etmv4_magic: | ||
| 182 | cs_etm__set_trace_param_etmv4(t_params, etm, i); | ||
| 183 | break; | ||
| 184 | default: | ||
| 185 | return -EINVAL; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | return 0; | ||
| 190 | } | ||
| 191 | |||
| 192 | static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params, | ||
| 193 | struct cs_etm_queue *etmq, | ||
| 194 | enum cs_etm_decoder_operation mode) | ||
| 195 | { | ||
| 196 | int ret = -EINVAL; | ||
| 197 | |||
| 198 | if (!(mode < CS_ETM_OPERATION_MAX)) | ||
| 199 | goto out; | ||
| 200 | |||
| 201 | d_params->packet_printer = cs_etm__packet_dump; | ||
| 202 | d_params->operation = mode; | ||
| 203 | d_params->data = etmq; | ||
| 204 | d_params->formatted = true; | ||
| 205 | d_params->fsyncs = false; | ||
| 206 | d_params->hsyncs = false; | ||
| 207 | d_params->frame_aligned = true; | ||
| 208 | |||
| 209 | ret = 0; | ||
| 210 | out: | ||
| 211 | return ret; | ||
| 212 | } | ||
| 213 | |||
| 112 | static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, | 214 | static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, |
| 113 | struct auxtrace_buffer *buffer) | 215 | struct auxtrace_buffer *buffer) |
| 114 | { | 216 | { |
| 115 | int i, ret; | 217 | int ret; |
| 116 | const char *color = PERF_COLOR_BLUE; | 218 | const char *color = PERF_COLOR_BLUE; |
| 117 | struct cs_etm_decoder_params d_params; | 219 | struct cs_etm_decoder_params d_params; |
| 118 | struct cs_etm_trace_params *t_params; | 220 | struct cs_etm_trace_params *t_params; |
| @@ -126,48 +228,22 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, | |||
| 126 | 228 | ||
| 127 | /* Use metadata to fill in trace parameters for trace decoder */ | 229 | /* Use metadata to fill in trace parameters for trace decoder */ |
| 128 | t_params = zalloc(sizeof(*t_params) * etm->num_cpu); | 230 | t_params = zalloc(sizeof(*t_params) * etm->num_cpu); |
| 129 | for (i = 0; i < etm->num_cpu; i++) { | 231 | |
| 130 | if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { | 232 | if (!t_params) |
| 131 | u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; | 233 | return; |
| 132 | 234 | ||
| 133 | t_params[i].protocol = | 235 | if (cs_etm__init_trace_params(t_params, etm)) |
| 134 | cs_etm__get_v7_protocol_version(etmidr); | 236 | goto out_free; |
| 135 | t_params[i].etmv3.reg_ctrl = | ||
| 136 | etm->metadata[i][CS_ETM_ETMCR]; | ||
| 137 | t_params[i].etmv3.reg_trc_id = | ||
| 138 | etm->metadata[i][CS_ETM_ETMTRACEIDR]; | ||
| 139 | } else if (etm->metadata[i][CS_ETM_MAGIC] == | ||
| 140 | __perf_cs_etmv4_magic) { | ||
| 141 | t_params[i].protocol = CS_ETM_PROTO_ETMV4i; | ||
| 142 | t_params[i].etmv4.reg_idr0 = | ||
| 143 | etm->metadata[i][CS_ETMV4_TRCIDR0]; | ||
| 144 | t_params[i].etmv4.reg_idr1 = | ||
| 145 | etm->metadata[i][CS_ETMV4_TRCIDR1]; | ||
| 146 | t_params[i].etmv4.reg_idr2 = | ||
| 147 | etm->metadata[i][CS_ETMV4_TRCIDR2]; | ||
| 148 | t_params[i].etmv4.reg_idr8 = | ||
| 149 | etm->metadata[i][CS_ETMV4_TRCIDR8]; | ||
| 150 | t_params[i].etmv4.reg_configr = | ||
| 151 | etm->metadata[i][CS_ETMV4_TRCCONFIGR]; | ||
| 152 | t_params[i].etmv4.reg_traceidr = | ||
| 153 | etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | 237 | ||
| 157 | /* Set decoder parameters to simply print the trace packets */ | 238 | /* Set decoder parameters to simply print the trace packets */ |
| 158 | d_params.packet_printer = cs_etm__packet_dump; | 239 | if (cs_etm__init_decoder_params(&d_params, NULL, |
| 159 | d_params.operation = CS_ETM_OPERATION_PRINT; | 240 | CS_ETM_OPERATION_PRINT)) |
| 160 | d_params.formatted = true; | 241 | goto out_free; |
| 161 | d_params.fsyncs = false; | ||
| 162 | d_params.hsyncs = false; | ||
| 163 | d_params.frame_aligned = true; | ||
| 164 | 242 | ||
| 165 | decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); | 243 | decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); |
| 166 | 244 | ||
| 167 | zfree(&t_params); | ||
| 168 | |||
| 169 | if (!decoder) | 245 | if (!decoder) |
| 170 | return; | 246 | goto out_free; |
| 171 | do { | 247 | do { |
| 172 | size_t consumed; | 248 | size_t consumed; |
| 173 | 249 | ||
| @@ -182,6 +258,9 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, | |||
| 182 | } while (buffer_used < buffer->size); | 258 | } while (buffer_used < buffer->size); |
| 183 | 259 | ||
| 184 | cs_etm_decoder__free(decoder); | 260 | cs_etm_decoder__free(decoder); |
| 261 | |||
| 262 | out_free: | ||
| 263 | zfree(&t_params); | ||
| 185 | } | 264 | } |
| 186 | 265 | ||
| 187 | static int cs_etm__flush_events(struct perf_session *session, | 266 | static int cs_etm__flush_events(struct perf_session *session, |
| @@ -205,7 +284,7 @@ static int cs_etm__flush_events(struct perf_session *session, | |||
| 205 | if (ret < 0) | 284 | if (ret < 0) |
| 206 | return ret; | 285 | return ret; |
| 207 | 286 | ||
| 208 | return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); | 287 | return cs_etm__process_timeless_queues(etm, -1); |
| 209 | } | 288 | } |
| 210 | 289 | ||
| 211 | static void cs_etm__free_queue(void *priv) | 290 | static void cs_etm__free_queue(void *priv) |
| @@ -251,7 +330,7 @@ static void cs_etm__free(struct perf_session *session) | |||
| 251 | cs_etm__free_events(session); | 330 | cs_etm__free_events(session); |
| 252 | session->auxtrace = NULL; | 331 | session->auxtrace = NULL; |
| 253 | 332 | ||
| 254 | /* First remove all traceID/CPU# nodes for the RB tree */ | 333 | /* First remove all traceID/metadata nodes for the RB tree */ |
| 255 | intlist__for_each_entry_safe(inode, tmp, traceid_list) | 334 | intlist__for_each_entry_safe(inode, tmp, traceid_list) |
| 256 | intlist__remove(traceid_list, inode); | 335 | intlist__remove(traceid_list, inode); |
| 257 | /* Then the RB tree itself */ | 336 | /* Then the RB tree itself */ |
| @@ -297,7 +376,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, | |||
| 297 | struct addr_location al; | 376 | struct addr_location al; |
| 298 | 377 | ||
| 299 | if (!etmq) | 378 | if (!etmq) |
| 300 | return -1; | 379 | return 0; |
| 301 | 380 | ||
| 302 | machine = etmq->etm->machine; | 381 | machine = etmq->etm->machine; |
| 303 | cpumode = cs_etm__cpu_mode(etmq, address); | 382 | cpumode = cs_etm__cpu_mode(etmq, address); |
| @@ -305,7 +384,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, | |||
| 305 | thread = etmq->thread; | 384 | thread = etmq->thread; |
| 306 | if (!thread) { | 385 | if (!thread) { |
| 307 | if (cpumode != PERF_RECORD_MISC_KERNEL) | 386 | if (cpumode != PERF_RECORD_MISC_KERNEL) |
| 308 | return -EINVAL; | 387 | return 0; |
| 309 | thread = etmq->etm->unknown_thread; | 388 | thread = etmq->etm->unknown_thread; |
| 310 | } | 389 | } |
| 311 | 390 | ||
| @@ -328,12 +407,10 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, | |||
| 328 | return len; | 407 | return len; |
| 329 | } | 408 | } |
| 330 | 409 | ||
| 331 | static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, | 410 | static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm) |
| 332 | unsigned int queue_nr) | ||
| 333 | { | 411 | { |
| 334 | int i; | ||
| 335 | struct cs_etm_decoder_params d_params; | 412 | struct cs_etm_decoder_params d_params; |
| 336 | struct cs_etm_trace_params *t_params; | 413 | struct cs_etm_trace_params *t_params = NULL; |
| 337 | struct cs_etm_queue *etmq; | 414 | struct cs_etm_queue *etmq; |
| 338 | size_t szp = sizeof(struct cs_etm_packet); | 415 | size_t szp = sizeof(struct cs_etm_packet); |
| 339 | 416 | ||
| @@ -368,59 +445,22 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, | |||
| 368 | if (!etmq->event_buf) | 445 | if (!etmq->event_buf) |
| 369 | goto out_free; | 446 | goto out_free; |
| 370 | 447 | ||
| 371 | etmq->etm = etm; | ||
| 372 | etmq->queue_nr = queue_nr; | ||
| 373 | etmq->pid = -1; | ||
| 374 | etmq->tid = -1; | ||
| 375 | etmq->cpu = -1; | ||
| 376 | |||
| 377 | /* Use metadata to fill in trace parameters for trace decoder */ | 448 | /* Use metadata to fill in trace parameters for trace decoder */ |
| 378 | t_params = zalloc(sizeof(*t_params) * etm->num_cpu); | 449 | t_params = zalloc(sizeof(*t_params) * etm->num_cpu); |
| 379 | 450 | ||
| 380 | if (!t_params) | 451 | if (!t_params) |
| 381 | goto out_free; | 452 | goto out_free; |
| 382 | 453 | ||
| 383 | for (i = 0; i < etm->num_cpu; i++) { | 454 | if (cs_etm__init_trace_params(t_params, etm)) |
| 384 | if (etm->metadata[i][CS_ETM_MAGIC] == __perf_cs_etmv3_magic) { | 455 | goto out_free; |
| 385 | u32 etmidr = etm->metadata[i][CS_ETM_ETMIDR]; | ||
| 386 | |||
| 387 | t_params[i].protocol = | ||
| 388 | cs_etm__get_v7_protocol_version(etmidr); | ||
| 389 | t_params[i].etmv3.reg_ctrl = | ||
| 390 | etm->metadata[i][CS_ETM_ETMCR]; | ||
| 391 | t_params[i].etmv3.reg_trc_id = | ||
| 392 | etm->metadata[i][CS_ETM_ETMTRACEIDR]; | ||
| 393 | } else if (etm->metadata[i][CS_ETM_MAGIC] == | ||
| 394 | __perf_cs_etmv4_magic) { | ||
| 395 | t_params[i].protocol = CS_ETM_PROTO_ETMV4i; | ||
| 396 | t_params[i].etmv4.reg_idr0 = | ||
| 397 | etm->metadata[i][CS_ETMV4_TRCIDR0]; | ||
| 398 | t_params[i].etmv4.reg_idr1 = | ||
| 399 | etm->metadata[i][CS_ETMV4_TRCIDR1]; | ||
| 400 | t_params[i].etmv4.reg_idr2 = | ||
| 401 | etm->metadata[i][CS_ETMV4_TRCIDR2]; | ||
| 402 | t_params[i].etmv4.reg_idr8 = | ||
| 403 | etm->metadata[i][CS_ETMV4_TRCIDR8]; | ||
| 404 | t_params[i].etmv4.reg_configr = | ||
| 405 | etm->metadata[i][CS_ETMV4_TRCCONFIGR]; | ||
| 406 | t_params[i].etmv4.reg_traceidr = | ||
| 407 | etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; | ||
| 408 | } | ||
| 409 | } | ||
| 410 | 456 | ||
| 411 | /* Set decoder parameters to simply print the trace packets */ | 457 | /* Set decoder parameters to decode trace packets */ |
| 412 | d_params.packet_printer = cs_etm__packet_dump; | 458 | if (cs_etm__init_decoder_params(&d_params, etmq, |
| 413 | d_params.operation = CS_ETM_OPERATION_DECODE; | 459 | CS_ETM_OPERATION_DECODE)) |
| 414 | d_params.formatted = true; | 460 | goto out_free; |
| 415 | d_params.fsyncs = false; | ||
| 416 | d_params.hsyncs = false; | ||
| 417 | d_params.frame_aligned = true; | ||
| 418 | d_params.data = etmq; | ||
| 419 | 461 | ||
| 420 | etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); | 462 | etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); |
| 421 | 463 | ||
| 422 | zfree(&t_params); | ||
| 423 | |||
| 424 | if (!etmq->decoder) | 464 | if (!etmq->decoder) |
| 425 | goto out_free; | 465 | goto out_free; |
| 426 | 466 | ||
| @@ -433,14 +473,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, | |||
| 433 | cs_etm__mem_access)) | 473 | cs_etm__mem_access)) |
| 434 | goto out_free_decoder; | 474 | goto out_free_decoder; |
| 435 | 475 | ||
| 436 | etmq->offset = 0; | 476 | zfree(&t_params); |
| 437 | etmq->period_instructions = 0; | ||
| 438 | |||
| 439 | return etmq; | 477 | return etmq; |
| 440 | 478 | ||
| 441 | out_free_decoder: | 479 | out_free_decoder: |
| 442 | cs_etm_decoder__free(etmq->decoder); | 480 | cs_etm_decoder__free(etmq->decoder); |
| 443 | out_free: | 481 | out_free: |
| 482 | zfree(&t_params); | ||
| 444 | zfree(&etmq->event_buf); | 483 | zfree(&etmq->event_buf); |
| 445 | zfree(&etmq->last_branch); | 484 | zfree(&etmq->last_branch); |
| 446 | zfree(&etmq->last_branch_rb); | 485 | zfree(&etmq->last_branch_rb); |
| @@ -455,24 +494,30 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, | |||
| 455 | struct auxtrace_queue *queue, | 494 | struct auxtrace_queue *queue, |
| 456 | unsigned int queue_nr) | 495 | unsigned int queue_nr) |
| 457 | { | 496 | { |
| 497 | int ret = 0; | ||
| 458 | struct cs_etm_queue *etmq = queue->priv; | 498 | struct cs_etm_queue *etmq = queue->priv; |
| 459 | 499 | ||
| 460 | if (list_empty(&queue->head) || etmq) | 500 | if (list_empty(&queue->head) || etmq) |
| 461 | return 0; | 501 | goto out; |
| 462 | 502 | ||
| 463 | etmq = cs_etm__alloc_queue(etm, queue_nr); | 503 | etmq = cs_etm__alloc_queue(etm); |
| 464 | 504 | ||
| 465 | if (!etmq) | 505 | if (!etmq) { |
| 466 | return -ENOMEM; | 506 | ret = -ENOMEM; |
| 507 | goto out; | ||
| 508 | } | ||
| 467 | 509 | ||
| 468 | queue->priv = etmq; | 510 | queue->priv = etmq; |
| 469 | 511 | etmq->etm = etm; | |
| 470 | if (queue->cpu != -1) | 512 | etmq->queue_nr = queue_nr; |
| 471 | etmq->cpu = queue->cpu; | 513 | etmq->cpu = queue->cpu; |
| 472 | |||
| 473 | etmq->tid = queue->tid; | 514 | etmq->tid = queue->tid; |
| 515 | etmq->pid = -1; | ||
| 516 | etmq->offset = 0; | ||
| 517 | etmq->period_instructions = 0; | ||
| 474 | 518 | ||
| 475 | return 0; | 519 | out: |
| 520 | return ret; | ||
| 476 | } | 521 | } |
| 477 | 522 | ||
| 478 | static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) | 523 | static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) |
| @@ -480,6 +525,9 @@ static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) | |||
| 480 | unsigned int i; | 525 | unsigned int i; |
| 481 | int ret; | 526 | int ret; |
| 482 | 527 | ||
| 528 | if (!etm->kernel_start) | ||
| 529 | etm->kernel_start = machine__kernel_start(etm->machine); | ||
| 530 | |||
| 483 | for (i = 0; i < etm->queues.nr_queues; i++) { | 531 | for (i = 0; i < etm->queues.nr_queues; i++) { |
| 484 | ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); | 532 | ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); |
| 485 | if (ret) | 533 | if (ret) |
| @@ -637,7 +685,7 @@ static int cs_etm__inject_event(union perf_event *event, | |||
| 637 | 685 | ||
| 638 | 686 | ||
| 639 | static int | 687 | static int |
| 640 | cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) | 688 | cs_etm__get_trace(struct cs_etm_queue *etmq) |
| 641 | { | 689 | { |
| 642 | struct auxtrace_buffer *aux_buffer = etmq->buffer; | 690 | struct auxtrace_buffer *aux_buffer = etmq->buffer; |
| 643 | struct auxtrace_buffer *old_buffer = aux_buffer; | 691 | struct auxtrace_buffer *old_buffer = aux_buffer; |
| @@ -651,7 +699,7 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) | |||
| 651 | if (!aux_buffer) { | 699 | if (!aux_buffer) { |
| 652 | if (old_buffer) | 700 | if (old_buffer) |
| 653 | auxtrace_buffer__drop_data(old_buffer); | 701 | auxtrace_buffer__drop_data(old_buffer); |
| 654 | buff->len = 0; | 702 | etmq->buf_len = 0; |
| 655 | return 0; | 703 | return 0; |
| 656 | } | 704 | } |
| 657 | 705 | ||
| @@ -671,13 +719,11 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) | |||
| 671 | if (old_buffer) | 719 | if (old_buffer) |
| 672 | auxtrace_buffer__drop_data(old_buffer); | 720 | auxtrace_buffer__drop_data(old_buffer); |
| 673 | 721 | ||
| 674 | buff->offset = aux_buffer->offset; | 722 | etmq->buf_used = 0; |
| 675 | buff->len = aux_buffer->size; | 723 | etmq->buf_len = aux_buffer->size; |
| 676 | buff->buf = aux_buffer->data; | 724 | etmq->buf = aux_buffer->data; |
| 677 | |||
| 678 | buff->ref_timestamp = aux_buffer->reference; | ||
| 679 | 725 | ||
| 680 | return buff->len; | 726 | return etmq->buf_len; |
| 681 | } | 727 | } |
| 682 | 728 | ||
| 683 | static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, | 729 | static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, |
| @@ -719,7 +765,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, | |||
| 719 | sample.stream_id = etmq->etm->instructions_id; | 765 | sample.stream_id = etmq->etm->instructions_id; |
| 720 | sample.period = period; | 766 | sample.period = period; |
| 721 | sample.cpu = etmq->packet->cpu; | 767 | sample.cpu = etmq->packet->cpu; |
| 722 | sample.flags = 0; | 768 | sample.flags = etmq->prev_packet->flags; |
| 723 | sample.insn_len = 1; | 769 | sample.insn_len = 1; |
| 724 | sample.cpumode = event->sample.header.misc; | 770 | sample.cpumode = event->sample.header.misc; |
| 725 | 771 | ||
| @@ -778,7 +824,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) | |||
| 778 | sample.stream_id = etmq->etm->branches_id; | 824 | sample.stream_id = etmq->etm->branches_id; |
| 779 | sample.period = 1; | 825 | sample.period = 1; |
| 780 | sample.cpu = etmq->packet->cpu; | 826 | sample.cpu = etmq->packet->cpu; |
| 781 | sample.flags = 0; | 827 | sample.flags = etmq->prev_packet->flags; |
| 782 | sample.cpumode = event->sample.header.misc; | 828 | sample.cpumode = event->sample.header.misc; |
| 783 | 829 | ||
| 784 | /* | 830 | /* |
| @@ -1106,95 +1152,489 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq) | |||
| 1106 | 1152 | ||
| 1107 | return 0; | 1153 | return 0; |
| 1108 | } | 1154 | } |
| 1155 | /* | ||
| 1156 | * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue | ||
| 1157 | * if need be. | ||
| 1158 | * Returns: < 0 if error | ||
| 1159 | * = 0 if no more auxtrace_buffer to read | ||
| 1160 | * > 0 if the current buffer isn't empty yet | ||
| 1161 | */ | ||
| 1162 | static int cs_etm__get_data_block(struct cs_etm_queue *etmq) | ||
| 1163 | { | ||
| 1164 | int ret; | ||
| 1165 | |||
| 1166 | if (!etmq->buf_len) { | ||
| 1167 | ret = cs_etm__get_trace(etmq); | ||
| 1168 | if (ret <= 0) | ||
| 1169 | return ret; | ||
| 1170 | /* | ||
| 1171 | * We cannot assume consecutive blocks in the data file | ||
| 1172 | * are contiguous, reset the decoder to force re-sync. | ||
| 1173 | */ | ||
| 1174 | ret = cs_etm_decoder__reset(etmq->decoder); | ||
| 1175 | if (ret) | ||
| 1176 | return ret; | ||
| 1177 | } | ||
| 1178 | |||
| 1179 | return etmq->buf_len; | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, | ||
| 1183 | struct cs_etm_packet *packet, | ||
| 1184 | u64 end_addr) | ||
| 1185 | { | ||
| 1186 | u16 instr16; | ||
| 1187 | u32 instr32; | ||
| 1188 | u64 addr; | ||
| 1189 | |||
| 1190 | switch (packet->isa) { | ||
| 1191 | case CS_ETM_ISA_T32: | ||
| 1192 | /* | ||
| 1193 | * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: | ||
| 1194 | * | ||
| 1195 | * b'15 b'8 | ||
| 1196 | * +-----------------+--------+ | ||
| 1197 | * | 1 1 0 1 1 1 1 1 | imm8 | | ||
| 1198 | * +-----------------+--------+ | ||
| 1199 | * | ||
| 1200 | * According to the specifiction, it only defines SVC for T32 | ||
| 1201 | * with 16 bits instruction and has no definition for 32bits; | ||
| 1202 | * so below only read 2 bytes as instruction size for T32. | ||
| 1203 | */ | ||
| 1204 | addr = end_addr - 2; | ||
| 1205 | cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16); | ||
| 1206 | if ((instr16 & 0xFF00) == 0xDF00) | ||
| 1207 | return true; | ||
| 1208 | |||
| 1209 | break; | ||
| 1210 | case CS_ETM_ISA_A32: | ||
| 1211 | /* | ||
| 1212 | * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: | ||
| 1213 | * | ||
| 1214 | * b'31 b'28 b'27 b'24 | ||
| 1215 | * +---------+---------+-------------------------+ | ||
| 1216 | * | !1111 | 1 1 1 1 | imm24 | | ||
| 1217 | * +---------+---------+-------------------------+ | ||
| 1218 | */ | ||
| 1219 | addr = end_addr - 4; | ||
| 1220 | cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); | ||
| 1221 | if ((instr32 & 0x0F000000) == 0x0F000000 && | ||
| 1222 | (instr32 & 0xF0000000) != 0xF0000000) | ||
| 1223 | return true; | ||
| 1224 | |||
| 1225 | break; | ||
| 1226 | case CS_ETM_ISA_A64: | ||
| 1227 | /* | ||
| 1228 | * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: | ||
| 1229 | * | ||
| 1230 | * b'31 b'21 b'4 b'0 | ||
| 1231 | * +-----------------------+---------+-----------+ | ||
| 1232 | * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | | ||
| 1233 | * +-----------------------+---------+-----------+ | ||
| 1234 | */ | ||
| 1235 | addr = end_addr - 4; | ||
| 1236 | cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); | ||
| 1237 | if ((instr32 & 0xFFE0001F) == 0xd4000001) | ||
| 1238 | return true; | ||
| 1239 | |||
| 1240 | break; | ||
| 1241 | case CS_ETM_ISA_UNKNOWN: | ||
| 1242 | default: | ||
| 1243 | break; | ||
| 1244 | } | ||
| 1245 | |||
| 1246 | return false; | ||
| 1247 | } | ||
| 1248 | |||
| 1249 | static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) | ||
| 1250 | { | ||
| 1251 | struct cs_etm_packet *packet = etmq->packet; | ||
| 1252 | struct cs_etm_packet *prev_packet = etmq->prev_packet; | ||
| 1253 | |||
| 1254 | if (magic == __perf_cs_etmv3_magic) | ||
| 1255 | if (packet->exception_number == CS_ETMV3_EXC_SVC) | ||
| 1256 | return true; | ||
| 1257 | |||
| 1258 | /* | ||
| 1259 | * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and | ||
| 1260 | * HVC cases; need to check if it's SVC instruction based on | ||
| 1261 | * packet address. | ||
| 1262 | */ | ||
| 1263 | if (magic == __perf_cs_etmv4_magic) { | ||
| 1264 | if (packet->exception_number == CS_ETMV4_EXC_CALL && | ||
| 1265 | cs_etm__is_svc_instr(etmq, prev_packet, | ||
| 1266 | prev_packet->end_addr)) | ||
| 1267 | return true; | ||
| 1268 | } | ||
| 1269 | |||
| 1270 | return false; | ||
| 1271 | } | ||
| 1272 | |||
| 1273 | static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) | ||
| 1274 | { | ||
| 1275 | struct cs_etm_packet *packet = etmq->packet; | ||
| 1276 | |||
| 1277 | if (magic == __perf_cs_etmv3_magic) | ||
| 1278 | if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || | ||
| 1279 | packet->exception_number == CS_ETMV3_EXC_ASYNC_DATA_ABORT || | ||
| 1280 | packet->exception_number == CS_ETMV3_EXC_PE_RESET || | ||
| 1281 | packet->exception_number == CS_ETMV3_EXC_IRQ || | ||
| 1282 | packet->exception_number == CS_ETMV3_EXC_FIQ) | ||
| 1283 | return true; | ||
| 1284 | |||
| 1285 | if (magic == __perf_cs_etmv4_magic) | ||
| 1286 | if (packet->exception_number == CS_ETMV4_EXC_RESET || | ||
| 1287 | packet->exception_number == CS_ETMV4_EXC_DEBUG_HALT || | ||
| 1288 | packet->exception_number == CS_ETMV4_EXC_SYSTEM_ERROR || | ||
| 1289 | packet->exception_number == CS_ETMV4_EXC_INST_DEBUG || | ||
| 1290 | packet->exception_number == CS_ETMV4_EXC_DATA_DEBUG || | ||
| 1291 | packet->exception_number == CS_ETMV4_EXC_IRQ || | ||
| 1292 | packet->exception_number == CS_ETMV4_EXC_FIQ) | ||
| 1293 | return true; | ||
| 1294 | |||
| 1295 | return false; | ||
| 1296 | } | ||
| 1297 | |||
| 1298 | static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) | ||
| 1299 | { | ||
| 1300 | struct cs_etm_packet *packet = etmq->packet; | ||
| 1301 | struct cs_etm_packet *prev_packet = etmq->prev_packet; | ||
| 1302 | |||
| 1303 | if (magic == __perf_cs_etmv3_magic) | ||
| 1304 | if (packet->exception_number == CS_ETMV3_EXC_SMC || | ||
| 1305 | packet->exception_number == CS_ETMV3_EXC_HYP || | ||
| 1306 | packet->exception_number == CS_ETMV3_EXC_JAZELLE_THUMBEE || | ||
| 1307 | packet->exception_number == CS_ETMV3_EXC_UNDEFINED_INSTR || | ||
| 1308 | packet->exception_number == CS_ETMV3_EXC_PREFETCH_ABORT || | ||
| 1309 | packet->exception_number == CS_ETMV3_EXC_DATA_FAULT || | ||
| 1310 | packet->exception_number == CS_ETMV3_EXC_GENERIC) | ||
| 1311 | return true; | ||
| 1312 | |||
| 1313 | if (magic == __perf_cs_etmv4_magic) { | ||
| 1314 | if (packet->exception_number == CS_ETMV4_EXC_TRAP || | ||
| 1315 | packet->exception_number == CS_ETMV4_EXC_ALIGNMENT || | ||
| 1316 | packet->exception_number == CS_ETMV4_EXC_INST_FAULT || | ||
| 1317 | packet->exception_number == CS_ETMV4_EXC_DATA_FAULT) | ||
| 1318 | return true; | ||
| 1319 | |||
| 1320 | /* | ||
| 1321 | * For CS_ETMV4_EXC_CALL, except SVC other instructions | ||
| 1322 | * (SMC, HVC) are taken as sync exceptions. | ||
| 1323 | */ | ||
| 1324 | if (packet->exception_number == CS_ETMV4_EXC_CALL && | ||
| 1325 | !cs_etm__is_svc_instr(etmq, prev_packet, | ||
| 1326 | prev_packet->end_addr)) | ||
| 1327 | return true; | ||
| 1328 | |||
| 1329 | /* | ||
| 1330 | * ETMv4 has 5 bits for exception number; if the numbers | ||
| 1331 | * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] | ||
| 1332 | * they are implementation defined exceptions. | ||
| 1333 | * | ||
| 1334 | * For this case, simply take it as sync exception. | ||
| 1335 | */ | ||
| 1336 | if (packet->exception_number > CS_ETMV4_EXC_FIQ && | ||
| 1337 | packet->exception_number <= CS_ETMV4_EXC_END) | ||
| 1338 | return true; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | return false; | ||
| 1342 | } | ||
| 1343 | |||
| 1344 | static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) | ||
| 1345 | { | ||
| 1346 | struct cs_etm_packet *packet = etmq->packet; | ||
| 1347 | struct cs_etm_packet *prev_packet = etmq->prev_packet; | ||
| 1348 | u64 magic; | ||
| 1349 | int ret; | ||
| 1350 | |||
| 1351 | switch (packet->sample_type) { | ||
| 1352 | case CS_ETM_RANGE: | ||
| 1353 | /* | ||
| 1354 | * Immediate branch instruction without neither link nor | ||
| 1355 | * return flag, it's normal branch instruction within | ||
| 1356 | * the function. | ||
| 1357 | */ | ||
| 1358 | if (packet->last_instr_type == OCSD_INSTR_BR && | ||
| 1359 | packet->last_instr_subtype == OCSD_S_INSTR_NONE) { | ||
| 1360 | packet->flags = PERF_IP_FLAG_BRANCH; | ||
| 1361 | |||
| 1362 | if (packet->last_instr_cond) | ||
| 1363 | packet->flags |= PERF_IP_FLAG_CONDITIONAL; | ||
| 1364 | } | ||
| 1365 | |||
| 1366 | /* | ||
| 1367 | * Immediate branch instruction with link (e.g. BL), this is | ||
| 1368 | * branch instruction for function call. | ||
| 1369 | */ | ||
| 1370 | if (packet->last_instr_type == OCSD_INSTR_BR && | ||
| 1371 | packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) | ||
| 1372 | packet->flags = PERF_IP_FLAG_BRANCH | | ||
| 1373 | PERF_IP_FLAG_CALL; | ||
| 1374 | |||
| 1375 | /* | ||
| 1376 | * Indirect branch instruction with link (e.g. BLR), this is | ||
| 1377 | * branch instruction for function call. | ||
| 1378 | */ | ||
| 1379 | if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && | ||
| 1380 | packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK) | ||
| 1381 | packet->flags = PERF_IP_FLAG_BRANCH | | ||
| 1382 | PERF_IP_FLAG_CALL; | ||
| 1383 | |||
| 1384 | /* | ||
| 1385 | * Indirect branch instruction with subtype of | ||
| 1386 | * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for | ||
| 1387 | * function return for A32/T32. | ||
| 1388 | */ | ||
| 1389 | if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && | ||
| 1390 | packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET) | ||
| 1391 | packet->flags = PERF_IP_FLAG_BRANCH | | ||
| 1392 | PERF_IP_FLAG_RETURN; | ||
| 1393 | |||
| 1394 | /* | ||
| 1395 | * Indirect branch instruction without link (e.g. BR), usually | ||
| 1396 | * this is used for function return, especially for functions | ||
| 1397 | * within dynamic link lib. | ||
| 1398 | */ | ||
| 1399 | if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && | ||
| 1400 | packet->last_instr_subtype == OCSD_S_INSTR_NONE) | ||
| 1401 | packet->flags = PERF_IP_FLAG_BRANCH | | ||
| 1402 | PERF_IP_FLAG_RETURN; | ||
| 1403 | |||
| 1404 | /* Return instruction for function return. */ | ||
| 1405 | if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT && | ||
| 1406 | packet->last_instr_subtype == OCSD_S_INSTR_V8_RET) | ||
| 1407 | packet->flags = PERF_IP_FLAG_BRANCH | | ||
| 1408 | PERF_IP_FLAG_RETURN; | ||
| 1409 | |||
| 1410 | /* | ||
| 1411 | * Decoder might insert a discontinuity in the middle of | ||
| 1412 | * instruction packets, fixup prev_packet with flag | ||
| 1413 | * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace. | ||
| 1414 | */ | ||
| 1415 | if (prev_packet->sample_type == CS_ETM_DISCONTINUITY) | ||
| 1416 | prev_packet->flags |= PERF_IP_FLAG_BRANCH | | ||
| 1417 | PERF_IP_FLAG_TRACE_BEGIN; | ||
| 1418 | |||
| 1419 | /* | ||
| 1420 | * If the previous packet is an exception return packet | ||
| 1421 | * and the return address just follows SVC instuction, | ||
| 1422 | * it needs to calibrate the previous packet sample flags | ||
| 1423 | * as PERF_IP_FLAG_SYSCALLRET. | ||
| 1424 | */ | ||
| 1425 | if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | | ||
| 1426 | PERF_IP_FLAG_RETURN | | ||
| 1427 | PERF_IP_FLAG_INTERRUPT) && | ||
| 1428 | cs_etm__is_svc_instr(etmq, packet, packet->start_addr)) | ||
| 1429 | prev_packet->flags = PERF_IP_FLAG_BRANCH | | ||
| 1430 | PERF_IP_FLAG_RETURN | | ||
| 1431 | PERF_IP_FLAG_SYSCALLRET; | ||
| 1432 | break; | ||
| 1433 | case CS_ETM_DISCONTINUITY: | ||
| 1434 | /* | ||
| 1435 | * The trace is discontinuous, if the previous packet is | ||
| 1436 | * instruction packet, set flag PERF_IP_FLAG_TRACE_END | ||
| 1437 | * for previous packet. | ||
| 1438 | */ | ||
| 1439 | if (prev_packet->sample_type == CS_ETM_RANGE) | ||
| 1440 | prev_packet->flags |= PERF_IP_FLAG_BRANCH | | ||
| 1441 | PERF_IP_FLAG_TRACE_END; | ||
| 1442 | break; | ||
| 1443 | case CS_ETM_EXCEPTION: | ||
| 1444 | ret = cs_etm__get_magic(packet->trace_chan_id, &magic); | ||
| 1445 | if (ret) | ||
| 1446 | return ret; | ||
| 1447 | |||
| 1448 | /* The exception is for system call. */ | ||
| 1449 | if (cs_etm__is_syscall(etmq, magic)) | ||
| 1450 | packet->flags = PERF_IP_FLAG_BRANCH | | ||
| 1451 | PERF_IP_FLAG_CALL | | ||
| 1452 | PERF_IP_FLAG_SYSCALLRET; | ||
| 1453 | /* | ||
| 1454 | * The exceptions are triggered by external signals from bus, | ||
| 1455 | * interrupt controller, debug module, PE reset or halt. | ||
| 1456 | */ | ||
| 1457 | else if (cs_etm__is_async_exception(etmq, magic)) | ||
| 1458 | packet->flags = PERF_IP_FLAG_BRANCH | | ||
| 1459 | PERF_IP_FLAG_CALL | | ||
| 1460 | PERF_IP_FLAG_ASYNC | | ||
| 1461 | PERF_IP_FLAG_INTERRUPT; | ||
| 1462 | /* | ||
| 1463 | * Otherwise, exception is caused by trap, instruction & | ||
| 1464 | * data fault, or alignment errors. | ||
| 1465 | */ | ||
| 1466 | else if (cs_etm__is_sync_exception(etmq, magic)) | ||
| 1467 | packet->flags = PERF_IP_FLAG_BRANCH | | ||
| 1468 | PERF_IP_FLAG_CALL | | ||
| 1469 | PERF_IP_FLAG_INTERRUPT; | ||
| 1470 | |||
| 1471 | /* | ||
| 1472 | * When the exception packet is inserted, since exception | ||
| 1473 | * packet is not used standalone for generating samples | ||
| 1474 | * and it's affiliation to the previous instruction range | ||
| 1475 | * packet; so set previous range packet flags to tell perf | ||
| 1476 | * it is an exception taken branch. | ||
| 1477 | */ | ||
| 1478 | if (prev_packet->sample_type == CS_ETM_RANGE) | ||
| 1479 | prev_packet->flags = packet->flags; | ||
| 1480 | break; | ||
| 1481 | case CS_ETM_EXCEPTION_RET: | ||
| 1482 | /* | ||
| 1483 | * When the exception return packet is inserted, since | ||
| 1484 | * exception return packet is not used standalone for | ||
| 1485 | * generating samples and it's affiliation to the previous | ||
| 1486 | * instruction range packet; so set previous range packet | ||
| 1487 | * flags to tell perf it is an exception return branch. | ||
| 1488 | * | ||
| 1489 | * The exception return can be for either system call or | ||
| 1490 | * other exception types; unfortunately the packet doesn't | ||
| 1491 | * contain exception type related info so we cannot decide | ||
| 1492 | * the exception type purely based on exception return packet. | ||
| 1493 | * If we record the exception number from exception packet and | ||
| 1494 | * reuse it for excpetion return packet, this is not reliable | ||
| 1495 | * due the trace can be discontinuity or the interrupt can | ||
| 1496 | * be nested, thus the recorded exception number cannot be | ||
| 1497 | * used for exception return packet for these two cases. | ||
| 1498 | * | ||
| 1499 | * For exception return packet, we only need to distinguish the | ||
| 1500 | * packet is for system call or for other types. Thus the | ||
| 1501 | * decision can be deferred when receive the next packet which | ||
| 1502 | * contains the return address, based on the return address we | ||
| 1503 | * can read out the previous instruction and check if it's a | ||
| 1504 | * system call instruction and then calibrate the sample flag | ||
| 1505 | * as needed. | ||
| 1506 | */ | ||
| 1507 | if (prev_packet->sample_type == CS_ETM_RANGE) | ||
| 1508 | prev_packet->flags = PERF_IP_FLAG_BRANCH | | ||
| 1509 | PERF_IP_FLAG_RETURN | | ||
| 1510 | PERF_IP_FLAG_INTERRUPT; | ||
| 1511 | break; | ||
| 1512 | case CS_ETM_EMPTY: | ||
| 1513 | default: | ||
| 1514 | break; | ||
| 1515 | } | ||
| 1516 | |||
| 1517 | return 0; | ||
| 1518 | } | ||
| 1519 | |||
| 1520 | static int cs_etm__decode_data_block(struct cs_etm_queue *etmq) | ||
| 1521 | { | ||
| 1522 | int ret = 0; | ||
| 1523 | size_t processed = 0; | ||
| 1524 | |||
| 1525 | /* | ||
| 1526 | * Packets are decoded and added to the decoder's packet queue | ||
| 1527 | * until the decoder packet processing callback has requested that | ||
| 1528 | * processing stops or there is nothing left in the buffer. Normal | ||
| 1529 | * operations that stop processing are a timestamp packet or a full | ||
| 1530 | * decoder buffer queue. | ||
| 1531 | */ | ||
| 1532 | ret = cs_etm_decoder__process_data_block(etmq->decoder, | ||
| 1533 | etmq->offset, | ||
| 1534 | &etmq->buf[etmq->buf_used], | ||
| 1535 | etmq->buf_len, | ||
| 1536 | &processed); | ||
| 1537 | if (ret) | ||
| 1538 | goto out; | ||
| 1539 | |||
| 1540 | etmq->offset += processed; | ||
| 1541 | etmq->buf_used += processed; | ||
| 1542 | etmq->buf_len -= processed; | ||
| 1543 | |||
| 1544 | out: | ||
| 1545 | return ret; | ||
| 1546 | } | ||
| 1547 | |||
| 1548 | static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) | ||
| 1549 | { | ||
| 1550 | int ret; | ||
| 1551 | |||
| 1552 | /* Process each packet in this chunk */ | ||
| 1553 | while (1) { | ||
| 1554 | ret = cs_etm_decoder__get_packet(etmq->decoder, | ||
| 1555 | etmq->packet); | ||
| 1556 | if (ret <= 0) | ||
| 1557 | /* | ||
| 1558 | * Stop processing this chunk on | ||
| 1559 | * end of data or error | ||
| 1560 | */ | ||
| 1561 | break; | ||
| 1562 | |||
| 1563 | /* | ||
| 1564 | * Since packet addresses are swapped in packet | ||
| 1565 | * handling within below switch() statements, | ||
| 1566 | * thus setting sample flags must be called | ||
| 1567 | * prior to switch() statement to use address | ||
| 1568 | * information before packets swapping. | ||
| 1569 | */ | ||
| 1570 | ret = cs_etm__set_sample_flags(etmq); | ||
| 1571 | if (ret < 0) | ||
| 1572 | break; | ||
| 1573 | |||
| 1574 | switch (etmq->packet->sample_type) { | ||
| 1575 | case CS_ETM_RANGE: | ||
| 1576 | /* | ||
| 1577 | * If the packet contains an instruction | ||
| 1578 | * range, generate instruction sequence | ||
| 1579 | * events. | ||
| 1580 | */ | ||
| 1581 | cs_etm__sample(etmq); | ||
| 1582 | break; | ||
| 1583 | case CS_ETM_EXCEPTION: | ||
| 1584 | case CS_ETM_EXCEPTION_RET: | ||
| 1585 | /* | ||
| 1586 | * If the exception packet is coming, | ||
| 1587 | * make sure the previous instruction | ||
| 1588 | * range packet to be handled properly. | ||
| 1589 | */ | ||
| 1590 | cs_etm__exception(etmq); | ||
| 1591 | break; | ||
| 1592 | case CS_ETM_DISCONTINUITY: | ||
| 1593 | /* | ||
| 1594 | * Discontinuity in trace, flush | ||
| 1595 | * previous branch stack | ||
| 1596 | */ | ||
| 1597 | cs_etm__flush(etmq); | ||
| 1598 | break; | ||
| 1599 | case CS_ETM_EMPTY: | ||
| 1600 | /* | ||
| 1601 | * Should not receive empty packet, | ||
| 1602 | * report error. | ||
| 1603 | */ | ||
| 1604 | pr_err("CS ETM Trace: empty packet\n"); | ||
| 1605 | return -EINVAL; | ||
| 1606 | default: | ||
| 1607 | break; | ||
| 1608 | } | ||
| 1609 | } | ||
| 1610 | |||
| 1611 | return ret; | ||
| 1612 | } | ||
| 1109 | 1613 | ||
| 1110 | static int cs_etm__run_decoder(struct cs_etm_queue *etmq) | 1614 | static int cs_etm__run_decoder(struct cs_etm_queue *etmq) |
| 1111 | { | 1615 | { |
| 1112 | struct cs_etm_auxtrace *etm = etmq->etm; | ||
| 1113 | struct cs_etm_buffer buffer; | ||
| 1114 | size_t buffer_used, processed; | ||
| 1115 | int err = 0; | 1616 | int err = 0; |
| 1116 | 1617 | ||
| 1117 | if (!etm->kernel_start) | ||
| 1118 | etm->kernel_start = machine__kernel_start(etm->machine); | ||
| 1119 | |||
| 1120 | /* Go through each buffer in the queue and decode them one by one */ | 1618 | /* Go through each buffer in the queue and decode them one by one */ |
| 1121 | while (1) { | 1619 | while (1) { |
| 1122 | buffer_used = 0; | 1620 | err = cs_etm__get_data_block(etmq); |
| 1123 | memset(&buffer, 0, sizeof(buffer)); | ||
| 1124 | err = cs_etm__get_trace(&buffer, etmq); | ||
| 1125 | if (err <= 0) | 1621 | if (err <= 0) |
| 1126 | return err; | 1622 | return err; |
| 1127 | /* | ||
| 1128 | * We cannot assume consecutive blocks in the data file are | ||
| 1129 | * contiguous, reset the decoder to force re-sync. | ||
| 1130 | */ | ||
| 1131 | err = cs_etm_decoder__reset(etmq->decoder); | ||
| 1132 | if (err != 0) | ||
| 1133 | return err; | ||
| 1134 | 1623 | ||
| 1135 | /* Run trace decoder until buffer consumed or end of trace */ | 1624 | /* Run trace decoder until buffer consumed or end of trace */ |
| 1136 | do { | 1625 | do { |
| 1137 | processed = 0; | 1626 | err = cs_etm__decode_data_block(etmq); |
| 1138 | err = cs_etm_decoder__process_data_block( | ||
| 1139 | etmq->decoder, | ||
| 1140 | etmq->offset, | ||
| 1141 | &buffer.buf[buffer_used], | ||
| 1142 | buffer.len - buffer_used, | ||
| 1143 | &processed); | ||
| 1144 | if (err) | 1627 | if (err) |
| 1145 | return err; | 1628 | return err; |
| 1146 | 1629 | ||
| 1147 | etmq->offset += processed; | 1630 | /* |
| 1148 | buffer_used += processed; | 1631 | * Process each packet in this chunk, nothing to do if |
| 1149 | 1632 | * an error occurs other than hoping the next one will | |
| 1150 | /* Process each packet in this chunk */ | 1633 | * be better. |
| 1151 | while (1) { | 1634 | */ |
| 1152 | err = cs_etm_decoder__get_packet(etmq->decoder, | 1635 | err = cs_etm__process_decoder_queue(etmq); |
| 1153 | etmq->packet); | 1636 | |
| 1154 | if (err <= 0) | 1637 | } while (etmq->buf_len); |
| 1155 | /* | ||
| 1156 | * Stop processing this chunk on | ||
| 1157 | * end of data or error | ||
| 1158 | */ | ||
| 1159 | break; | ||
| 1160 | |||
| 1161 | switch (etmq->packet->sample_type) { | ||
| 1162 | case CS_ETM_RANGE: | ||
| 1163 | /* | ||
| 1164 | * If the packet contains an instruction | ||
| 1165 | * range, generate instruction sequence | ||
| 1166 | * events. | ||
| 1167 | */ | ||
| 1168 | cs_etm__sample(etmq); | ||
| 1169 | break; | ||
| 1170 | case CS_ETM_EXCEPTION: | ||
| 1171 | case CS_ETM_EXCEPTION_RET: | ||
| 1172 | /* | ||
| 1173 | * If the exception packet is coming, | ||
| 1174 | * make sure the previous instruction | ||
| 1175 | * range packet to be handled properly. | ||
| 1176 | */ | ||
| 1177 | cs_etm__exception(etmq); | ||
| 1178 | break; | ||
| 1179 | case CS_ETM_DISCONTINUITY: | ||
| 1180 | /* | ||
| 1181 | * Discontinuity in trace, flush | ||
| 1182 | * previous branch stack | ||
| 1183 | */ | ||
| 1184 | cs_etm__flush(etmq); | ||
| 1185 | break; | ||
| 1186 | case CS_ETM_EMPTY: | ||
| 1187 | /* | ||
| 1188 | * Should not receive empty packet, | ||
| 1189 | * report error. | ||
| 1190 | */ | ||
| 1191 | pr_err("CS ETM Trace: empty packet\n"); | ||
| 1192 | return -EINVAL; | ||
| 1193 | default: | ||
| 1194 | break; | ||
| 1195 | } | ||
| 1196 | } | ||
| 1197 | } while (buffer.len > buffer_used); | ||
| 1198 | 1638 | ||
| 1199 | if (err == 0) | 1639 | if (err == 0) |
| 1200 | /* Flush any remaining branch stack entries */ | 1640 | /* Flush any remaining branch stack entries */ |
| @@ -1205,7 +1645,7 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq) | |||
| 1205 | } | 1645 | } |
| 1206 | 1646 | ||
| 1207 | static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, | 1647 | static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, |
| 1208 | pid_t tid, u64 time_) | 1648 | pid_t tid) |
| 1209 | { | 1649 | { |
| 1210 | unsigned int i; | 1650 | unsigned int i; |
| 1211 | struct auxtrace_queues *queues = &etm->queues; | 1651 | struct auxtrace_queues *queues = &etm->queues; |
| @@ -1215,7 +1655,6 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, | |||
| 1215 | struct cs_etm_queue *etmq = queue->priv; | 1655 | struct cs_etm_queue *etmq = queue->priv; |
| 1216 | 1656 | ||
| 1217 | if (etmq && ((tid == -1) || (etmq->tid == tid))) { | 1657 | if (etmq && ((tid == -1) || (etmq->tid == tid))) { |
| 1218 | etmq->time = time_; | ||
| 1219 | cs_etm__set_pid_tid_cpu(etm, queue); | 1658 | cs_etm__set_pid_tid_cpu(etm, queue); |
| 1220 | cs_etm__run_decoder(etmq); | 1659 | cs_etm__run_decoder(etmq); |
| 1221 | } | 1660 | } |
| @@ -1259,8 +1698,7 @@ static int cs_etm__process_event(struct perf_session *session, | |||
| 1259 | 1698 | ||
| 1260 | if (event->header.type == PERF_RECORD_EXIT) | 1699 | if (event->header.type == PERF_RECORD_EXIT) |
| 1261 | return cs_etm__process_timeless_queues(etm, | 1700 | return cs_etm__process_timeless_queues(etm, |
| 1262 | event->fork.tid, | 1701 | event->fork.tid); |
| 1263 | sample->time); | ||
| 1264 | 1702 | ||
| 1265 | return 0; | 1703 | return 0; |
| 1266 | } | 1704 | } |
| @@ -1414,9 +1852,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event, | |||
| 1414 | 0xffffffff); | 1852 | 0xffffffff); |
| 1415 | 1853 | ||
| 1416 | /* | 1854 | /* |
| 1417 | * Create an RB tree for traceID-CPU# tuple. Since the conversion has | 1855 | * Create an RB tree for traceID-metadata tuple. Since the conversion |
| 1418 | * to be made for each packet that gets decoded, optimizing access in | 1856 | * has to be made for each packet that gets decoded, optimizing access |
| 1419 | * anything other than a sequential array is worth doing. | 1857 | * in anything other than a sequential array is worth doing. |
| 1420 | */ | 1858 | */ |
| 1421 | traceid_list = intlist__new(NULL); | 1859 | traceid_list = intlist__new(NULL); |
| 1422 | if (!traceid_list) { | 1860 | if (!traceid_list) { |
| @@ -1482,8 +1920,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event, | |||
| 1482 | err = -EINVAL; | 1920 | err = -EINVAL; |
| 1483 | goto err_free_metadata; | 1921 | goto err_free_metadata; |
| 1484 | } | 1922 | } |
| 1485 | /* All good, associate the traceID with the CPU# */ | 1923 | /* All good, associate the traceID with the metadata pointer */ |
| 1486 | inode->priv = &metadata[j][CS_ETM_CPU]; | 1924 | inode->priv = metadata[j]; |
| 1487 | } | 1925 | } |
| 1488 | 1926 | ||
| 1489 | /* | 1927 | /* |
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 37f8d48179ca..0e97c196147a 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h | |||
| @@ -53,7 +53,51 @@ enum { | |||
| 53 | CS_ETMV4_PRIV_MAX, | 53 | CS_ETMV4_PRIV_MAX, |
| 54 | }; | 54 | }; |
| 55 | 55 | ||
| 56 | /* RB tree for quick conversion between traceID and CPUs */ | 56 | /* |
| 57 | * ETMv3 exception encoding number: | ||
| 58 | * See Embedded Trace Macrocell spcification (ARM IHI 0014Q) | ||
| 59 | * table 7-12 Encoding of Exception[3:0] for non-ARMv7-M processors. | ||
| 60 | */ | ||
| 61 | enum { | ||
| 62 | CS_ETMV3_EXC_NONE = 0, | ||
| 63 | CS_ETMV3_EXC_DEBUG_HALT = 1, | ||
| 64 | CS_ETMV3_EXC_SMC = 2, | ||
| 65 | CS_ETMV3_EXC_HYP = 3, | ||
| 66 | CS_ETMV3_EXC_ASYNC_DATA_ABORT = 4, | ||
| 67 | CS_ETMV3_EXC_JAZELLE_THUMBEE = 5, | ||
| 68 | CS_ETMV3_EXC_PE_RESET = 8, | ||
| 69 | CS_ETMV3_EXC_UNDEFINED_INSTR = 9, | ||
| 70 | CS_ETMV3_EXC_SVC = 10, | ||
| 71 | CS_ETMV3_EXC_PREFETCH_ABORT = 11, | ||
| 72 | CS_ETMV3_EXC_DATA_FAULT = 12, | ||
| 73 | CS_ETMV3_EXC_GENERIC = 13, | ||
| 74 | CS_ETMV3_EXC_IRQ = 14, | ||
| 75 | CS_ETMV3_EXC_FIQ = 15, | ||
| 76 | }; | ||
| 77 | |||
| 78 | /* | ||
| 79 | * ETMv4 exception encoding number: | ||
| 80 | * See ARM Embedded Trace Macrocell Architecture Specification (ARM IHI 0064D) | ||
| 81 | * table 6-12 Possible values for the TYPE field in an Exception instruction | ||
| 82 | * trace packet, for ARMv7-A/R and ARMv8-A/R PEs. | ||
| 83 | */ | ||
| 84 | enum { | ||
| 85 | CS_ETMV4_EXC_RESET = 0, | ||
| 86 | CS_ETMV4_EXC_DEBUG_HALT = 1, | ||
| 87 | CS_ETMV4_EXC_CALL = 2, | ||
| 88 | CS_ETMV4_EXC_TRAP = 3, | ||
| 89 | CS_ETMV4_EXC_SYSTEM_ERROR = 4, | ||
| 90 | CS_ETMV4_EXC_INST_DEBUG = 6, | ||
| 91 | CS_ETMV4_EXC_DATA_DEBUG = 7, | ||
| 92 | CS_ETMV4_EXC_ALIGNMENT = 10, | ||
| 93 | CS_ETMV4_EXC_INST_FAULT = 11, | ||
| 94 | CS_ETMV4_EXC_DATA_FAULT = 12, | ||
| 95 | CS_ETMV4_EXC_IRQ = 14, | ||
| 96 | CS_ETMV4_EXC_FIQ = 15, | ||
| 97 | CS_ETMV4_EXC_END = 31, | ||
| 98 | }; | ||
| 99 | |||
| 100 | /* RB tree for quick conversion between traceID and metadata pointers */ | ||
| 57 | struct intlist *traceid_list; | 101 | struct intlist *traceid_list; |
| 58 | 102 | ||
| 59 | #define KiB(x) ((x) * 1024) | 103 | #define KiB(x) ((x) * 1024) |
| @@ -61,14 +105,15 @@ struct intlist *traceid_list; | |||
| 61 | 105 | ||
| 62 | #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) | 106 | #define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) |
| 63 | 107 | ||
| 64 | static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL; | 108 | #define __perf_cs_etmv3_magic 0x3030303030303030ULL |
| 65 | static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; | 109 | #define __perf_cs_etmv4_magic 0x4040404040404040ULL |
| 66 | #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) | 110 | #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) |
| 67 | #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) | 111 | #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) |
| 68 | 112 | ||
| 69 | #ifdef HAVE_CSTRACE_SUPPORT | 113 | #ifdef HAVE_CSTRACE_SUPPORT |
| 70 | int cs_etm__process_auxtrace_info(union perf_event *event, | 114 | int cs_etm__process_auxtrace_info(union perf_event *event, |
| 71 | struct perf_session *session); | 115 | struct perf_session *session); |
| 116 | int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); | ||
| 72 | #else | 117 | #else |
| 73 | static inline int | 118 | static inline int |
| 74 | cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, | 119 | cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, |
| @@ -76,6 +121,12 @@ cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, | |||
| 76 | { | 121 | { |
| 77 | return -1; | 122 | return -1; |
| 78 | } | 123 | } |
| 124 | |||
| 125 | static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused, | ||
| 126 | int *cpu __maybe_unused) | ||
| 127 | { | ||
| 128 | return -1; | ||
| 129 | } | ||
| 79 | #endif | 130 | #endif |
| 80 | 131 | ||
| 81 | #endif | 132 | #endif |
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 2a36fab76994..26af43ad9ddd 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c | |||
| @@ -1578,7 +1578,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, | |||
| 1578 | { | 1578 | { |
| 1579 | struct perf_session *session; | 1579 | struct perf_session *session; |
| 1580 | struct perf_data data = { | 1580 | struct perf_data data = { |
| 1581 | .file = { .path = input, .fd = -1 }, | 1581 | .path = input, |
| 1582 | .mode = PERF_DATA_MODE_READ, | 1582 | .mode = PERF_DATA_MODE_READ, |
| 1583 | .force = opts->force, | 1583 | .force = opts->force, |
| 1584 | }; | 1584 | }; |
| @@ -1650,7 +1650,7 @@ int bt_convert__perf2ctf(const char *input, const char *path, | |||
| 1650 | 1650 | ||
| 1651 | fprintf(stderr, | 1651 | fprintf(stderr, |
| 1652 | "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", | 1652 | "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", |
| 1653 | data.file.path, path); | 1653 | data.path, path); |
| 1654 | 1654 | ||
| 1655 | fprintf(stderr, | 1655 | fprintf(stderr, |
| 1656 | "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples", | 1656 | "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples", |
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index d8cfc19ddb10..7bd5ddeb7a41 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c | |||
| @@ -7,11 +7,117 @@ | |||
| 7 | #include <fcntl.h> | 7 | #include <fcntl.h> |
| 8 | #include <unistd.h> | 8 | #include <unistd.h> |
| 9 | #include <string.h> | 9 | #include <string.h> |
| 10 | #include <asm/bug.h> | ||
| 11 | #include <sys/types.h> | ||
| 12 | #include <dirent.h> | ||
| 10 | 13 | ||
| 11 | #include "data.h" | 14 | #include "data.h" |
| 12 | #include "util.h" | 15 | #include "util.h" |
| 13 | #include "debug.h" | 16 | #include "debug.h" |
| 14 | 17 | ||
| 18 | static void close_dir(struct perf_data_file *files, int nr) | ||
| 19 | { | ||
| 20 | while (--nr >= 1) { | ||
| 21 | close(files[nr].fd); | ||
| 22 | free(files[nr].path); | ||
| 23 | } | ||
| 24 | free(files); | ||
| 25 | } | ||
| 26 | |||
| 27 | void perf_data__close_dir(struct perf_data *data) | ||
| 28 | { | ||
| 29 | close_dir(data->dir.files, data->dir.nr); | ||
| 30 | } | ||
| 31 | |||
| 32 | int perf_data__create_dir(struct perf_data *data, int nr) | ||
| 33 | { | ||
| 34 | struct perf_data_file *files = NULL; | ||
| 35 | int i, ret = -1; | ||
| 36 | |||
| 37 | files = zalloc(nr * sizeof(*files)); | ||
| 38 | if (!files) | ||
| 39 | return -ENOMEM; | ||
| 40 | |||
| 41 | data->dir.files = files; | ||
| 42 | data->dir.nr = nr; | ||
| 43 | |||
| 44 | for (i = 0; i < nr; i++) { | ||
| 45 | struct perf_data_file *file = &files[i]; | ||
| 46 | |||
| 47 | if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0) | ||
| 48 | goto out_err; | ||
| 49 | |||
| 50 | ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR); | ||
| 51 | if (ret < 0) | ||
| 52 | goto out_err; | ||
| 53 | |||
| 54 | file->fd = ret; | ||
| 55 | } | ||
| 56 | |||
| 57 | return 0; | ||
| 58 | |||
| 59 | out_err: | ||
| 60 | close_dir(files, i); | ||
| 61 | return ret; | ||
| 62 | } | ||
| 63 | |||
| 64 | int perf_data__open_dir(struct perf_data *data) | ||
| 65 | { | ||
| 66 | struct perf_data_file *files = NULL; | ||
| 67 | struct dirent *dent; | ||
| 68 | int ret = -1; | ||
| 69 | DIR *dir; | ||
| 70 | int nr = 0; | ||
| 71 | |||
| 72 | dir = opendir(data->path); | ||
| 73 | if (!dir) | ||
| 74 | return -EINVAL; | ||
| 75 | |||
| 76 | while ((dent = readdir(dir)) != NULL) { | ||
| 77 | struct perf_data_file *file; | ||
| 78 | char path[PATH_MAX]; | ||
| 79 | struct stat st; | ||
| 80 | |||
| 81 | snprintf(path, sizeof(path), "%s/%s", data->path, dent->d_name); | ||
| 82 | if (stat(path, &st)) | ||
| 83 | continue; | ||
| 84 | |||
| 85 | if (!S_ISREG(st.st_mode) || strncmp(dent->d_name, "data", 4)) | ||
| 86 | continue; | ||
| 87 | |||
| 88 | ret = -ENOMEM; | ||
| 89 | |||
| 90 | file = realloc(files, (nr + 1) * sizeof(*files)); | ||
| 91 | if (!file) | ||
| 92 | goto out_err; | ||
| 93 | |||
| 94 | files = file; | ||
| 95 | file = &files[nr++]; | ||
| 96 | |||
| 97 | file->path = strdup(path); | ||
| 98 | if (!file->path) | ||
| 99 | goto out_err; | ||
| 100 | |||
| 101 | ret = open(file->path, O_RDONLY); | ||
| 102 | if (ret < 0) | ||
| 103 | goto out_err; | ||
| 104 | |||
| 105 | file->fd = ret; | ||
| 106 | file->size = st.st_size; | ||
| 107 | } | ||
| 108 | |||
| 109 | if (!files) | ||
| 110 | return -EINVAL; | ||
| 111 | |||
| 112 | data->dir.files = files; | ||
| 113 | data->dir.nr = nr; | ||
| 114 | return 0; | ||
| 115 | |||
| 116 | out_err: | ||
| 117 | close_dir(files, nr); | ||
| 118 | return ret; | ||
| 119 | } | ||
| 120 | |||
| 15 | static bool check_pipe(struct perf_data *data) | 121 | static bool check_pipe(struct perf_data *data) |
| 16 | { | 122 | { |
| 17 | struct stat st; | 123 | struct stat st; |
| @@ -19,11 +125,11 @@ static bool check_pipe(struct perf_data *data) | |||
| 19 | int fd = perf_data__is_read(data) ? | 125 | int fd = perf_data__is_read(data) ? |
| 20 | STDIN_FILENO : STDOUT_FILENO; | 126 | STDIN_FILENO : STDOUT_FILENO; |
| 21 | 127 | ||
| 22 | if (!data->file.path) { | 128 | if (!data->path) { |
| 23 | if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) | 129 | if (!fstat(fd, &st) && S_ISFIFO(st.st_mode)) |
| 24 | is_pipe = true; | 130 | is_pipe = true; |
| 25 | } else { | 131 | } else { |
| 26 | if (!strcmp(data->file.path, "-")) | 132 | if (!strcmp(data->path, "-")) |
| 27 | is_pipe = true; | 133 | is_pipe = true; |
| 28 | } | 134 | } |
| 29 | 135 | ||
| @@ -37,13 +143,31 @@ static int check_backup(struct perf_data *data) | |||
| 37 | { | 143 | { |
| 38 | struct stat st; | 144 | struct stat st; |
| 39 | 145 | ||
| 40 | if (!stat(data->file.path, &st) && st.st_size) { | 146 | if (perf_data__is_read(data)) |
| 41 | /* TODO check errors properly */ | 147 | return 0; |
| 148 | |||
| 149 | if (!stat(data->path, &st) && st.st_size) { | ||
| 42 | char oldname[PATH_MAX]; | 150 | char oldname[PATH_MAX]; |
| 151 | int ret; | ||
| 152 | |||
| 43 | snprintf(oldname, sizeof(oldname), "%s.old", | 153 | snprintf(oldname, sizeof(oldname), "%s.old", |
| 44 | data->file.path); | 154 | data->path); |
| 45 | unlink(oldname); | 155 | |
| 46 | rename(data->file.path, oldname); | 156 | ret = rm_rf_perf_data(oldname); |
| 157 | if (ret) { | ||
| 158 | pr_err("Can't remove old data: %s (%s)\n", | ||
| 159 | ret == -2 ? | ||
| 160 | "Unknown file found" : strerror(errno), | ||
| 161 | oldname); | ||
| 162 | return -1; | ||
| 163 | } | ||
| 164 | |||
| 165 | if (rename(data->path, oldname)) { | ||
| 166 | pr_err("Can't move data: %s (%s to %s)\n", | ||
| 167 | strerror(errno), | ||
| 168 | data->path, oldname); | ||
| 169 | return -1; | ||
| 170 | } | ||
| 47 | } | 171 | } |
| 48 | 172 | ||
| 49 | return 0; | 173 | return 0; |
| @@ -82,7 +206,7 @@ static int open_file_read(struct perf_data *data) | |||
| 82 | goto out_close; | 206 | goto out_close; |
| 83 | } | 207 | } |
| 84 | 208 | ||
| 85 | data->size = st.st_size; | 209 | data->file.size = st.st_size; |
| 86 | return fd; | 210 | return fd; |
| 87 | 211 | ||
| 88 | out_close: | 212 | out_close: |
| @@ -95,9 +219,6 @@ static int open_file_write(struct perf_data *data) | |||
| 95 | int fd; | 219 | int fd; |
| 96 | char sbuf[STRERR_BUFSIZE]; | 220 | char sbuf[STRERR_BUFSIZE]; |
| 97 | 221 | ||
| 98 | if (check_backup(data)) | ||
| 99 | return -1; | ||
| 100 | |||
| 101 | fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, | 222 | fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC, |
| 102 | S_IRUSR|S_IWUSR); | 223 | S_IRUSR|S_IWUSR); |
| 103 | 224 | ||
| @@ -115,8 +236,22 @@ static int open_file(struct perf_data *data) | |||
| 115 | fd = perf_data__is_read(data) ? | 236 | fd = perf_data__is_read(data) ? |
| 116 | open_file_read(data) : open_file_write(data); | 237 | open_file_read(data) : open_file_write(data); |
| 117 | 238 | ||
| 239 | if (fd < 0) { | ||
| 240 | free(data->file.path); | ||
| 241 | return -1; | ||
| 242 | } | ||
| 243 | |||
| 118 | data->file.fd = fd; | 244 | data->file.fd = fd; |
| 119 | return fd < 0 ? -1 : 0; | 245 | return 0; |
| 246 | } | ||
| 247 | |||
| 248 | static int open_file_dup(struct perf_data *data) | ||
| 249 | { | ||
| 250 | data->file.path = strdup(data->path); | ||
| 251 | if (!data->file.path) | ||
| 252 | return -ENOMEM; | ||
| 253 | |||
| 254 | return open_file(data); | ||
| 120 | } | 255 | } |
| 121 | 256 | ||
| 122 | int perf_data__open(struct perf_data *data) | 257 | int perf_data__open(struct perf_data *data) |
| @@ -124,14 +259,18 @@ int perf_data__open(struct perf_data *data) | |||
| 124 | if (check_pipe(data)) | 259 | if (check_pipe(data)) |
| 125 | return 0; | 260 | return 0; |
| 126 | 261 | ||
| 127 | if (!data->file.path) | 262 | if (!data->path) |
| 128 | data->file.path = "perf.data"; | 263 | data->path = "perf.data"; |
| 129 | 264 | ||
| 130 | return open_file(data); | 265 | if (check_backup(data)) |
| 266 | return -1; | ||
| 267 | |||
| 268 | return open_file_dup(data); | ||
| 131 | } | 269 | } |
| 132 | 270 | ||
| 133 | void perf_data__close(struct perf_data *data) | 271 | void perf_data__close(struct perf_data *data) |
| 134 | { | 272 | { |
| 273 | free(data->file.path); | ||
| 135 | close(data->file.fd); | 274 | close(data->file.fd); |
| 136 | } | 275 | } |
| 137 | 276 | ||
| @@ -159,15 +298,15 @@ int perf_data__switch(struct perf_data *data, | |||
| 159 | if (perf_data__is_read(data)) | 298 | if (perf_data__is_read(data)) |
| 160 | return -EINVAL; | 299 | return -EINVAL; |
| 161 | 300 | ||
| 162 | if (asprintf(&new_filepath, "%s.%s", data->file.path, postfix) < 0) | 301 | if (asprintf(&new_filepath, "%s.%s", data->path, postfix) < 0) |
| 163 | return -ENOMEM; | 302 | return -ENOMEM; |
| 164 | 303 | ||
| 165 | /* | 304 | /* |
| 166 | * Only fire a warning, don't return error, continue fill | 305 | * Only fire a warning, don't return error, continue fill |
| 167 | * original file. | 306 | * original file. |
| 168 | */ | 307 | */ |
| 169 | if (rename(data->file.path, new_filepath)) | 308 | if (rename(data->path, new_filepath)) |
| 170 | pr_warning("Failed to rename %s to %s\n", data->file.path, new_filepath); | 309 | pr_warning("Failed to rename %s to %s\n", data->path, new_filepath); |
| 171 | 310 | ||
| 172 | if (!at_exit) { | 311 | if (!at_exit) { |
| 173 | close(data->file.fd); | 312 | close(data->file.fd); |
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 4828f7feea89..14b47be2bd69 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h | |||
| @@ -10,16 +10,22 @@ enum perf_data_mode { | |||
| 10 | }; | 10 | }; |
| 11 | 11 | ||
| 12 | struct perf_data_file { | 12 | struct perf_data_file { |
| 13 | const char *path; | 13 | char *path; |
| 14 | int fd; | 14 | int fd; |
| 15 | unsigned long size; | ||
| 15 | }; | 16 | }; |
| 16 | 17 | ||
| 17 | struct perf_data { | 18 | struct perf_data { |
| 19 | const char *path; | ||
| 18 | struct perf_data_file file; | 20 | struct perf_data_file file; |
| 19 | bool is_pipe; | 21 | bool is_pipe; |
| 20 | bool force; | 22 | bool force; |
| 21 | unsigned long size; | ||
| 22 | enum perf_data_mode mode; | 23 | enum perf_data_mode mode; |
| 24 | |||
| 25 | struct { | ||
| 26 | struct perf_data_file *files; | ||
| 27 | int nr; | ||
| 28 | } dir; | ||
| 23 | }; | 29 | }; |
| 24 | 30 | ||
| 25 | static inline bool perf_data__is_read(struct perf_data *data) | 31 | static inline bool perf_data__is_read(struct perf_data *data) |
| @@ -44,7 +50,7 @@ static inline int perf_data__fd(struct perf_data *data) | |||
| 44 | 50 | ||
| 45 | static inline unsigned long perf_data__size(struct perf_data *data) | 51 | static inline unsigned long perf_data__size(struct perf_data *data) |
| 46 | { | 52 | { |
| 47 | return data->size; | 53 | return data->file.size; |
| 48 | } | 54 | } |
| 49 | 55 | ||
| 50 | int perf_data__open(struct perf_data *data); | 56 | int perf_data__open(struct perf_data *data); |
| @@ -63,4 +69,8 @@ ssize_t perf_data_file__write(struct perf_data_file *file, | |||
| 63 | int perf_data__switch(struct perf_data *data, | 69 | int perf_data__switch(struct perf_data *data, |
| 64 | const char *postfix, | 70 | const char *postfix, |
| 65 | size_t pos, bool at_exit); | 71 | size_t pos, bool at_exit); |
| 72 | |||
| 73 | int perf_data__create_dir(struct perf_data *data, int nr); | ||
| 74 | int perf_data__open_dir(struct perf_data *data); | ||
| 75 | void perf_data__close_dir(struct perf_data *data); | ||
| 66 | #endif /* __PERF_DATA_H */ | 76 | #endif /* __PERF_DATA_H */ |
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index 69fbb0a72d0c..de9b4769d06c 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "thread.h" | 20 | #include "thread.h" |
| 21 | #include "comm.h" | 21 | #include "comm.h" |
| 22 | #include "symbol.h" | 22 | #include "symbol.h" |
| 23 | #include "map.h" | ||
| 23 | #include "event.h" | 24 | #include "event.h" |
| 24 | #include "util.h" | 25 | #include "util.h" |
| 25 | #include "thread-stack.h" | 26 | #include "thread-stack.h" |
diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c deleted file mode 100644 index eec754243f4d..000000000000 --- a/tools/perf/util/drv_configs.c +++ /dev/null | |||
| @@ -1,78 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * drv_configs.h: Interface to apply PMU specific configuration | ||
| 3 | * Copyright (c) 2016-2018, Linaro Ltd. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | * | ||
| 14 | */ | ||
| 15 | |||
| 16 | #include "drv_configs.h" | ||
| 17 | #include "evlist.h" | ||
| 18 | #include "evsel.h" | ||
| 19 | #include "pmu.h" | ||
| 20 | #include <errno.h> | ||
| 21 | |||
| 22 | static int | ||
| 23 | perf_evsel__apply_drv_configs(struct perf_evsel *evsel, | ||
| 24 | struct perf_evsel_config_term **err_term) | ||
| 25 | { | ||
| 26 | bool found = false; | ||
| 27 | int err = 0; | ||
| 28 | struct perf_evsel_config_term *term; | ||
| 29 | struct perf_pmu *pmu = NULL; | ||
| 30 | |||
| 31 | while ((pmu = perf_pmu__scan(pmu)) != NULL) | ||
| 32 | if (pmu->type == evsel->attr.type) { | ||
| 33 | found = true; | ||
| 34 | break; | ||
| 35 | } | ||
| 36 | |||
| 37 | list_for_each_entry(term, &evsel->config_terms, list) { | ||
| 38 | if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG) | ||
| 39 | continue; | ||
| 40 | |||
| 41 | /* | ||
| 42 | * We have a configuration term, report an error if we | ||
| 43 | * can't find the PMU or if the PMU driver doesn't support | ||
| 44 | * cmd line driver configuration. | ||
| 45 | */ | ||
| 46 | if (!found || !pmu->set_drv_config) { | ||
| 47 | err = -EINVAL; | ||
| 48 | *err_term = term; | ||
| 49 | break; | ||
| 50 | } | ||
| 51 | |||
| 52 | err = pmu->set_drv_config(term); | ||
| 53 | if (err) { | ||
| 54 | *err_term = term; | ||
| 55 | break; | ||
| 56 | } | ||
| 57 | } | ||
| 58 | |||
| 59 | return err; | ||
| 60 | } | ||
| 61 | |||
| 62 | int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, | ||
| 63 | struct perf_evsel **err_evsel, | ||
| 64 | struct perf_evsel_config_term **err_term) | ||
| 65 | { | ||
| 66 | struct perf_evsel *evsel; | ||
| 67 | int err = 0; | ||
| 68 | |||
| 69 | evlist__for_each_entry(evlist, evsel) { | ||
| 70 | err = perf_evsel__apply_drv_configs(evsel, err_term); | ||
| 71 | if (err) { | ||
| 72 | *err_evsel = evsel; | ||
| 73 | break; | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | return err; | ||
| 78 | } | ||
diff --git a/tools/perf/util/drv_configs.h b/tools/perf/util/drv_configs.h deleted file mode 100644 index 32bc9babc2e0..000000000000 --- a/tools/perf/util/drv_configs.h +++ /dev/null | |||
| @@ -1,26 +0,0 @@ | |||
| 1 | /* | ||
| 2 | * drv_configs.h: Interface to apply PMU specific configuration | ||
| 3 | * Copyright (c) 2016-2018, Linaro Ltd. | ||
| 4 | * | ||
| 5 | * This program is free software; you can redistribute it and/or modify it | ||
| 6 | * under the terms and conditions of the GNU General Public License, | ||
| 7 | * version 2, as published by the Free Software Foundation. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 12 | * more details. | ||
| 13 | * | ||
| 14 | */ | ||
| 15 | |||
| 16 | #ifndef __PERF_DRV_CONFIGS_H | ||
| 17 | #define __PERF_DRV_CONFIGS_H | ||
| 18 | |||
| 19 | #include "drv_configs.h" | ||
| 20 | #include "evlist.h" | ||
| 21 | #include "evsel.h" | ||
| 22 | |||
| 23 | int perf_evlist__apply_drv_configs(struct perf_evlist *evlist, | ||
| 24 | struct perf_evsel **err_evsel, | ||
| 25 | struct perf_evsel_config_term **term); | ||
| 26 | #endif | ||
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 62c8cf622607..ba58ba603b69 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c | |||
| @@ -8,8 +8,11 @@ | |||
| 8 | #include <unistd.h> | 8 | #include <unistd.h> |
| 9 | #include <errno.h> | 9 | #include <errno.h> |
| 10 | #include <fcntl.h> | 10 | #include <fcntl.h> |
| 11 | #include <libgen.h> | ||
| 11 | #include "compress.h" | 12 | #include "compress.h" |
| 13 | #include "namespaces.h" | ||
| 12 | #include "path.h" | 14 | #include "path.h" |
| 15 | #include "map.h" | ||
| 13 | #include "symbol.h" | 16 | #include "symbol.h" |
| 14 | #include "srcline.h" | 17 | #include "srcline.h" |
| 15 | #include "dso.h" | 18 | #include "dso.h" |
| @@ -1195,10 +1198,10 @@ struct dso *dso__new(const char *name) | |||
| 1195 | strcpy(dso->name, name); | 1198 | strcpy(dso->name, name); |
| 1196 | dso__set_long_name(dso, dso->name, false); | 1199 | dso__set_long_name(dso, dso->name, false); |
| 1197 | dso__set_short_name(dso, dso->name, false); | 1200 | dso__set_short_name(dso, dso->name, false); |
| 1198 | dso->symbols = dso->symbol_names = RB_ROOT; | 1201 | dso->symbols = dso->symbol_names = RB_ROOT_CACHED; |
| 1199 | dso->data.cache = RB_ROOT; | 1202 | dso->data.cache = RB_ROOT; |
| 1200 | dso->inlined_nodes = RB_ROOT; | 1203 | dso->inlined_nodes = RB_ROOT_CACHED; |
| 1201 | dso->srclines = RB_ROOT; | 1204 | dso->srclines = RB_ROOT_CACHED; |
| 1202 | dso->data.fd = -1; | 1205 | dso->data.fd = -1; |
| 1203 | dso->data.status = DSO_DATA_STATUS_UNKNOWN; | 1206 | dso->data.status = DSO_DATA_STATUS_UNKNOWN; |
| 1204 | dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; | 1207 | dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND; |
| @@ -1467,7 +1470,7 @@ size_t dso__fprintf(struct dso *dso, FILE *fp) | |||
| 1467 | ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT "); | 1470 | ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT "); |
| 1468 | ret += dso__fprintf_buildid(dso, fp); | 1471 | ret += dso__fprintf_buildid(dso, fp); |
| 1469 | ret += fprintf(fp, ")\n"); | 1472 | ret += fprintf(fp, ")\n"); |
| 1470 | for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) { | 1473 | for (nd = rb_first_cached(&dso->symbols); nd; nd = rb_next(nd)) { |
| 1471 | struct symbol *pos = rb_entry(nd, struct symbol, rb_node); | 1474 | struct symbol *pos = rb_entry(nd, struct symbol, rb_node); |
| 1472 | ret += symbol__fprintf(pos, fp); | 1475 | ret += symbol__fprintf(pos, fp); |
| 1473 | } | 1476 | } |
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 8c8a7abe809d..bb417c54c25a 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h | |||
| @@ -7,13 +7,14 @@ | |||
| 7 | #include <linux/rbtree.h> | 7 | #include <linux/rbtree.h> |
| 8 | #include <sys/types.h> | 8 | #include <sys/types.h> |
| 9 | #include <stdbool.h> | 9 | #include <stdbool.h> |
| 10 | #include <stdio.h> | ||
| 10 | #include "rwsem.h" | 11 | #include "rwsem.h" |
| 11 | #include <linux/types.h> | ||
| 12 | #include <linux/bitops.h> | 12 | #include <linux/bitops.h> |
| 13 | #include "map.h" | ||
| 14 | #include "namespaces.h" | ||
| 15 | #include "build-id.h" | 13 | #include "build-id.h" |
| 16 | 14 | ||
| 15 | struct machine; | ||
| 16 | struct map; | ||
| 17 | |||
| 17 | enum dso_binary_type { | 18 | enum dso_binary_type { |
| 18 | DSO_BINARY_TYPE__KALLSYMS = 0, | 19 | DSO_BINARY_TYPE__KALLSYMS = 0, |
| 19 | DSO_BINARY_TYPE__GUEST_KALLSYMS, | 20 | DSO_BINARY_TYPE__GUEST_KALLSYMS, |
| @@ -140,10 +141,10 @@ struct dso { | |||
| 140 | struct list_head node; | 141 | struct list_head node; |
| 141 | struct rb_node rb_node; /* rbtree node sorted by long name */ | 142 | struct rb_node rb_node; /* rbtree node sorted by long name */ |
| 142 | struct rb_root *root; /* root of rbtree that rb_node is in */ | 143 | struct rb_root *root; /* root of rbtree that rb_node is in */ |
| 143 | struct rb_root symbols; | 144 | struct rb_root_cached symbols; |
| 144 | struct rb_root symbol_names; | 145 | struct rb_root_cached symbol_names; |
| 145 | struct rb_root inlined_nodes; | 146 | struct rb_root_cached inlined_nodes; |
| 146 | struct rb_root srclines; | 147 | struct rb_root_cached srclines; |
| 147 | struct { | 148 | struct { |
| 148 | u64 addr; | 149 | u64 addr; |
| 149 | struct symbol *symbol; | 150 | struct symbol *symbol; |
| @@ -235,7 +236,7 @@ bool dso__loaded(const struct dso *dso); | |||
| 235 | 236 | ||
| 236 | static inline bool dso__has_symbols(const struct dso *dso) | 237 | static inline bool dso__has_symbols(const struct dso *dso) |
| 237 | { | 238 | { |
| 238 | return !RB_EMPTY_ROOT(&dso->symbols); | 239 | return !RB_EMPTY_ROOT(&dso->symbols.rb_root); |
| 239 | } | 240 | } |
| 240 | 241 | ||
| 241 | bool dso__sorted_by_name(const struct dso *dso); | 242 | bool dso__sorted_by_name(const struct dso *dso); |
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 937a5a4f71cc..ba7be74fad6e 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c | |||
| @@ -21,9 +21,13 @@ | |||
| 21 | #include "thread.h" | 21 | #include "thread.h" |
| 22 | #include "thread_map.h" | 22 | #include "thread_map.h" |
| 23 | #include "sane_ctype.h" | 23 | #include "sane_ctype.h" |
| 24 | #include "map.h" | ||
| 25 | #include "symbol.h" | ||
| 24 | #include "symbol/kallsyms.h" | 26 | #include "symbol/kallsyms.h" |
| 25 | #include "asm/bug.h" | 27 | #include "asm/bug.h" |
| 26 | #include "stat.h" | 28 | #include "stat.h" |
| 29 | #include "session.h" | ||
| 30 | #include "bpf-event.h" | ||
| 27 | 31 | ||
| 28 | #define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 | 32 | #define DEFAULT_PROC_MAP_PARSE_TIMEOUT 500 |
| 29 | 33 | ||
| @@ -45,6 +49,8 @@ static const char *perf_event__names[] = { | |||
| 45 | [PERF_RECORD_SWITCH] = "SWITCH", | 49 | [PERF_RECORD_SWITCH] = "SWITCH", |
| 46 | [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", | 50 | [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", |
| 47 | [PERF_RECORD_NAMESPACES] = "NAMESPACES", | 51 | [PERF_RECORD_NAMESPACES] = "NAMESPACES", |
| 52 | [PERF_RECORD_KSYMBOL] = "KSYMBOL", | ||
| 53 | [PERF_RECORD_BPF_EVENT] = "BPF_EVENT", | ||
| 48 | [PERF_RECORD_HEADER_ATTR] = "ATTR", | 54 | [PERF_RECORD_HEADER_ATTR] = "ATTR", |
| 49 | [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", | 55 | [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", |
| 50 | [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", | 56 | [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", |
| @@ -1329,6 +1335,22 @@ int perf_event__process_switch(struct perf_tool *tool __maybe_unused, | |||
| 1329 | return machine__process_switch_event(machine, event); | 1335 | return machine__process_switch_event(machine, event); |
| 1330 | } | 1336 | } |
| 1331 | 1337 | ||
| 1338 | int perf_event__process_ksymbol(struct perf_tool *tool __maybe_unused, | ||
| 1339 | union perf_event *event, | ||
| 1340 | struct perf_sample *sample __maybe_unused, | ||
| 1341 | struct machine *machine) | ||
| 1342 | { | ||
| 1343 | return machine__process_ksymbol(machine, event, sample); | ||
| 1344 | } | ||
| 1345 | |||
| 1346 | int perf_event__process_bpf_event(struct perf_tool *tool __maybe_unused, | ||
| 1347 | union perf_event *event, | ||
| 1348 | struct perf_sample *sample __maybe_unused, | ||
| 1349 | struct machine *machine) | ||
| 1350 | { | ||
| 1351 | return machine__process_bpf_event(machine, event, sample); | ||
| 1352 | } | ||
| 1353 | |||
| 1332 | size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) | 1354 | size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) |
| 1333 | { | 1355 | { |
| 1334 | return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", | 1356 | return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", |
| @@ -1461,6 +1483,21 @@ static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) | |||
| 1461 | return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); | 1483 | return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); |
| 1462 | } | 1484 | } |
| 1463 | 1485 | ||
| 1486 | size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp) | ||
| 1487 | { | ||
| 1488 | return fprintf(fp, " ksymbol event with addr %" PRIx64 " len %u type %u flags 0x%x name %s\n", | ||
| 1489 | event->ksymbol_event.addr, event->ksymbol_event.len, | ||
| 1490 | event->ksymbol_event.ksym_type, | ||
| 1491 | event->ksymbol_event.flags, event->ksymbol_event.name); | ||
| 1492 | } | ||
| 1493 | |||
| 1494 | size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp) | ||
| 1495 | { | ||
| 1496 | return fprintf(fp, " bpf event with type %u, flags %u, id %u\n", | ||
| 1497 | event->bpf_event.type, event->bpf_event.flags, | ||
| 1498 | event->bpf_event.id); | ||
| 1499 | } | ||
| 1500 | |||
| 1464 | size_t perf_event__fprintf(union perf_event *event, FILE *fp) | 1501 | size_t perf_event__fprintf(union perf_event *event, FILE *fp) |
| 1465 | { | 1502 | { |
| 1466 | size_t ret = fprintf(fp, "PERF_RECORD_%s", | 1503 | size_t ret = fprintf(fp, "PERF_RECORD_%s", |
| @@ -1496,6 +1533,12 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) | |||
| 1496 | case PERF_RECORD_LOST: | 1533 | case PERF_RECORD_LOST: |
| 1497 | ret += perf_event__fprintf_lost(event, fp); | 1534 | ret += perf_event__fprintf_lost(event, fp); |
| 1498 | break; | 1535 | break; |
| 1536 | case PERF_RECORD_KSYMBOL: | ||
| 1537 | ret += perf_event__fprintf_ksymbol(event, fp); | ||
| 1538 | break; | ||
| 1539 | case PERF_RECORD_BPF_EVENT: | ||
| 1540 | ret += perf_event__fprintf_bpf_event(event, fp); | ||
| 1541 | break; | ||
| 1499 | default: | 1542 | default: |
| 1500 | ret += fprintf(fp, "\n"); | 1543 | ret += fprintf(fp, "\n"); |
| 1501 | } | 1544 | } |
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index eb95f3384958..36ae7e92dab1 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <limits.h> | 5 | #include <limits.h> |
| 6 | #include <stdio.h> | 6 | #include <stdio.h> |
| 7 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
| 8 | #include <linux/bpf.h> | ||
| 8 | 9 | ||
| 9 | #include "../perf.h" | 10 | #include "../perf.h" |
| 10 | #include "build-id.h" | 11 | #include "build-id.h" |
| @@ -84,6 +85,29 @@ struct throttle_event { | |||
| 84 | u64 stream_id; | 85 | u64 stream_id; |
| 85 | }; | 86 | }; |
| 86 | 87 | ||
| 88 | #ifndef KSYM_NAME_LEN | ||
| 89 | #define KSYM_NAME_LEN 256 | ||
| 90 | #endif | ||
| 91 | |||
| 92 | struct ksymbol_event { | ||
| 93 | struct perf_event_header header; | ||
| 94 | u64 addr; | ||
| 95 | u32 len; | ||
| 96 | u16 ksym_type; | ||
| 97 | u16 flags; | ||
| 98 | char name[KSYM_NAME_LEN]; | ||
| 99 | }; | ||
| 100 | |||
| 101 | struct bpf_event { | ||
| 102 | struct perf_event_header header; | ||
| 103 | u16 type; | ||
| 104 | u16 flags; | ||
| 105 | u32 id; | ||
| 106 | |||
| 107 | /* for bpf_prog types */ | ||
| 108 | u8 tag[BPF_TAG_SIZE]; // prog tag | ||
| 109 | }; | ||
| 110 | |||
| 87 | #define PERF_SAMPLE_MASK \ | 111 | #define PERF_SAMPLE_MASK \ |
| 88 | (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \ | 112 | (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \ |
| 89 | PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \ | 113 | PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \ |
| @@ -137,26 +161,7 @@ struct ip_callchain { | |||
| 137 | u64 ips[0]; | 161 | u64 ips[0]; |
| 138 | }; | 162 | }; |
| 139 | 163 | ||
| 140 | struct branch_flags { | 164 | struct branch_stack; |
| 141 | u64 mispred:1; | ||
| 142 | u64 predicted:1; | ||
| 143 | u64 in_tx:1; | ||
| 144 | u64 abort:1; | ||
| 145 | u64 cycles:16; | ||
| 146 | u64 type:4; | ||
| 147 | u64 reserved:40; | ||
| 148 | }; | ||
| 149 | |||
| 150 | struct branch_entry { | ||
| 151 | u64 from; | ||
| 152 | u64 to; | ||
| 153 | struct branch_flags flags; | ||
| 154 | }; | ||
| 155 | |||
| 156 | struct branch_stack { | ||
| 157 | u64 nr; | ||
| 158 | struct branch_entry entries[0]; | ||
| 159 | }; | ||
| 160 | 165 | ||
| 161 | enum { | 166 | enum { |
| 162 | PERF_IP_FLAG_BRANCH = 1ULL << 0, | 167 | PERF_IP_FLAG_BRANCH = 1ULL << 0, |
| @@ -527,8 +532,9 @@ struct auxtrace_error_event { | |||
| 527 | u32 cpu; | 532 | u32 cpu; |
| 528 | u32 pid; | 533 | u32 pid; |
| 529 | u32 tid; | 534 | u32 tid; |
| 530 | u32 reserved__; /* For alignment */ | 535 | u32 fmt; |
| 531 | u64 ip; | 536 | u64 ip; |
| 537 | u64 time; | ||
| 532 | char msg[MAX_AUXTRACE_ERROR_MSG]; | 538 | char msg[MAX_AUXTRACE_ERROR_MSG]; |
| 533 | }; | 539 | }; |
| 534 | 540 | ||
| @@ -651,6 +657,8 @@ union perf_event { | |||
| 651 | struct stat_round_event stat_round; | 657 | struct stat_round_event stat_round; |
| 652 | struct time_conv_event time_conv; | 658 | struct time_conv_event time_conv; |
| 653 | struct feature_event feat; | 659 | struct feature_event feat; |
| 660 | struct ksymbol_event ksymbol_event; | ||
| 661 | struct bpf_event bpf_event; | ||
| 654 | }; | 662 | }; |
| 655 | 663 | ||
| 656 | void perf_event__print_totals(void); | 664 | void perf_event__print_totals(void); |
| @@ -748,6 +756,14 @@ int perf_event__process_exit(struct perf_tool *tool, | |||
| 748 | union perf_event *event, | 756 | union perf_event *event, |
| 749 | struct perf_sample *sample, | 757 | struct perf_sample *sample, |
| 750 | struct machine *machine); | 758 | struct machine *machine); |
| 759 | int perf_event__process_ksymbol(struct perf_tool *tool, | ||
| 760 | union perf_event *event, | ||
| 761 | struct perf_sample *sample, | ||
| 762 | struct machine *machine); | ||
| 763 | int perf_event__process_bpf_event(struct perf_tool *tool, | ||
| 764 | union perf_event *event, | ||
| 765 | struct perf_sample *sample, | ||
| 766 | struct machine *machine); | ||
| 751 | int perf_tool__process_synth_event(struct perf_tool *tool, | 767 | int perf_tool__process_synth_event(struct perf_tool *tool, |
| 752 | union perf_event *event, | 768 | union perf_event *event, |
| 753 | struct machine *machine, | 769 | struct machine *machine, |
| @@ -811,6 +827,8 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); | |||
| 811 | size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); | 827 | size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp); |
| 812 | size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); | 828 | size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp); |
| 813 | size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); | 829 | size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp); |
| 830 | size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp); | ||
| 831 | size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp); | ||
| 814 | size_t perf_event__fprintf(union perf_event *event, FILE *fp); | 832 | size_t perf_event__fprintf(union perf_event *event, FILE *fp); |
| 815 | 833 | ||
| 816 | int kallsyms__get_function_start(const char *kallsyms_filename, | 834 | int kallsyms__get_function_start(const char *kallsyms_filename, |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 8c902276d4b4..08cedb643ea6 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
| @@ -1022,7 +1022,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, | |||
| 1022 | */ | 1022 | */ |
| 1023 | int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, | 1023 | int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, |
| 1024 | unsigned int auxtrace_pages, | 1024 | unsigned int auxtrace_pages, |
| 1025 | bool auxtrace_overwrite, int nr_cblocks) | 1025 | bool auxtrace_overwrite, int nr_cblocks, int affinity) |
| 1026 | { | 1026 | { |
| 1027 | struct perf_evsel *evsel; | 1027 | struct perf_evsel *evsel; |
| 1028 | const struct cpu_map *cpus = evlist->cpus; | 1028 | const struct cpu_map *cpus = evlist->cpus; |
| @@ -1032,7 +1032,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, | |||
| 1032 | * Its value is decided by evsel's write_backward. | 1032 | * Its value is decided by evsel's write_backward. |
| 1033 | * So &mp should not be passed through const pointer. | 1033 | * So &mp should not be passed through const pointer. |
| 1034 | */ | 1034 | */ |
| 1035 | struct mmap_params mp = { .nr_cblocks = nr_cblocks }; | 1035 | struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity }; |
| 1036 | 1036 | ||
| 1037 | if (!evlist->mmap) | 1037 | if (!evlist->mmap) |
| 1038 | evlist->mmap = perf_evlist__alloc_mmap(evlist, false); | 1038 | evlist->mmap = perf_evlist__alloc_mmap(evlist, false); |
| @@ -1064,7 +1064,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, | |||
| 1064 | 1064 | ||
| 1065 | int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) | 1065 | int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) |
| 1066 | { | 1066 | { |
| 1067 | return perf_evlist__mmap_ex(evlist, pages, 0, false, 0); | 1067 | return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS); |
| 1068 | } | 1068 | } |
| 1069 | 1069 | ||
| 1070 | int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) | 1070 | int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) |
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 868294491194..744906dd4887 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h | |||
| @@ -49,6 +49,9 @@ struct perf_evlist { | |||
| 49 | struct perf_evsel *selected; | 49 | struct perf_evsel *selected; |
| 50 | struct events_stats stats; | 50 | struct events_stats stats; |
| 51 | struct perf_env *env; | 51 | struct perf_env *env; |
| 52 | void (*trace_event_sample_raw)(struct perf_evlist *evlist, | ||
| 53 | union perf_event *event, | ||
| 54 | struct perf_sample *sample); | ||
| 52 | u64 first_sample_time; | 55 | u64 first_sample_time; |
| 53 | u64 last_sample_time; | 56 | u64 last_sample_time; |
| 54 | }; | 57 | }; |
| @@ -162,7 +165,7 @@ unsigned long perf_event_mlock_kb_in_pages(void); | |||
| 162 | 165 | ||
| 163 | int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, | 166 | int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, |
| 164 | unsigned int auxtrace_pages, | 167 | unsigned int auxtrace_pages, |
| 165 | bool auxtrace_overwrite, int nr_cblocks); | 168 | bool auxtrace_overwrite, int nr_cblocks, int affinity); |
| 166 | int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); | 169 | int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); |
| 167 | void perf_evlist__munmap(struct perf_evlist *evlist); | 170 | void perf_evlist__munmap(struct perf_evlist *evlist); |
| 168 | 171 | ||
| @@ -314,5 +317,4 @@ void perf_evlist__force_leader(struct perf_evlist *evlist); | |||
| 314 | 317 | ||
| 315 | struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist, | 318 | struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evlist, |
| 316 | struct perf_evsel *evsel); | 319 | struct perf_evsel *evsel); |
| 317 | |||
| 318 | #endif /* __PERF_EVLIST_H */ | 320 | #endif /* __PERF_EVLIST_H */ |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbc0466db368..dfe2958e6287 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
| @@ -956,6 +956,14 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, | |||
| 956 | attr->sample_freq = 0; | 956 | attr->sample_freq = 0; |
| 957 | attr->sample_period = 0; | 957 | attr->sample_period = 0; |
| 958 | attr->write_backward = 0; | 958 | attr->write_backward = 0; |
| 959 | |||
| 960 | /* | ||
| 961 | * We don't get sample for slave events, we make them | ||
| 962 | * when delivering group leader sample. Set the slave | ||
| 963 | * event to follow the master sample_type to ease up | ||
| 964 | * report. | ||
| 965 | */ | ||
| 966 | attr->sample_type = leader->attr.sample_type; | ||
| 959 | } | 967 | } |
| 960 | 968 | ||
| 961 | if (opts->no_samples) | 969 | if (opts->no_samples) |
| @@ -1035,6 +1043,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, | |||
| 1035 | attr->mmap = track; | 1043 | attr->mmap = track; |
| 1036 | attr->mmap2 = track && !perf_missing_features.mmap2; | 1044 | attr->mmap2 = track && !perf_missing_features.mmap2; |
| 1037 | attr->comm = track; | 1045 | attr->comm = track; |
| 1046 | attr->ksymbol = track && !perf_missing_features.ksymbol; | ||
| 1047 | attr->bpf_event = track && opts->bpf_event && | ||
| 1048 | !perf_missing_features.bpf_event; | ||
| 1038 | 1049 | ||
| 1039 | if (opts->record_namespaces) | 1050 | if (opts->record_namespaces) |
| 1040 | attr->namespaces = track; | 1051 | attr->namespaces = track; |
| @@ -1652,6 +1663,8 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, | |||
| 1652 | PRINT_ATTRf(context_switch, p_unsigned); | 1663 | PRINT_ATTRf(context_switch, p_unsigned); |
| 1653 | PRINT_ATTRf(write_backward, p_unsigned); | 1664 | PRINT_ATTRf(write_backward, p_unsigned); |
| 1654 | PRINT_ATTRf(namespaces, p_unsigned); | 1665 | PRINT_ATTRf(namespaces, p_unsigned); |
| 1666 | PRINT_ATTRf(ksymbol, p_unsigned); | ||
| 1667 | PRINT_ATTRf(bpf_event, p_unsigned); | ||
| 1655 | 1668 | ||
| 1656 | PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); | 1669 | PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); |
| 1657 | PRINT_ATTRf(bp_type, p_unsigned); | 1670 | PRINT_ATTRf(bp_type, p_unsigned); |
| @@ -1811,6 +1824,10 @@ fallback_missing_features: | |||
| 1811 | PERF_SAMPLE_BRANCH_NO_CYCLES); | 1824 | PERF_SAMPLE_BRANCH_NO_CYCLES); |
| 1812 | if (perf_missing_features.group_read && evsel->attr.inherit) | 1825 | if (perf_missing_features.group_read && evsel->attr.inherit) |
| 1813 | evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID); | 1826 | evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID); |
| 1827 | if (perf_missing_features.ksymbol) | ||
| 1828 | evsel->attr.ksymbol = 0; | ||
| 1829 | if (perf_missing_features.bpf_event) | ||
| 1830 | evsel->attr.bpf_event = 0; | ||
| 1814 | retry_sample_id: | 1831 | retry_sample_id: |
| 1815 | if (perf_missing_features.sample_id_all) | 1832 | if (perf_missing_features.sample_id_all) |
| 1816 | evsel->attr.sample_id_all = 0; | 1833 | evsel->attr.sample_id_all = 0; |
| @@ -1930,7 +1947,15 @@ try_fallback: | |||
| 1930 | * Must probe features in the order they were added to the | 1947 | * Must probe features in the order they were added to the |
| 1931 | * perf_event_attr interface. | 1948 | * perf_event_attr interface. |
| 1932 | */ | 1949 | */ |
| 1933 | if (!perf_missing_features.write_backward && evsel->attr.write_backward) { | 1950 | if (!perf_missing_features.bpf_event && evsel->attr.bpf_event) { |
| 1951 | perf_missing_features.bpf_event = true; | ||
| 1952 | pr_debug2("switching off bpf_event\n"); | ||
| 1953 | goto fallback_missing_features; | ||
| 1954 | } else if (!perf_missing_features.ksymbol && evsel->attr.ksymbol) { | ||
| 1955 | perf_missing_features.ksymbol = true; | ||
| 1956 | pr_debug2("switching off ksymbol\n"); | ||
| 1957 | goto fallback_missing_features; | ||
| 1958 | } else if (!perf_missing_features.write_backward && evsel->attr.write_backward) { | ||
| 1934 | perf_missing_features.write_backward = true; | 1959 | perf_missing_features.write_backward = true; |
| 1935 | pr_debug2("switching off write_backward\n"); | 1960 | pr_debug2("switching off write_backward\n"); |
| 1936 | goto out_close; | 1961 | goto out_close; |
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 82a289ce8b0c..cc578e02e08f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | #include <linux/perf_event.h> | 8 | #include <linux/perf_event.h> |
| 9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
| 10 | #include "xyarray.h" | 10 | #include "xyarray.h" |
| 11 | #include "symbol.h" | 11 | #include "symbol_conf.h" |
| 12 | #include "cpumap.h" | 12 | #include "cpumap.h" |
| 13 | #include "counts.h" | 13 | #include "counts.h" |
| 14 | 14 | ||
| @@ -168,6 +168,8 @@ struct perf_missing_features { | |||
| 168 | bool lbr_flags; | 168 | bool lbr_flags; |
| 169 | bool write_backward; | 169 | bool write_backward; |
| 170 | bool group_read; | 170 | bool group_read; |
| 171 | bool ksymbol; | ||
| 172 | bool bpf_event; | ||
| 171 | }; | 173 | }; |
| 172 | 174 | ||
| 173 | extern struct perf_missing_features perf_missing_features; | 175 | extern struct perf_missing_features perf_missing_features; |
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index dec6d218c31c..01b324c275b9 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include "tool.h" | 39 | #include "tool.h" |
| 40 | #include "time-utils.h" | 40 | #include "time-utils.h" |
| 41 | #include "units.h" | 41 | #include "units.h" |
| 42 | #include "cputopo.h" | ||
| 42 | 43 | ||
| 43 | #include "sane_ctype.h" | 44 | #include "sane_ctype.h" |
| 44 | 45 | ||
| @@ -526,17 +527,11 @@ static int write_event_desc(struct feat_fd *ff, | |||
| 526 | static int write_cmdline(struct feat_fd *ff, | 527 | static int write_cmdline(struct feat_fd *ff, |
| 527 | struct perf_evlist *evlist __maybe_unused) | 528 | struct perf_evlist *evlist __maybe_unused) |
| 528 | { | 529 | { |
| 529 | char buf[MAXPATHLEN]; | 530 | char pbuf[MAXPATHLEN], *buf; |
| 530 | u32 n; | 531 | int i, ret, n; |
| 531 | int i, ret; | ||
| 532 | 532 | ||
| 533 | /* actual path to perf binary */ | 533 | /* actual path to perf binary */ |
| 534 | ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1); | 534 | buf = perf_exe(pbuf, MAXPATHLEN); |
| 535 | if (ret <= 0) | ||
| 536 | return -1; | ||
| 537 | |||
| 538 | /* readlink() does not add null termination */ | ||
| 539 | buf[ret] = '\0'; | ||
| 540 | 535 | ||
| 541 | /* account for binary path */ | 536 | /* account for binary path */ |
| 542 | n = perf_env.nr_cmdline + 1; | 537 | n = perf_env.nr_cmdline + 1; |
| @@ -557,160 +552,15 @@ static int write_cmdline(struct feat_fd *ff, | |||
| 557 | return 0; | 552 | return 0; |
| 558 | } | 553 | } |
| 559 | 554 | ||
| 560 | #define CORE_SIB_FMT \ | ||
| 561 | "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list" | ||
| 562 | #define THRD_SIB_FMT \ | ||
| 563 | "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list" | ||
| 564 | |||
| 565 | struct cpu_topo { | ||
| 566 | u32 cpu_nr; | ||
| 567 | u32 core_sib; | ||
| 568 | u32 thread_sib; | ||
| 569 | char **core_siblings; | ||
| 570 | char **thread_siblings; | ||
| 571 | }; | ||
| 572 | |||
| 573 | static int build_cpu_topo(struct cpu_topo *tp, int cpu) | ||
| 574 | { | ||
| 575 | FILE *fp; | ||
| 576 | char filename[MAXPATHLEN]; | ||
| 577 | char *buf = NULL, *p; | ||
| 578 | size_t len = 0; | ||
| 579 | ssize_t sret; | ||
| 580 | u32 i = 0; | ||
| 581 | int ret = -1; | ||
| 582 | |||
| 583 | sprintf(filename, CORE_SIB_FMT, cpu); | ||
| 584 | fp = fopen(filename, "r"); | ||
| 585 | if (!fp) | ||
| 586 | goto try_threads; | ||
| 587 | |||
| 588 | sret = getline(&buf, &len, fp); | ||
| 589 | fclose(fp); | ||
| 590 | if (sret <= 0) | ||
| 591 | goto try_threads; | ||
| 592 | |||
| 593 | p = strchr(buf, '\n'); | ||
| 594 | if (p) | ||
| 595 | *p = '\0'; | ||
| 596 | |||
| 597 | for (i = 0; i < tp->core_sib; i++) { | ||
| 598 | if (!strcmp(buf, tp->core_siblings[i])) | ||
| 599 | break; | ||
| 600 | } | ||
| 601 | if (i == tp->core_sib) { | ||
| 602 | tp->core_siblings[i] = buf; | ||
| 603 | tp->core_sib++; | ||
| 604 | buf = NULL; | ||
| 605 | len = 0; | ||
| 606 | } | ||
| 607 | ret = 0; | ||
| 608 | |||
| 609 | try_threads: | ||
| 610 | sprintf(filename, THRD_SIB_FMT, cpu); | ||
| 611 | fp = fopen(filename, "r"); | ||
| 612 | if (!fp) | ||
| 613 | goto done; | ||
| 614 | |||
| 615 | if (getline(&buf, &len, fp) <= 0) | ||
| 616 | goto done; | ||
| 617 | |||
| 618 | p = strchr(buf, '\n'); | ||
| 619 | if (p) | ||
| 620 | *p = '\0'; | ||
| 621 | |||
| 622 | for (i = 0; i < tp->thread_sib; i++) { | ||
| 623 | if (!strcmp(buf, tp->thread_siblings[i])) | ||
| 624 | break; | ||
| 625 | } | ||
| 626 | if (i == tp->thread_sib) { | ||
| 627 | tp->thread_siblings[i] = buf; | ||
| 628 | tp->thread_sib++; | ||
| 629 | buf = NULL; | ||
| 630 | } | ||
| 631 | ret = 0; | ||
| 632 | done: | ||
| 633 | if(fp) | ||
| 634 | fclose(fp); | ||
| 635 | free(buf); | ||
| 636 | return ret; | ||
| 637 | } | ||
| 638 | |||
| 639 | static void free_cpu_topo(struct cpu_topo *tp) | ||
| 640 | { | ||
| 641 | u32 i; | ||
| 642 | |||
| 643 | if (!tp) | ||
| 644 | return; | ||
| 645 | |||
| 646 | for (i = 0 ; i < tp->core_sib; i++) | ||
| 647 | zfree(&tp->core_siblings[i]); | ||
| 648 | |||
| 649 | for (i = 0 ; i < tp->thread_sib; i++) | ||
| 650 | zfree(&tp->thread_siblings[i]); | ||
| 651 | |||
| 652 | free(tp); | ||
| 653 | } | ||
| 654 | |||
| 655 | static struct cpu_topo *build_cpu_topology(void) | ||
| 656 | { | ||
| 657 | struct cpu_topo *tp = NULL; | ||
| 658 | void *addr; | ||
| 659 | u32 nr, i; | ||
| 660 | size_t sz; | ||
| 661 | long ncpus; | ||
| 662 | int ret = -1; | ||
| 663 | struct cpu_map *map; | ||
| 664 | |||
| 665 | ncpus = cpu__max_present_cpu(); | ||
| 666 | |||
| 667 | /* build online CPU map */ | ||
| 668 | map = cpu_map__new(NULL); | ||
| 669 | if (map == NULL) { | ||
| 670 | pr_debug("failed to get system cpumap\n"); | ||
| 671 | return NULL; | ||
| 672 | } | ||
| 673 | |||
| 674 | nr = (u32)(ncpus & UINT_MAX); | ||
| 675 | |||
| 676 | sz = nr * sizeof(char *); | ||
| 677 | addr = calloc(1, sizeof(*tp) + 2 * sz); | ||
| 678 | if (!addr) | ||
| 679 | goto out_free; | ||
| 680 | |||
| 681 | tp = addr; | ||
| 682 | tp->cpu_nr = nr; | ||
| 683 | addr += sizeof(*tp); | ||
| 684 | tp->core_siblings = addr; | ||
| 685 | addr += sz; | ||
| 686 | tp->thread_siblings = addr; | ||
| 687 | |||
| 688 | for (i = 0; i < nr; i++) { | ||
| 689 | if (!cpu_map__has(map, i)) | ||
| 690 | continue; | ||
| 691 | |||
| 692 | ret = build_cpu_topo(tp, i); | ||
| 693 | if (ret < 0) | ||
| 694 | break; | ||
| 695 | } | ||
| 696 | |||
| 697 | out_free: | ||
| 698 | cpu_map__put(map); | ||
| 699 | if (ret) { | ||
| 700 | free_cpu_topo(tp); | ||
| 701 | tp = NULL; | ||
| 702 | } | ||
| 703 | return tp; | ||
| 704 | } | ||
| 705 | 555 | ||
| 706 | static int write_cpu_topology(struct feat_fd *ff, | 556 | static int write_cpu_topology(struct feat_fd *ff, |
| 707 | struct perf_evlist *evlist __maybe_unused) | 557 | struct perf_evlist *evlist __maybe_unused) |
| 708 | { | 558 | { |
| 709 | struct cpu_topo *tp; | 559 | struct cpu_topology *tp; |
| 710 | u32 i; | 560 | u32 i; |
| 711 | int ret, j; | 561 | int ret, j; |
| 712 | 562 | ||
| 713 | tp = build_cpu_topology(); | 563 | tp = cpu_topology__new(); |
| 714 | if (!tp) | 564 | if (!tp) |
| 715 | return -1; | 565 | return -1; |
| 716 | 566 | ||
| @@ -748,7 +598,7 @@ static int write_cpu_topology(struct feat_fd *ff, | |||
| 748 | return ret; | 598 | return ret; |
| 749 | } | 599 | } |
| 750 | done: | 600 | done: |
| 751 | free_cpu_topo(tp); | 601 | cpu_topology__delete(tp); |
| 752 | return ret; | 602 | return ret; |
| 753 | } | 603 | } |
| 754 | 604 | ||
| @@ -783,112 +633,45 @@ static int write_total_mem(struct feat_fd *ff, | |||
| 783 | return ret; | 633 | return ret; |
| 784 | } | 634 | } |
| 785 | 635 | ||
| 786 | static int write_topo_node(struct feat_fd *ff, int node) | ||
| 787 | { | ||
| 788 | char str[MAXPATHLEN]; | ||
| 789 | char field[32]; | ||
| 790 | char *buf = NULL, *p; | ||
| 791 | size_t len = 0; | ||
| 792 | FILE *fp; | ||
| 793 | u64 mem_total, mem_free, mem; | ||
| 794 | int ret = -1; | ||
| 795 | |||
| 796 | sprintf(str, "/sys/devices/system/node/node%d/meminfo", node); | ||
| 797 | fp = fopen(str, "r"); | ||
| 798 | if (!fp) | ||
| 799 | return -1; | ||
| 800 | |||
| 801 | while (getline(&buf, &len, fp) > 0) { | ||
| 802 | /* skip over invalid lines */ | ||
| 803 | if (!strchr(buf, ':')) | ||
| 804 | continue; | ||
| 805 | if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2) | ||
| 806 | goto done; | ||
| 807 | if (!strcmp(field, "MemTotal:")) | ||
| 808 | mem_total = mem; | ||
| 809 | if (!strcmp(field, "MemFree:")) | ||
| 810 | mem_free = mem; | ||
| 811 | } | ||
| 812 | |||
| 813 | fclose(fp); | ||
| 814 | fp = NULL; | ||
| 815 | |||
| 816 | ret = do_write(ff, &mem_total, sizeof(u64)); | ||
| 817 | if (ret) | ||
| 818 | goto done; | ||
| 819 | |||
| 820 | ret = do_write(ff, &mem_free, sizeof(u64)); | ||
| 821 | if (ret) | ||
| 822 | goto done; | ||
| 823 | |||
| 824 | ret = -1; | ||
| 825 | sprintf(str, "/sys/devices/system/node/node%d/cpulist", node); | ||
| 826 | |||
| 827 | fp = fopen(str, "r"); | ||
| 828 | if (!fp) | ||
| 829 | goto done; | ||
| 830 | |||
| 831 | if (getline(&buf, &len, fp) <= 0) | ||
| 832 | goto done; | ||
| 833 | |||
| 834 | p = strchr(buf, '\n'); | ||
| 835 | if (p) | ||
| 836 | *p = '\0'; | ||
| 837 | |||
| 838 | ret = do_write_string(ff, buf); | ||
| 839 | done: | ||
| 840 | free(buf); | ||
| 841 | if (fp) | ||
| 842 | fclose(fp); | ||
| 843 | return ret; | ||
| 844 | } | ||
| 845 | |||
| 846 | static int write_numa_topology(struct feat_fd *ff, | 636 | static int write_numa_topology(struct feat_fd *ff, |
| 847 | struct perf_evlist *evlist __maybe_unused) | 637 | struct perf_evlist *evlist __maybe_unused) |
| 848 | { | 638 | { |
| 849 | char *buf = NULL; | 639 | struct numa_topology *tp; |
| 850 | size_t len = 0; | ||
| 851 | FILE *fp; | ||
| 852 | struct cpu_map *node_map = NULL; | ||
| 853 | char *c; | ||
| 854 | u32 nr, i, j; | ||
| 855 | int ret = -1; | 640 | int ret = -1; |
| 641 | u32 i; | ||
| 856 | 642 | ||
| 857 | fp = fopen("/sys/devices/system/node/online", "r"); | 643 | tp = numa_topology__new(); |
| 858 | if (!fp) | 644 | if (!tp) |
| 859 | return -1; | 645 | return -ENOMEM; |
| 860 | |||
| 861 | if (getline(&buf, &len, fp) <= 0) | ||
| 862 | goto done; | ||
| 863 | 646 | ||
| 864 | c = strchr(buf, '\n'); | 647 | ret = do_write(ff, &tp->nr, sizeof(u32)); |
| 865 | if (c) | 648 | if (ret < 0) |
| 866 | *c = '\0'; | 649 | goto err; |
| 867 | 650 | ||
| 868 | node_map = cpu_map__new(buf); | 651 | for (i = 0; i < tp->nr; i++) { |
| 869 | if (!node_map) | 652 | struct numa_topology_node *n = &tp->nodes[i]; |
| 870 | goto done; | ||
| 871 | 653 | ||
| 872 | nr = (u32)node_map->nr; | 654 | ret = do_write(ff, &n->node, sizeof(u32)); |
| 655 | if (ret < 0) | ||
| 656 | goto err; | ||
| 873 | 657 | ||
| 874 | ret = do_write(ff, &nr, sizeof(nr)); | 658 | ret = do_write(ff, &n->mem_total, sizeof(u64)); |
| 875 | if (ret < 0) | 659 | if (ret) |
| 876 | goto done; | 660 | goto err; |
| 877 | 661 | ||
| 878 | for (i = 0; i < nr; i++) { | 662 | ret = do_write(ff, &n->mem_free, sizeof(u64)); |
| 879 | j = (u32)node_map->map[i]; | 663 | if (ret) |
| 880 | ret = do_write(ff, &j, sizeof(j)); | 664 | goto err; |
| 881 | if (ret < 0) | ||
| 882 | break; | ||
| 883 | 665 | ||
| 884 | ret = write_topo_node(ff, i); | 666 | ret = do_write_string(ff, n->cpus); |
| 885 | if (ret < 0) | 667 | if (ret < 0) |
| 886 | break; | 668 | goto err; |
| 887 | } | 669 | } |
| 888 | done: | 670 | |
| 889 | free(buf); | 671 | ret = 0; |
| 890 | fclose(fp); | 672 | |
| 891 | cpu_map__put(node_map); | 673 | err: |
| 674 | numa_topology__delete(tp); | ||
| 892 | return ret; | 675 | return ret; |
| 893 | } | 676 | } |
| 894 | 677 | ||
| @@ -1042,11 +825,9 @@ static int write_cpuid(struct feat_fd *ff, | |||
| 1042 | int ret; | 825 | int ret; |
| 1043 | 826 | ||
| 1044 | ret = get_cpuid(buffer, sizeof(buffer)); | 827 | ret = get_cpuid(buffer, sizeof(buffer)); |
| 1045 | if (!ret) | 828 | if (ret) |
| 1046 | goto write_it; | 829 | return -1; |
| 1047 | 830 | ||
| 1048 | return -1; | ||
| 1049 | write_it: | ||
| 1050 | return do_write_string(ff, buffer); | 831 | return do_write_string(ff, buffer); |
| 1051 | } | 832 | } |
| 1052 | 833 | ||
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 8aad8330e392..669f961316f0 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
| @@ -1,4 +1,5 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include "callchain.h" | ||
| 2 | #include "util.h" | 3 | #include "util.h" |
| 3 | #include "build-id.h" | 4 | #include "build-id.h" |
| 4 | #include "hist.h" | 5 | #include "hist.h" |
| @@ -11,6 +12,7 @@ | |||
| 11 | #include "evsel.h" | 12 | #include "evsel.h" |
| 12 | #include "annotate.h" | 13 | #include "annotate.h" |
| 13 | #include "srcline.h" | 14 | #include "srcline.h" |
| 15 | #include "symbol.h" | ||
| 14 | #include "thread.h" | 16 | #include "thread.h" |
| 15 | #include "ui/progress.h" | 17 | #include "ui/progress.h" |
| 16 | #include <errno.h> | 18 | #include <errno.h> |
| @@ -209,7 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | |||
| 209 | 211 | ||
| 210 | void hists__output_recalc_col_len(struct hists *hists, int max_rows) | 212 | void hists__output_recalc_col_len(struct hists *hists, int max_rows) |
| 211 | { | 213 | { |
| 212 | struct rb_node *next = rb_first(&hists->entries); | 214 | struct rb_node *next = rb_first_cached(&hists->entries); |
| 213 | struct hist_entry *n; | 215 | struct hist_entry *n; |
| 214 | int row = 0; | 216 | int row = 0; |
| 215 | 217 | ||
| @@ -296,7 +298,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) | |||
| 296 | 298 | ||
| 297 | if (!he->leaf) { | 299 | if (!he->leaf) { |
| 298 | struct hist_entry *child; | 300 | struct hist_entry *child; |
| 299 | struct rb_node *node = rb_first(&he->hroot_out); | 301 | struct rb_node *node = rb_first_cached(&he->hroot_out); |
| 300 | while (node) { | 302 | while (node) { |
| 301 | child = rb_entry(node, struct hist_entry, rb_node); | 303 | child = rb_entry(node, struct hist_entry, rb_node); |
| 302 | node = rb_next(node); | 304 | node = rb_next(node); |
| @@ -311,8 +313,8 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) | |||
| 311 | 313 | ||
| 312 | static void hists__delete_entry(struct hists *hists, struct hist_entry *he) | 314 | static void hists__delete_entry(struct hists *hists, struct hist_entry *he) |
| 313 | { | 315 | { |
| 314 | struct rb_root *root_in; | 316 | struct rb_root_cached *root_in; |
| 315 | struct rb_root *root_out; | 317 | struct rb_root_cached *root_out; |
| 316 | 318 | ||
| 317 | if (he->parent_he) { | 319 | if (he->parent_he) { |
| 318 | root_in = &he->parent_he->hroot_in; | 320 | root_in = &he->parent_he->hroot_in; |
| @@ -325,8 +327,8 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he) | |||
| 325 | root_out = &hists->entries; | 327 | root_out = &hists->entries; |
| 326 | } | 328 | } |
| 327 | 329 | ||
| 328 | rb_erase(&he->rb_node_in, root_in); | 330 | rb_erase_cached(&he->rb_node_in, root_in); |
| 329 | rb_erase(&he->rb_node, root_out); | 331 | rb_erase_cached(&he->rb_node, root_out); |
| 330 | 332 | ||
| 331 | --hists->nr_entries; | 333 | --hists->nr_entries; |
| 332 | if (!he->filtered) | 334 | if (!he->filtered) |
| @@ -337,7 +339,7 @@ static void hists__delete_entry(struct hists *hists, struct hist_entry *he) | |||
| 337 | 339 | ||
| 338 | void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) | 340 | void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) |
| 339 | { | 341 | { |
| 340 | struct rb_node *next = rb_first(&hists->entries); | 342 | struct rb_node *next = rb_first_cached(&hists->entries); |
| 341 | struct hist_entry *n; | 343 | struct hist_entry *n; |
| 342 | 344 | ||
| 343 | while (next) { | 345 | while (next) { |
| @@ -353,7 +355,7 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) | |||
| 353 | 355 | ||
| 354 | void hists__delete_entries(struct hists *hists) | 356 | void hists__delete_entries(struct hists *hists) |
| 355 | { | 357 | { |
| 356 | struct rb_node *next = rb_first(&hists->entries); | 358 | struct rb_node *next = rb_first_cached(&hists->entries); |
| 357 | struct hist_entry *n; | 359 | struct hist_entry *n; |
| 358 | 360 | ||
| 359 | while (next) { | 361 | while (next) { |
| @@ -435,8 +437,8 @@ static int hist_entry__init(struct hist_entry *he, | |||
| 435 | } | 437 | } |
| 436 | INIT_LIST_HEAD(&he->pairs.node); | 438 | INIT_LIST_HEAD(&he->pairs.node); |
| 437 | thread__get(he->thread); | 439 | thread__get(he->thread); |
| 438 | he->hroot_in = RB_ROOT; | 440 | he->hroot_in = RB_ROOT_CACHED; |
| 439 | he->hroot_out = RB_ROOT; | 441 | he->hroot_out = RB_ROOT_CACHED; |
| 440 | 442 | ||
| 441 | if (!symbol_conf.report_hierarchy) | 443 | if (!symbol_conf.report_hierarchy) |
| 442 | he->leaf = true; | 444 | he->leaf = true; |
| @@ -513,8 +515,9 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, | |||
| 513 | int64_t cmp; | 515 | int64_t cmp; |
| 514 | u64 period = entry->stat.period; | 516 | u64 period = entry->stat.period; |
| 515 | u64 weight = entry->stat.weight; | 517 | u64 weight = entry->stat.weight; |
| 518 | bool leftmost = true; | ||
| 516 | 519 | ||
| 517 | p = &hists->entries_in->rb_node; | 520 | p = &hists->entries_in->rb_root.rb_node; |
| 518 | 521 | ||
| 519 | while (*p != NULL) { | 522 | while (*p != NULL) { |
| 520 | parent = *p; | 523 | parent = *p; |
| @@ -557,8 +560,10 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, | |||
| 557 | 560 | ||
| 558 | if (cmp < 0) | 561 | if (cmp < 0) |
| 559 | p = &(*p)->rb_left; | 562 | p = &(*p)->rb_left; |
| 560 | else | 563 | else { |
| 561 | p = &(*p)->rb_right; | 564 | p = &(*p)->rb_right; |
| 565 | leftmost = false; | ||
| 566 | } | ||
| 562 | } | 567 | } |
| 563 | 568 | ||
| 564 | he = hist_entry__new(entry, sample_self); | 569 | he = hist_entry__new(entry, sample_self); |
| @@ -570,7 +575,7 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists, | |||
| 570 | hists->nr_entries++; | 575 | hists->nr_entries++; |
| 571 | 576 | ||
| 572 | rb_link_node(&he->rb_node_in, parent, p); | 577 | rb_link_node(&he->rb_node_in, parent, p); |
| 573 | rb_insert_color(&he->rb_node_in, hists->entries_in); | 578 | rb_insert_color_cached(&he->rb_node_in, hists->entries_in, leftmost); |
| 574 | out: | 579 | out: |
| 575 | if (sample_self) | 580 | if (sample_self) |
| 576 | he_stat__add_cpumode_period(&he->stat, al->cpumode, period); | 581 | he_stat__add_cpumode_period(&he->stat, al->cpumode, period); |
| @@ -1279,16 +1284,17 @@ static void hist_entry__apply_hierarchy_filters(struct hist_entry *he) | |||
| 1279 | } | 1284 | } |
| 1280 | 1285 | ||
| 1281 | static struct hist_entry *hierarchy_insert_entry(struct hists *hists, | 1286 | static struct hist_entry *hierarchy_insert_entry(struct hists *hists, |
| 1282 | struct rb_root *root, | 1287 | struct rb_root_cached *root, |
| 1283 | struct hist_entry *he, | 1288 | struct hist_entry *he, |
| 1284 | struct hist_entry *parent_he, | 1289 | struct hist_entry *parent_he, |
| 1285 | struct perf_hpp_list *hpp_list) | 1290 | struct perf_hpp_list *hpp_list) |
| 1286 | { | 1291 | { |
| 1287 | struct rb_node **p = &root->rb_node; | 1292 | struct rb_node **p = &root->rb_root.rb_node; |
| 1288 | struct rb_node *parent = NULL; | 1293 | struct rb_node *parent = NULL; |
| 1289 | struct hist_entry *iter, *new; | 1294 | struct hist_entry *iter, *new; |
| 1290 | struct perf_hpp_fmt *fmt; | 1295 | struct perf_hpp_fmt *fmt; |
| 1291 | int64_t cmp; | 1296 | int64_t cmp; |
| 1297 | bool leftmost = true; | ||
| 1292 | 1298 | ||
| 1293 | while (*p != NULL) { | 1299 | while (*p != NULL) { |
| 1294 | parent = *p; | 1300 | parent = *p; |
| @@ -1308,8 +1314,10 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, | |||
| 1308 | 1314 | ||
| 1309 | if (cmp < 0) | 1315 | if (cmp < 0) |
| 1310 | p = &parent->rb_left; | 1316 | p = &parent->rb_left; |
| 1311 | else | 1317 | else { |
| 1312 | p = &parent->rb_right; | 1318 | p = &parent->rb_right; |
| 1319 | leftmost = false; | ||
| 1320 | } | ||
| 1313 | } | 1321 | } |
| 1314 | 1322 | ||
| 1315 | new = hist_entry__new(he, true); | 1323 | new = hist_entry__new(he, true); |
| @@ -1343,12 +1351,12 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, | |||
| 1343 | } | 1351 | } |
| 1344 | 1352 | ||
| 1345 | rb_link_node(&new->rb_node_in, parent, p); | 1353 | rb_link_node(&new->rb_node_in, parent, p); |
| 1346 | rb_insert_color(&new->rb_node_in, root); | 1354 | rb_insert_color_cached(&new->rb_node_in, root, leftmost); |
| 1347 | return new; | 1355 | return new; |
| 1348 | } | 1356 | } |
| 1349 | 1357 | ||
| 1350 | static int hists__hierarchy_insert_entry(struct hists *hists, | 1358 | static int hists__hierarchy_insert_entry(struct hists *hists, |
| 1351 | struct rb_root *root, | 1359 | struct rb_root_cached *root, |
| 1352 | struct hist_entry *he) | 1360 | struct hist_entry *he) |
| 1353 | { | 1361 | { |
| 1354 | struct perf_hpp_list_node *node; | 1362 | struct perf_hpp_list_node *node; |
| @@ -1395,13 +1403,14 @@ static int hists__hierarchy_insert_entry(struct hists *hists, | |||
| 1395 | } | 1403 | } |
| 1396 | 1404 | ||
| 1397 | static int hists__collapse_insert_entry(struct hists *hists, | 1405 | static int hists__collapse_insert_entry(struct hists *hists, |
| 1398 | struct rb_root *root, | 1406 | struct rb_root_cached *root, |
| 1399 | struct hist_entry *he) | 1407 | struct hist_entry *he) |
| 1400 | { | 1408 | { |
| 1401 | struct rb_node **p = &root->rb_node; | 1409 | struct rb_node **p = &root->rb_root.rb_node; |
| 1402 | struct rb_node *parent = NULL; | 1410 | struct rb_node *parent = NULL; |
| 1403 | struct hist_entry *iter; | 1411 | struct hist_entry *iter; |
| 1404 | int64_t cmp; | 1412 | int64_t cmp; |
| 1413 | bool leftmost = true; | ||
| 1405 | 1414 | ||
| 1406 | if (symbol_conf.report_hierarchy) | 1415 | if (symbol_conf.report_hierarchy) |
| 1407 | return hists__hierarchy_insert_entry(hists, root, he); | 1416 | return hists__hierarchy_insert_entry(hists, root, he); |
| @@ -1432,19 +1441,21 @@ static int hists__collapse_insert_entry(struct hists *hists, | |||
| 1432 | 1441 | ||
| 1433 | if (cmp < 0) | 1442 | if (cmp < 0) |
| 1434 | p = &(*p)->rb_left; | 1443 | p = &(*p)->rb_left; |
| 1435 | else | 1444 | else { |
| 1436 | p = &(*p)->rb_right; | 1445 | p = &(*p)->rb_right; |
| 1446 | leftmost = false; | ||
| 1447 | } | ||
| 1437 | } | 1448 | } |
| 1438 | hists->nr_entries++; | 1449 | hists->nr_entries++; |
| 1439 | 1450 | ||
| 1440 | rb_link_node(&he->rb_node_in, parent, p); | 1451 | rb_link_node(&he->rb_node_in, parent, p); |
| 1441 | rb_insert_color(&he->rb_node_in, root); | 1452 | rb_insert_color_cached(&he->rb_node_in, root, leftmost); |
| 1442 | return 1; | 1453 | return 1; |
| 1443 | } | 1454 | } |
| 1444 | 1455 | ||
| 1445 | struct rb_root *hists__get_rotate_entries_in(struct hists *hists) | 1456 | struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists) |
| 1446 | { | 1457 | { |
| 1447 | struct rb_root *root; | 1458 | struct rb_root_cached *root; |
| 1448 | 1459 | ||
| 1449 | pthread_mutex_lock(&hists->lock); | 1460 | pthread_mutex_lock(&hists->lock); |
| 1450 | 1461 | ||
| @@ -1467,7 +1478,7 @@ static void hists__apply_filters(struct hists *hists, struct hist_entry *he) | |||
| 1467 | 1478 | ||
| 1468 | int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) | 1479 | int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) |
| 1469 | { | 1480 | { |
| 1470 | struct rb_root *root; | 1481 | struct rb_root_cached *root; |
| 1471 | struct rb_node *next; | 1482 | struct rb_node *next; |
| 1472 | struct hist_entry *n; | 1483 | struct hist_entry *n; |
| 1473 | int ret; | 1484 | int ret; |
| @@ -1479,7 +1490,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) | |||
| 1479 | 1490 | ||
| 1480 | root = hists__get_rotate_entries_in(hists); | 1491 | root = hists__get_rotate_entries_in(hists); |
| 1481 | 1492 | ||
| 1482 | next = rb_first(root); | 1493 | next = rb_first_cached(root); |
| 1483 | 1494 | ||
| 1484 | while (next) { | 1495 | while (next) { |
| 1485 | if (session_done()) | 1496 | if (session_done()) |
| @@ -1487,7 +1498,7 @@ int hists__collapse_resort(struct hists *hists, struct ui_progress *prog) | |||
| 1487 | n = rb_entry(next, struct hist_entry, rb_node_in); | 1498 | n = rb_entry(next, struct hist_entry, rb_node_in); |
| 1488 | next = rb_next(&n->rb_node_in); | 1499 | next = rb_next(&n->rb_node_in); |
| 1489 | 1500 | ||
| 1490 | rb_erase(&n->rb_node_in, root); | 1501 | rb_erase_cached(&n->rb_node_in, root); |
| 1491 | ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n); | 1502 | ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n); |
| 1492 | if (ret < 0) | 1503 | if (ret < 0) |
| 1493 | return -1; | 1504 | return -1; |
| @@ -1558,7 +1569,7 @@ static void hierarchy_recalc_total_periods(struct hists *hists) | |||
| 1558 | struct rb_node *node; | 1569 | struct rb_node *node; |
| 1559 | struct hist_entry *he; | 1570 | struct hist_entry *he; |
| 1560 | 1571 | ||
| 1561 | node = rb_first(&hists->entries); | 1572 | node = rb_first_cached(&hists->entries); |
| 1562 | 1573 | ||
| 1563 | hists->stats.total_period = 0; | 1574 | hists->stats.total_period = 0; |
| 1564 | hists->stats.total_non_filtered_period = 0; | 1575 | hists->stats.total_non_filtered_period = 0; |
| @@ -1578,13 +1589,14 @@ static void hierarchy_recalc_total_periods(struct hists *hists) | |||
| 1578 | } | 1589 | } |
| 1579 | } | 1590 | } |
| 1580 | 1591 | ||
| 1581 | static void hierarchy_insert_output_entry(struct rb_root *root, | 1592 | static void hierarchy_insert_output_entry(struct rb_root_cached *root, |
| 1582 | struct hist_entry *he) | 1593 | struct hist_entry *he) |
| 1583 | { | 1594 | { |
| 1584 | struct rb_node **p = &root->rb_node; | 1595 | struct rb_node **p = &root->rb_root.rb_node; |
| 1585 | struct rb_node *parent = NULL; | 1596 | struct rb_node *parent = NULL; |
| 1586 | struct hist_entry *iter; | 1597 | struct hist_entry *iter; |
| 1587 | struct perf_hpp_fmt *fmt; | 1598 | struct perf_hpp_fmt *fmt; |
| 1599 | bool leftmost = true; | ||
| 1588 | 1600 | ||
| 1589 | while (*p != NULL) { | 1601 | while (*p != NULL) { |
| 1590 | parent = *p; | 1602 | parent = *p; |
| @@ -1592,12 +1604,14 @@ static void hierarchy_insert_output_entry(struct rb_root *root, | |||
| 1592 | 1604 | ||
| 1593 | if (hist_entry__sort(he, iter) > 0) | 1605 | if (hist_entry__sort(he, iter) > 0) |
| 1594 | p = &parent->rb_left; | 1606 | p = &parent->rb_left; |
| 1595 | else | 1607 | else { |
| 1596 | p = &parent->rb_right; | 1608 | p = &parent->rb_right; |
| 1609 | leftmost = false; | ||
| 1610 | } | ||
| 1597 | } | 1611 | } |
| 1598 | 1612 | ||
| 1599 | rb_link_node(&he->rb_node, parent, p); | 1613 | rb_link_node(&he->rb_node, parent, p); |
| 1600 | rb_insert_color(&he->rb_node, root); | 1614 | rb_insert_color_cached(&he->rb_node, root, leftmost); |
| 1601 | 1615 | ||
| 1602 | /* update column width of dynamic entry */ | 1616 | /* update column width of dynamic entry */ |
| 1603 | perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { | 1617 | perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) { |
| @@ -1608,16 +1622,16 @@ static void hierarchy_insert_output_entry(struct rb_root *root, | |||
| 1608 | 1622 | ||
| 1609 | static void hists__hierarchy_output_resort(struct hists *hists, | 1623 | static void hists__hierarchy_output_resort(struct hists *hists, |
| 1610 | struct ui_progress *prog, | 1624 | struct ui_progress *prog, |
| 1611 | struct rb_root *root_in, | 1625 | struct rb_root_cached *root_in, |
| 1612 | struct rb_root *root_out, | 1626 | struct rb_root_cached *root_out, |
| 1613 | u64 min_callchain_hits, | 1627 | u64 min_callchain_hits, |
| 1614 | bool use_callchain) | 1628 | bool use_callchain) |
| 1615 | { | 1629 | { |
| 1616 | struct rb_node *node; | 1630 | struct rb_node *node; |
| 1617 | struct hist_entry *he; | 1631 | struct hist_entry *he; |
| 1618 | 1632 | ||
| 1619 | *root_out = RB_ROOT; | 1633 | *root_out = RB_ROOT_CACHED; |
| 1620 | node = rb_first(root_in); | 1634 | node = rb_first_cached(root_in); |
| 1621 | 1635 | ||
| 1622 | while (node) { | 1636 | while (node) { |
| 1623 | he = rb_entry(node, struct hist_entry, rb_node_in); | 1637 | he = rb_entry(node, struct hist_entry, rb_node_in); |
| @@ -1660,15 +1674,16 @@ static void hists__hierarchy_output_resort(struct hists *hists, | |||
| 1660 | } | 1674 | } |
| 1661 | } | 1675 | } |
| 1662 | 1676 | ||
| 1663 | static void __hists__insert_output_entry(struct rb_root *entries, | 1677 | static void __hists__insert_output_entry(struct rb_root_cached *entries, |
| 1664 | struct hist_entry *he, | 1678 | struct hist_entry *he, |
| 1665 | u64 min_callchain_hits, | 1679 | u64 min_callchain_hits, |
| 1666 | bool use_callchain) | 1680 | bool use_callchain) |
| 1667 | { | 1681 | { |
| 1668 | struct rb_node **p = &entries->rb_node; | 1682 | struct rb_node **p = &entries->rb_root.rb_node; |
| 1669 | struct rb_node *parent = NULL; | 1683 | struct rb_node *parent = NULL; |
| 1670 | struct hist_entry *iter; | 1684 | struct hist_entry *iter; |
| 1671 | struct perf_hpp_fmt *fmt; | 1685 | struct perf_hpp_fmt *fmt; |
| 1686 | bool leftmost = true; | ||
| 1672 | 1687 | ||
| 1673 | if (use_callchain) { | 1688 | if (use_callchain) { |
| 1674 | if (callchain_param.mode == CHAIN_GRAPH_REL) { | 1689 | if (callchain_param.mode == CHAIN_GRAPH_REL) { |
| @@ -1689,12 +1704,14 @@ static void __hists__insert_output_entry(struct rb_root *entries, | |||
| 1689 | 1704 | ||
| 1690 | if (hist_entry__sort(he, iter) > 0) | 1705 | if (hist_entry__sort(he, iter) > 0) |
| 1691 | p = &(*p)->rb_left; | 1706 | p = &(*p)->rb_left; |
| 1692 | else | 1707 | else { |
| 1693 | p = &(*p)->rb_right; | 1708 | p = &(*p)->rb_right; |
| 1709 | leftmost = false; | ||
| 1710 | } | ||
| 1694 | } | 1711 | } |
| 1695 | 1712 | ||
| 1696 | rb_link_node(&he->rb_node, parent, p); | 1713 | rb_link_node(&he->rb_node, parent, p); |
| 1697 | rb_insert_color(&he->rb_node, entries); | 1714 | rb_insert_color_cached(&he->rb_node, entries, leftmost); |
| 1698 | 1715 | ||
| 1699 | perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { | 1716 | perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) { |
| 1700 | if (perf_hpp__is_dynamic_entry(fmt) && | 1717 | if (perf_hpp__is_dynamic_entry(fmt) && |
| @@ -1704,9 +1721,10 @@ static void __hists__insert_output_entry(struct rb_root *entries, | |||
| 1704 | } | 1721 | } |
| 1705 | 1722 | ||
| 1706 | static void output_resort(struct hists *hists, struct ui_progress *prog, | 1723 | static void output_resort(struct hists *hists, struct ui_progress *prog, |
| 1707 | bool use_callchain, hists__resort_cb_t cb) | 1724 | bool use_callchain, hists__resort_cb_t cb, |
| 1725 | void *cb_arg) | ||
| 1708 | { | 1726 | { |
| 1709 | struct rb_root *root; | 1727 | struct rb_root_cached *root; |
| 1710 | struct rb_node *next; | 1728 | struct rb_node *next; |
| 1711 | struct hist_entry *n; | 1729 | struct hist_entry *n; |
| 1712 | u64 callchain_total; | 1730 | u64 callchain_total; |
| @@ -1736,14 +1754,14 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, | |||
| 1736 | else | 1754 | else |
| 1737 | root = hists->entries_in; | 1755 | root = hists->entries_in; |
| 1738 | 1756 | ||
| 1739 | next = rb_first(root); | 1757 | next = rb_first_cached(root); |
| 1740 | hists->entries = RB_ROOT; | 1758 | hists->entries = RB_ROOT_CACHED; |
| 1741 | 1759 | ||
| 1742 | while (next) { | 1760 | while (next) { |
| 1743 | n = rb_entry(next, struct hist_entry, rb_node_in); | 1761 | n = rb_entry(next, struct hist_entry, rb_node_in); |
| 1744 | next = rb_next(&n->rb_node_in); | 1762 | next = rb_next(&n->rb_node_in); |
| 1745 | 1763 | ||
| 1746 | if (cb && cb(n)) | 1764 | if (cb && cb(n, cb_arg)) |
| 1747 | continue; | 1765 | continue; |
| 1748 | 1766 | ||
| 1749 | __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); | 1767 | __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); |
| @@ -1757,7 +1775,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog, | |||
| 1757 | } | 1775 | } |
| 1758 | } | 1776 | } |
| 1759 | 1777 | ||
| 1760 | void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) | 1778 | void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog, |
| 1779 | hists__resort_cb_t cb, void *cb_arg) | ||
| 1761 | { | 1780 | { |
| 1762 | bool use_callchain; | 1781 | bool use_callchain; |
| 1763 | 1782 | ||
| @@ -1768,18 +1787,23 @@ void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *pro | |||
| 1768 | 1787 | ||
| 1769 | use_callchain |= symbol_conf.show_branchflag_count; | 1788 | use_callchain |= symbol_conf.show_branchflag_count; |
| 1770 | 1789 | ||
| 1771 | output_resort(evsel__hists(evsel), prog, use_callchain, NULL); | 1790 | output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg); |
| 1791 | } | ||
| 1792 | |||
| 1793 | void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog) | ||
| 1794 | { | ||
| 1795 | return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL); | ||
| 1772 | } | 1796 | } |
| 1773 | 1797 | ||
| 1774 | void hists__output_resort(struct hists *hists, struct ui_progress *prog) | 1798 | void hists__output_resort(struct hists *hists, struct ui_progress *prog) |
| 1775 | { | 1799 | { |
| 1776 | output_resort(hists, prog, symbol_conf.use_callchain, NULL); | 1800 | output_resort(hists, prog, symbol_conf.use_callchain, NULL, NULL); |
| 1777 | } | 1801 | } |
| 1778 | 1802 | ||
| 1779 | void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, | 1803 | void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, |
| 1780 | hists__resort_cb_t cb) | 1804 | hists__resort_cb_t cb) |
| 1781 | { | 1805 | { |
| 1782 | output_resort(hists, prog, symbol_conf.use_callchain, cb); | 1806 | output_resort(hists, prog, symbol_conf.use_callchain, cb, NULL); |
| 1783 | } | 1807 | } |
| 1784 | 1808 | ||
| 1785 | static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd) | 1809 | static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd) |
| @@ -1798,7 +1822,7 @@ struct rb_node *rb_hierarchy_last(struct rb_node *node) | |||
| 1798 | struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); | 1822 | struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); |
| 1799 | 1823 | ||
| 1800 | while (can_goto_child(he, HMD_NORMAL)) { | 1824 | while (can_goto_child(he, HMD_NORMAL)) { |
| 1801 | node = rb_last(&he->hroot_out); | 1825 | node = rb_last(&he->hroot_out.rb_root); |
| 1802 | he = rb_entry(node, struct hist_entry, rb_node); | 1826 | he = rb_entry(node, struct hist_entry, rb_node); |
| 1803 | } | 1827 | } |
| 1804 | return node; | 1828 | return node; |
| @@ -1809,7 +1833,7 @@ struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_di | |||
| 1809 | struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); | 1833 | struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node); |
| 1810 | 1834 | ||
| 1811 | if (can_goto_child(he, hmd)) | 1835 | if (can_goto_child(he, hmd)) |
| 1812 | node = rb_first(&he->hroot_out); | 1836 | node = rb_first_cached(&he->hroot_out); |
| 1813 | else | 1837 | else |
| 1814 | node = rb_next(node); | 1838 | node = rb_next(node); |
| 1815 | 1839 | ||
| @@ -1847,7 +1871,7 @@ bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit) | |||
| 1847 | if (he->leaf) | 1871 | if (he->leaf) |
| 1848 | return false; | 1872 | return false; |
| 1849 | 1873 | ||
| 1850 | node = rb_first(&he->hroot_out); | 1874 | node = rb_first_cached(&he->hroot_out); |
| 1851 | child = rb_entry(node, struct hist_entry, rb_node); | 1875 | child = rb_entry(node, struct hist_entry, rb_node); |
| 1852 | 1876 | ||
| 1853 | while (node && child->filtered) { | 1877 | while (node && child->filtered) { |
| @@ -1965,7 +1989,7 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil | |||
| 1965 | hists__reset_filter_stats(hists); | 1989 | hists__reset_filter_stats(hists); |
| 1966 | hists__reset_col_len(hists); | 1990 | hists__reset_col_len(hists); |
| 1967 | 1991 | ||
| 1968 | for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) { | 1992 | for (nd = rb_first_cached(&hists->entries); nd; nd = rb_next(nd)) { |
| 1969 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); | 1993 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); |
| 1970 | 1994 | ||
| 1971 | if (filter(hists, h)) | 1995 | if (filter(hists, h)) |
| @@ -1975,13 +1999,15 @@ static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t fil | |||
| 1975 | } | 1999 | } |
| 1976 | } | 2000 | } |
| 1977 | 2001 | ||
| 1978 | static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) | 2002 | static void resort_filtered_entry(struct rb_root_cached *root, |
| 2003 | struct hist_entry *he) | ||
| 1979 | { | 2004 | { |
| 1980 | struct rb_node **p = &root->rb_node; | 2005 | struct rb_node **p = &root->rb_root.rb_node; |
| 1981 | struct rb_node *parent = NULL; | 2006 | struct rb_node *parent = NULL; |
| 1982 | struct hist_entry *iter; | 2007 | struct hist_entry *iter; |
| 1983 | struct rb_root new_root = RB_ROOT; | 2008 | struct rb_root_cached new_root = RB_ROOT_CACHED; |
| 1984 | struct rb_node *nd; | 2009 | struct rb_node *nd; |
| 2010 | bool leftmost = true; | ||
| 1985 | 2011 | ||
| 1986 | while (*p != NULL) { | 2012 | while (*p != NULL) { |
| 1987 | parent = *p; | 2013 | parent = *p; |
| @@ -1989,22 +2015,24 @@ static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) | |||
| 1989 | 2015 | ||
| 1990 | if (hist_entry__sort(he, iter) > 0) | 2016 | if (hist_entry__sort(he, iter) > 0) |
| 1991 | p = &(*p)->rb_left; | 2017 | p = &(*p)->rb_left; |
| 1992 | else | 2018 | else { |
| 1993 | p = &(*p)->rb_right; | 2019 | p = &(*p)->rb_right; |
| 2020 | leftmost = false; | ||
| 2021 | } | ||
| 1994 | } | 2022 | } |
| 1995 | 2023 | ||
| 1996 | rb_link_node(&he->rb_node, parent, p); | 2024 | rb_link_node(&he->rb_node, parent, p); |
| 1997 | rb_insert_color(&he->rb_node, root); | 2025 | rb_insert_color_cached(&he->rb_node, root, leftmost); |
| 1998 | 2026 | ||
| 1999 | if (he->leaf || he->filtered) | 2027 | if (he->leaf || he->filtered) |
| 2000 | return; | 2028 | return; |
| 2001 | 2029 | ||
| 2002 | nd = rb_first(&he->hroot_out); | 2030 | nd = rb_first_cached(&he->hroot_out); |
| 2003 | while (nd) { | 2031 | while (nd) { |
| 2004 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); | 2032 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); |
| 2005 | 2033 | ||
| 2006 | nd = rb_next(nd); | 2034 | nd = rb_next(nd); |
| 2007 | rb_erase(&h->rb_node, &he->hroot_out); | 2035 | rb_erase_cached(&h->rb_node, &he->hroot_out); |
| 2008 | 2036 | ||
| 2009 | resort_filtered_entry(&new_root, h); | 2037 | resort_filtered_entry(&new_root, h); |
| 2010 | } | 2038 | } |
| @@ -2015,14 +2043,14 @@ static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he) | |||
| 2015 | static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg) | 2043 | static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg) |
| 2016 | { | 2044 | { |
| 2017 | struct rb_node *nd; | 2045 | struct rb_node *nd; |
| 2018 | struct rb_root new_root = RB_ROOT; | 2046 | struct rb_root_cached new_root = RB_ROOT_CACHED; |
| 2019 | 2047 | ||
| 2020 | hists->stats.nr_non_filtered_samples = 0; | 2048 | hists->stats.nr_non_filtered_samples = 0; |
| 2021 | 2049 | ||
| 2022 | hists__reset_filter_stats(hists); | 2050 | hists__reset_filter_stats(hists); |
| 2023 | hists__reset_col_len(hists); | 2051 | hists__reset_col_len(hists); |
| 2024 | 2052 | ||
| 2025 | nd = rb_first(&hists->entries); | 2053 | nd = rb_first_cached(&hists->entries); |
| 2026 | while (nd) { | 2054 | while (nd) { |
| 2027 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); | 2055 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); |
| 2028 | int ret; | 2056 | int ret; |
| @@ -2066,12 +2094,12 @@ static void hists__filter_hierarchy(struct hists *hists, int type, const void *a | |||
| 2066 | * resort output after applying a new filter since filter in a lower | 2094 | * resort output after applying a new filter since filter in a lower |
| 2067 | * hierarchy can change periods in a upper hierarchy. | 2095 | * hierarchy can change periods in a upper hierarchy. |
| 2068 | */ | 2096 | */ |
| 2069 | nd = rb_first(&hists->entries); | 2097 | nd = rb_first_cached(&hists->entries); |
| 2070 | while (nd) { | 2098 | while (nd) { |
| 2071 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); | 2099 | struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node); |
| 2072 | 2100 | ||
| 2073 | nd = rb_next(nd); | 2101 | nd = rb_next(nd); |
| 2074 | rb_erase(&h->rb_node, &hists->entries); | 2102 | rb_erase_cached(&h->rb_node, &hists->entries); |
| 2075 | 2103 | ||
| 2076 | resort_filtered_entry(&new_root, h); | 2104 | resort_filtered_entry(&new_root, h); |
| 2077 | } | 2105 | } |
| @@ -2140,18 +2168,19 @@ void hists__inc_nr_samples(struct hists *hists, bool filtered) | |||
| 2140 | static struct hist_entry *hists__add_dummy_entry(struct hists *hists, | 2168 | static struct hist_entry *hists__add_dummy_entry(struct hists *hists, |
| 2141 | struct hist_entry *pair) | 2169 | struct hist_entry *pair) |
| 2142 | { | 2170 | { |
| 2143 | struct rb_root *root; | 2171 | struct rb_root_cached *root; |
| 2144 | struct rb_node **p; | 2172 | struct rb_node **p; |
| 2145 | struct rb_node *parent = NULL; | 2173 | struct rb_node *parent = NULL; |
| 2146 | struct hist_entry *he; | 2174 | struct hist_entry *he; |
| 2147 | int64_t cmp; | 2175 | int64_t cmp; |
| 2176 | bool leftmost = true; | ||
| 2148 | 2177 | ||
| 2149 | if (hists__has(hists, need_collapse)) | 2178 | if (hists__has(hists, need_collapse)) |
| 2150 | root = &hists->entries_collapsed; | 2179 | root = &hists->entries_collapsed; |
| 2151 | else | 2180 | else |
| 2152 | root = hists->entries_in; | 2181 | root = hists->entries_in; |
| 2153 | 2182 | ||
| 2154 | p = &root->rb_node; | 2183 | p = &root->rb_root.rb_node; |
| 2155 | 2184 | ||
| 2156 | while (*p != NULL) { | 2185 | while (*p != NULL) { |
| 2157 | parent = *p; | 2186 | parent = *p; |
| @@ -2164,8 +2193,10 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, | |||
| 2164 | 2193 | ||
| 2165 | if (cmp < 0) | 2194 | if (cmp < 0) |
| 2166 | p = &(*p)->rb_left; | 2195 | p = &(*p)->rb_left; |
| 2167 | else | 2196 | else { |
| 2168 | p = &(*p)->rb_right; | 2197 | p = &(*p)->rb_right; |
| 2198 | leftmost = false; | ||
| 2199 | } | ||
| 2169 | } | 2200 | } |
| 2170 | 2201 | ||
| 2171 | he = hist_entry__new(pair, true); | 2202 | he = hist_entry__new(pair, true); |
| @@ -2175,7 +2206,7 @@ static struct hist_entry *hists__add_dummy_entry(struct hists *hists, | |||
| 2175 | if (symbol_conf.cumulate_callchain) | 2206 | if (symbol_conf.cumulate_callchain) |
| 2176 | memset(he->stat_acc, 0, sizeof(he->stat)); | 2207 | memset(he->stat_acc, 0, sizeof(he->stat)); |
| 2177 | rb_link_node(&he->rb_node_in, parent, p); | 2208 | rb_link_node(&he->rb_node_in, parent, p); |
| 2178 | rb_insert_color(&he->rb_node_in, root); | 2209 | rb_insert_color_cached(&he->rb_node_in, root, leftmost); |
| 2179 | hists__inc_stats(hists, he); | 2210 | hists__inc_stats(hists, he); |
| 2180 | he->dummy = true; | 2211 | he->dummy = true; |
| 2181 | } | 2212 | } |
| @@ -2184,15 +2215,16 @@ out: | |||
| 2184 | } | 2215 | } |
| 2185 | 2216 | ||
| 2186 | static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, | 2217 | static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, |
| 2187 | struct rb_root *root, | 2218 | struct rb_root_cached *root, |
| 2188 | struct hist_entry *pair) | 2219 | struct hist_entry *pair) |
| 2189 | { | 2220 | { |
| 2190 | struct rb_node **p; | 2221 | struct rb_node **p; |
| 2191 | struct rb_node *parent = NULL; | 2222 | struct rb_node *parent = NULL; |
| 2192 | struct hist_entry *he; | 2223 | struct hist_entry *he; |
| 2193 | struct perf_hpp_fmt *fmt; | 2224 | struct perf_hpp_fmt *fmt; |
| 2225 | bool leftmost = true; | ||
| 2194 | 2226 | ||
| 2195 | p = &root->rb_node; | 2227 | p = &root->rb_root.rb_node; |
| 2196 | while (*p != NULL) { | 2228 | while (*p != NULL) { |
| 2197 | int64_t cmp = 0; | 2229 | int64_t cmp = 0; |
| 2198 | 2230 | ||
| @@ -2209,14 +2241,16 @@ static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists, | |||
| 2209 | 2241 | ||
| 2210 | if (cmp < 0) | 2242 | if (cmp < 0) |
| 2211 | p = &parent->rb_left; | 2243 | p = &parent->rb_left; |
| 2212 | else | 2244 | else { |
| 2213 | p = &parent->rb_right; | 2245 | p = &parent->rb_right; |
| 2246 | leftmost = false; | ||
| 2247 | } | ||
| 2214 | } | 2248 | } |
| 2215 | 2249 | ||
| 2216 | he = hist_entry__new(pair, true); | 2250 | he = hist_entry__new(pair, true); |
| 2217 | if (he) { | 2251 | if (he) { |
| 2218 | rb_link_node(&he->rb_node_in, parent, p); | 2252 | rb_link_node(&he->rb_node_in, parent, p); |
| 2219 | rb_insert_color(&he->rb_node_in, root); | 2253 | rb_insert_color_cached(&he->rb_node_in, root, leftmost); |
| 2220 | 2254 | ||
| 2221 | he->dummy = true; | 2255 | he->dummy = true; |
| 2222 | he->hists = hists; | 2256 | he->hists = hists; |
| @@ -2233,9 +2267,9 @@ static struct hist_entry *hists__find_entry(struct hists *hists, | |||
| 2233 | struct rb_node *n; | 2267 | struct rb_node *n; |
| 2234 | 2268 | ||
| 2235 | if (hists__has(hists, need_collapse)) | 2269 | if (hists__has(hists, need_collapse)) |
| 2236 | n = hists->entries_collapsed.rb_node; | 2270 | n = hists->entries_collapsed.rb_root.rb_node; |
| 2237 | else | 2271 | else |
| 2238 | n = hists->entries_in->rb_node; | 2272 | n = hists->entries_in->rb_root.rb_node; |
| 2239 | 2273 | ||
| 2240 | while (n) { | 2274 | while (n) { |
| 2241 | struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in); | 2275 | struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in); |
| @@ -2252,10 +2286,10 @@ static struct hist_entry *hists__find_entry(struct hists *hists, | |||
| 2252 | return NULL; | 2286 | return NULL; |
| 2253 | } | 2287 | } |
| 2254 | 2288 | ||
| 2255 | static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root, | 2289 | static struct hist_entry *hists__find_hierarchy_entry(struct rb_root_cached *root, |
| 2256 | struct hist_entry *he) | 2290 | struct hist_entry *he) |
| 2257 | { | 2291 | { |
| 2258 | struct rb_node *n = root->rb_node; | 2292 | struct rb_node *n = root->rb_root.rb_node; |
| 2259 | 2293 | ||
| 2260 | while (n) { | 2294 | while (n) { |
| 2261 | struct hist_entry *iter; | 2295 | struct hist_entry *iter; |
| @@ -2280,13 +2314,13 @@ static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root, | |||
| 2280 | return NULL; | 2314 | return NULL; |
| 2281 | } | 2315 | } |
| 2282 | 2316 | ||
| 2283 | static void hists__match_hierarchy(struct rb_root *leader_root, | 2317 | static void hists__match_hierarchy(struct rb_root_cached *leader_root, |
| 2284 | struct rb_root *other_root) | 2318 | struct rb_root_cached *other_root) |
| 2285 | { | 2319 | { |
| 2286 | struct rb_node *nd; | 2320 | struct rb_node *nd; |
| 2287 | struct hist_entry *pos, *pair; | 2321 | struct hist_entry *pos, *pair; |
| 2288 | 2322 | ||
| 2289 | for (nd = rb_first(leader_root); nd; nd = rb_next(nd)) { | 2323 | for (nd = rb_first_cached(leader_root); nd; nd = rb_next(nd)) { |
| 2290 | pos = rb_entry(nd, struct hist_entry, rb_node_in); | 2324 | pos = rb_entry(nd, struct hist_entry, rb_node_in); |
| 2291 | pair = hists__find_hierarchy_entry(other_root, pos); | 2325 | pair = hists__find_hierarchy_entry(other_root, pos); |
| 2292 | 2326 | ||
| @@ -2302,7 +2336,7 @@ static void hists__match_hierarchy(struct rb_root *leader_root, | |||
| 2302 | */ | 2336 | */ |
| 2303 | void hists__match(struct hists *leader, struct hists *other) | 2337 | void hists__match(struct hists *leader, struct hists *other) |
| 2304 | { | 2338 | { |
| 2305 | struct rb_root *root; | 2339 | struct rb_root_cached *root; |
| 2306 | struct rb_node *nd; | 2340 | struct rb_node *nd; |
| 2307 | struct hist_entry *pos, *pair; | 2341 | struct hist_entry *pos, *pair; |
| 2308 | 2342 | ||
| @@ -2317,7 +2351,7 @@ void hists__match(struct hists *leader, struct hists *other) | |||
| 2317 | else | 2351 | else |
| 2318 | root = leader->entries_in; | 2352 | root = leader->entries_in; |
| 2319 | 2353 | ||
| 2320 | for (nd = rb_first(root); nd; nd = rb_next(nd)) { | 2354 | for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) { |
| 2321 | pos = rb_entry(nd, struct hist_entry, rb_node_in); | 2355 | pos = rb_entry(nd, struct hist_entry, rb_node_in); |
| 2322 | pair = hists__find_entry(other, pos); | 2356 | pair = hists__find_entry(other, pos); |
| 2323 | 2357 | ||
| @@ -2328,13 +2362,13 @@ void hists__match(struct hists *leader, struct hists *other) | |||
| 2328 | 2362 | ||
| 2329 | static int hists__link_hierarchy(struct hists *leader_hists, | 2363 | static int hists__link_hierarchy(struct hists *leader_hists, |
| 2330 | struct hist_entry *parent, | 2364 | struct hist_entry *parent, |
| 2331 | struct rb_root *leader_root, | 2365 | struct rb_root_cached *leader_root, |
| 2332 | struct rb_root *other_root) | 2366 | struct rb_root_cached *other_root) |
| 2333 | { | 2367 | { |
| 2334 | struct rb_node *nd; | 2368 | struct rb_node *nd; |
| 2335 | struct hist_entry *pos, *leader; | 2369 | struct hist_entry *pos, *leader; |
| 2336 | 2370 | ||
| 2337 | for (nd = rb_first(other_root); nd; nd = rb_next(nd)) { | 2371 | for (nd = rb_first_cached(other_root); nd; nd = rb_next(nd)) { |
| 2338 | pos = rb_entry(nd, struct hist_entry, rb_node_in); | 2372 | pos = rb_entry(nd, struct hist_entry, rb_node_in); |
| 2339 | 2373 | ||
| 2340 | if (hist_entry__has_pairs(pos)) { | 2374 | if (hist_entry__has_pairs(pos)) { |
| @@ -2377,7 +2411,7 @@ static int hists__link_hierarchy(struct hists *leader_hists, | |||
| 2377 | */ | 2411 | */ |
| 2378 | int hists__link(struct hists *leader, struct hists *other) | 2412 | int hists__link(struct hists *leader, struct hists *other) |
| 2379 | { | 2413 | { |
| 2380 | struct rb_root *root; | 2414 | struct rb_root_cached *root; |
| 2381 | struct rb_node *nd; | 2415 | struct rb_node *nd; |
| 2382 | struct hist_entry *pos, *pair; | 2416 | struct hist_entry *pos, *pair; |
| 2383 | 2417 | ||
| @@ -2393,7 +2427,7 @@ int hists__link(struct hists *leader, struct hists *other) | |||
| 2393 | else | 2427 | else |
| 2394 | root = other->entries_in; | 2428 | root = other->entries_in; |
| 2395 | 2429 | ||
| 2396 | for (nd = rb_first(root); nd; nd = rb_next(nd)) { | 2430 | for (nd = rb_first_cached(root); nd; nd = rb_next(nd)) { |
| 2397 | pos = rb_entry(nd, struct hist_entry, rb_node_in); | 2431 | pos = rb_entry(nd, struct hist_entry, rb_node_in); |
| 2398 | 2432 | ||
| 2399 | if (!hist_entry__has_pairs(pos)) { | 2433 | if (!hist_entry__has_pairs(pos)) { |
| @@ -2566,10 +2600,10 @@ int perf_hist_config(const char *var, const char *value) | |||
| 2566 | int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) | 2600 | int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) |
| 2567 | { | 2601 | { |
| 2568 | memset(hists, 0, sizeof(*hists)); | 2602 | memset(hists, 0, sizeof(*hists)); |
| 2569 | hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT; | 2603 | hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT_CACHED; |
| 2570 | hists->entries_in = &hists->entries_in_array[0]; | 2604 | hists->entries_in = &hists->entries_in_array[0]; |
| 2571 | hists->entries_collapsed = RB_ROOT; | 2605 | hists->entries_collapsed = RB_ROOT_CACHED; |
| 2572 | hists->entries = RB_ROOT; | 2606 | hists->entries = RB_ROOT_CACHED; |
| 2573 | pthread_mutex_init(&hists->lock, NULL); | 2607 | pthread_mutex_init(&hists->lock, NULL); |
| 2574 | hists->socket_filter = -1; | 2608 | hists->socket_filter = -1; |
| 2575 | hists->hpp_list = hpp_list; | 2609 | hists->hpp_list = hpp_list; |
| @@ -2577,14 +2611,14 @@ int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list) | |||
| 2577 | return 0; | 2611 | return 0; |
| 2578 | } | 2612 | } |
| 2579 | 2613 | ||
| 2580 | static void hists__delete_remaining_entries(struct rb_root *root) | 2614 | static void hists__delete_remaining_entries(struct rb_root_cached *root) |
| 2581 | { | 2615 | { |
| 2582 | struct rb_node *node; | 2616 | struct rb_node *node; |
| 2583 | struct hist_entry *he; | 2617 | struct hist_entry *he; |
| 2584 | 2618 | ||
| 2585 | while (!RB_EMPTY_ROOT(root)) { | 2619 | while (!RB_EMPTY_ROOT(&root->rb_root)) { |
| 2586 | node = rb_first(root); | 2620 | node = rb_first_cached(root); |
| 2587 | rb_erase(node, root); | 2621 | rb_erase_cached(node, root); |
| 2588 | 2622 | ||
| 2589 | he = rb_entry(node, struct hist_entry, rb_node_in); | 2623 | he = rb_entry(node, struct hist_entry, rb_node_in); |
| 2590 | hist_entry__delete(he); | 2624 | hist_entry__delete(he); |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 664b5eda8d51..4af27fbab24f 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
| @@ -2,9 +2,9 @@ | |||
| 2 | #ifndef __PERF_HIST_H | 2 | #ifndef __PERF_HIST_H |
| 3 | #define __PERF_HIST_H | 3 | #define __PERF_HIST_H |
| 4 | 4 | ||
| 5 | #include <linux/rbtree.h> | ||
| 5 | #include <linux/types.h> | 6 | #include <linux/types.h> |
| 6 | #include <pthread.h> | 7 | #include <pthread.h> |
| 7 | #include "callchain.h" | ||
| 8 | #include "evsel.h" | 8 | #include "evsel.h" |
| 9 | #include "header.h" | 9 | #include "header.h" |
| 10 | #include "color.h" | 10 | #include "color.h" |
| @@ -13,6 +13,9 @@ | |||
| 13 | struct hist_entry; | 13 | struct hist_entry; |
| 14 | struct hist_entry_ops; | 14 | struct hist_entry_ops; |
| 15 | struct addr_location; | 15 | struct addr_location; |
| 16 | struct map_symbol; | ||
| 17 | struct mem_info; | ||
| 18 | struct branch_info; | ||
| 16 | struct symbol; | 19 | struct symbol; |
| 17 | 20 | ||
| 18 | enum hist_filter { | 21 | enum hist_filter { |
| @@ -70,10 +73,10 @@ struct thread; | |||
| 70 | struct dso; | 73 | struct dso; |
| 71 | 74 | ||
| 72 | struct hists { | 75 | struct hists { |
| 73 | struct rb_root entries_in_array[2]; | 76 | struct rb_root_cached entries_in_array[2]; |
| 74 | struct rb_root *entries_in; | 77 | struct rb_root_cached *entries_in; |
| 75 | struct rb_root entries; | 78 | struct rb_root_cached entries; |
| 76 | struct rb_root entries_collapsed; | 79 | struct rb_root_cached entries_collapsed; |
| 77 | u64 nr_entries; | 80 | u64 nr_entries; |
| 78 | u64 nr_non_filtered_entries; | 81 | u64 nr_non_filtered_entries; |
| 79 | u64 callchain_period; | 82 | u64 callchain_period; |
| @@ -160,8 +163,10 @@ int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp, | |||
| 160 | struct perf_hpp_fmt *fmt, int printed); | 163 | struct perf_hpp_fmt *fmt, int printed); |
| 161 | void hist_entry__delete(struct hist_entry *he); | 164 | void hist_entry__delete(struct hist_entry *he); |
| 162 | 165 | ||
| 163 | typedef int (*hists__resort_cb_t)(struct hist_entry *he); | 166 | typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg); |
| 164 | 167 | ||
| 168 | void perf_evsel__output_resort_cb(struct perf_evsel *evsel, struct ui_progress *prog, | ||
| 169 | hists__resort_cb_t cb, void *cb_arg); | ||
| 165 | void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); | 170 | void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog); |
| 166 | void hists__output_resort(struct hists *hists, struct ui_progress *prog); | 171 | void hists__output_resort(struct hists *hists, struct ui_progress *prog); |
| 167 | void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, | 172 | void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog, |
| @@ -230,7 +235,7 @@ static __pure inline bool hists__has_callchains(struct hists *hists) | |||
| 230 | int hists__init(void); | 235 | int hists__init(void); |
| 231 | int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); | 236 | int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list); |
| 232 | 237 | ||
| 233 | struct rb_root *hists__get_rotate_entries_in(struct hists *hists); | 238 | struct rb_root_cached *hists__get_rotate_entries_in(struct hists *hists); |
| 234 | 239 | ||
| 235 | struct perf_hpp { | 240 | struct perf_hpp { |
| 236 | char *buf; | 241 | char *buf; |
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index ee6ca65f81f4..0c0180c67574 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c | |||
| @@ -27,6 +27,8 @@ | |||
| 27 | #include "evsel.h" | 27 | #include "evsel.h" |
| 28 | #include "evlist.h" | 28 | #include "evlist.h" |
| 29 | #include "machine.h" | 29 | #include "machine.h" |
| 30 | #include "map.h" | ||
| 31 | #include "symbol.h" | ||
| 30 | #include "session.h" | 32 | #include "session.h" |
| 31 | #include "util.h" | 33 | #include "util.h" |
| 32 | #include "thread.h" | 34 | #include "thread.h" |
| @@ -142,7 +144,7 @@ static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample) | |||
| 142 | 144 | ||
| 143 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, | 145 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, |
| 144 | INTEL_BTS_ERR_LOST, sample->cpu, sample->pid, | 146 | INTEL_BTS_ERR_LOST, sample->cpu, sample->pid, |
| 145 | sample->tid, 0, "Lost trace data"); | 147 | sample->tid, 0, "Lost trace data", sample->time); |
| 146 | 148 | ||
| 147 | err = perf_session__deliver_synth_event(bts->session, &event, NULL); | 149 | err = perf_session__deliver_synth_event(bts->session, &event, NULL); |
| 148 | if (err) | 150 | if (err) |
| @@ -372,7 +374,7 @@ static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid, | |||
| 372 | 374 | ||
| 373 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, | 375 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, |
| 374 | INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip, | 376 | INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip, |
| 375 | "Failed to get instruction"); | 377 | "Failed to get instruction", 0); |
| 376 | 378 | ||
| 377 | err = perf_session__deliver_synth_event(bts->session, &event, NULL); | 379 | err = perf_session__deliver_synth_event(bts->session, &event, NULL); |
| 378 | if (err) | 380 | if (err) |
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 1b704fbea9de..23bf788f84b9 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o | 1 | perf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o |
| 2 | 2 | ||
| 3 | inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk | 3 | inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk |
| 4 | inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt | 4 | inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt |
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 4503f3ca45ab..6e03db142091 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | 26 | ||
| 27 | #include "../cache.h" | 27 | #include "../cache.h" |
| 28 | #include "../util.h" | 28 | #include "../util.h" |
| 29 | #include "../auxtrace.h" | ||
| 29 | 30 | ||
| 30 | #include "intel-pt-insn-decoder.h" | 31 | #include "intel-pt-insn-decoder.h" |
| 31 | #include "intel-pt-pkt-decoder.h" | 32 | #include "intel-pt-pkt-decoder.h" |
| @@ -867,7 +868,7 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) | |||
| 867 | 868 | ||
| 868 | ret = intel_pt_get_packet(decoder->buf, decoder->len, | 869 | ret = intel_pt_get_packet(decoder->buf, decoder->len, |
| 869 | &decoder->packet); | 870 | &decoder->packet); |
| 870 | if (ret == INTEL_PT_NEED_MORE_BYTES && | 871 | if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 && |
| 871 | decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { | 872 | decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { |
| 872 | ret = intel_pt_get_split_packet(decoder); | 873 | ret = intel_pt_get_split_packet(decoder); |
| 873 | if (ret < 0) | 874 | if (ret < 0) |
| @@ -1394,7 +1395,6 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder) | |||
| 1394 | { | 1395 | { |
| 1395 | intel_pt_log("ERROR: Buffer overflow\n"); | 1396 | intel_pt_log("ERROR: Buffer overflow\n"); |
| 1396 | intel_pt_clear_tx_flags(decoder); | 1397 | intel_pt_clear_tx_flags(decoder); |
| 1397 | decoder->cbr = 0; | ||
| 1398 | decoder->timestamp_insn_cnt = 0; | 1398 | decoder->timestamp_insn_cnt = 0; |
| 1399 | decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; | 1399 | decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; |
| 1400 | decoder->overflow = true; | 1400 | decoder->overflow = true; |
| @@ -2575,6 +2575,34 @@ static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) | |||
| 2575 | } | 2575 | } |
| 2576 | } | 2576 | } |
| 2577 | 2577 | ||
| 2578 | #define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1) | ||
| 2579 | |||
| 2580 | /** | ||
| 2581 | * adj_for_padding - adjust overlap to account for padding. | ||
| 2582 | * @buf_b: second buffer | ||
| 2583 | * @buf_a: first buffer | ||
| 2584 | * @len_a: size of first buffer | ||
| 2585 | * | ||
| 2586 | * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap | ||
| 2587 | * accordingly. | ||
| 2588 | * | ||
| 2589 | * Return: A pointer into @buf_b from where non-overlapped data starts | ||
| 2590 | */ | ||
| 2591 | static unsigned char *adj_for_padding(unsigned char *buf_b, | ||
| 2592 | unsigned char *buf_a, size_t len_a) | ||
| 2593 | { | ||
| 2594 | unsigned char *p = buf_b - MAX_PADDING; | ||
| 2595 | unsigned char *q = buf_a + len_a - MAX_PADDING; | ||
| 2596 | int i; | ||
| 2597 | |||
| 2598 | for (i = MAX_PADDING; i; i--, p++, q++) { | ||
| 2599 | if (*p != *q) | ||
| 2600 | break; | ||
| 2601 | } | ||
| 2602 | |||
| 2603 | return p; | ||
| 2604 | } | ||
| 2605 | |||
| 2578 | /** | 2606 | /** |
| 2579 | * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data | 2607 | * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data |
| 2580 | * using TSC. | 2608 | * using TSC. |
| @@ -2625,8 +2653,11 @@ static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, | |||
| 2625 | 2653 | ||
| 2626 | /* Same TSC, so buffers are consecutive */ | 2654 | /* Same TSC, so buffers are consecutive */ |
| 2627 | if (!cmp && rem_b >= rem_a) { | 2655 | if (!cmp && rem_b >= rem_a) { |
| 2656 | unsigned char *start; | ||
| 2657 | |||
| 2628 | *consecutive = true; | 2658 | *consecutive = true; |
| 2629 | return buf_b + len_b - (rem_b - rem_a); | 2659 | start = buf_b + len_b - (rem_b - rem_a); |
| 2660 | return adj_for_padding(start, buf_a, len_a); | ||
| 2630 | } | 2661 | } |
| 2631 | if (cmp < 0) | 2662 | if (cmp < 0) |
| 2632 | return buf_b; /* tsc_a < tsc_b => no overlap */ | 2663 | return buf_b; /* tsc_a < tsc_b => no overlap */ |
| @@ -2689,7 +2720,7 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, | |||
| 2689 | found = memmem(buf_a, len_a, buf_b, len_a); | 2720 | found = memmem(buf_a, len_a, buf_b, len_a); |
| 2690 | if (found) { | 2721 | if (found) { |
| 2691 | *consecutive = true; | 2722 | *consecutive = true; |
| 2692 | return buf_b + len_a; | 2723 | return adj_for_padding(buf_b + len_a, buf_a, len_a); |
| 2693 | } | 2724 | } |
| 2694 | 2725 | ||
| 2695 | /* Try again at next PSB in buffer 'a' */ | 2726 | /* Try again at next PSB in buffer 'a' */ |
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 2e72373ec6df..3b497bab4324 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c | |||
| @@ -1411,7 +1411,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq) | |||
| 1411 | } | 1411 | } |
| 1412 | 1412 | ||
| 1413 | static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, | 1413 | static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, |
| 1414 | pid_t pid, pid_t tid, u64 ip) | 1414 | pid_t pid, pid_t tid, u64 ip, u64 timestamp) |
| 1415 | { | 1415 | { |
| 1416 | union perf_event event; | 1416 | union perf_event event; |
| 1417 | char msg[MAX_AUXTRACE_ERROR_MSG]; | 1417 | char msg[MAX_AUXTRACE_ERROR_MSG]; |
| @@ -1420,7 +1420,7 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, | |||
| 1420 | intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); | 1420 | intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); |
| 1421 | 1421 | ||
| 1422 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, | 1422 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, |
| 1423 | code, cpu, pid, tid, ip, msg); | 1423 | code, cpu, pid, tid, ip, msg, timestamp); |
| 1424 | 1424 | ||
| 1425 | err = perf_session__deliver_synth_event(pt->session, &event, NULL); | 1425 | err = perf_session__deliver_synth_event(pt->session, &event, NULL); |
| 1426 | if (err) | 1426 | if (err) |
| @@ -1430,6 +1430,18 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, | |||
| 1430 | return err; | 1430 | return err; |
| 1431 | } | 1431 | } |
| 1432 | 1432 | ||
| 1433 | static int intel_ptq_synth_error(struct intel_pt_queue *ptq, | ||
| 1434 | const struct intel_pt_state *state) | ||
| 1435 | { | ||
| 1436 | struct intel_pt *pt = ptq->pt; | ||
| 1437 | u64 tm = ptq->timestamp; | ||
| 1438 | |||
| 1439 | tm = pt->timeless_decoding ? 0 : tsc_to_perf_time(tm, &pt->tc); | ||
| 1440 | |||
| 1441 | return intel_pt_synth_error(pt, state->err, ptq->cpu, ptq->pid, | ||
| 1442 | ptq->tid, state->from_ip, tm); | ||
| 1443 | } | ||
| 1444 | |||
| 1433 | static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) | 1445 | static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) |
| 1434 | { | 1446 | { |
| 1435 | struct auxtrace_queue *queue; | 1447 | struct auxtrace_queue *queue; |
| @@ -1676,10 +1688,7 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) | |||
| 1676 | intel_pt_next_tid(pt, ptq); | 1688 | intel_pt_next_tid(pt, ptq); |
| 1677 | } | 1689 | } |
| 1678 | if (pt->synth_opts.errors) { | 1690 | if (pt->synth_opts.errors) { |
| 1679 | err = intel_pt_synth_error(pt, state->err, | 1691 | err = intel_ptq_synth_error(ptq, state); |
| 1680 | ptq->cpu, ptq->pid, | ||
| 1681 | ptq->tid, | ||
| 1682 | state->from_ip); | ||
| 1683 | if (err) | 1692 | if (err) |
| 1684 | return err; | 1693 | return err; |
| 1685 | } | 1694 | } |
| @@ -1804,7 +1813,7 @@ static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, | |||
| 1804 | static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) | 1813 | static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) |
| 1805 | { | 1814 | { |
| 1806 | return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, | 1815 | return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, |
| 1807 | sample->pid, sample->tid, 0); | 1816 | sample->pid, sample->tid, 0, sample->time); |
| 1808 | } | 1817 | } |
| 1809 | 1818 | ||
| 1810 | static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) | 1819 | static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) |
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h index 85bab8735fa9..5c19ee001299 100644 --- a/tools/perf/util/intlist.h +++ b/tools/perf/util/intlist.h | |||
| @@ -45,7 +45,7 @@ static inline unsigned int intlist__nr_entries(const struct intlist *ilist) | |||
| 45 | /* For intlist iteration */ | 45 | /* For intlist iteration */ |
| 46 | static inline struct int_node *intlist__first(struct intlist *ilist) | 46 | static inline struct int_node *intlist__first(struct intlist *ilist) |
| 47 | { | 47 | { |
| 48 | struct rb_node *rn = rb_first(&ilist->rblist.entries); | 48 | struct rb_node *rn = rb_first_cached(&ilist->rblist.entries); |
| 49 | return rn ? rb_entry(rn, struct int_node, rb_node) : NULL; | 49 | return rn ? rb_entry(rn, struct int_node, rb_node) : NULL; |
| 50 | } | 50 | } |
| 51 | static inline struct int_node *intlist__next(struct int_node *in) | 51 | static inline struct int_node *intlist__next(struct int_node *in) |
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index bf249552a9b0..eda28d3570bc 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #include <sys/sysmacros.h> | 2 | #include <sys/sysmacros.h> |
| 3 | #include <sys/types.h> | 3 | #include <sys/types.h> |
| 4 | #include <errno.h> | 4 | #include <errno.h> |
| 5 | #include <libgen.h> | ||
| 5 | #include <stdio.h> | 6 | #include <stdio.h> |
| 6 | #include <stdlib.h> | 7 | #include <stdlib.h> |
| 7 | #include <string.h> | 8 | #include <string.h> |
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index 7b1f06567521..1403dec189b4 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h | |||
| @@ -3,12 +3,13 @@ | |||
| 3 | #define __PERF_KVM_STAT_H | 3 | #define __PERF_KVM_STAT_H |
| 4 | 4 | ||
| 5 | #include "../perf.h" | 5 | #include "../perf.h" |
| 6 | #include "evsel.h" | ||
| 7 | #include "evlist.h" | ||
| 8 | #include "session.h" | ||
| 9 | #include "tool.h" | 6 | #include "tool.h" |
| 10 | #include "stat.h" | 7 | #include "stat.h" |
| 11 | 8 | ||
| 9 | struct perf_evsel; | ||
| 10 | struct perf_evlist; | ||
| 11 | struct perf_session; | ||
| 12 | |||
| 12 | struct event_key { | 13 | struct event_key { |
| 13 | #define INVALID_KEY (~0ULL) | 14 | #define INVALID_KEY (~0ULL) |
| 14 | u64 key; | 15 | u64 key; |
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 143f7057d581..61959aba7e27 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #include "hist.h" | 10 | #include "hist.h" |
| 11 | #include "machine.h" | 11 | #include "machine.h" |
| 12 | #include "map.h" | 12 | #include "map.h" |
| 13 | #include "symbol.h" | ||
| 13 | #include "sort.h" | 14 | #include "sort.h" |
| 14 | #include "strlist.h" | 15 | #include "strlist.h" |
| 15 | #include "thread.h" | 16 | #include "thread.h" |
| @@ -21,6 +22,7 @@ | |||
| 21 | #include "unwind.h" | 22 | #include "unwind.h" |
| 22 | #include "linux/hash.h" | 23 | #include "linux/hash.h" |
| 23 | #include "asm/bug.h" | 24 | #include "asm/bug.h" |
| 25 | #include "bpf-event.h" | ||
| 24 | 26 | ||
| 25 | #include "sane_ctype.h" | 27 | #include "sane_ctype.h" |
| 26 | #include <symbol/kallsyms.h> | 28 | #include <symbol/kallsyms.h> |
| @@ -41,7 +43,7 @@ static void machine__threads_init(struct machine *machine) | |||
| 41 | 43 | ||
| 42 | for (i = 0; i < THREADS__TABLE_SIZE; i++) { | 44 | for (i = 0; i < THREADS__TABLE_SIZE; i++) { |
| 43 | struct threads *threads = &machine->threads[i]; | 45 | struct threads *threads = &machine->threads[i]; |
| 44 | threads->entries = RB_ROOT; | 46 | threads->entries = RB_ROOT_CACHED; |
| 45 | init_rwsem(&threads->lock); | 47 | init_rwsem(&threads->lock); |
| 46 | threads->nr = 0; | 48 | threads->nr = 0; |
| 47 | INIT_LIST_HEAD(&threads->dead); | 49 | INIT_LIST_HEAD(&threads->dead); |
| @@ -179,7 +181,7 @@ void machine__delete_threads(struct machine *machine) | |||
| 179 | for (i = 0; i < THREADS__TABLE_SIZE; i++) { | 181 | for (i = 0; i < THREADS__TABLE_SIZE; i++) { |
| 180 | struct threads *threads = &machine->threads[i]; | 182 | struct threads *threads = &machine->threads[i]; |
| 181 | down_write(&threads->lock); | 183 | down_write(&threads->lock); |
| 182 | nd = rb_first(&threads->entries); | 184 | nd = rb_first_cached(&threads->entries); |
| 183 | while (nd) { | 185 | while (nd) { |
| 184 | struct thread *t = rb_entry(nd, struct thread, rb_node); | 186 | struct thread *t = rb_entry(nd, struct thread, rb_node); |
| 185 | 187 | ||
| @@ -222,7 +224,7 @@ void machine__delete(struct machine *machine) | |||
| 222 | void machines__init(struct machines *machines) | 224 | void machines__init(struct machines *machines) |
| 223 | { | 225 | { |
| 224 | machine__init(&machines->host, "", HOST_KERNEL_ID); | 226 | machine__init(&machines->host, "", HOST_KERNEL_ID); |
| 225 | machines->guests = RB_ROOT; | 227 | machines->guests = RB_ROOT_CACHED; |
| 226 | } | 228 | } |
| 227 | 229 | ||
| 228 | void machines__exit(struct machines *machines) | 230 | void machines__exit(struct machines *machines) |
| @@ -234,9 +236,10 @@ void machines__exit(struct machines *machines) | |||
| 234 | struct machine *machines__add(struct machines *machines, pid_t pid, | 236 | struct machine *machines__add(struct machines *machines, pid_t pid, |
| 235 | const char *root_dir) | 237 | const char *root_dir) |
| 236 | { | 238 | { |
| 237 | struct rb_node **p = &machines->guests.rb_node; | 239 | struct rb_node **p = &machines->guests.rb_root.rb_node; |
| 238 | struct rb_node *parent = NULL; | 240 | struct rb_node *parent = NULL; |
| 239 | struct machine *pos, *machine = malloc(sizeof(*machine)); | 241 | struct machine *pos, *machine = malloc(sizeof(*machine)); |
| 242 | bool leftmost = true; | ||
| 240 | 243 | ||
| 241 | if (machine == NULL) | 244 | if (machine == NULL) |
| 242 | return NULL; | 245 | return NULL; |
| @@ -251,12 +254,14 @@ struct machine *machines__add(struct machines *machines, pid_t pid, | |||
| 251 | pos = rb_entry(parent, struct machine, rb_node); | 254 | pos = rb_entry(parent, struct machine, rb_node); |
| 252 | if (pid < pos->pid) | 255 | if (pid < pos->pid) |
| 253 | p = &(*p)->rb_left; | 256 | p = &(*p)->rb_left; |
| 254 | else | 257 | else { |
| 255 | p = &(*p)->rb_right; | 258 | p = &(*p)->rb_right; |
| 259 | leftmost = false; | ||
| 260 | } | ||
| 256 | } | 261 | } |
| 257 | 262 | ||
| 258 | rb_link_node(&machine->rb_node, parent, p); | 263 | rb_link_node(&machine->rb_node, parent, p); |
| 259 | rb_insert_color(&machine->rb_node, &machines->guests); | 264 | rb_insert_color_cached(&machine->rb_node, &machines->guests, leftmost); |
| 260 | 265 | ||
| 261 | return machine; | 266 | return machine; |
| 262 | } | 267 | } |
| @@ -267,7 +272,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec) | |||
| 267 | 272 | ||
| 268 | machines->host.comm_exec = comm_exec; | 273 | machines->host.comm_exec = comm_exec; |
| 269 | 274 | ||
| 270 | for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { | 275 | for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { |
| 271 | struct machine *machine = rb_entry(nd, struct machine, rb_node); | 276 | struct machine *machine = rb_entry(nd, struct machine, rb_node); |
| 272 | 277 | ||
| 273 | machine->comm_exec = comm_exec; | 278 | machine->comm_exec = comm_exec; |
| @@ -276,7 +281,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec) | |||
| 276 | 281 | ||
| 277 | struct machine *machines__find(struct machines *machines, pid_t pid) | 282 | struct machine *machines__find(struct machines *machines, pid_t pid) |
| 278 | { | 283 | { |
| 279 | struct rb_node **p = &machines->guests.rb_node; | 284 | struct rb_node **p = &machines->guests.rb_root.rb_node; |
| 280 | struct rb_node *parent = NULL; | 285 | struct rb_node *parent = NULL; |
| 281 | struct machine *machine; | 286 | struct machine *machine; |
| 282 | struct machine *default_machine = NULL; | 287 | struct machine *default_machine = NULL; |
| @@ -339,7 +344,7 @@ void machines__process_guests(struct machines *machines, | |||
| 339 | { | 344 | { |
| 340 | struct rb_node *nd; | 345 | struct rb_node *nd; |
| 341 | 346 | ||
| 342 | for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { | 347 | for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { |
| 343 | struct machine *pos = rb_entry(nd, struct machine, rb_node); | 348 | struct machine *pos = rb_entry(nd, struct machine, rb_node); |
| 344 | process(pos, data); | 349 | process(pos, data); |
| 345 | } | 350 | } |
| @@ -352,7 +357,8 @@ void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size) | |||
| 352 | 357 | ||
| 353 | machines->host.id_hdr_size = id_hdr_size; | 358 | machines->host.id_hdr_size = id_hdr_size; |
| 354 | 359 | ||
| 355 | for (node = rb_first(&machines->guests); node; node = rb_next(node)) { | 360 | for (node = rb_first_cached(&machines->guests); node; |
| 361 | node = rb_next(node)) { | ||
| 356 | machine = rb_entry(node, struct machine, rb_node); | 362 | machine = rb_entry(node, struct machine, rb_node); |
| 357 | machine->id_hdr_size = id_hdr_size; | 363 | machine->id_hdr_size = id_hdr_size; |
| 358 | } | 364 | } |
| @@ -465,9 +471,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine, | |||
| 465 | pid_t pid, pid_t tid, | 471 | pid_t pid, pid_t tid, |
| 466 | bool create) | 472 | bool create) |
| 467 | { | 473 | { |
| 468 | struct rb_node **p = &threads->entries.rb_node; | 474 | struct rb_node **p = &threads->entries.rb_root.rb_node; |
| 469 | struct rb_node *parent = NULL; | 475 | struct rb_node *parent = NULL; |
| 470 | struct thread *th; | 476 | struct thread *th; |
| 477 | bool leftmost = true; | ||
| 471 | 478 | ||
| 472 | th = threads__get_last_match(threads, machine, pid, tid); | 479 | th = threads__get_last_match(threads, machine, pid, tid); |
| 473 | if (th) | 480 | if (th) |
| @@ -485,8 +492,10 @@ static struct thread *____machine__findnew_thread(struct machine *machine, | |||
| 485 | 492 | ||
| 486 | if (tid < th->tid) | 493 | if (tid < th->tid) |
| 487 | p = &(*p)->rb_left; | 494 | p = &(*p)->rb_left; |
| 488 | else | 495 | else { |
| 489 | p = &(*p)->rb_right; | 496 | p = &(*p)->rb_right; |
| 497 | leftmost = false; | ||
| 498 | } | ||
| 490 | } | 499 | } |
| 491 | 500 | ||
| 492 | if (!create) | 501 | if (!create) |
| @@ -495,7 +504,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, | |||
| 495 | th = thread__new(pid, tid); | 504 | th = thread__new(pid, tid); |
| 496 | if (th != NULL) { | 505 | if (th != NULL) { |
| 497 | rb_link_node(&th->rb_node, parent, p); | 506 | rb_link_node(&th->rb_node, parent, p); |
| 498 | rb_insert_color(&th->rb_node, &threads->entries); | 507 | rb_insert_color_cached(&th->rb_node, &threads->entries, leftmost); |
| 499 | 508 | ||
| 500 | /* | 509 | /* |
| 501 | * We have to initialize map_groups separately | 510 | * We have to initialize map_groups separately |
| @@ -506,7 +515,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine, | |||
| 506 | * leader and that would screwed the rb tree. | 515 | * leader and that would screwed the rb tree. |
| 507 | */ | 516 | */ |
| 508 | if (thread__init_map_groups(th, machine)) { | 517 | if (thread__init_map_groups(th, machine)) { |
| 509 | rb_erase_init(&th->rb_node, &threads->entries); | 518 | rb_erase_cached(&th->rb_node, &threads->entries); |
| 510 | RB_CLEAR_NODE(&th->rb_node); | 519 | RB_CLEAR_NODE(&th->rb_node); |
| 511 | thread__put(th); | 520 | thread__put(th); |
| 512 | return NULL; | 521 | return NULL; |
| @@ -681,6 +690,59 @@ int machine__process_switch_event(struct machine *machine __maybe_unused, | |||
| 681 | return 0; | 690 | return 0; |
| 682 | } | 691 | } |
| 683 | 692 | ||
| 693 | static int machine__process_ksymbol_register(struct machine *machine, | ||
| 694 | union perf_event *event, | ||
| 695 | struct perf_sample *sample __maybe_unused) | ||
| 696 | { | ||
| 697 | struct symbol *sym; | ||
| 698 | struct map *map; | ||
| 699 | |||
| 700 | map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr); | ||
| 701 | if (!map) { | ||
| 702 | map = dso__new_map(event->ksymbol_event.name); | ||
| 703 | if (!map) | ||
| 704 | return -ENOMEM; | ||
| 705 | |||
| 706 | map->start = event->ksymbol_event.addr; | ||
| 707 | map->pgoff = map->start; | ||
| 708 | map->end = map->start + event->ksymbol_event.len; | ||
| 709 | map_groups__insert(&machine->kmaps, map); | ||
| 710 | } | ||
| 711 | |||
| 712 | sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len, | ||
| 713 | 0, 0, event->ksymbol_event.name); | ||
| 714 | if (!sym) | ||
| 715 | return -ENOMEM; | ||
| 716 | dso__insert_symbol(map->dso, sym); | ||
| 717 | return 0; | ||
| 718 | } | ||
| 719 | |||
| 720 | static int machine__process_ksymbol_unregister(struct machine *machine, | ||
| 721 | union perf_event *event, | ||
| 722 | struct perf_sample *sample __maybe_unused) | ||
| 723 | { | ||
| 724 | struct map *map; | ||
| 725 | |||
| 726 | map = map_groups__find(&machine->kmaps, event->ksymbol_event.addr); | ||
| 727 | if (map) | ||
| 728 | map_groups__remove(&machine->kmaps, map); | ||
| 729 | |||
| 730 | return 0; | ||
| 731 | } | ||
| 732 | |||
| 733 | int machine__process_ksymbol(struct machine *machine __maybe_unused, | ||
| 734 | union perf_event *event, | ||
| 735 | struct perf_sample *sample) | ||
| 736 | { | ||
| 737 | if (dump_trace) | ||
| 738 | perf_event__fprintf_ksymbol(event, stdout); | ||
| 739 | |||
| 740 | if (event->ksymbol_event.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER) | ||
| 741 | return machine__process_ksymbol_unregister(machine, event, | ||
| 742 | sample); | ||
| 743 | return machine__process_ksymbol_register(machine, event, sample); | ||
| 744 | } | ||
| 745 | |||
| 684 | static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename) | 746 | static void dso__adjust_kmod_long_name(struct dso *dso, const char *filename) |
| 685 | { | 747 | { |
| 686 | const char *dup_filename; | 748 | const char *dup_filename; |
| @@ -744,7 +806,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp) | |||
| 744 | struct rb_node *nd; | 806 | struct rb_node *nd; |
| 745 | size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp); | 807 | size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp); |
| 746 | 808 | ||
| 747 | for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { | 809 | for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { |
| 748 | struct machine *pos = rb_entry(nd, struct machine, rb_node); | 810 | struct machine *pos = rb_entry(nd, struct machine, rb_node); |
| 749 | ret += __dsos__fprintf(&pos->dsos.head, fp); | 811 | ret += __dsos__fprintf(&pos->dsos.head, fp); |
| 750 | } | 812 | } |
| @@ -764,7 +826,7 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, | |||
| 764 | struct rb_node *nd; | 826 | struct rb_node *nd; |
| 765 | size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm); | 827 | size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm); |
| 766 | 828 | ||
| 767 | for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { | 829 | for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { |
| 768 | struct machine *pos = rb_entry(nd, struct machine, rb_node); | 830 | struct machine *pos = rb_entry(nd, struct machine, rb_node); |
| 769 | ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm); | 831 | ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm); |
| 770 | } | 832 | } |
| @@ -804,7 +866,8 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) | |||
| 804 | 866 | ||
| 805 | ret = fprintf(fp, "Threads: %u\n", threads->nr); | 867 | ret = fprintf(fp, "Threads: %u\n", threads->nr); |
| 806 | 868 | ||
| 807 | for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { | 869 | for (nd = rb_first_cached(&threads->entries); nd; |
| 870 | nd = rb_next(nd)) { | ||
| 808 | struct thread *pos = rb_entry(nd, struct thread, rb_node); | 871 | struct thread *pos = rb_entry(nd, struct thread, rb_node); |
| 809 | 872 | ||
| 810 | ret += thread__fprintf(pos, fp); | 873 | ret += thread__fprintf(pos, fp); |
| @@ -1107,7 +1170,7 @@ failure: | |||
| 1107 | 1170 | ||
| 1108 | void machines__destroy_kernel_maps(struct machines *machines) | 1171 | void machines__destroy_kernel_maps(struct machines *machines) |
| 1109 | { | 1172 | { |
| 1110 | struct rb_node *next = rb_first(&machines->guests); | 1173 | struct rb_node *next = rb_first_cached(&machines->guests); |
| 1111 | 1174 | ||
| 1112 | machine__destroy_kernel_maps(&machines->host); | 1175 | machine__destroy_kernel_maps(&machines->host); |
| 1113 | 1176 | ||
| @@ -1115,7 +1178,7 @@ void machines__destroy_kernel_maps(struct machines *machines) | |||
| 1115 | struct machine *pos = rb_entry(next, struct machine, rb_node); | 1178 | struct machine *pos = rb_entry(next, struct machine, rb_node); |
| 1116 | 1179 | ||
| 1117 | next = rb_next(&pos->rb_node); | 1180 | next = rb_next(&pos->rb_node); |
| 1118 | rb_erase(&pos->rb_node, &machines->guests); | 1181 | rb_erase_cached(&pos->rb_node, &machines->guests); |
| 1119 | machine__delete(pos); | 1182 | machine__delete(pos); |
| 1120 | } | 1183 | } |
| 1121 | } | 1184 | } |
| @@ -1680,7 +1743,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th, | |||
| 1680 | BUG_ON(refcount_read(&th->refcnt) == 0); | 1743 | BUG_ON(refcount_read(&th->refcnt) == 0); |
| 1681 | if (lock) | 1744 | if (lock) |
| 1682 | down_write(&threads->lock); | 1745 | down_write(&threads->lock); |
| 1683 | rb_erase_init(&th->rb_node, &threads->entries); | 1746 | rb_erase_cached(&th->rb_node, &threads->entries); |
| 1684 | RB_CLEAR_NODE(&th->rb_node); | 1747 | RB_CLEAR_NODE(&th->rb_node); |
| 1685 | --threads->nr; | 1748 | --threads->nr; |
| 1686 | /* | 1749 | /* |
| @@ -1812,6 +1875,10 @@ int machine__process_event(struct machine *machine, union perf_event *event, | |||
| 1812 | case PERF_RECORD_SWITCH: | 1875 | case PERF_RECORD_SWITCH: |
| 1813 | case PERF_RECORD_SWITCH_CPU_WIDE: | 1876 | case PERF_RECORD_SWITCH_CPU_WIDE: |
| 1814 | ret = machine__process_switch_event(machine, event); break; | 1877 | ret = machine__process_switch_event(machine, event); break; |
| 1878 | case PERF_RECORD_KSYMBOL: | ||
| 1879 | ret = machine__process_ksymbol(machine, event, sample); break; | ||
| 1880 | case PERF_RECORD_BPF_EVENT: | ||
| 1881 | ret = machine__process_bpf_event(machine, event, sample); break; | ||
| 1815 | default: | 1882 | default: |
| 1816 | ret = -1; | 1883 | ret = -1; |
| 1817 | break; | 1884 | break; |
| @@ -2453,7 +2520,8 @@ int machine__for_each_thread(struct machine *machine, | |||
| 2453 | 2520 | ||
| 2454 | for (i = 0; i < THREADS__TABLE_SIZE; i++) { | 2521 | for (i = 0; i < THREADS__TABLE_SIZE; i++) { |
| 2455 | threads = &machine->threads[i]; | 2522 | threads = &machine->threads[i]; |
| 2456 | for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { | 2523 | for (nd = rb_first_cached(&threads->entries); nd; |
| 2524 | nd = rb_next(nd)) { | ||
| 2457 | thread = rb_entry(nd, struct thread, rb_node); | 2525 | thread = rb_entry(nd, struct thread, rb_node); |
| 2458 | rc = fn(thread, priv); | 2526 | rc = fn(thread, priv); |
| 2459 | if (rc != 0) | 2527 | if (rc != 0) |
| @@ -2480,7 +2548,7 @@ int machines__for_each_thread(struct machines *machines, | |||
| 2480 | if (rc != 0) | 2548 | if (rc != 0) |
| 2481 | return rc; | 2549 | return rc; |
| 2482 | 2550 | ||
| 2483 | for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { | 2551 | for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { |
| 2484 | struct machine *machine = rb_entry(nd, struct machine, rb_node); | 2552 | struct machine *machine = rb_entry(nd, struct machine, rb_node); |
| 2485 | 2553 | ||
| 2486 | rc = machine__for_each_thread(machine, fn, priv); | 2554 | rc = machine__for_each_thread(machine, fn, priv); |
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index a5d1da60f751..f70ab98a7bde 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h | |||
| @@ -4,7 +4,7 @@ | |||
| 4 | 4 | ||
| 5 | #include <sys/types.h> | 5 | #include <sys/types.h> |
| 6 | #include <linux/rbtree.h> | 6 | #include <linux/rbtree.h> |
| 7 | #include "map.h" | 7 | #include "map_groups.h" |
| 8 | #include "dso.h" | 8 | #include "dso.h" |
| 9 | #include "event.h" | 9 | #include "event.h" |
| 10 | #include "rwsem.h" | 10 | #include "rwsem.h" |
| @@ -29,11 +29,11 @@ struct vdso_info; | |||
| 29 | #define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) | 29 | #define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS) |
| 30 | 30 | ||
| 31 | struct threads { | 31 | struct threads { |
| 32 | struct rb_root entries; | 32 | struct rb_root_cached entries; |
| 33 | struct rw_semaphore lock; | 33 | struct rw_semaphore lock; |
| 34 | unsigned int nr; | 34 | unsigned int nr; |
| 35 | struct list_head dead; | 35 | struct list_head dead; |
| 36 | struct thread *last_match; | 36 | struct thread *last_match; |
| 37 | }; | 37 | }; |
| 38 | 38 | ||
| 39 | struct machine { | 39 | struct machine { |
| @@ -130,6 +130,9 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event | |||
| 130 | struct perf_sample *sample); | 130 | struct perf_sample *sample); |
| 131 | int machine__process_mmap2_event(struct machine *machine, union perf_event *event, | 131 | int machine__process_mmap2_event(struct machine *machine, union perf_event *event, |
| 132 | struct perf_sample *sample); | 132 | struct perf_sample *sample); |
| 133 | int machine__process_ksymbol(struct machine *machine, | ||
| 134 | union perf_event *event, | ||
| 135 | struct perf_sample *sample); | ||
| 133 | int machine__process_event(struct machine *machine, union perf_event *event, | 136 | int machine__process_event(struct machine *machine, union perf_event *event, |
| 134 | struct perf_sample *sample); | 137 | struct perf_sample *sample); |
| 135 | 138 | ||
| @@ -137,7 +140,7 @@ typedef void (*machine__process_t)(struct machine *machine, void *data); | |||
| 137 | 140 | ||
| 138 | struct machines { | 141 | struct machines { |
| 139 | struct machine host; | 142 | struct machine host; |
| 140 | struct rb_root guests; | 143 | struct rb_root_cached guests; |
| 141 | }; | 144 | }; |
| 142 | 145 | ||
| 143 | void machines__init(struct machines *machines); | 146 | void machines__init(struct machines *machines); |
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 6751301a755c..fbeb0c6efaa6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c | |||
| @@ -286,8 +286,8 @@ void map__put(struct map *map) | |||
| 286 | 286 | ||
| 287 | void map__fixup_start(struct map *map) | 287 | void map__fixup_start(struct map *map) |
| 288 | { | 288 | { |
| 289 | struct rb_root *symbols = &map->dso->symbols; | 289 | struct rb_root_cached *symbols = &map->dso->symbols; |
| 290 | struct rb_node *nd = rb_first(symbols); | 290 | struct rb_node *nd = rb_first_cached(symbols); |
| 291 | if (nd != NULL) { | 291 | if (nd != NULL) { |
| 292 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); | 292 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); |
| 293 | map->start = sym->start; | 293 | map->start = sym->start; |
| @@ -296,8 +296,8 @@ void map__fixup_start(struct map *map) | |||
| 296 | 296 | ||
| 297 | void map__fixup_end(struct map *map) | 297 | void map__fixup_end(struct map *map) |
| 298 | { | 298 | { |
| 299 | struct rb_root *symbols = &map->dso->symbols; | 299 | struct rb_root_cached *symbols = &map->dso->symbols; |
| 300 | struct rb_node *nd = rb_last(symbols); | 300 | struct rb_node *nd = rb_last(&symbols->rb_root); |
| 301 | if (nd != NULL) { | 301 | if (nd != NULL) { |
| 302 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); | 302 | struct symbol *sym = rb_entry(nd, struct symbol, rb_node); |
| 303 | map->end = sym->end; | 303 | map->end = sym->end; |
| @@ -557,6 +557,12 @@ void map_groups__init(struct map_groups *mg, struct machine *machine) | |||
| 557 | refcount_set(&mg->refcnt, 1); | 557 | refcount_set(&mg->refcnt, 1); |
| 558 | } | 558 | } |
| 559 | 559 | ||
| 560 | void map_groups__insert(struct map_groups *mg, struct map *map) | ||
| 561 | { | ||
| 562 | maps__insert(&mg->maps, map); | ||
| 563 | map->groups = mg; | ||
| 564 | } | ||
| 565 | |||
| 560 | static void __maps__purge(struct maps *maps) | 566 | static void __maps__purge(struct maps *maps) |
| 561 | { | 567 | { |
| 562 | struct rb_root *root = &maps->entries; | 568 | struct rb_root *root = &maps->entries; |
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 09282aa45c80..0e20749f2c55 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h | |||
| @@ -6,12 +6,10 @@ | |||
| 6 | #include <linux/compiler.h> | 6 | #include <linux/compiler.h> |
| 7 | #include <linux/list.h> | 7 | #include <linux/list.h> |
| 8 | #include <linux/rbtree.h> | 8 | #include <linux/rbtree.h> |
| 9 | #include <pthread.h> | ||
| 10 | #include <stdio.h> | 9 | #include <stdio.h> |
| 11 | #include <string.h> | 10 | #include <string.h> |
| 12 | #include <stdbool.h> | 11 | #include <stdbool.h> |
| 13 | #include <linux/types.h> | 12 | #include <linux/types.h> |
| 14 | #include "rwsem.h" | ||
| 15 | 13 | ||
| 16 | struct dso; | 14 | struct dso; |
| 17 | struct ip_callchain; | 15 | struct ip_callchain; |
| @@ -48,38 +46,7 @@ struct map { | |||
| 48 | refcount_t refcnt; | 46 | refcount_t refcnt; |
| 49 | }; | 47 | }; |
| 50 | 48 | ||
| 51 | #define KMAP_NAME_LEN 256 | 49 | struct kmap; |
| 52 | |||
| 53 | struct kmap { | ||
| 54 | struct ref_reloc_sym *ref_reloc_sym; | ||
| 55 | struct map_groups *kmaps; | ||
| 56 | char name[KMAP_NAME_LEN]; | ||
| 57 | }; | ||
| 58 | |||
| 59 | struct maps { | ||
| 60 | struct rb_root entries; | ||
| 61 | struct rb_root names; | ||
| 62 | struct rw_semaphore lock; | ||
| 63 | }; | ||
| 64 | |||
| 65 | struct map_groups { | ||
| 66 | struct maps maps; | ||
| 67 | struct machine *machine; | ||
| 68 | refcount_t refcnt; | ||
| 69 | }; | ||
| 70 | |||
| 71 | struct map_groups *map_groups__new(struct machine *machine); | ||
| 72 | void map_groups__delete(struct map_groups *mg); | ||
| 73 | bool map_groups__empty(struct map_groups *mg); | ||
| 74 | |||
| 75 | static inline struct map_groups *map_groups__get(struct map_groups *mg) | ||
| 76 | { | ||
| 77 | if (mg) | ||
| 78 | refcount_inc(&mg->refcnt); | ||
| 79 | return mg; | ||
| 80 | } | ||
| 81 | |||
| 82 | void map_groups__put(struct map_groups *mg); | ||
| 83 | 50 | ||
| 84 | struct kmap *__map__kmap(struct map *map); | 51 | struct kmap *__map__kmap(struct map *map); |
| 85 | struct kmap *map__kmap(struct map *map); | 52 | struct kmap *map__kmap(struct map *map); |
| @@ -174,18 +141,7 @@ char *map__srcline(struct map *map, u64 addr, struct symbol *sym); | |||
| 174 | int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, | 141 | int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, |
| 175 | FILE *fp); | 142 | FILE *fp); |
| 176 | 143 | ||
| 177 | struct srccode_state { | 144 | struct srccode_state; |
| 178 | char *srcfile; | ||
| 179 | unsigned line; | ||
| 180 | }; | ||
| 181 | |||
| 182 | static inline void srccode_state_init(struct srccode_state *state) | ||
| 183 | { | ||
| 184 | state->srcfile = NULL; | ||
| 185 | state->line = 0; | ||
| 186 | } | ||
| 187 | |||
| 188 | void srccode_state_free(struct srccode_state *state); | ||
| 189 | 145 | ||
| 190 | int map__fprintf_srccode(struct map *map, u64 addr, | 146 | int map__fprintf_srccode(struct map *map, u64 addr, |
| 191 | FILE *fp, struct srccode_state *state); | 147 | FILE *fp, struct srccode_state *state); |
| @@ -198,61 +154,9 @@ void map__fixup_end(struct map *map); | |||
| 198 | 154 | ||
| 199 | void map__reloc_vmlinux(struct map *map); | 155 | void map__reloc_vmlinux(struct map *map); |
| 200 | 156 | ||
| 201 | void maps__insert(struct maps *maps, struct map *map); | ||
| 202 | void maps__remove(struct maps *maps, struct map *map); | ||
| 203 | struct map *maps__find(struct maps *maps, u64 addr); | ||
| 204 | struct map *maps__first(struct maps *maps); | ||
| 205 | struct map *map__next(struct map *map); | ||
| 206 | struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, | ||
| 207 | struct map **mapp); | ||
| 208 | void map_groups__init(struct map_groups *mg, struct machine *machine); | ||
| 209 | void map_groups__exit(struct map_groups *mg); | ||
| 210 | int map_groups__clone(struct thread *thread, | ||
| 211 | struct map_groups *parent); | ||
| 212 | size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); | ||
| 213 | |||
| 214 | int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, | 157 | int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, |
| 215 | u64 addr); | 158 | u64 addr); |
| 216 | 159 | ||
| 217 | static inline void map_groups__insert(struct map_groups *mg, struct map *map) | ||
| 218 | { | ||
| 219 | maps__insert(&mg->maps, map); | ||
| 220 | map->groups = mg; | ||
| 221 | } | ||
| 222 | |||
| 223 | static inline void map_groups__remove(struct map_groups *mg, struct map *map) | ||
| 224 | { | ||
| 225 | maps__remove(&mg->maps, map); | ||
| 226 | } | ||
| 227 | |||
| 228 | static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) | ||
| 229 | { | ||
| 230 | return maps__find(&mg->maps, addr); | ||
| 231 | } | ||
| 232 | |||
| 233 | struct map *map_groups__first(struct map_groups *mg); | ||
| 234 | |||
| 235 | static inline struct map *map_groups__next(struct map *map) | ||
| 236 | { | ||
| 237 | return map__next(map); | ||
| 238 | } | ||
| 239 | |||
| 240 | struct symbol *map_groups__find_symbol(struct map_groups *mg, | ||
| 241 | u64 addr, struct map **mapp); | ||
| 242 | |||
| 243 | struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, | ||
| 244 | const char *name, | ||
| 245 | struct map **mapp); | ||
| 246 | |||
| 247 | struct addr_map_symbol; | ||
| 248 | |||
| 249 | int map_groups__find_ams(struct addr_map_symbol *ams); | ||
| 250 | |||
| 251 | int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, | ||
| 252 | FILE *fp); | ||
| 253 | |||
| 254 | struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); | ||
| 255 | |||
| 256 | bool __map__is_kernel(const struct map *map); | 160 | bool __map__is_kernel(const struct map *map); |
| 257 | bool __map__is_extra_kernel_map(const struct map *map); | 161 | bool __map__is_extra_kernel_map(const struct map *map); |
| 258 | 162 | ||
diff --git a/tools/perf/util/map_groups.h b/tools/perf/util/map_groups.h new file mode 100644 index 000000000000..4dcda33e0fdf --- /dev/null +++ b/tools/perf/util/map_groups.h | |||
| @@ -0,0 +1,91 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #ifndef __PERF_MAP_GROUPS_H | ||
| 3 | #define __PERF_MAP_GROUPS_H | ||
| 4 | |||
| 5 | #include <linux/refcount.h> | ||
| 6 | #include <linux/rbtree.h> | ||
| 7 | #include <stdio.h> | ||
| 8 | #include <stdbool.h> | ||
| 9 | #include <linux/types.h> | ||
| 10 | #include "rwsem.h" | ||
| 11 | |||
| 12 | struct ref_reloc_sym; | ||
| 13 | struct machine; | ||
| 14 | struct map; | ||
| 15 | struct thread; | ||
| 16 | |||
| 17 | struct maps { | ||
| 18 | struct rb_root entries; | ||
| 19 | struct rb_root names; | ||
| 20 | struct rw_semaphore lock; | ||
| 21 | }; | ||
| 22 | |||
| 23 | void maps__insert(struct maps *maps, struct map *map); | ||
| 24 | void maps__remove(struct maps *maps, struct map *map); | ||
| 25 | struct map *maps__find(struct maps *maps, u64 addr); | ||
| 26 | struct map *maps__first(struct maps *maps); | ||
| 27 | struct map *map__next(struct map *map); | ||
| 28 | struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp); | ||
| 29 | |||
| 30 | struct map_groups { | ||
| 31 | struct maps maps; | ||
| 32 | struct machine *machine; | ||
| 33 | refcount_t refcnt; | ||
| 34 | }; | ||
| 35 | |||
| 36 | #define KMAP_NAME_LEN 256 | ||
| 37 | |||
| 38 | struct kmap { | ||
| 39 | struct ref_reloc_sym *ref_reloc_sym; | ||
| 40 | struct map_groups *kmaps; | ||
| 41 | char name[KMAP_NAME_LEN]; | ||
| 42 | }; | ||
| 43 | |||
| 44 | struct map_groups *map_groups__new(struct machine *machine); | ||
| 45 | void map_groups__delete(struct map_groups *mg); | ||
| 46 | bool map_groups__empty(struct map_groups *mg); | ||
| 47 | |||
| 48 | static inline struct map_groups *map_groups__get(struct map_groups *mg) | ||
| 49 | { | ||
| 50 | if (mg) | ||
| 51 | refcount_inc(&mg->refcnt); | ||
| 52 | return mg; | ||
| 53 | } | ||
| 54 | |||
| 55 | void map_groups__put(struct map_groups *mg); | ||
| 56 | void map_groups__init(struct map_groups *mg, struct machine *machine); | ||
| 57 | void map_groups__exit(struct map_groups *mg); | ||
| 58 | int map_groups__clone(struct thread *thread, struct map_groups *parent); | ||
| 59 | size_t map_groups__fprintf(struct map_groups *mg, FILE *fp); | ||
| 60 | |||
| 61 | void map_groups__insert(struct map_groups *mg, struct map *map); | ||
| 62 | |||
| 63 | static inline void map_groups__remove(struct map_groups *mg, struct map *map) | ||
| 64 | { | ||
| 65 | maps__remove(&mg->maps, map); | ||
| 66 | } | ||
| 67 | |||
| 68 | static inline struct map *map_groups__find(struct map_groups *mg, u64 addr) | ||
| 69 | { | ||
| 70 | return maps__find(&mg->maps, addr); | ||
| 71 | } | ||
| 72 | |||
| 73 | struct map *map_groups__first(struct map_groups *mg); | ||
| 74 | |||
| 75 | static inline struct map *map_groups__next(struct map *map) | ||
| 76 | { | ||
| 77 | return map__next(map); | ||
| 78 | } | ||
| 79 | |||
| 80 | struct symbol *map_groups__find_symbol(struct map_groups *mg, u64 addr, struct map **mapp); | ||
| 81 | struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, const char *name, struct map **mapp); | ||
| 82 | |||
| 83 | struct addr_map_symbol; | ||
| 84 | |||
| 85 | int map_groups__find_ams(struct addr_map_symbol *ams); | ||
| 86 | |||
| 87 | int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE *fp); | ||
| 88 | |||
| 89 | struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); | ||
| 90 | |||
| 91 | #endif // __PERF_MAP_GROUPS_H | ||
diff --git a/tools/perf/util/map_symbol.h b/tools/perf/util/map_symbol.h new file mode 100644 index 000000000000..5a1aed9f6bb4 --- /dev/null +++ b/tools/perf/util/map_symbol.h | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | #ifndef __PERF_MAP_SYMBOL | ||
| 3 | #define __PERF_MAP_SYMBOL 1 | ||
| 4 | |||
| 5 | #include <linux/types.h> | ||
| 6 | |||
| 7 | struct map; | ||
| 8 | struct symbol; | ||
| 9 | |||
| 10 | struct map_symbol { | ||
| 11 | struct map *map; | ||
| 12 | struct symbol *sym; | ||
| 13 | }; | ||
| 14 | |||
| 15 | struct addr_map_symbol { | ||
| 16 | struct map *map; | ||
| 17 | struct symbol *sym; | ||
| 18 | u64 addr; | ||
| 19 | u64 al_addr; | ||
| 20 | u64 phys_addr; | ||
| 21 | }; | ||
| 22 | #endif // __PERF_MAP_SYMBOL | ||
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index a28f9b5cc4ff..b8d864ed4afe 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c | |||
| @@ -270,7 +270,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) | |||
| 270 | } | 270 | } |
| 271 | 271 | ||
| 272 | void metricgroup__print(bool metrics, bool metricgroups, char *filter, | 272 | void metricgroup__print(bool metrics, bool metricgroups, char *filter, |
| 273 | bool raw) | 273 | bool raw, bool details) |
| 274 | { | 274 | { |
| 275 | struct pmu_events_map *map = perf_pmu__find_map(NULL); | 275 | struct pmu_events_map *map = perf_pmu__find_map(NULL); |
| 276 | struct pmu_event *pe; | 276 | struct pmu_event *pe; |
| @@ -329,6 +329,12 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, | |||
| 329 | if (asprintf(&s, "%s\n%*s%s]", | 329 | if (asprintf(&s, "%s\n%*s%s]", |
| 330 | pe->metric_name, 8, "[", pe->desc) < 0) | 330 | pe->metric_name, 8, "[", pe->desc) < 0) |
| 331 | return; | 331 | return; |
| 332 | |||
| 333 | if (details) { | ||
| 334 | if (asprintf(&s, "%s\n%*s%s]", | ||
| 335 | s, 8, "[", pe->metric_expr) < 0) | ||
| 336 | return; | ||
| 337 | } | ||
| 332 | } | 338 | } |
| 333 | 339 | ||
| 334 | if (!s) | 340 | if (!s) |
| @@ -352,7 +358,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, | |||
| 352 | else if (metrics && !raw) | 358 | else if (metrics && !raw) |
| 353 | printf("\nMetrics:\n\n"); | 359 | printf("\nMetrics:\n\n"); |
| 354 | 360 | ||
| 355 | for (node = rb_first(&groups.entries); node; node = next) { | 361 | for (node = rb_first_cached(&groups.entries); node; node = next) { |
| 356 | struct mep *me = container_of(node, struct mep, nd); | 362 | struct mep *me = container_of(node, struct mep, nd); |
| 357 | 363 | ||
| 358 | if (metricgroups) | 364 | if (metricgroups) |
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 8a155dba0581..5c52097a5c63 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h | |||
| @@ -27,6 +27,7 @@ int metricgroup__parse_groups(const struct option *opt, | |||
| 27 | const char *str, | 27 | const char *str, |
| 28 | struct rblist *metric_events); | 28 | struct rblist *metric_events); |
| 29 | 29 | ||
| 30 | void metricgroup__print(bool metrics, bool groups, char *filter, bool raw); | 30 | void metricgroup__print(bool metrics, bool groups, char *filter, |
| 31 | bool raw, bool details); | ||
| 31 | bool metricgroup__has_metric(const char *metric); | 32 | bool metricgroup__has_metric(const char *metric); |
| 32 | #endif | 33 | #endif |
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 8fc39311a30d..cdc7740fc181 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c | |||
| @@ -10,6 +10,9 @@ | |||
| 10 | #include <sys/mman.h> | 10 | #include <sys/mman.h> |
| 11 | #include <inttypes.h> | 11 | #include <inttypes.h> |
| 12 | #include <asm/bug.h> | 12 | #include <asm/bug.h> |
| 13 | #ifdef HAVE_LIBNUMA_SUPPORT | ||
| 14 | #include <numaif.h> | ||
| 15 | #endif | ||
| 13 | #include "debug.h" | 16 | #include "debug.h" |
| 14 | #include "event.h" | 17 | #include "event.h" |
| 15 | #include "mmap.h" | 18 | #include "mmap.h" |
| @@ -154,9 +157,72 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb | |||
| 154 | } | 157 | } |
| 155 | 158 | ||
| 156 | #ifdef HAVE_AIO_SUPPORT | 159 | #ifdef HAVE_AIO_SUPPORT |
| 160 | |||
| 161 | #ifdef HAVE_LIBNUMA_SUPPORT | ||
| 162 | static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) | ||
| 163 | { | ||
| 164 | map->aio.data[idx] = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, | ||
| 165 | MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); | ||
| 166 | if (map->aio.data[idx] == MAP_FAILED) { | ||
| 167 | map->aio.data[idx] = NULL; | ||
| 168 | return -1; | ||
| 169 | } | ||
| 170 | |||
| 171 | return 0; | ||
| 172 | } | ||
| 173 | |||
| 174 | static void perf_mmap__aio_free(struct perf_mmap *map, int idx) | ||
| 175 | { | ||
| 176 | if (map->aio.data[idx]) { | ||
| 177 | munmap(map->aio.data[idx], perf_mmap__mmap_len(map)); | ||
| 178 | map->aio.data[idx] = NULL; | ||
| 179 | } | ||
| 180 | } | ||
| 181 | |||
| 182 | static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affinity) | ||
| 183 | { | ||
| 184 | void *data; | ||
| 185 | size_t mmap_len; | ||
| 186 | unsigned long node_mask; | ||
| 187 | |||
| 188 | if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) { | ||
| 189 | data = map->aio.data[idx]; | ||
| 190 | mmap_len = perf_mmap__mmap_len(map); | ||
| 191 | node_mask = 1UL << cpu__get_node(cpu); | ||
| 192 | if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) { | ||
| 193 | pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n", | ||
| 194 | data, data + mmap_len, cpu__get_node(cpu)); | ||
| 195 | return -1; | ||
| 196 | } | ||
| 197 | } | ||
| 198 | |||
| 199 | return 0; | ||
| 200 | } | ||
| 201 | #else | ||
| 202 | static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) | ||
| 203 | { | ||
| 204 | map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); | ||
| 205 | if (map->aio.data[idx] == NULL) | ||
| 206 | return -1; | ||
| 207 | |||
| 208 | return 0; | ||
| 209 | } | ||
| 210 | |||
| 211 | static void perf_mmap__aio_free(struct perf_mmap *map, int idx) | ||
| 212 | { | ||
| 213 | zfree(&(map->aio.data[idx])); | ||
| 214 | } | ||
| 215 | |||
| 216 | static int perf_mmap__aio_bind(struct perf_mmap *map __maybe_unused, int idx __maybe_unused, | ||
| 217 | int cpu __maybe_unused, int affinity __maybe_unused) | ||
| 218 | { | ||
| 219 | return 0; | ||
| 220 | } | ||
| 221 | #endif | ||
| 222 | |||
| 157 | static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) | 223 | static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) |
| 158 | { | 224 | { |
| 159 | int delta_max, i, prio; | 225 | int delta_max, i, prio, ret; |
| 160 | 226 | ||
| 161 | map->aio.nr_cblocks = mp->nr_cblocks; | 227 | map->aio.nr_cblocks = mp->nr_cblocks; |
| 162 | if (map->aio.nr_cblocks) { | 228 | if (map->aio.nr_cblocks) { |
| @@ -177,11 +243,14 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp) | |||
| 177 | } | 243 | } |
| 178 | delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX); | 244 | delta_max = sysconf(_SC_AIO_PRIO_DELTA_MAX); |
| 179 | for (i = 0; i < map->aio.nr_cblocks; ++i) { | 245 | for (i = 0; i < map->aio.nr_cblocks; ++i) { |
| 180 | map->aio.data[i] = malloc(perf_mmap__mmap_len(map)); | 246 | ret = perf_mmap__aio_alloc(map, i); |
| 181 | if (!map->aio.data[i]) { | 247 | if (ret == -1) { |
| 182 | pr_debug2("failed to allocate data buffer area, error %m"); | 248 | pr_debug2("failed to allocate data buffer area, error %m"); |
| 183 | return -1; | 249 | return -1; |
| 184 | } | 250 | } |
| 251 | ret = perf_mmap__aio_bind(map, i, map->cpu, mp->affinity); | ||
| 252 | if (ret == -1) | ||
| 253 | return -1; | ||
| 185 | /* | 254 | /* |
| 186 | * Use cblock.aio_fildes value different from -1 | 255 | * Use cblock.aio_fildes value different from -1 |
| 187 | * to denote started aio write operation on the | 256 | * to denote started aio write operation on the |
| @@ -210,7 +279,7 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map) | |||
| 210 | int i; | 279 | int i; |
| 211 | 280 | ||
| 212 | for (i = 0; i < map->aio.nr_cblocks; ++i) | 281 | for (i = 0; i < map->aio.nr_cblocks; ++i) |
| 213 | zfree(&map->aio.data[i]); | 282 | perf_mmap__aio_free(map, i); |
| 214 | if (map->aio.data) | 283 | if (map->aio.data) |
| 215 | zfree(&map->aio.data); | 284 | zfree(&map->aio.data); |
| 216 | zfree(&map->aio.cblocks); | 285 | zfree(&map->aio.cblocks); |
| @@ -314,6 +383,32 @@ void perf_mmap__munmap(struct perf_mmap *map) | |||
| 314 | auxtrace_mmap__munmap(&map->auxtrace_mmap); | 383 | auxtrace_mmap__munmap(&map->auxtrace_mmap); |
| 315 | } | 384 | } |
| 316 | 385 | ||
| 386 | static void build_node_mask(int node, cpu_set_t *mask) | ||
| 387 | { | ||
| 388 | int c, cpu, nr_cpus; | ||
| 389 | const struct cpu_map *cpu_map = NULL; | ||
| 390 | |||
| 391 | cpu_map = cpu_map__online(); | ||
| 392 | if (!cpu_map) | ||
| 393 | return; | ||
| 394 | |||
| 395 | nr_cpus = cpu_map__nr(cpu_map); | ||
| 396 | for (c = 0; c < nr_cpus; c++) { | ||
| 397 | cpu = cpu_map->map[c]; /* map c index to online cpu index */ | ||
| 398 | if (cpu__get_node(cpu) == node) | ||
| 399 | CPU_SET(cpu, mask); | ||
| 400 | } | ||
| 401 | } | ||
| 402 | |||
| 403 | static void perf_mmap__setup_affinity_mask(struct perf_mmap *map, struct mmap_params *mp) | ||
| 404 | { | ||
| 405 | CPU_ZERO(&map->affinity_mask); | ||
| 406 | if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1) | ||
| 407 | build_node_mask(cpu__get_node(map->cpu), &map->affinity_mask); | ||
| 408 | else if (mp->affinity == PERF_AFFINITY_CPU) | ||
| 409 | CPU_SET(map->cpu, &map->affinity_mask); | ||
| 410 | } | ||
| 411 | |||
| 317 | int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu) | 412 | int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu) |
| 318 | { | 413 | { |
| 319 | /* | 414 | /* |
| @@ -343,6 +438,8 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c | |||
| 343 | map->fd = fd; | 438 | map->fd = fd; |
| 344 | map->cpu = cpu; | 439 | map->cpu = cpu; |
| 345 | 440 | ||
| 441 | perf_mmap__setup_affinity_mask(map, mp); | ||
| 442 | |||
| 346 | if (auxtrace_mmap__mmap(&map->auxtrace_mmap, | 443 | if (auxtrace_mmap__mmap(&map->auxtrace_mmap, |
| 347 | &mp->auxtrace_mp, map->base, fd)) | 444 | &mp->auxtrace_mp, map->base, fd)) |
| 348 | return -1; | 445 | return -1; |
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index aeb6942fdb00..e566c19b242b 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h | |||
| @@ -38,6 +38,7 @@ struct perf_mmap { | |||
| 38 | int nr_cblocks; | 38 | int nr_cblocks; |
| 39 | } aio; | 39 | } aio; |
| 40 | #endif | 40 | #endif |
| 41 | cpu_set_t affinity_mask; | ||
| 41 | }; | 42 | }; |
| 42 | 43 | ||
| 43 | /* | 44 | /* |
| @@ -69,7 +70,7 @@ enum bkw_mmap_state { | |||
| 69 | }; | 70 | }; |
| 70 | 71 | ||
| 71 | struct mmap_params { | 72 | struct mmap_params { |
| 72 | int prot, mask, nr_cblocks; | 73 | int prot, mask, nr_cblocks, affinity; |
| 73 | struct auxtrace_mmap_params auxtrace_mp; | 74 | struct auxtrace_mmap_params auxtrace_mp; |
| 74 | }; | 75 | }; |
| 75 | 76 | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 920e1e6551dd..4dcc01b2532c 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
| @@ -2540,7 +2540,7 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag, | |||
| 2540 | 2540 | ||
| 2541 | print_sdt_events(NULL, NULL, name_only); | 2541 | print_sdt_events(NULL, NULL, name_only); |
| 2542 | 2542 | ||
| 2543 | metricgroup__print(true, true, NULL, name_only); | 2543 | metricgroup__print(true, true, NULL, name_only, details_flag); |
| 2544 | } | 2544 | } |
| 2545 | 2545 | ||
| 2546 | int parse_events__is_hardcoded_term(struct parse_events_term *term) | 2546 | int parse_events__is_hardcoded_term(struct parse_events_term *term) |
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index da8fe57691b8..44819bdb037d 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y | |||
| @@ -311,7 +311,7 @@ value_sym '/' event_config '/' | |||
| 311 | $$ = list; | 311 | $$ = list; |
| 312 | } | 312 | } |
| 313 | | | 313 | | |
| 314 | value_sym sep_slash_dc | 314 | value_sym sep_slash_slash_dc |
| 315 | { | 315 | { |
| 316 | struct list_head *list; | 316 | struct list_head *list; |
| 317 | int type = $1 >> 16; | 317 | int type = $1 >> 16; |
| @@ -702,7 +702,7 @@ PE_VALUE PE_ARRAY_RANGE PE_VALUE | |||
| 702 | 702 | ||
| 703 | sep_dc: ':' | | 703 | sep_dc: ':' | |
| 704 | 704 | ||
| 705 | sep_slash_dc: '/' | ':' | | 705 | sep_slash_slash_dc: '/' '/' | ':' | |
| 706 | 706 | ||
| 707 | %% | 707 | %% |
| 708 | 708 | ||
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 11a234740632..51d437f55d18 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c | |||
| @@ -29,8 +29,6 @@ struct perf_pmu_format { | |||
| 29 | struct list_head list; | 29 | struct list_head list; |
| 30 | }; | 30 | }; |
| 31 | 31 | ||
| 32 | #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" | ||
| 33 | |||
| 34 | int perf_pmu_parse(struct list_head *list, char *name); | 32 | int perf_pmu_parse(struct list_head *list, char *name); |
| 35 | extern FILE *perf_pmu_in; | 33 | extern FILE *perf_pmu_in; |
| 36 | 34 | ||
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 76fecec7b3f9..47253c3daf55 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h | |||
| @@ -6,9 +6,10 @@ | |||
| 6 | #include <linux/compiler.h> | 6 | #include <linux/compiler.h> |
| 7 | #include <linux/perf_event.h> | 7 | #include <linux/perf_event.h> |
| 8 | #include <stdbool.h> | 8 | #include <stdbool.h> |
| 9 | #include "evsel.h" | ||
| 10 | #include "parse-events.h" | 9 | #include "parse-events.h" |
| 11 | 10 | ||
| 11 | struct perf_evsel_config_term; | ||
| 12 | |||
| 12 | enum { | 13 | enum { |
| 13 | PERF_PMU_FORMAT_VALUE_CONFIG, | 14 | PERF_PMU_FORMAT_VALUE_CONFIG, |
| 14 | PERF_PMU_FORMAT_VALUE_CONFIG1, | 15 | PERF_PMU_FORMAT_VALUE_CONFIG1, |
| @@ -16,6 +17,7 @@ enum { | |||
| 16 | }; | 17 | }; |
| 17 | 18 | ||
| 18 | #define PERF_PMU_FORMAT_BITS 64 | 19 | #define PERF_PMU_FORMAT_BITS 64 |
| 20 | #define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/" | ||
| 19 | 21 | ||
| 20 | struct perf_event_attr; | 22 | struct perf_event_attr; |
| 21 | 23 | ||
| @@ -29,7 +31,6 @@ struct perf_pmu { | |||
| 29 | struct list_head format; /* HEAD struct perf_pmu_format -> list */ | 31 | struct list_head format; /* HEAD struct perf_pmu_format -> list */ |
| 30 | struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ | 32 | struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ |
| 31 | struct list_head list; /* ELEM */ | 33 | struct list_head list; /* ELEM */ |
| 32 | int (*set_drv_config) (struct perf_evsel_config_term *term); | ||
| 33 | }; | 34 | }; |
| 34 | 35 | ||
| 35 | struct perf_pmu_info { | 36 | struct perf_pmu_info { |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 18a59fba97ff..0030f9b9bf7e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
| @@ -35,11 +35,14 @@ | |||
| 35 | 35 | ||
| 36 | #include "util.h" | 36 | #include "util.h" |
| 37 | #include "event.h" | 37 | #include "event.h" |
| 38 | #include "namespaces.h" | ||
| 38 | #include "strlist.h" | 39 | #include "strlist.h" |
| 39 | #include "strfilter.h" | 40 | #include "strfilter.h" |
| 40 | #include "debug.h" | 41 | #include "debug.h" |
| 41 | #include "cache.h" | 42 | #include "cache.h" |
| 42 | #include "color.h" | 43 | #include "color.h" |
| 44 | #include "map.h" | ||
| 45 | #include "map_groups.h" | ||
| 43 | #include "symbol.h" | 46 | #include "symbol.h" |
| 44 | #include "thread.h" | 47 | #include "thread.h" |
| 45 | #include <api/fs/fs.h> | 48 | #include <api/fs/fs.h> |
| @@ -3528,7 +3531,8 @@ int show_available_funcs(const char *target, struct nsinfo *nsi, | |||
| 3528 | /* Show all (filtered) symbols */ | 3531 | /* Show all (filtered) symbols */ |
| 3529 | setup_pager(); | 3532 | setup_pager(); |
| 3530 | 3533 | ||
| 3531 | for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) { | 3534 | for (nd = rb_first_cached(&map->dso->symbol_names); nd; |
| 3535 | nd = rb_next(nd)) { | ||
| 3532 | struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); | 3536 | struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); |
| 3533 | 3537 | ||
| 3534 | if (strfilter__compare(_filter, pos->sym.name)) | 3538 | if (strfilter__compare(_filter, pos->sym.name)) |
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 15a98c3a2a2f..05c8d571a901 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h | |||
| @@ -4,8 +4,9 @@ | |||
| 4 | 4 | ||
| 5 | #include <linux/compiler.h> | 5 | #include <linux/compiler.h> |
| 6 | #include <stdbool.h> | 6 | #include <stdbool.h> |
| 7 | #include "intlist.h" | 7 | |
| 8 | #include "namespaces.h" | 8 | struct intlist; |
| 9 | struct nsinfo; | ||
| 9 | 10 | ||
| 10 | /* Probe related configurations */ | 11 | /* Probe related configurations */ |
| 11 | struct probe_conf { | 12 | struct probe_conf { |
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 0b1195cad0e5..4062bc4412a9 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <sys/types.h> | 20 | #include <sys/types.h> |
| 21 | #include <sys/uio.h> | 21 | #include <sys/uio.h> |
| 22 | #include <unistd.h> | 22 | #include <unistd.h> |
| 23 | #include "namespaces.h" | ||
| 23 | #include "util.h" | 24 | #include "util.h" |
| 24 | #include "event.h" | 25 | #include "event.h" |
| 25 | #include "strlist.h" | 26 | #include "strlist.h" |
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h index a920f702a74d..376e86cb4c3c 100644 --- a/tools/perf/util/rb_resort.h +++ b/tools/perf/util/rb_resort.h | |||
| @@ -140,12 +140,12 @@ struct __name##_sorted *__name = __name##_sorted__new | |||
| 140 | 140 | ||
| 141 | /* For 'struct intlist' */ | 141 | /* For 'struct intlist' */ |
| 142 | #define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ | 142 | #define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \ |
| 143 | DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \ | 143 | DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries.rb_root, \ |
| 144 | __ilist->rblist.nr_entries) | 144 | __ilist->rblist.nr_entries) |
| 145 | 145 | ||
| 146 | /* For 'struct machine->threads' */ | 146 | /* For 'struct machine->threads' */ |
| 147 | #define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ | 147 | #define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \ |
| 148 | DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \ | 148 | DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries.rb_root, \ |
| 149 | __machine->threads[hash_bucket].nr) | 149 | __machine->threads[hash_bucket].nr) |
| 150 | 150 | ||
| 151 | #endif /* _PERF_RESORT_RB_H_ */ | 151 | #endif /* _PERF_RESORT_RB_H_ */ |
diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index 0efc3258c648..11e07fab20dc 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c | |||
| @@ -13,8 +13,9 @@ | |||
| 13 | 13 | ||
| 14 | int rblist__add_node(struct rblist *rblist, const void *new_entry) | 14 | int rblist__add_node(struct rblist *rblist, const void *new_entry) |
| 15 | { | 15 | { |
| 16 | struct rb_node **p = &rblist->entries.rb_node; | 16 | struct rb_node **p = &rblist->entries.rb_root.rb_node; |
| 17 | struct rb_node *parent = NULL, *new_node; | 17 | struct rb_node *parent = NULL, *new_node; |
| 18 | bool leftmost = true; | ||
| 18 | 19 | ||
| 19 | while (*p != NULL) { | 20 | while (*p != NULL) { |
| 20 | int rc; | 21 | int rc; |
| @@ -24,8 +25,10 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) | |||
| 24 | rc = rblist->node_cmp(parent, new_entry); | 25 | rc = rblist->node_cmp(parent, new_entry); |
| 25 | if (rc > 0) | 26 | if (rc > 0) |
| 26 | p = &(*p)->rb_left; | 27 | p = &(*p)->rb_left; |
| 27 | else if (rc < 0) | 28 | else if (rc < 0) { |
| 28 | p = &(*p)->rb_right; | 29 | p = &(*p)->rb_right; |
| 30 | leftmost = false; | ||
| 31 | } | ||
| 29 | else | 32 | else |
| 30 | return -EEXIST; | 33 | return -EEXIST; |
| 31 | } | 34 | } |
| @@ -35,7 +38,7 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) | |||
| 35 | return -ENOMEM; | 38 | return -ENOMEM; |
| 36 | 39 | ||
| 37 | rb_link_node(new_node, parent, p); | 40 | rb_link_node(new_node, parent, p); |
| 38 | rb_insert_color(new_node, &rblist->entries); | 41 | rb_insert_color_cached(new_node, &rblist->entries, leftmost); |
| 39 | ++rblist->nr_entries; | 42 | ++rblist->nr_entries; |
| 40 | 43 | ||
| 41 | return 0; | 44 | return 0; |
| @@ -43,7 +46,7 @@ int rblist__add_node(struct rblist *rblist, const void *new_entry) | |||
| 43 | 46 | ||
| 44 | void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node) | 47 | void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node) |
| 45 | { | 48 | { |
| 46 | rb_erase(rb_node, &rblist->entries); | 49 | rb_erase_cached(rb_node, &rblist->entries); |
| 47 | --rblist->nr_entries; | 50 | --rblist->nr_entries; |
| 48 | rblist->node_delete(rblist, rb_node); | 51 | rblist->node_delete(rblist, rb_node); |
| 49 | } | 52 | } |
| @@ -52,8 +55,9 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist, | |||
| 52 | const void *entry, | 55 | const void *entry, |
| 53 | bool create) | 56 | bool create) |
| 54 | { | 57 | { |
| 55 | struct rb_node **p = &rblist->entries.rb_node; | 58 | struct rb_node **p = &rblist->entries.rb_root.rb_node; |
| 56 | struct rb_node *parent = NULL, *new_node = NULL; | 59 | struct rb_node *parent = NULL, *new_node = NULL; |
| 60 | bool leftmost = true; | ||
| 57 | 61 | ||
| 58 | while (*p != NULL) { | 62 | while (*p != NULL) { |
| 59 | int rc; | 63 | int rc; |
| @@ -63,8 +67,10 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist, | |||
| 63 | rc = rblist->node_cmp(parent, entry); | 67 | rc = rblist->node_cmp(parent, entry); |
| 64 | if (rc > 0) | 68 | if (rc > 0) |
| 65 | p = &(*p)->rb_left; | 69 | p = &(*p)->rb_left; |
| 66 | else if (rc < 0) | 70 | else if (rc < 0) { |
| 67 | p = &(*p)->rb_right; | 71 | p = &(*p)->rb_right; |
| 72 | leftmost = false; | ||
| 73 | } | ||
| 68 | else | 74 | else |
| 69 | return parent; | 75 | return parent; |
| 70 | } | 76 | } |
| @@ -73,7 +79,8 @@ static struct rb_node *__rblist__findnew(struct rblist *rblist, | |||
| 73 | new_node = rblist->node_new(rblist, entry); | 79 | new_node = rblist->node_new(rblist, entry); |
| 74 | if (new_node) { | 80 | if (new_node) { |
| 75 | rb_link_node(new_node, parent, p); | 81 | rb_link_node(new_node, parent, p); |
| 76 | rb_insert_color(new_node, &rblist->entries); | 82 | rb_insert_color_cached(new_node, |
| 83 | &rblist->entries, leftmost); | ||
| 77 | ++rblist->nr_entries; | 84 | ++rblist->nr_entries; |
| 78 | } | 85 | } |
| 79 | } | 86 | } |
| @@ -94,7 +101,7 @@ struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry) | |||
| 94 | void rblist__init(struct rblist *rblist) | 101 | void rblist__init(struct rblist *rblist) |
| 95 | { | 102 | { |
| 96 | if (rblist != NULL) { | 103 | if (rblist != NULL) { |
| 97 | rblist->entries = RB_ROOT; | 104 | rblist->entries = RB_ROOT_CACHED; |
| 98 | rblist->nr_entries = 0; | 105 | rblist->nr_entries = 0; |
| 99 | } | 106 | } |
| 100 | 107 | ||
| @@ -103,7 +110,7 @@ void rblist__init(struct rblist *rblist) | |||
| 103 | 110 | ||
| 104 | void rblist__exit(struct rblist *rblist) | 111 | void rblist__exit(struct rblist *rblist) |
| 105 | { | 112 | { |
| 106 | struct rb_node *pos, *next = rb_first(&rblist->entries); | 113 | struct rb_node *pos, *next = rb_first_cached(&rblist->entries); |
| 107 | 114 | ||
| 108 | while (next) { | 115 | while (next) { |
| 109 | pos = next; | 116 | pos = next; |
| @@ -124,7 +131,8 @@ struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx) | |||
| 124 | { | 131 | { |
| 125 | struct rb_node *node; | 132 | struct rb_node *node; |
| 126 | 133 | ||
| 127 | for (node = rb_first(&rblist->entries); node; node = rb_next(node)) { | 134 | for (node = rb_first_cached(&rblist->entries); node; |
| 135 | node = rb_next(node)) { | ||
| 128 | if (!idx--) | 136 | if (!idx--) |
| 129 | return node; | 137 | return node; |
| 130 | } | 138 | } |
diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h index 76df15c27f5f..14b232a4d0b6 100644 --- a/tools/perf/util/rblist.h +++ b/tools/perf/util/rblist.h | |||
| @@ -20,7 +20,7 @@ | |||
| 20 | */ | 20 | */ |
| 21 | 21 | ||
| 22 | struct rblist { | 22 | struct rblist { |
| 23 | struct rb_root entries; | 23 | struct rb_root_cached entries; |
| 24 | unsigned int nr_entries; | 24 | unsigned int nr_entries; |
| 25 | 25 | ||
| 26 | int (*node_cmp)(struct rb_node *rbn, const void *entry); | 26 | int (*node_cmp)(struct rb_node *rbn, const void *entry); |
diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h new file mode 100644 index 000000000000..d4356030b504 --- /dev/null +++ b/tools/perf/util/s390-cpumcf-kernel.h | |||
| @@ -0,0 +1,62 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | /* | ||
| 3 | * Support for s390 CPU measurement counter set diagnostic facility | ||
| 4 | * | ||
| 5 | * Copyright IBM Corp. 2019 | ||
| 6 | Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> | ||
| 7 | * Thomas Richter <tmricht@linux.ibm.com> | ||
| 8 | */ | ||
| 9 | #ifndef S390_CPUMCF_KERNEL_H | ||
| 10 | #define S390_CPUMCF_KERNEL_H | ||
| 11 | |||
| 12 | #define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */ | ||
| 13 | #define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */ | ||
| 14 | |||
| 15 | struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */ | ||
| 16 | unsigned int def:16; /* 0-15 Data Entry Format */ | ||
| 17 | unsigned int set:16; /* 16-23 Counter set identifier */ | ||
| 18 | unsigned int ctr:16; /* 24-39 Number of stored counters */ | ||
| 19 | unsigned int res1:16; /* 40-63 Reserved */ | ||
| 20 | }; | ||
| 21 | |||
| 22 | struct cf_trailer_entry { /* CPU-M CF trailer for raw traces (64 byte) */ | ||
| 23 | /* 0 - 7 */ | ||
| 24 | union { | ||
| 25 | struct { | ||
| 26 | unsigned int clock_base:1; /* TOD clock base */ | ||
| 27 | unsigned int speed:1; /* CPU speed */ | ||
| 28 | /* Measurement alerts */ | ||
| 29 | unsigned int mtda:1; /* Loss of MT ctr. data alert */ | ||
| 30 | unsigned int caca:1; /* Counter auth. change alert */ | ||
| 31 | unsigned int lcda:1; /* Loss of counter data alert */ | ||
| 32 | }; | ||
| 33 | unsigned long flags; /* 0-63 All indicators */ | ||
| 34 | }; | ||
| 35 | /* 8 - 15 */ | ||
| 36 | unsigned int cfvn:16; /* 64-79 Ctr First Version */ | ||
| 37 | unsigned int csvn:16; /* 80-95 Ctr Second Version */ | ||
| 38 | unsigned int cpu_speed:32; /* 96-127 CPU speed */ | ||
| 39 | /* 16 - 23 */ | ||
| 40 | unsigned long timestamp; /* 128-191 Timestamp (TOD) */ | ||
| 41 | /* 24 - 55 */ | ||
| 42 | union { | ||
| 43 | struct { | ||
| 44 | unsigned long progusage1; | ||
| 45 | unsigned long progusage2; | ||
| 46 | unsigned long progusage3; | ||
| 47 | unsigned long tod_base; | ||
| 48 | }; | ||
| 49 | unsigned long progusage[4]; | ||
| 50 | }; | ||
| 51 | /* 56 - 63 */ | ||
| 52 | unsigned int mach_type:16; /* Machine type */ | ||
| 53 | unsigned int res1:16; /* Reserved */ | ||
| 54 | unsigned int res2:32; /* Reserved */ | ||
| 55 | }; | ||
| 56 | |||
| 57 | #define CPUMF_CTR_SET_BASIC 0 /* Basic Counter Set */ | ||
| 58 | #define CPUMF_CTR_SET_USER 1 /* Problem-State Counter Set */ | ||
| 59 | #define CPUMF_CTR_SET_CRYPTO 2 /* Crypto-Activity Counter Set */ | ||
| 60 | #define CPUMF_CTR_SET_EXT 3 /* Extended Counter Set */ | ||
| 61 | #define CPUMF_CTR_SET_MT_DIAG 4 /* MT-diagnostic Counter Set */ | ||
| 62 | #endif | ||
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 68b2570304ec..c215704931dc 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c | |||
| @@ -162,6 +162,7 @@ | |||
| 162 | #include "auxtrace.h" | 162 | #include "auxtrace.h" |
| 163 | #include "s390-cpumsf.h" | 163 | #include "s390-cpumsf.h" |
| 164 | #include "s390-cpumsf-kernel.h" | 164 | #include "s390-cpumsf-kernel.h" |
| 165 | #include "s390-cpumcf-kernel.h" | ||
| 165 | #include "config.h" | 166 | #include "config.h" |
| 166 | 167 | ||
| 167 | struct s390_cpumsf { | 168 | struct s390_cpumsf { |
| @@ -184,8 +185,58 @@ struct s390_cpumsf_queue { | |||
| 184 | struct auxtrace_buffer *buffer; | 185 | struct auxtrace_buffer *buffer; |
| 185 | int cpu; | 186 | int cpu; |
| 186 | FILE *logfile; | 187 | FILE *logfile; |
| 188 | FILE *logfile_ctr; | ||
| 187 | }; | 189 | }; |
| 188 | 190 | ||
| 191 | /* Check if the raw data should be dumped to file. If this is the case and | ||
| 192 | * the file to dump to has not been opened for writing, do so. | ||
| 193 | * | ||
| 194 | * Return 0 on success and greater zero on error so processing continues. | ||
| 195 | */ | ||
| 196 | static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf, | ||
| 197 | struct perf_sample *sample) | ||
| 198 | { | ||
| 199 | struct s390_cpumsf_queue *sfq; | ||
| 200 | struct auxtrace_queue *q; | ||
| 201 | int rc = 0; | ||
| 202 | |||
| 203 | if (!sf->use_logfile || sf->queues.nr_queues <= sample->cpu) | ||
| 204 | return rc; | ||
| 205 | |||
| 206 | q = &sf->queues.queue_array[sample->cpu]; | ||
| 207 | sfq = q->priv; | ||
| 208 | if (!sfq) /* Queue not yet allocated */ | ||
| 209 | return rc; | ||
| 210 | |||
| 211 | if (!sfq->logfile_ctr) { | ||
| 212 | char *name; | ||
| 213 | |||
| 214 | rc = (sf->logdir) | ||
| 215 | ? asprintf(&name, "%s/aux.ctr.%02x", | ||
| 216 | sf->logdir, sample->cpu) | ||
| 217 | : asprintf(&name, "aux.ctr.%02x", sample->cpu); | ||
| 218 | if (rc > 0) | ||
| 219 | sfq->logfile_ctr = fopen(name, "w"); | ||
| 220 | if (sfq->logfile_ctr == NULL) { | ||
| 221 | pr_err("Failed to open counter set log file %s, " | ||
| 222 | "continue...\n", name); | ||
| 223 | rc = 1; | ||
| 224 | } | ||
| 225 | free(name); | ||
| 226 | } | ||
| 227 | |||
| 228 | if (sfq->logfile_ctr) { | ||
| 229 | /* See comment above for -4 */ | ||
| 230 | size_t n = fwrite(sample->raw_data, sample->raw_size - 4, 1, | ||
| 231 | sfq->logfile_ctr); | ||
| 232 | if (n != 1) { | ||
| 233 | pr_err("Failed to write counter set data\n"); | ||
| 234 | rc = 1; | ||
| 235 | } | ||
| 236 | } | ||
| 237 | return rc; | ||
| 238 | } | ||
| 239 | |||
| 189 | /* Display s390 CPU measurement facility basic-sampling data entry */ | 240 | /* Display s390 CPU measurement facility basic-sampling data entry */ |
| 190 | static bool s390_cpumsf_basic_show(const char *color, size_t pos, | 241 | static bool s390_cpumsf_basic_show(const char *color, size_t pos, |
| 191 | struct hws_basic_entry *basic) | 242 | struct hws_basic_entry *basic) |
| @@ -301,6 +352,11 @@ static bool s390_cpumsf_validate(int machine_type, | |||
| 301 | *dsdes = 85; | 352 | *dsdes = 85; |
| 302 | *bsdes = 32; | 353 | *bsdes = 32; |
| 303 | break; | 354 | break; |
| 355 | case 2964: | ||
| 356 | case 2965: | ||
| 357 | *dsdes = 112; | ||
| 358 | *bsdes = 32; | ||
| 359 | break; | ||
| 304 | default: | 360 | default: |
| 305 | /* Illegal trailer entry */ | 361 | /* Illegal trailer entry */ |
| 306 | return false; | 362 | return false; |
| @@ -768,7 +824,7 @@ static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp) | |||
| 768 | } | 824 | } |
| 769 | 825 | ||
| 770 | static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, | 826 | static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, |
| 771 | pid_t pid, pid_t tid, u64 ip) | 827 | pid_t pid, pid_t tid, u64 ip, u64 timestamp) |
| 772 | { | 828 | { |
| 773 | char msg[MAX_AUXTRACE_ERROR_MSG]; | 829 | char msg[MAX_AUXTRACE_ERROR_MSG]; |
| 774 | union perf_event event; | 830 | union perf_event event; |
| @@ -776,7 +832,7 @@ static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, | |||
| 776 | 832 | ||
| 777 | strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); | 833 | strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1); |
| 778 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, | 834 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, |
| 779 | code, cpu, pid, tid, ip, msg); | 835 | code, cpu, pid, tid, ip, msg, timestamp); |
| 780 | 836 | ||
| 781 | err = perf_session__deliver_synth_event(sf->session, &event, NULL); | 837 | err = perf_session__deliver_synth_event(sf->session, &event, NULL); |
| 782 | if (err) | 838 | if (err) |
| @@ -788,11 +844,12 @@ static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu, | |||
| 788 | static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) | 844 | static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample) |
| 789 | { | 845 | { |
| 790 | return s390_cpumsf_synth_error(sf, 1, sample->cpu, | 846 | return s390_cpumsf_synth_error(sf, 1, sample->cpu, |
| 791 | sample->pid, sample->tid, 0); | 847 | sample->pid, sample->tid, 0, |
| 848 | sample->time); | ||
| 792 | } | 849 | } |
| 793 | 850 | ||
| 794 | static int | 851 | static int |
| 795 | s390_cpumsf_process_event(struct perf_session *session __maybe_unused, | 852 | s390_cpumsf_process_event(struct perf_session *session, |
| 796 | union perf_event *event, | 853 | union perf_event *event, |
| 797 | struct perf_sample *sample, | 854 | struct perf_sample *sample, |
| 798 | struct perf_tool *tool) | 855 | struct perf_tool *tool) |
| @@ -801,6 +858,8 @@ s390_cpumsf_process_event(struct perf_session *session __maybe_unused, | |||
| 801 | struct s390_cpumsf, | 858 | struct s390_cpumsf, |
| 802 | auxtrace); | 859 | auxtrace); |
| 803 | u64 timestamp = sample->time; | 860 | u64 timestamp = sample->time; |
| 861 | struct perf_evsel *ev_bc000; | ||
| 862 | |||
| 804 | int err = 0; | 863 | int err = 0; |
| 805 | 864 | ||
| 806 | if (dump_trace) | 865 | if (dump_trace) |
| @@ -811,6 +870,16 @@ s390_cpumsf_process_event(struct perf_session *session __maybe_unused, | |||
| 811 | return -EINVAL; | 870 | return -EINVAL; |
| 812 | } | 871 | } |
| 813 | 872 | ||
| 873 | if (event->header.type == PERF_RECORD_SAMPLE && | ||
| 874 | sample->raw_size) { | ||
| 875 | /* Handle event with raw data */ | ||
| 876 | ev_bc000 = perf_evlist__event2evsel(session->evlist, event); | ||
| 877 | if (ev_bc000 && | ||
| 878 | ev_bc000->attr.config == PERF_EVENT_CPUM_CF_DIAG) | ||
| 879 | err = s390_cpumcf_dumpctr(sf, sample); | ||
| 880 | return err; | ||
| 881 | } | ||
| 882 | |||
| 814 | if (event->header.type == PERF_RECORD_AUX && | 883 | if (event->header.type == PERF_RECORD_AUX && |
| 815 | event->aux.flags & PERF_AUX_FLAG_TRUNCATED) | 884 | event->aux.flags & PERF_AUX_FLAG_TRUNCATED) |
| 816 | return s390_cpumsf_lost(sf, sample); | 885 | return s390_cpumsf_lost(sf, sample); |
| @@ -891,9 +960,15 @@ static void s390_cpumsf_free_queues(struct perf_session *session) | |||
| 891 | struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) | 960 | struct s390_cpumsf_queue *sfq = (struct s390_cpumsf_queue *) |
| 892 | queues->queue_array[i].priv; | 961 | queues->queue_array[i].priv; |
| 893 | 962 | ||
| 894 | if (sfq != NULL && sfq->logfile) { | 963 | if (sfq != NULL) { |
| 895 | fclose(sfq->logfile); | 964 | if (sfq->logfile) { |
| 896 | sfq->logfile = NULL; | 965 | fclose(sfq->logfile); |
| 966 | sfq->logfile = NULL; | ||
| 967 | } | ||
| 968 | if (sfq->logfile_ctr) { | ||
| 969 | fclose(sfq->logfile_ctr); | ||
| 970 | sfq->logfile_ctr = NULL; | ||
| 971 | } | ||
| 897 | } | 972 | } |
| 898 | zfree(&queues->queue_array[i].priv); | 973 | zfree(&queues->queue_array[i].priv); |
| 899 | } | 974 | } |
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c new file mode 100644 index 000000000000..6650f599ed9c --- /dev/null +++ b/tools/perf/util/s390-sample-raw.c | |||
| @@ -0,0 +1,222 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* | ||
| 3 | * Copyright IBM Corp. 2019 | ||
| 4 | * Author(s): Thomas Richter <tmricht@linux.ibm.com> | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify | ||
| 7 | * it under the terms of the GNU General Public License (version 2 only) | ||
| 8 | * as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * Architecture specific trace_event function. Save event's bc000 raw data | ||
| 11 | * to file. File name is aux.ctr.## where ## stands for the CPU number the | ||
| 12 | * sample was taken from. | ||
| 13 | */ | ||
| 14 | |||
| 15 | #include <unistd.h> | ||
| 16 | #include <stdio.h> | ||
| 17 | #include <string.h> | ||
| 18 | #include <inttypes.h> | ||
| 19 | |||
| 20 | #include <sys/stat.h> | ||
| 21 | #include <linux/compiler.h> | ||
| 22 | #include <asm/byteorder.h> | ||
| 23 | |||
| 24 | #include "debug.h" | ||
| 25 | #include "util.h" | ||
| 26 | #include "auxtrace.h" | ||
| 27 | #include "session.h" | ||
| 28 | #include "evlist.h" | ||
| 29 | #include "config.h" | ||
| 30 | #include "color.h" | ||
| 31 | #include "sample-raw.h" | ||
| 32 | #include "s390-cpumcf-kernel.h" | ||
| 33 | #include "pmu-events/pmu-events.h" | ||
| 34 | |||
| 35 | static size_t ctrset_size(struct cf_ctrset_entry *set) | ||
| 36 | { | ||
| 37 | return sizeof(*set) + set->ctr * sizeof(u64); | ||
| 38 | } | ||
| 39 | |||
| 40 | static bool ctrset_valid(struct cf_ctrset_entry *set) | ||
| 41 | { | ||
| 42 | return set->def == S390_CPUMCF_DIAG_DEF; | ||
| 43 | } | ||
| 44 | |||
| 45 | /* CPU Measurement Counter Facility raw data is a byte stream. It is 8 byte | ||
| 46 | * aligned and might have trailing padding bytes. | ||
| 47 | * Display the raw data on screen. | ||
| 48 | */ | ||
| 49 | static bool s390_cpumcfdg_testctr(struct perf_sample *sample) | ||
| 50 | { | ||
| 51 | size_t len = sample->raw_size, offset = 0; | ||
| 52 | unsigned char *buf = sample->raw_data; | ||
| 53 | struct cf_trailer_entry *te; | ||
| 54 | struct cf_ctrset_entry *cep, ce; | ||
| 55 | |||
| 56 | if (!len) | ||
| 57 | return false; | ||
| 58 | while (offset < len) { | ||
| 59 | cep = (struct cf_ctrset_entry *)(buf + offset); | ||
| 60 | ce.def = be16_to_cpu(cep->def); | ||
| 61 | ce.set = be16_to_cpu(cep->set); | ||
| 62 | ce.ctr = be16_to_cpu(cep->ctr); | ||
| 63 | ce.res1 = be16_to_cpu(cep->res1); | ||
| 64 | |||
| 65 | if (!ctrset_valid(&ce) || offset + ctrset_size(&ce) > len) { | ||
| 66 | /* Raw data for counter sets are always multiple of 8 | ||
| 67 | * bytes. Prepending a 4 bytes size field to the | ||
| 68 | * raw data block in the sample causes the perf tool | ||
| 69 | * to append 4 padding bytes to make the raw data part | ||
| 70 | * of the sample a multiple of eight bytes again. | ||
| 71 | * | ||
| 72 | * If the last entry (trailer) is 4 bytes off the raw | ||
| 73 | * area data end, all is good. | ||
| 74 | */ | ||
| 75 | if (len - offset - sizeof(*te) == 4) | ||
| 76 | break; | ||
| 77 | pr_err("Invalid counter set entry at %zd\n", offset); | ||
| 78 | return false; | ||
| 79 | } | ||
| 80 | offset += ctrset_size(&ce); | ||
| 81 | } | ||
| 82 | return true; | ||
| 83 | } | ||
| 84 | |||
| 85 | /* Dump event bc000 on screen, already tested on correctness. */ | ||
| 86 | static void s390_cpumcfdg_dumptrail(const char *color, size_t offset, | ||
| 87 | struct cf_trailer_entry *tep) | ||
| 88 | { | ||
| 89 | struct cf_trailer_entry te; | ||
| 90 | |||
| 91 | te.flags = be64_to_cpu(tep->flags); | ||
| 92 | te.cfvn = be16_to_cpu(tep->cfvn); | ||
| 93 | te.csvn = be16_to_cpu(tep->csvn); | ||
| 94 | te.cpu_speed = be32_to_cpu(tep->cpu_speed); | ||
| 95 | te.timestamp = be64_to_cpu(tep->timestamp); | ||
| 96 | te.progusage1 = be64_to_cpu(tep->progusage1); | ||
| 97 | te.progusage2 = be64_to_cpu(tep->progusage2); | ||
| 98 | te.progusage3 = be64_to_cpu(tep->progusage3); | ||
| 99 | te.tod_base = be64_to_cpu(tep->tod_base); | ||
| 100 | te.mach_type = be16_to_cpu(tep->mach_type); | ||
| 101 | te.res1 = be16_to_cpu(tep->res1); | ||
| 102 | te.res2 = be32_to_cpu(tep->res2); | ||
| 103 | |||
| 104 | color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c" | ||
| 105 | " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n", | ||
| 106 | offset, te.clock_base ? 'T' : ' ', | ||
| 107 | te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ', | ||
| 108 | te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ', | ||
| 109 | te.cfvn, te.csvn, te.cpu_speed, te.timestamp); | ||
| 110 | color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx" | ||
| 111 | " Type:%x\n\n", | ||
| 112 | te.progusage1, te.progusage2, te.progusage3, | ||
| 113 | te.tod_base, te.mach_type); | ||
| 114 | } | ||
| 115 | |||
| 116 | /* Return starting number of a counter set */ | ||
| 117 | static int get_counterset_start(int setnr) | ||
| 118 | { | ||
| 119 | switch (setnr) { | ||
| 120 | case CPUMF_CTR_SET_BASIC: /* Basic counter set */ | ||
| 121 | return 0; | ||
| 122 | case CPUMF_CTR_SET_USER: /* Problem state counter set */ | ||
| 123 | return 32; | ||
| 124 | case CPUMF_CTR_SET_CRYPTO: /* Crypto counter set */ | ||
| 125 | return 64; | ||
| 126 | case CPUMF_CTR_SET_EXT: /* Extended counter set */ | ||
| 127 | return 128; | ||
| 128 | case CPUMF_CTR_SET_MT_DIAG: /* Diagnostic counter set */ | ||
| 129 | return 448; | ||
| 130 | default: | ||
| 131 | return -1; | ||
| 132 | } | ||
| 133 | } | ||
| 134 | |||
| 135 | /* Scan the PMU table and extract the logical name of a counter from the | ||
| 136 | * PMU events table. Input is the counter set and counter number with in the | ||
| 137 | * set. Construct the event number and use this as key. If they match return | ||
| 138 | * the name of this counter. | ||
| 139 | * If no match is found a NULL pointer is returned. | ||
| 140 | */ | ||
| 141 | static const char *get_counter_name(int set, int nr, struct pmu_events_map *map) | ||
| 142 | { | ||
| 143 | int rc, event_nr, wanted = get_counterset_start(set) + nr; | ||
| 144 | |||
| 145 | if (map) { | ||
| 146 | struct pmu_event *evp = map->table; | ||
| 147 | |||
| 148 | for (; evp->name || evp->event || evp->desc; ++evp) { | ||
| 149 | if (evp->name == NULL || evp->event == NULL) | ||
| 150 | continue; | ||
| 151 | rc = sscanf(evp->event, "event=%x", &event_nr); | ||
| 152 | if (rc == 1 && event_nr == wanted) | ||
| 153 | return evp->name; | ||
| 154 | } | ||
| 155 | } | ||
| 156 | return NULL; | ||
| 157 | } | ||
| 158 | |||
| 159 | static void s390_cpumcfdg_dump(struct perf_sample *sample) | ||
| 160 | { | ||
| 161 | size_t i, len = sample->raw_size, offset = 0; | ||
| 162 | unsigned char *buf = sample->raw_data; | ||
| 163 | const char *color = PERF_COLOR_BLUE; | ||
| 164 | struct cf_ctrset_entry *cep, ce; | ||
| 165 | struct pmu_events_map *map; | ||
| 166 | struct perf_pmu pmu; | ||
| 167 | u64 *p; | ||
| 168 | |||
| 169 | memset(&pmu, 0, sizeof(pmu)); | ||
| 170 | map = perf_pmu__find_map(&pmu); | ||
| 171 | while (offset < len) { | ||
| 172 | cep = (struct cf_ctrset_entry *)(buf + offset); | ||
| 173 | |||
| 174 | ce.def = be16_to_cpu(cep->def); | ||
| 175 | ce.set = be16_to_cpu(cep->set); | ||
| 176 | ce.ctr = be16_to_cpu(cep->ctr); | ||
| 177 | ce.res1 = be16_to_cpu(cep->res1); | ||
| 178 | |||
| 179 | if (!ctrset_valid(&ce)) { /* Print trailer */ | ||
| 180 | s390_cpumcfdg_dumptrail(color, offset, | ||
| 181 | (struct cf_trailer_entry *)cep); | ||
| 182 | return; | ||
| 183 | } | ||
| 184 | |||
| 185 | color_fprintf(stdout, color, " [%#08zx] Counterset:%d" | ||
| 186 | " Counters:%d\n", offset, ce.set, ce.ctr); | ||
| 187 | for (i = 0, p = (u64 *)(cep + 1); i < ce.ctr; ++i, ++p) { | ||
| 188 | const char *ev_name = get_counter_name(ce.set, i, map); | ||
| 189 | |||
| 190 | color_fprintf(stdout, color, | ||
| 191 | "\tCounter:%03d %s Value:%#018lx\n", i, | ||
| 192 | ev_name ?: "<unknown>", be64_to_cpu(*p)); | ||
| 193 | } | ||
| 194 | offset += ctrset_size(&ce); | ||
| 195 | } | ||
| 196 | } | ||
| 197 | |||
| 198 | /* S390 specific trace event function. Check for PERF_RECORD_SAMPLE events | ||
| 199 | * and if the event was triggered by a counter set diagnostic event display | ||
| 200 | * its raw data. | ||
| 201 | * The function is only invoked when the dump flag -D is set. | ||
| 202 | */ | ||
| 203 | void perf_evlist__s390_sample_raw(struct perf_evlist *evlist, union perf_event *event, | ||
| 204 | struct perf_sample *sample) | ||
| 205 | { | ||
| 206 | struct perf_evsel *ev_bc000; | ||
| 207 | |||
| 208 | if (event->header.type != PERF_RECORD_SAMPLE) | ||
| 209 | return; | ||
| 210 | |||
| 211 | ev_bc000 = perf_evlist__event2evsel(evlist, event); | ||
| 212 | if (ev_bc000 == NULL || | ||
| 213 | ev_bc000->attr.config != PERF_EVENT_CPUM_CF_DIAG) | ||
| 214 | return; | ||
| 215 | |||
| 216 | /* Display raw data on screen */ | ||
| 217 | if (!s390_cpumcfdg_testctr(sample)) { | ||
| 218 | pr_err("Invalid counter set data encountered\n"); | ||
| 219 | return; | ||
| 220 | } | ||
| 221 | s390_cpumcfdg_dump(sample); | ||
| 222 | } | ||
diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c new file mode 100644 index 000000000000..c21e1311fb0f --- /dev/null +++ b/tools/perf/util/sample-raw.c | |||
| @@ -0,0 +1,18 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | |||
| 3 | #include <string.h> | ||
| 4 | #include "evlist.h" | ||
| 5 | #include "env.h" | ||
| 6 | #include "sample-raw.h" | ||
| 7 | |||
| 8 | /* | ||
| 9 | * Check platform the perf data file was created on and perform platform | ||
| 10 | * specific interpretation. | ||
| 11 | */ | ||
| 12 | void perf_evlist__init_trace_event_sample_raw(struct perf_evlist *evlist) | ||
| 13 | { | ||
| 14 | const char *arch_pf = perf_env__arch(evlist->env); | ||
| 15 | |||
| 16 | if (arch_pf && !strcmp("s390", arch_pf)) | ||
| 17 | evlist->trace_event_sample_raw = perf_evlist__s390_sample_raw; | ||
| 18 | } | ||
diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h new file mode 100644 index 000000000000..95d445c87e93 --- /dev/null +++ b/tools/perf/util/sample-raw.h | |||
| @@ -0,0 +1,14 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #ifndef __SAMPLE_RAW_H | ||
| 3 | #define __SAMPLE_RAW_H 1 | ||
| 4 | |||
| 5 | struct perf_evlist; | ||
| 6 | union perf_event; | ||
| 7 | struct perf_sample; | ||
| 8 | |||
| 9 | void perf_evlist__s390_sample_raw(struct perf_evlist *evlist, | ||
| 10 | union perf_event *event, | ||
| 11 | struct perf_sample *sample); | ||
| 12 | |||
| 13 | void perf_evlist__init_trace_event_sample_raw(struct perf_evlist *evlist); | ||
| 14 | #endif /* __PERF_EVLIST_H */ | ||
diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build index 82d28c67e0f3..7b342ce38d99 100644 --- a/tools/perf/util/scripting-engines/Build +++ b/tools/perf/util/scripting-engines/Build | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | libperf-$(CONFIG_LIBPERL) += trace-event-perl.o | 1 | perf-$(CONFIG_LIBPERL) += trace-event-perl.o |
| 2 | libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o | 2 | perf-$(CONFIG_LIBPYTHON) += trace-event-python.o |
| 3 | 3 | ||
| 4 | CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default | 4 | CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default |
| 5 | 5 | ||
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index b93f36b887b5..5f06378a482b 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c | |||
| @@ -37,6 +37,8 @@ | |||
| 37 | #include "../../perf.h" | 37 | #include "../../perf.h" |
| 38 | #include "../callchain.h" | 38 | #include "../callchain.h" |
| 39 | #include "../machine.h" | 39 | #include "../machine.h" |
| 40 | #include "../map.h" | ||
| 41 | #include "../symbol.h" | ||
| 40 | #include "../thread.h" | 42 | #include "../thread.h" |
| 41 | #include "../event.h" | 43 | #include "../event.h" |
| 42 | #include "../trace-event.h" | 44 | #include "../trace-event.h" |
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 87ef16a1b17e..0e17db41b49b 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c | |||
| @@ -44,6 +44,8 @@ | |||
| 44 | #include "../thread-stack.h" | 44 | #include "../thread-stack.h" |
| 45 | #include "../trace-event.h" | 45 | #include "../trace-event.h" |
| 46 | #include "../call-path.h" | 46 | #include "../call-path.h" |
| 47 | #include "map.h" | ||
| 48 | #include "symbol.h" | ||
| 47 | #include "thread_map.h" | 49 | #include "thread_map.h" |
| 48 | #include "cpumap.h" | 50 | #include "cpumap.h" |
| 49 | #include "print_binary.h" | 51 | #include "print_binary.h" |
| @@ -733,8 +735,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, | |||
| 733 | Py_FatalError("couldn't create Python dictionary"); | 735 | Py_FatalError("couldn't create Python dictionary"); |
| 734 | 736 | ||
| 735 | pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel))); | 737 | pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel))); |
| 736 | pydict_set_item_string_decref(dict, "attr", _PyUnicode_FromStringAndSize( | 738 | pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->attr, sizeof(evsel->attr))); |
| 737 | (const char *)&evsel->attr, sizeof(evsel->attr))); | ||
| 738 | 739 | ||
| 739 | pydict_set_item_string_decref(dict_sample, "pid", | 740 | pydict_set_item_string_decref(dict_sample, "pid", |
| 740 | _PyLong_FromLong(sample->pid)); | 741 | _PyLong_FromLong(sample->pid)); |
| @@ -1494,34 +1495,40 @@ static void _free_command_line(wchar_t **command_line, int num) | |||
| 1494 | static int python_start_script(const char *script, int argc, const char **argv) | 1495 | static int python_start_script(const char *script, int argc, const char **argv) |
| 1495 | { | 1496 | { |
| 1496 | struct tables *tables = &tables_global; | 1497 | struct tables *tables = &tables_global; |
| 1498 | PyMODINIT_FUNC (*initfunc)(void); | ||
| 1497 | #if PY_MAJOR_VERSION < 3 | 1499 | #if PY_MAJOR_VERSION < 3 |
| 1498 | const char **command_line; | 1500 | const char **command_line; |
| 1499 | #else | 1501 | #else |
| 1500 | wchar_t **command_line; | 1502 | wchar_t **command_line; |
| 1501 | #endif | 1503 | #endif |
| 1502 | char buf[PATH_MAX]; | 1504 | /* |
| 1505 | * Use a non-const name variable to cope with python 2.6's | ||
| 1506 | * PyImport_AppendInittab prototype | ||
| 1507 | */ | ||
| 1508 | char buf[PATH_MAX], name[19] = "perf_trace_context"; | ||
| 1503 | int i, err = 0; | 1509 | int i, err = 0; |
| 1504 | FILE *fp; | 1510 | FILE *fp; |
| 1505 | 1511 | ||
| 1506 | #if PY_MAJOR_VERSION < 3 | 1512 | #if PY_MAJOR_VERSION < 3 |
| 1513 | initfunc = initperf_trace_context; | ||
| 1507 | command_line = malloc((argc + 1) * sizeof(const char *)); | 1514 | command_line = malloc((argc + 1) * sizeof(const char *)); |
| 1508 | command_line[0] = script; | 1515 | command_line[0] = script; |
| 1509 | for (i = 1; i < argc + 1; i++) | 1516 | for (i = 1; i < argc + 1; i++) |
| 1510 | command_line[i] = argv[i - 1]; | 1517 | command_line[i] = argv[i - 1]; |
| 1511 | #else | 1518 | #else |
| 1519 | initfunc = PyInit_perf_trace_context; | ||
| 1512 | command_line = malloc((argc + 1) * sizeof(wchar_t *)); | 1520 | command_line = malloc((argc + 1) * sizeof(wchar_t *)); |
| 1513 | command_line[0] = Py_DecodeLocale(script, NULL); | 1521 | command_line[0] = Py_DecodeLocale(script, NULL); |
| 1514 | for (i = 1; i < argc + 1; i++) | 1522 | for (i = 1; i < argc + 1; i++) |
| 1515 | command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); | 1523 | command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); |
| 1516 | #endif | 1524 | #endif |
| 1517 | 1525 | ||
| 1526 | PyImport_AppendInittab(name, initfunc); | ||
| 1518 | Py_Initialize(); | 1527 | Py_Initialize(); |
| 1519 | 1528 | ||
| 1520 | #if PY_MAJOR_VERSION < 3 | 1529 | #if PY_MAJOR_VERSION < 3 |
| 1521 | initperf_trace_context(); | ||
| 1522 | PySys_SetArgv(argc + 1, (char **)command_line); | 1530 | PySys_SetArgv(argc + 1, (char **)command_line); |
| 1523 | #else | 1531 | #else |
| 1524 | PyInit_perf_trace_context(); | ||
| 1525 | PySys_SetArgv(argc + 1, command_line); | 1532 | PySys_SetArgv(argc + 1, command_line); |
| 1526 | #endif | 1533 | #endif |
| 1527 | 1534 | ||
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5456c84c7dd1..c764bbc91009 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
| @@ -13,6 +13,8 @@ | |||
| 13 | #include "evlist.h" | 13 | #include "evlist.h" |
| 14 | #include "evsel.h" | 14 | #include "evsel.h" |
| 15 | #include "memswap.h" | 15 | #include "memswap.h" |
| 16 | #include "map.h" | ||
| 17 | #include "symbol.h" | ||
| 16 | #include "session.h" | 18 | #include "session.h" |
| 17 | #include "tool.h" | 19 | #include "tool.h" |
| 18 | #include "sort.h" | 20 | #include "sort.h" |
| @@ -23,6 +25,7 @@ | |||
| 23 | #include "auxtrace.h" | 25 | #include "auxtrace.h" |
| 24 | #include "thread.h" | 26 | #include "thread.h" |
| 25 | #include "thread-stack.h" | 27 | #include "thread-stack.h" |
| 28 | #include "sample-raw.h" | ||
| 26 | #include "stat.h" | 29 | #include "stat.h" |
| 27 | #include "arch/common.h" | 30 | #include "arch/common.h" |
| 28 | 31 | ||
| @@ -147,6 +150,8 @@ struct perf_session *perf_session__new(struct perf_data *data, | |||
| 147 | perf_session__set_id_hdr_size(session); | 150 | perf_session__set_id_hdr_size(session); |
| 148 | perf_session__set_comm_exec(session); | 151 | perf_session__set_comm_exec(session); |
| 149 | } | 152 | } |
| 153 | |||
| 154 | perf_evlist__init_trace_event_sample_raw(session->evlist); | ||
| 150 | } | 155 | } |
| 151 | } else { | 156 | } else { |
| 152 | session->machines.host.env = &perf_env; | 157 | session->machines.host.env = &perf_env; |
| @@ -376,6 +381,10 @@ void perf_tool__fill_defaults(struct perf_tool *tool) | |||
| 376 | tool->itrace_start = perf_event__process_itrace_start; | 381 | tool->itrace_start = perf_event__process_itrace_start; |
| 377 | if (tool->context_switch == NULL) | 382 | if (tool->context_switch == NULL) |
| 378 | tool->context_switch = perf_event__process_switch; | 383 | tool->context_switch = perf_event__process_switch; |
| 384 | if (tool->ksymbol == NULL) | ||
| 385 | tool->ksymbol = perf_event__process_ksymbol; | ||
| 386 | if (tool->bpf_event == NULL) | ||
| 387 | tool->bpf_event = perf_event__process_bpf_event; | ||
| 379 | if (tool->read == NULL) | 388 | if (tool->read == NULL) |
| 380 | tool->read = process_event_sample_stub; | 389 | tool->read = process_event_sample_stub; |
| 381 | if (tool->throttle == NULL) | 390 | if (tool->throttle == NULL) |
| @@ -694,7 +703,10 @@ static void perf_event__auxtrace_error_swap(union perf_event *event, | |||
| 694 | event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu); | 703 | event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu); |
| 695 | event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid); | 704 | event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid); |
| 696 | event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid); | 705 | event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid); |
| 706 | event->auxtrace_error.fmt = bswap_32(event->auxtrace_error.fmt); | ||
| 697 | event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip); | 707 | event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip); |
| 708 | if (event->auxtrace_error.fmt) | ||
| 709 | event->auxtrace_error.time = bswap_64(event->auxtrace_error.time); | ||
| 698 | } | 710 | } |
| 699 | 711 | ||
| 700 | static void perf_event__thread_map_swap(union perf_event *event, | 712 | static void perf_event__thread_map_swap(union perf_event *event, |
| @@ -1065,6 +1077,8 @@ static void dump_event(struct perf_evlist *evlist, union perf_event *event, | |||
| 1065 | file_offset, event->header.size, event->header.type); | 1077 | file_offset, event->header.size, event->header.type); |
| 1066 | 1078 | ||
| 1067 | trace_event(event); | 1079 | trace_event(event); |
| 1080 | if (event->header.type == PERF_RECORD_SAMPLE && evlist->trace_event_sample_raw) | ||
| 1081 | evlist->trace_event_sample_raw(evlist, event, sample); | ||
| 1068 | 1082 | ||
| 1069 | if (sample) | 1083 | if (sample) |
| 1070 | perf_evlist__print_tstamp(evlist, event, sample); | 1084 | perf_evlist__print_tstamp(evlist, event, sample); |
| @@ -1188,6 +1202,13 @@ static int deliver_sample_value(struct perf_evlist *evlist, | |||
| 1188 | return 0; | 1202 | return 0; |
| 1189 | } | 1203 | } |
| 1190 | 1204 | ||
| 1205 | /* | ||
| 1206 | * There's no reason to deliver sample | ||
| 1207 | * for zero period, bail out. | ||
| 1208 | */ | ||
| 1209 | if (!sample->period) | ||
| 1210 | return 0; | ||
| 1211 | |||
| 1191 | return tool->sample(tool, event, sample, sid->evsel, machine); | 1212 | return tool->sample(tool, event, sample, sid->evsel, machine); |
| 1192 | } | 1213 | } |
| 1193 | 1214 | ||
| @@ -1305,6 +1326,10 @@ static int machines__deliver_event(struct machines *machines, | |||
| 1305 | case PERF_RECORD_SWITCH: | 1326 | case PERF_RECORD_SWITCH: |
| 1306 | case PERF_RECORD_SWITCH_CPU_WIDE: | 1327 | case PERF_RECORD_SWITCH_CPU_WIDE: |
| 1307 | return tool->context_switch(tool, event, sample, machine); | 1328 | return tool->context_switch(tool, event, sample, machine); |
| 1329 | case PERF_RECORD_KSYMBOL: | ||
| 1330 | return tool->ksymbol(tool, event, sample, machine); | ||
| 1331 | case PERF_RECORD_BPF_EVENT: | ||
| 1332 | return tool->bpf_event(tool, event, sample, machine); | ||
| 1308 | default: | 1333 | default: |
| 1309 | ++evlist->stats.nr_unknown_events; | 1334 | ++evlist->stats.nr_unknown_events; |
| 1310 | return -1; | 1335 | return -1; |
| @@ -1820,38 +1845,35 @@ fetch_mmaped_event(struct perf_session *session, | |||
| 1820 | #define NUM_MMAPS 128 | 1845 | #define NUM_MMAPS 128 |
| 1821 | #endif | 1846 | #endif |
| 1822 | 1847 | ||
| 1823 | static int __perf_session__process_events(struct perf_session *session, | 1848 | struct reader { |
| 1824 | u64 data_offset, u64 data_size, | 1849 | int fd; |
| 1825 | u64 file_size) | 1850 | u64 data_size; |
| 1851 | u64 data_offset; | ||
| 1852 | }; | ||
| 1853 | |||
| 1854 | static int | ||
| 1855 | reader__process_events(struct reader *rd, struct perf_session *session, | ||
| 1856 | struct ui_progress *prog) | ||
| 1826 | { | 1857 | { |
| 1827 | struct ordered_events *oe = &session->ordered_events; | 1858 | u64 data_size = rd->data_size; |
| 1828 | struct perf_tool *tool = session->tool; | ||
| 1829 | int fd = perf_data__fd(session->data); | ||
| 1830 | u64 head, page_offset, file_offset, file_pos, size; | 1859 | u64 head, page_offset, file_offset, file_pos, size; |
| 1831 | int err, mmap_prot, mmap_flags, map_idx = 0; | 1860 | int err = 0, mmap_prot, mmap_flags, map_idx = 0; |
| 1832 | size_t mmap_size; | 1861 | size_t mmap_size; |
| 1833 | char *buf, *mmaps[NUM_MMAPS]; | 1862 | char *buf, *mmaps[NUM_MMAPS]; |
| 1834 | union perf_event *event; | 1863 | union perf_event *event; |
| 1835 | struct ui_progress prog; | ||
| 1836 | s64 skip; | 1864 | s64 skip; |
| 1837 | 1865 | ||
| 1838 | perf_tool__fill_defaults(tool); | 1866 | page_offset = page_size * (rd->data_offset / page_size); |
| 1839 | |||
| 1840 | page_offset = page_size * (data_offset / page_size); | ||
| 1841 | file_offset = page_offset; | 1867 | file_offset = page_offset; |
| 1842 | head = data_offset - page_offset; | 1868 | head = rd->data_offset - page_offset; |
| 1843 | |||
| 1844 | if (data_size == 0) | ||
| 1845 | goto out; | ||
| 1846 | 1869 | ||
| 1847 | if (data_offset + data_size < file_size) | 1870 | ui_progress__init_size(prog, data_size, "Processing events..."); |
| 1848 | file_size = data_offset + data_size; | ||
| 1849 | 1871 | ||
| 1850 | ui_progress__init_size(&prog, file_size, "Processing events..."); | 1872 | data_size += rd->data_offset; |
| 1851 | 1873 | ||
| 1852 | mmap_size = MMAP_SIZE; | 1874 | mmap_size = MMAP_SIZE; |
| 1853 | if (mmap_size > file_size) { | 1875 | if (mmap_size > data_size) { |
| 1854 | mmap_size = file_size; | 1876 | mmap_size = data_size; |
| 1855 | session->one_mmap = true; | 1877 | session->one_mmap = true; |
| 1856 | } | 1878 | } |
| 1857 | 1879 | ||
| @@ -1865,12 +1887,12 @@ static int __perf_session__process_events(struct perf_session *session, | |||
| 1865 | mmap_flags = MAP_PRIVATE; | 1887 | mmap_flags = MAP_PRIVATE; |
| 1866 | } | 1888 | } |
| 1867 | remap: | 1889 | remap: |
| 1868 | buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd, | 1890 | buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, rd->fd, |
| 1869 | file_offset); | 1891 | file_offset); |
| 1870 | if (buf == MAP_FAILED) { | 1892 | if (buf == MAP_FAILED) { |
| 1871 | pr_err("failed to mmap file\n"); | 1893 | pr_err("failed to mmap file\n"); |
| 1872 | err = -errno; | 1894 | err = -errno; |
| 1873 | goto out_err; | 1895 | goto out; |
| 1874 | } | 1896 | } |
| 1875 | mmaps[map_idx] = buf; | 1897 | mmaps[map_idx] = buf; |
| 1876 | map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); | 1898 | map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1); |
| @@ -1902,7 +1924,7 @@ more: | |||
| 1902 | file_offset + head, event->header.size, | 1924 | file_offset + head, event->header.size, |
| 1903 | event->header.type); | 1925 | event->header.type); |
| 1904 | err = -EINVAL; | 1926 | err = -EINVAL; |
| 1905 | goto out_err; | 1927 | goto out; |
| 1906 | } | 1928 | } |
| 1907 | 1929 | ||
| 1908 | if (skip) | 1930 | if (skip) |
| @@ -1911,15 +1933,40 @@ more: | |||
| 1911 | head += size; | 1933 | head += size; |
| 1912 | file_pos += size; | 1934 | file_pos += size; |
| 1913 | 1935 | ||
| 1914 | ui_progress__update(&prog, size); | 1936 | ui_progress__update(prog, size); |
| 1915 | 1937 | ||
| 1916 | if (session_done()) | 1938 | if (session_done()) |
| 1917 | goto out; | 1939 | goto out; |
| 1918 | 1940 | ||
| 1919 | if (file_pos < file_size) | 1941 | if (file_pos < data_size) |
| 1920 | goto more; | 1942 | goto more; |
| 1921 | 1943 | ||
| 1922 | out: | 1944 | out: |
| 1945 | return err; | ||
| 1946 | } | ||
| 1947 | |||
| 1948 | static int __perf_session__process_events(struct perf_session *session) | ||
| 1949 | { | ||
| 1950 | struct reader rd = { | ||
| 1951 | .fd = perf_data__fd(session->data), | ||
| 1952 | .data_size = session->header.data_size, | ||
| 1953 | .data_offset = session->header.data_offset, | ||
| 1954 | }; | ||
| 1955 | struct ordered_events *oe = &session->ordered_events; | ||
| 1956 | struct perf_tool *tool = session->tool; | ||
| 1957 | struct ui_progress prog; | ||
| 1958 | int err; | ||
| 1959 | |||
| 1960 | perf_tool__fill_defaults(tool); | ||
| 1961 | |||
| 1962 | if (rd.data_size == 0) | ||
| 1963 | return -1; | ||
| 1964 | |||
| 1965 | ui_progress__init_size(&prog, rd.data_size, "Processing events..."); | ||
| 1966 | |||
| 1967 | err = reader__process_events(&rd, session, &prog); | ||
| 1968 | if (err) | ||
| 1969 | goto out_err; | ||
| 1923 | /* do the final flush for ordered samples */ | 1970 | /* do the final flush for ordered samples */ |
| 1924 | err = ordered_events__flush(oe, OE_FLUSH__FINAL); | 1971 | err = ordered_events__flush(oe, OE_FLUSH__FINAL); |
| 1925 | if (err) | 1972 | if (err) |
| @@ -1944,20 +1991,13 @@ out_err: | |||
| 1944 | 1991 | ||
| 1945 | int perf_session__process_events(struct perf_session *session) | 1992 | int perf_session__process_events(struct perf_session *session) |
| 1946 | { | 1993 | { |
| 1947 | u64 size = perf_data__size(session->data); | ||
| 1948 | int err; | ||
| 1949 | |||
| 1950 | if (perf_session__register_idle_thread(session) < 0) | 1994 | if (perf_session__register_idle_thread(session) < 0) |
| 1951 | return -ENOMEM; | 1995 | return -ENOMEM; |
| 1952 | 1996 | ||
| 1953 | if (!perf_data__is_pipe(session->data)) | 1997 | if (perf_data__is_pipe(session->data)) |
| 1954 | err = __perf_session__process_events(session, | 1998 | return __perf_session__process_pipe_events(session); |
| 1955 | session->header.data_offset, | ||
| 1956 | session->header.data_size, size); | ||
| 1957 | else | ||
| 1958 | err = __perf_session__process_pipe_events(session); | ||
| 1959 | 1999 | ||
| 1960 | return err; | 2000 | return __perf_session__process_events(session); |
| 1961 | } | 2001 | } |
| 1962 | 2002 | ||
| 1963 | bool perf_session__has_traces(struct perf_session *session, const char *msg) | 2003 | bool perf_session__has_traces(struct perf_session *session, const char *msg) |
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index 64d1f36dee99..5b5a167b43ce 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py | |||
| @@ -1,5 +1,3 @@ | |||
| 1 | #!/usr/bin/python | ||
| 2 | |||
| 3 | from os import getenv | 1 | from os import getenv |
| 4 | from subprocess import Popen, PIPE | 2 | from subprocess import Popen, PIPE |
| 5 | from re import sub | 3 | from re import sub |
| @@ -55,9 +53,14 @@ ext_sources = [f.strip() for f in open('util/python-ext-sources') | |||
| 55 | # use full paths with source files | 53 | # use full paths with source files |
| 56 | ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) | 54 | ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources)) |
| 57 | 55 | ||
| 56 | extra_libraries = [] | ||
| 57 | if '-DHAVE_LIBNUMA_SUPPORT' in cflags: | ||
| 58 | extra_libraries = [ 'numa' ] | ||
| 59 | |||
| 58 | perf = Extension('perf', | 60 | perf = Extension('perf', |
| 59 | sources = ext_sources, | 61 | sources = ext_sources, |
| 60 | include_dirs = ['util/include'], | 62 | include_dirs = ['util/include'], |
| 63 | libraries = extra_libraries, | ||
| 61 | extra_compile_args = cflags, | 64 | extra_compile_args = cflags, |
| 62 | extra_objects = [libtraceevent, libapikfs], | 65 | extra_objects = [libtraceevent, libapikfs], |
| 63 | ) | 66 | ) |
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 6c1a83768eb0..d2299e912e59 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include "sort.h" | 6 | #include "sort.h" |
| 7 | #include "hist.h" | 7 | #include "hist.h" |
| 8 | #include "comm.h" | 8 | #include "comm.h" |
| 9 | #include "map.h" | ||
| 9 | #include "symbol.h" | 10 | #include "symbol.h" |
| 10 | #include "thread.h" | 11 | #include "thread.h" |
| 11 | #include "evsel.h" | 12 | #include "evsel.h" |
| @@ -230,8 +231,14 @@ static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) | |||
| 230 | if (sym_l == sym_r) | 231 | if (sym_l == sym_r) |
| 231 | return 0; | 232 | return 0; |
| 232 | 233 | ||
| 233 | if (sym_l->inlined || sym_r->inlined) | 234 | if (sym_l->inlined || sym_r->inlined) { |
| 234 | return strcmp(sym_l->name, sym_r->name); | 235 | int ret = strcmp(sym_l->name, sym_r->name); |
| 236 | |||
| 237 | if (ret) | ||
| 238 | return ret; | ||
| 239 | if ((sym_l->start <= sym_r->end) && (sym_l->end >= sym_r->start)) | ||
| 240 | return 0; | ||
| 241 | } | ||
| 235 | 242 | ||
| 236 | if (sym_l->start != sym_r->start) | 243 | if (sym_l->start != sym_r->start) |
| 237 | return (int64_t)(sym_r->start - sym_l->start); | 244 | return (int64_t)(sym_r->start - sym_l->start); |
| @@ -428,8 +435,6 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, | |||
| 428 | { | 435 | { |
| 429 | 436 | ||
| 430 | struct symbol *sym = he->ms.sym; | 437 | struct symbol *sym = he->ms.sym; |
| 431 | struct map *map = he->ms.map; | ||
| 432 | struct perf_evsel *evsel = hists_to_evsel(he->hists); | ||
| 433 | struct annotation *notes; | 438 | struct annotation *notes; |
| 434 | double ipc = 0.0, coverage = 0.0; | 439 | double ipc = 0.0, coverage = 0.0; |
| 435 | char tmp[64]; | 440 | char tmp[64]; |
| @@ -437,11 +442,6 @@ static int hist_entry__sym_ipc_snprintf(struct hist_entry *he, char *bf, | |||
| 437 | if (!sym) | 442 | if (!sym) |
| 438 | return repsep_snprintf(bf, size, "%-*s", width, "-"); | 443 | return repsep_snprintf(bf, size, "%-*s", width, "-"); |
| 439 | 444 | ||
| 440 | if (!sym->annotate2 && symbol__annotate2(sym, map, evsel, | ||
| 441 | &annotation__default_options, NULL) < 0) { | ||
| 442 | return 0; | ||
| 443 | } | ||
| 444 | |||
| 445 | notes = symbol__annotation(sym); | 445 | notes = symbol__annotation(sym); |
| 446 | 446 | ||
| 447 | if (notes->hit_cycles) | 447 | if (notes->hit_cycles) |
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 130fe37fe2df..2fbee0b1011c 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h | |||
| @@ -9,7 +9,8 @@ | |||
| 9 | #include <linux/list.h> | 9 | #include <linux/list.h> |
| 10 | #include "cache.h" | 10 | #include "cache.h" |
| 11 | #include <linux/rbtree.h> | 11 | #include <linux/rbtree.h> |
| 12 | #include "symbol.h" | 12 | #include "map_symbol.h" |
| 13 | #include "symbol_conf.h" | ||
| 13 | #include "string.h" | 14 | #include "string.h" |
| 14 | #include "callchain.h" | 15 | #include "callchain.h" |
| 15 | #include "values.h" | 16 | #include "values.h" |
| @@ -145,8 +146,8 @@ struct hist_entry { | |||
| 145 | union { | 146 | union { |
| 146 | /* this is for hierarchical entry structure */ | 147 | /* this is for hierarchical entry structure */ |
| 147 | struct { | 148 | struct { |
| 148 | struct rb_root hroot_in; | 149 | struct rb_root_cached hroot_in; |
| 149 | struct rb_root hroot_out; | 150 | struct rb_root_cached hroot_out; |
| 150 | }; /* non-leaf entries */ | 151 | }; /* non-leaf entries */ |
| 151 | struct rb_root sorted_chain; /* leaf entry has callchains */ | 152 | struct rb_root sorted_chain; /* leaf entry has callchains */ |
| 152 | }; | 153 | }; |
diff --git a/tools/perf/util/srccode.h b/tools/perf/util/srccode.h index e500a746d5f1..1b5ed769779c 100644 --- a/tools/perf/util/srccode.h +++ b/tools/perf/util/srccode.h | |||
| @@ -1,6 +1,19 @@ | |||
| 1 | #ifndef SRCCODE_H | 1 | #ifndef SRCCODE_H |
| 2 | #define SRCCODE_H 1 | 2 | #define SRCCODE_H 1 |
| 3 | 3 | ||
| 4 | struct srccode_state { | ||
| 5 | char *srcfile; | ||
| 6 | unsigned line; | ||
| 7 | }; | ||
| 8 | |||
| 9 | static inline void srccode_state_init(struct srccode_state *state) | ||
| 10 | { | ||
| 11 | state->srcfile = NULL; | ||
| 12 | state->line = 0; | ||
| 13 | } | ||
| 14 | |||
| 15 | void srccode_state_free(struct srccode_state *state); | ||
| 16 | |||
| 4 | /* Result is not 0 terminated */ | 17 | /* Result is not 0 terminated */ |
| 5 | char *find_sourceline(char *fn, unsigned line, int *lenp); | 18 | char *find_sourceline(char *fn, unsigned line, int *lenp); |
| 6 | 19 | ||
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index dc86597d0cc4..10ca1533937e 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c | |||
| @@ -104,7 +104,7 @@ static struct symbol *new_inline_sym(struct dso *dso, | |||
| 104 | } else { | 104 | } else { |
| 105 | /* create a fake symbol for the inline frame */ | 105 | /* create a fake symbol for the inline frame */ |
| 106 | inline_sym = symbol__new(base_sym ? base_sym->start : 0, | 106 | inline_sym = symbol__new(base_sym ? base_sym->start : 0, |
| 107 | base_sym ? base_sym->end : 0, | 107 | base_sym ? (base_sym->end - base_sym->start) : 0, |
| 108 | base_sym ? base_sym->binding : 0, | 108 | base_sym ? base_sym->binding : 0, |
| 109 | base_sym ? base_sym->type : 0, | 109 | base_sym ? base_sym->type : 0, |
| 110 | funcname); | 110 | funcname); |
| @@ -594,11 +594,12 @@ struct srcline_node { | |||
| 594 | struct rb_node rb_node; | 594 | struct rb_node rb_node; |
| 595 | }; | 595 | }; |
| 596 | 596 | ||
| 597 | void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline) | 597 | void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline) |
| 598 | { | 598 | { |
| 599 | struct rb_node **p = &tree->rb_node; | 599 | struct rb_node **p = &tree->rb_root.rb_node; |
| 600 | struct rb_node *parent = NULL; | 600 | struct rb_node *parent = NULL; |
| 601 | struct srcline_node *i, *node; | 601 | struct srcline_node *i, *node; |
| 602 | bool leftmost = true; | ||
| 602 | 603 | ||
| 603 | node = zalloc(sizeof(struct srcline_node)); | 604 | node = zalloc(sizeof(struct srcline_node)); |
| 604 | if (!node) { | 605 | if (!node) { |
| @@ -614,16 +615,18 @@ void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline) | |||
| 614 | i = rb_entry(parent, struct srcline_node, rb_node); | 615 | i = rb_entry(parent, struct srcline_node, rb_node); |
| 615 | if (addr < i->addr) | 616 | if (addr < i->addr) |
| 616 | p = &(*p)->rb_left; | 617 | p = &(*p)->rb_left; |
| 617 | else | 618 | else { |
| 618 | p = &(*p)->rb_right; | 619 | p = &(*p)->rb_right; |
| 620 | leftmost = false; | ||
| 621 | } | ||
| 619 | } | 622 | } |
| 620 | rb_link_node(&node->rb_node, parent, p); | 623 | rb_link_node(&node->rb_node, parent, p); |
| 621 | rb_insert_color(&node->rb_node, tree); | 624 | rb_insert_color_cached(&node->rb_node, tree, leftmost); |
| 622 | } | 625 | } |
| 623 | 626 | ||
| 624 | char *srcline__tree_find(struct rb_root *tree, u64 addr) | 627 | char *srcline__tree_find(struct rb_root_cached *tree, u64 addr) |
| 625 | { | 628 | { |
| 626 | struct rb_node *n = tree->rb_node; | 629 | struct rb_node *n = tree->rb_root.rb_node; |
| 627 | 630 | ||
| 628 | while (n) { | 631 | while (n) { |
| 629 | struct srcline_node *i = rb_entry(n, struct srcline_node, | 632 | struct srcline_node *i = rb_entry(n, struct srcline_node, |
| @@ -640,15 +643,15 @@ char *srcline__tree_find(struct rb_root *tree, u64 addr) | |||
| 640 | return NULL; | 643 | return NULL; |
| 641 | } | 644 | } |
| 642 | 645 | ||
| 643 | void srcline__tree_delete(struct rb_root *tree) | 646 | void srcline__tree_delete(struct rb_root_cached *tree) |
| 644 | { | 647 | { |
| 645 | struct srcline_node *pos; | 648 | struct srcline_node *pos; |
| 646 | struct rb_node *next = rb_first(tree); | 649 | struct rb_node *next = rb_first_cached(tree); |
| 647 | 650 | ||
| 648 | while (next) { | 651 | while (next) { |
| 649 | pos = rb_entry(next, struct srcline_node, rb_node); | 652 | pos = rb_entry(next, struct srcline_node, rb_node); |
| 650 | next = rb_next(&pos->rb_node); | 653 | next = rb_next(&pos->rb_node); |
| 651 | rb_erase(&pos->rb_node, tree); | 654 | rb_erase_cached(&pos->rb_node, tree); |
| 652 | free_srcline(pos->srcline); | 655 | free_srcline(pos->srcline); |
| 653 | zfree(&pos); | 656 | zfree(&pos); |
| 654 | } | 657 | } |
| @@ -682,28 +685,32 @@ void inline_node__delete(struct inline_node *node) | |||
| 682 | free(node); | 685 | free(node); |
| 683 | } | 686 | } |
| 684 | 687 | ||
| 685 | void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines) | 688 | void inlines__tree_insert(struct rb_root_cached *tree, |
| 689 | struct inline_node *inlines) | ||
| 686 | { | 690 | { |
| 687 | struct rb_node **p = &tree->rb_node; | 691 | struct rb_node **p = &tree->rb_root.rb_node; |
| 688 | struct rb_node *parent = NULL; | 692 | struct rb_node *parent = NULL; |
| 689 | const u64 addr = inlines->addr; | 693 | const u64 addr = inlines->addr; |
| 690 | struct inline_node *i; | 694 | struct inline_node *i; |
| 695 | bool leftmost = true; | ||
| 691 | 696 | ||
| 692 | while (*p != NULL) { | 697 | while (*p != NULL) { |
| 693 | parent = *p; | 698 | parent = *p; |
| 694 | i = rb_entry(parent, struct inline_node, rb_node); | 699 | i = rb_entry(parent, struct inline_node, rb_node); |
| 695 | if (addr < i->addr) | 700 | if (addr < i->addr) |
| 696 | p = &(*p)->rb_left; | 701 | p = &(*p)->rb_left; |
| 697 | else | 702 | else { |
| 698 | p = &(*p)->rb_right; | 703 | p = &(*p)->rb_right; |
| 704 | leftmost = false; | ||
| 705 | } | ||
| 699 | } | 706 | } |
| 700 | rb_link_node(&inlines->rb_node, parent, p); | 707 | rb_link_node(&inlines->rb_node, parent, p); |
| 701 | rb_insert_color(&inlines->rb_node, tree); | 708 | rb_insert_color_cached(&inlines->rb_node, tree, leftmost); |
| 702 | } | 709 | } |
| 703 | 710 | ||
| 704 | struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr) | 711 | struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr) |
| 705 | { | 712 | { |
| 706 | struct rb_node *n = tree->rb_node; | 713 | struct rb_node *n = tree->rb_root.rb_node; |
| 707 | 714 | ||
| 708 | while (n) { | 715 | while (n) { |
| 709 | struct inline_node *i = rb_entry(n, struct inline_node, | 716 | struct inline_node *i = rb_entry(n, struct inline_node, |
| @@ -720,15 +727,15 @@ struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr) | |||
| 720 | return NULL; | 727 | return NULL; |
| 721 | } | 728 | } |
| 722 | 729 | ||
| 723 | void inlines__tree_delete(struct rb_root *tree) | 730 | void inlines__tree_delete(struct rb_root_cached *tree) |
| 724 | { | 731 | { |
| 725 | struct inline_node *pos; | 732 | struct inline_node *pos; |
| 726 | struct rb_node *next = rb_first(tree); | 733 | struct rb_node *next = rb_first_cached(tree); |
| 727 | 734 | ||
| 728 | while (next) { | 735 | while (next) { |
| 729 | pos = rb_entry(next, struct inline_node, rb_node); | 736 | pos = rb_entry(next, struct inline_node, rb_node); |
| 730 | next = rb_next(&pos->rb_node); | 737 | next = rb_next(&pos->rb_node); |
| 731 | rb_erase(&pos->rb_node, tree); | 738 | rb_erase_cached(&pos->rb_node, tree); |
| 732 | inline_node__delete(pos); | 739 | inline_node__delete(pos); |
| 733 | } | 740 | } |
| 734 | } | 741 | } |
diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 5762212dc342..b11a0aaaa676 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h | |||
| @@ -19,11 +19,11 @@ void free_srcline(char *srcline); | |||
| 19 | char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line); | 19 | char *get_srcline_split(struct dso *dso, u64 addr, unsigned *line); |
| 20 | 20 | ||
| 21 | /* insert the srcline into the DSO, which will take ownership */ | 21 | /* insert the srcline into the DSO, which will take ownership */ |
| 22 | void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline); | 22 | void srcline__tree_insert(struct rb_root_cached *tree, u64 addr, char *srcline); |
| 23 | /* find previously inserted srcline */ | 23 | /* find previously inserted srcline */ |
| 24 | char *srcline__tree_find(struct rb_root *tree, u64 addr); | 24 | char *srcline__tree_find(struct rb_root_cached *tree, u64 addr); |
| 25 | /* delete all srclines within the tree */ | 25 | /* delete all srclines within the tree */ |
| 26 | void srcline__tree_delete(struct rb_root *tree); | 26 | void srcline__tree_delete(struct rb_root_cached *tree); |
| 27 | 27 | ||
| 28 | #define SRCLINE_UNKNOWN ((char *) "??:0") | 28 | #define SRCLINE_UNKNOWN ((char *) "??:0") |
| 29 | 29 | ||
| @@ -46,10 +46,11 @@ struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr, | |||
| 46 | void inline_node__delete(struct inline_node *node); | 46 | void inline_node__delete(struct inline_node *node); |
| 47 | 47 | ||
| 48 | /* insert the inline node list into the DSO, which will take ownership */ | 48 | /* insert the inline node list into the DSO, which will take ownership */ |
| 49 | void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines); | 49 | void inlines__tree_insert(struct rb_root_cached *tree, |
| 50 | struct inline_node *inlines); | ||
| 50 | /* find previously inserted inline node list */ | 51 | /* find previously inserted inline node list */ |
| 51 | struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr); | 52 | struct inline_node *inlines__tree_find(struct rb_root_cached *tree, u64 addr); |
| 52 | /* delete all nodes within the tree of inline_node s */ | 53 | /* delete all nodes within the tree of inline_node s */ |
| 53 | void inlines__tree_delete(struct rb_root *tree); | 54 | void inlines__tree_delete(struct rb_root_cached *tree); |
| 54 | 55 | ||
| 55 | #endif /* PERF_SRCLINE_H */ | 56 | #endif /* PERF_SRCLINE_H */ |
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 665ee374fc01..6d043c78f3c2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #include <inttypes.h> | 2 | #include <inttypes.h> |
| 3 | #include <linux/time64.h> | 3 | #include <linux/time64.h> |
| 4 | #include <math.h> | 4 | #include <math.h> |
| 5 | #include "color.h" | ||
| 5 | #include "evlist.h" | 6 | #include "evlist.h" |
| 6 | #include "evsel.h" | 7 | #include "evsel.h" |
| 7 | #include "stat.h" | 8 | #include "stat.h" |
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 3c22c58b3e90..83d8094be4fe 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c | |||
| @@ -168,7 +168,7 @@ static void reset_stat(struct runtime_stat *st) | |||
| 168 | struct rb_node *pos, *next; | 168 | struct rb_node *pos, *next; |
| 169 | 169 | ||
| 170 | rblist = &st->value_list; | 170 | rblist = &st->value_list; |
| 171 | next = rb_first(&rblist->entries); | 171 | next = rb_first_cached(&rblist->entries); |
| 172 | while (next) { | 172 | while (next) { |
| 173 | pos = next; | 173 | pos = next; |
| 174 | next = rb_next(pos); | 174 | next = rb_next(pos); |
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index d58f1e08b170..7e82c71dcc42 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h | |||
| @@ -57,7 +57,7 @@ static inline unsigned int strlist__nr_entries(const struct strlist *slist) | |||
| 57 | /* For strlist iteration */ | 57 | /* For strlist iteration */ |
| 58 | static inline struct str_node *strlist__first(struct strlist *slist) | 58 | static inline struct str_node *strlist__first(struct strlist *slist) |
| 59 | { | 59 | { |
| 60 | struct rb_node *rn = rb_first(&slist->rblist.entries); | 60 | struct rb_node *rn = rb_first_cached(&slist->rblist.entries); |
| 61 | return rn ? rb_entry(rn, struct str_node, rb_node) : NULL; | 61 | return rn ? rb_entry(rn, struct str_node, rb_node) : NULL; |
| 62 | } | 62 | } |
| 63 | static inline struct str_node *strlist__next(struct str_node *sn) | 63 | static inline struct str_node *strlist__next(struct str_node *sn) |
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index dca7dfae69ad..4ad106a5f2c0 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c | |||
| @@ -6,6 +6,8 @@ | |||
| 6 | #include <unistd.h> | 6 | #include <unistd.h> |
| 7 | #include <inttypes.h> | 7 | #include <inttypes.h> |
| 8 | 8 | ||
| 9 | #include "map.h" | ||
| 10 | #include "map_groups.h" | ||
| 9 | #include "symbol.h" | 11 | #include "symbol.h" |
| 10 | #include "demangle-java.h" | 12 | #include "demangle-java.h" |
| 11 | #include "demangle-rust.h" | 13 | #include "demangle-rust.h" |
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index 7119df77dc0b..17edbd4f6f85 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include "util.h" | 3 | #include "util.h" |
| 4 | 4 | ||
| 5 | #include <errno.h> | 5 | #include <errno.h> |
| 6 | #include <unistd.h> | ||
| 6 | #include <stdio.h> | 7 | #include <stdio.h> |
| 7 | #include <fcntl.h> | 8 | #include <fcntl.h> |
| 8 | #include <string.h> | 9 | #include <string.h> |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 48efad6d0f90..758bf5f74e6e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | #include "util.h" | 17 | #include "util.h" |
| 18 | #include "debug.h" | 18 | #include "debug.h" |
| 19 | #include "machine.h" | 19 | #include "machine.h" |
| 20 | #include "map.h" | ||
| 20 | #include "symbol.h" | 21 | #include "symbol.h" |
| 21 | #include "strlist.h" | 22 | #include "strlist.h" |
| 22 | #include "intlist.h" | 23 | #include "intlist.h" |
| @@ -163,7 +164,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) | |||
| 163 | return arch__choose_best_symbol(syma, symb); | 164 | return arch__choose_best_symbol(syma, symb); |
| 164 | } | 165 | } |
| 165 | 166 | ||
| 166 | void symbols__fixup_duplicate(struct rb_root *symbols) | 167 | void symbols__fixup_duplicate(struct rb_root_cached *symbols) |
| 167 | { | 168 | { |
| 168 | struct rb_node *nd; | 169 | struct rb_node *nd; |
| 169 | struct symbol *curr, *next; | 170 | struct symbol *curr, *next; |
| @@ -171,7 +172,7 @@ void symbols__fixup_duplicate(struct rb_root *symbols) | |||
| 171 | if (symbol_conf.allow_aliases) | 172 | if (symbol_conf.allow_aliases) |
| 172 | return; | 173 | return; |
| 173 | 174 | ||
| 174 | nd = rb_first(symbols); | 175 | nd = rb_first_cached(symbols); |
| 175 | 176 | ||
| 176 | while (nd) { | 177 | while (nd) { |
| 177 | curr = rb_entry(nd, struct symbol, rb_node); | 178 | curr = rb_entry(nd, struct symbol, rb_node); |
| @@ -186,20 +187,20 @@ again: | |||
| 186 | continue; | 187 | continue; |
| 187 | 188 | ||
| 188 | if (choose_best_symbol(curr, next) == SYMBOL_A) { | 189 | if (choose_best_symbol(curr, next) == SYMBOL_A) { |
| 189 | rb_erase(&next->rb_node, symbols); | 190 | rb_erase_cached(&next->rb_node, symbols); |
| 190 | symbol__delete(next); | 191 | symbol__delete(next); |
| 191 | goto again; | 192 | goto again; |
| 192 | } else { | 193 | } else { |
| 193 | nd = rb_next(&curr->rb_node); | 194 | nd = rb_next(&curr->rb_node); |
| 194 | rb_erase(&curr->rb_node, symbols); | 195 | rb_erase_cached(&curr->rb_node, symbols); |
| 195 | symbol__delete(curr); | 196 | symbol__delete(curr); |
| 196 | } | 197 | } |
| 197 | } | 198 | } |
| 198 | } | 199 | } |
| 199 | 200 | ||
| 200 | void symbols__fixup_end(struct rb_root *symbols) | 201 | void symbols__fixup_end(struct rb_root_cached *symbols) |
| 201 | { | 202 | { |
| 202 | struct rb_node *nd, *prevnd = rb_first(symbols); | 203 | struct rb_node *nd, *prevnd = rb_first_cached(symbols); |
| 203 | struct symbol *curr, *prev; | 204 | struct symbol *curr, *prev; |
| 204 | 205 | ||
| 205 | if (prevnd == NULL) | 206 | if (prevnd == NULL) |
| @@ -282,25 +283,27 @@ void symbol__delete(struct symbol *sym) | |||
| 282 | free(((void *)sym) - symbol_conf.priv_size); | 283 | free(((void *)sym) - symbol_conf.priv_size); |
| 283 | } | 284 | } |
| 284 | 285 | ||
| 285 | void symbols__delete(struct rb_root *symbols) | 286 | void symbols__delete(struct rb_root_cached *symbols) |
| 286 | { | 287 | { |
| 287 | struct symbol *pos; | 288 | struct symbol *pos; |
| 288 | struct rb_node *next = rb_first(symbols); | 289 | struct rb_node *next = rb_first_cached(symbols); |
| 289 | 290 | ||
| 290 | while (next) { | 291 | while (next) { |
| 291 | pos = rb_entry(next, struct symbol, rb_node); | 292 | pos = rb_entry(next, struct symbol, rb_node); |
| 292 | next = rb_next(&pos->rb_node); | 293 | next = rb_next(&pos->rb_node); |
| 293 | rb_erase(&pos->rb_node, symbols); | 294 | rb_erase_cached(&pos->rb_node, symbols); |
| 294 | symbol__delete(pos); | 295 | symbol__delete(pos); |
| 295 | } | 296 | } |
| 296 | } | 297 | } |
| 297 | 298 | ||
| 298 | void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel) | 299 | void __symbols__insert(struct rb_root_cached *symbols, |
| 300 | struct symbol *sym, bool kernel) | ||
| 299 | { | 301 | { |
| 300 | struct rb_node **p = &symbols->rb_node; | 302 | struct rb_node **p = &symbols->rb_root.rb_node; |
| 301 | struct rb_node *parent = NULL; | 303 | struct rb_node *parent = NULL; |
| 302 | const u64 ip = sym->start; | 304 | const u64 ip = sym->start; |
| 303 | struct symbol *s; | 305 | struct symbol *s; |
| 306 | bool leftmost = true; | ||
| 304 | 307 | ||
| 305 | if (kernel) { | 308 | if (kernel) { |
| 306 | const char *name = sym->name; | 309 | const char *name = sym->name; |
| @@ -318,26 +321,28 @@ void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel) | |||
| 318 | s = rb_entry(parent, struct symbol, rb_node); | 321 | s = rb_entry(parent, struct symbol, rb_node); |
| 319 | if (ip < s->start) | 322 | if (ip < s->start) |
| 320 | p = &(*p)->rb_left; | 323 | p = &(*p)->rb_left; |
| 321 | else | 324 | else { |
| 322 | p = &(*p)->rb_right; | 325 | p = &(*p)->rb_right; |
| 326 | leftmost = false; | ||
| 327 | } | ||
| 323 | } | 328 | } |
| 324 | rb_link_node(&sym->rb_node, parent, p); | 329 | rb_link_node(&sym->rb_node, parent, p); |
| 325 | rb_insert_color(&sym->rb_node, symbols); | 330 | rb_insert_color_cached(&sym->rb_node, symbols, leftmost); |
| 326 | } | 331 | } |
| 327 | 332 | ||
| 328 | void symbols__insert(struct rb_root *symbols, struct symbol *sym) | 333 | void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym) |
| 329 | { | 334 | { |
| 330 | __symbols__insert(symbols, sym, false); | 335 | __symbols__insert(symbols, sym, false); |
| 331 | } | 336 | } |
| 332 | 337 | ||
| 333 | static struct symbol *symbols__find(struct rb_root *symbols, u64 ip) | 338 | static struct symbol *symbols__find(struct rb_root_cached *symbols, u64 ip) |
| 334 | { | 339 | { |
| 335 | struct rb_node *n; | 340 | struct rb_node *n; |
| 336 | 341 | ||
| 337 | if (symbols == NULL) | 342 | if (symbols == NULL) |
| 338 | return NULL; | 343 | return NULL; |
| 339 | 344 | ||
| 340 | n = symbols->rb_node; | 345 | n = symbols->rb_root.rb_node; |
| 341 | 346 | ||
| 342 | while (n) { | 347 | while (n) { |
| 343 | struct symbol *s = rb_entry(n, struct symbol, rb_node); | 348 | struct symbol *s = rb_entry(n, struct symbol, rb_node); |
| @@ -353,9 +358,9 @@ static struct symbol *symbols__find(struct rb_root *symbols, u64 ip) | |||
| 353 | return NULL; | 358 | return NULL; |
| 354 | } | 359 | } |
| 355 | 360 | ||
| 356 | static struct symbol *symbols__first(struct rb_root *symbols) | 361 | static struct symbol *symbols__first(struct rb_root_cached *symbols) |
| 357 | { | 362 | { |
| 358 | struct rb_node *n = rb_first(symbols); | 363 | struct rb_node *n = rb_first_cached(symbols); |
| 359 | 364 | ||
| 360 | if (n) | 365 | if (n) |
| 361 | return rb_entry(n, struct symbol, rb_node); | 366 | return rb_entry(n, struct symbol, rb_node); |
| @@ -363,9 +368,9 @@ static struct symbol *symbols__first(struct rb_root *symbols) | |||
| 363 | return NULL; | 368 | return NULL; |
| 364 | } | 369 | } |
| 365 | 370 | ||
| 366 | static struct symbol *symbols__last(struct rb_root *symbols) | 371 | static struct symbol *symbols__last(struct rb_root_cached *symbols) |
| 367 | { | 372 | { |
| 368 | struct rb_node *n = rb_last(symbols); | 373 | struct rb_node *n = rb_last(&symbols->rb_root); |
| 369 | 374 | ||
| 370 | if (n) | 375 | if (n) |
| 371 | return rb_entry(n, struct symbol, rb_node); | 376 | return rb_entry(n, struct symbol, rb_node); |
| @@ -383,11 +388,12 @@ static struct symbol *symbols__next(struct symbol *sym) | |||
| 383 | return NULL; | 388 | return NULL; |
| 384 | } | 389 | } |
| 385 | 390 | ||
| 386 | static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym) | 391 | static void symbols__insert_by_name(struct rb_root_cached *symbols, struct symbol *sym) |
| 387 | { | 392 | { |
| 388 | struct rb_node **p = &symbols->rb_node; | 393 | struct rb_node **p = &symbols->rb_root.rb_node; |
| 389 | struct rb_node *parent = NULL; | 394 | struct rb_node *parent = NULL; |
| 390 | struct symbol_name_rb_node *symn, *s; | 395 | struct symbol_name_rb_node *symn, *s; |
| 396 | bool leftmost = true; | ||
| 391 | 397 | ||
| 392 | symn = container_of(sym, struct symbol_name_rb_node, sym); | 398 | symn = container_of(sym, struct symbol_name_rb_node, sym); |
| 393 | 399 | ||
| @@ -396,19 +402,21 @@ static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym) | |||
| 396 | s = rb_entry(parent, struct symbol_name_rb_node, rb_node); | 402 | s = rb_entry(parent, struct symbol_name_rb_node, rb_node); |
| 397 | if (strcmp(sym->name, s->sym.name) < 0) | 403 | if (strcmp(sym->name, s->sym.name) < 0) |
| 398 | p = &(*p)->rb_left; | 404 | p = &(*p)->rb_left; |
| 399 | else | 405 | else { |
| 400 | p = &(*p)->rb_right; | 406 | p = &(*p)->rb_right; |
| 407 | leftmost = false; | ||
| 408 | } | ||
| 401 | } | 409 | } |
| 402 | rb_link_node(&symn->rb_node, parent, p); | 410 | rb_link_node(&symn->rb_node, parent, p); |
| 403 | rb_insert_color(&symn->rb_node, symbols); | 411 | rb_insert_color_cached(&symn->rb_node, symbols, leftmost); |
| 404 | } | 412 | } |
| 405 | 413 | ||
| 406 | static void symbols__sort_by_name(struct rb_root *symbols, | 414 | static void symbols__sort_by_name(struct rb_root_cached *symbols, |
| 407 | struct rb_root *source) | 415 | struct rb_root_cached *source) |
| 408 | { | 416 | { |
| 409 | struct rb_node *nd; | 417 | struct rb_node *nd; |
| 410 | 418 | ||
| 411 | for (nd = rb_first(source); nd; nd = rb_next(nd)) { | 419 | for (nd = rb_first_cached(source); nd; nd = rb_next(nd)) { |
| 412 | struct symbol *pos = rb_entry(nd, struct symbol, rb_node); | 420 | struct symbol *pos = rb_entry(nd, struct symbol, rb_node); |
| 413 | symbols__insert_by_name(symbols, pos); | 421 | symbols__insert_by_name(symbols, pos); |
| 414 | } | 422 | } |
| @@ -431,7 +439,7 @@ int symbol__match_symbol_name(const char *name, const char *str, | |||
| 431 | return arch__compare_symbol_names(name, str); | 439 | return arch__compare_symbol_names(name, str); |
| 432 | } | 440 | } |
| 433 | 441 | ||
| 434 | static struct symbol *symbols__find_by_name(struct rb_root *symbols, | 442 | static struct symbol *symbols__find_by_name(struct rb_root_cached *symbols, |
| 435 | const char *name, | 443 | const char *name, |
| 436 | enum symbol_tag_include includes) | 444 | enum symbol_tag_include includes) |
| 437 | { | 445 | { |
| @@ -441,7 +449,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, | |||
| 441 | if (symbols == NULL) | 449 | if (symbols == NULL) |
| 442 | return NULL; | 450 | return NULL; |
| 443 | 451 | ||
| 444 | n = symbols->rb_node; | 452 | n = symbols->rb_root.rb_node; |
| 445 | 453 | ||
| 446 | while (n) { | 454 | while (n) { |
| 447 | int cmp; | 455 | int cmp; |
| @@ -644,7 +652,7 @@ static int map__process_kallsym_symbol(void *arg, const char *name, | |||
| 644 | { | 652 | { |
| 645 | struct symbol *sym; | 653 | struct symbol *sym; |
| 646 | struct dso *dso = arg; | 654 | struct dso *dso = arg; |
| 647 | struct rb_root *root = &dso->symbols; | 655 | struct rb_root_cached *root = &dso->symbols; |
| 648 | 656 | ||
| 649 | if (!symbol_type__filter(type)) | 657 | if (!symbol_type__filter(type)) |
| 650 | return 0; | 658 | return 0; |
| @@ -681,14 +689,14 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct | |||
| 681 | struct map *curr_map; | 689 | struct map *curr_map; |
| 682 | struct symbol *pos; | 690 | struct symbol *pos; |
| 683 | int count = 0; | 691 | int count = 0; |
| 684 | struct rb_root old_root = dso->symbols; | 692 | struct rb_root_cached old_root = dso->symbols; |
| 685 | struct rb_root *root = &dso->symbols; | 693 | struct rb_root_cached *root = &dso->symbols; |
| 686 | struct rb_node *next = rb_first(root); | 694 | struct rb_node *next = rb_first_cached(root); |
| 687 | 695 | ||
| 688 | if (!kmaps) | 696 | if (!kmaps) |
| 689 | return -1; | 697 | return -1; |
| 690 | 698 | ||
| 691 | *root = RB_ROOT; | 699 | *root = RB_ROOT_CACHED; |
| 692 | 700 | ||
| 693 | while (next) { | 701 | while (next) { |
| 694 | char *module; | 702 | char *module; |
| @@ -696,8 +704,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct | |||
| 696 | pos = rb_entry(next, struct symbol, rb_node); | 704 | pos = rb_entry(next, struct symbol, rb_node); |
| 697 | next = rb_next(&pos->rb_node); | 705 | next = rb_next(&pos->rb_node); |
| 698 | 706 | ||
| 699 | rb_erase_init(&pos->rb_node, &old_root); | 707 | rb_erase_cached(&pos->rb_node, &old_root); |
| 700 | 708 | RB_CLEAR_NODE(&pos->rb_node); | |
| 701 | module = strchr(pos->name, '\t'); | 709 | module = strchr(pos->name, '\t'); |
| 702 | if (module) | 710 | if (module) |
| 703 | *module = '\0'; | 711 | *module = '\0'; |
| @@ -710,6 +718,8 @@ static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct | |||
| 710 | } | 718 | } |
| 711 | 719 | ||
| 712 | pos->start -= curr_map->start - curr_map->pgoff; | 720 | pos->start -= curr_map->start - curr_map->pgoff; |
| 721 | if (pos->end > curr_map->end) | ||
| 722 | pos->end = curr_map->end; | ||
| 713 | if (pos->end) | 723 | if (pos->end) |
| 714 | pos->end -= curr_map->start - curr_map->pgoff; | 724 | pos->end -= curr_map->start - curr_map->pgoff; |
| 715 | symbols__insert(&curr_map->dso->symbols, pos); | 725 | symbols__insert(&curr_map->dso->symbols, pos); |
| @@ -734,8 +744,8 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, | |||
| 734 | struct map *curr_map = initial_map; | 744 | struct map *curr_map = initial_map; |
| 735 | struct symbol *pos; | 745 | struct symbol *pos; |
| 736 | int count = 0, moved = 0; | 746 | int count = 0, moved = 0; |
| 737 | struct rb_root *root = &dso->symbols; | 747 | struct rb_root_cached *root = &dso->symbols; |
| 738 | struct rb_node *next = rb_first(root); | 748 | struct rb_node *next = rb_first_cached(root); |
| 739 | int kernel_range = 0; | 749 | int kernel_range = 0; |
| 740 | bool x86_64; | 750 | bool x86_64; |
| 741 | 751 | ||
| @@ -849,7 +859,7 @@ static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, | |||
| 849 | } | 859 | } |
| 850 | add_symbol: | 860 | add_symbol: |
| 851 | if (curr_map != initial_map) { | 861 | if (curr_map != initial_map) { |
| 852 | rb_erase(&pos->rb_node, root); | 862 | rb_erase_cached(&pos->rb_node, root); |
| 853 | symbols__insert(&curr_map->dso->symbols, pos); | 863 | symbols__insert(&curr_map->dso->symbols, pos); |
| 854 | ++moved; | 864 | ++moved; |
| 855 | } else | 865 | } else |
| @@ -857,7 +867,7 @@ add_symbol: | |||
| 857 | 867 | ||
| 858 | continue; | 868 | continue; |
| 859 | discard_symbol: | 869 | discard_symbol: |
| 860 | rb_erase(&pos->rb_node, root); | 870 | rb_erase_cached(&pos->rb_node, root); |
| 861 | symbol__delete(pos); | 871 | symbol__delete(pos); |
| 862 | } | 872 | } |
| 863 | 873 | ||
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 14d9d438e7e2..9a8fe012910a 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
| @@ -5,16 +5,13 @@ | |||
| 5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
| 6 | #include <stdbool.h> | 6 | #include <stdbool.h> |
| 7 | #include <stdint.h> | 7 | #include <stdint.h> |
| 8 | #include "map.h" | ||
| 9 | #include "../perf.h" | ||
| 10 | #include <linux/list.h> | 8 | #include <linux/list.h> |
| 11 | #include <linux/rbtree.h> | 9 | #include <linux/rbtree.h> |
| 12 | #include <stdio.h> | 10 | #include <stdio.h> |
| 13 | #include <byteswap.h> | 11 | #include "map_symbol.h" |
| 14 | #include <libgen.h> | 12 | #include "branch.h" |
| 15 | #include "build-id.h" | ||
| 16 | #include "event.h" | ||
| 17 | #include "path.h" | 13 | #include "path.h" |
| 14 | #include "symbol_conf.h" | ||
| 18 | 15 | ||
| 19 | #ifdef HAVE_LIBELF_SUPPORT | 16 | #ifdef HAVE_LIBELF_SUPPORT |
| 20 | #include <libelf.h> | 17 | #include <libelf.h> |
| @@ -24,6 +21,10 @@ | |||
| 24 | 21 | ||
| 25 | #include "dso.h" | 22 | #include "dso.h" |
| 26 | 23 | ||
| 24 | struct map; | ||
| 25 | struct map_groups; | ||
| 26 | struct option; | ||
| 27 | |||
| 27 | /* | 28 | /* |
| 28 | * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; | 29 | * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP; |
| 29 | * for newer versions we can use mmap to reduce memory usage: | 30 | * for newer versions we can use mmap to reduce memory usage: |
| @@ -68,7 +69,7 @@ struct symbol { | |||
| 68 | }; | 69 | }; |
| 69 | 70 | ||
| 70 | void symbol__delete(struct symbol *sym); | 71 | void symbol__delete(struct symbol *sym); |
| 71 | void symbols__delete(struct rb_root *symbols); | 72 | void symbols__delete(struct rb_root_cached *symbols); |
| 72 | 73 | ||
| 73 | /* symbols__for_each_entry - iterate over symbols (rb_root) | 74 | /* symbols__for_each_entry - iterate over symbols (rb_root) |
| 74 | * | 75 | * |
| @@ -77,7 +78,7 @@ void symbols__delete(struct rb_root *symbols); | |||
| 77 | * @nd: the 'struct rb_node *' to use as a temporary storage | 78 | * @nd: the 'struct rb_node *' to use as a temporary storage |
| 78 | */ | 79 | */ |
| 79 | #define symbols__for_each_entry(symbols, pos, nd) \ | 80 | #define symbols__for_each_entry(symbols, pos, nd) \ |
| 80 | for (nd = rb_first(symbols); \ | 81 | for (nd = rb_first_cached(symbols); \ |
| 81 | nd && (pos = rb_entry(nd, struct symbol, rb_node)); \ | 82 | nd && (pos = rb_entry(nd, struct symbol, rb_node)); \ |
| 82 | nd = rb_next(nd)) | 83 | nd = rb_next(nd)) |
| 83 | 84 | ||
| @@ -89,69 +90,6 @@ static inline size_t symbol__size(const struct symbol *sym) | |||
| 89 | struct strlist; | 90 | struct strlist; |
| 90 | struct intlist; | 91 | struct intlist; |
| 91 | 92 | ||
| 92 | struct symbol_conf { | ||
| 93 | unsigned short priv_size; | ||
| 94 | bool try_vmlinux_path, | ||
| 95 | init_annotation, | ||
| 96 | force, | ||
| 97 | ignore_vmlinux, | ||
| 98 | ignore_vmlinux_buildid, | ||
| 99 | show_kernel_path, | ||
| 100 | use_modules, | ||
| 101 | allow_aliases, | ||
| 102 | sort_by_name, | ||
| 103 | show_nr_samples, | ||
| 104 | show_total_period, | ||
| 105 | use_callchain, | ||
| 106 | cumulate_callchain, | ||
| 107 | show_branchflag_count, | ||
| 108 | exclude_other, | ||
| 109 | show_cpu_utilization, | ||
| 110 | initialized, | ||
| 111 | kptr_restrict, | ||
| 112 | event_group, | ||
| 113 | demangle, | ||
| 114 | demangle_kernel, | ||
| 115 | filter_relative, | ||
| 116 | show_hist_headers, | ||
| 117 | branch_callstack, | ||
| 118 | has_filter, | ||
| 119 | show_ref_callgraph, | ||
| 120 | hide_unresolved, | ||
| 121 | raw_trace, | ||
| 122 | report_hierarchy, | ||
| 123 | inline_name; | ||
| 124 | const char *vmlinux_name, | ||
| 125 | *kallsyms_name, | ||
| 126 | *source_prefix, | ||
| 127 | *field_sep, | ||
| 128 | *graph_function; | ||
| 129 | const char *default_guest_vmlinux_name, | ||
| 130 | *default_guest_kallsyms, | ||
| 131 | *default_guest_modules; | ||
| 132 | const char *guestmount; | ||
| 133 | const char *dso_list_str, | ||
| 134 | *comm_list_str, | ||
| 135 | *pid_list_str, | ||
| 136 | *tid_list_str, | ||
| 137 | *sym_list_str, | ||
| 138 | *col_width_list_str, | ||
| 139 | *bt_stop_list_str; | ||
| 140 | struct strlist *dso_list, | ||
| 141 | *comm_list, | ||
| 142 | *sym_list, | ||
| 143 | *dso_from_list, | ||
| 144 | *dso_to_list, | ||
| 145 | *sym_from_list, | ||
| 146 | *sym_to_list, | ||
| 147 | *bt_stop_list; | ||
| 148 | struct intlist *pid_list, | ||
| 149 | *tid_list; | ||
| 150 | const char *symfs; | ||
| 151 | }; | ||
| 152 | |||
| 153 | extern struct symbol_conf symbol_conf; | ||
| 154 | |||
| 155 | struct symbol_name_rb_node { | 93 | struct symbol_name_rb_node { |
| 156 | struct rb_node rb_node; | 94 | struct rb_node rb_node; |
| 157 | struct symbol sym; | 95 | struct symbol sym; |
| @@ -178,19 +116,6 @@ struct ref_reloc_sym { | |||
| 178 | u64 unrelocated_addr; | 116 | u64 unrelocated_addr; |
| 179 | }; | 117 | }; |
| 180 | 118 | ||
| 181 | struct map_symbol { | ||
| 182 | struct map *map; | ||
| 183 | struct symbol *sym; | ||
| 184 | }; | ||
| 185 | |||
| 186 | struct addr_map_symbol { | ||
| 187 | struct map *map; | ||
| 188 | struct symbol *sym; | ||
| 189 | u64 addr; | ||
| 190 | u64 al_addr; | ||
| 191 | u64 phys_addr; | ||
| 192 | }; | ||
| 193 | |||
| 194 | struct branch_info { | 119 | struct branch_info { |
| 195 | struct addr_map_symbol from; | 120 | struct addr_map_symbol from; |
| 196 | struct addr_map_symbol to; | 121 | struct addr_map_symbol to; |
| @@ -310,10 +235,11 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss); | |||
| 310 | 235 | ||
| 311 | char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); | 236 | char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name); |
| 312 | 237 | ||
| 313 | void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel); | 238 | void __symbols__insert(struct rb_root_cached *symbols, struct symbol *sym, |
| 314 | void symbols__insert(struct rb_root *symbols, struct symbol *sym); | 239 | bool kernel); |
| 315 | void symbols__fixup_duplicate(struct rb_root *symbols); | 240 | void symbols__insert(struct rb_root_cached *symbols, struct symbol *sym); |
| 316 | void symbols__fixup_end(struct rb_root *symbols); | 241 | void symbols__fixup_duplicate(struct rb_root_cached *symbols); |
| 242 | void symbols__fixup_end(struct rb_root_cached *symbols); | ||
| 317 | void map_groups__fixup_end(struct map_groups *mg); | 243 | void map_groups__fixup_end(struct map_groups *mg); |
| 318 | 244 | ||
| 319 | typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); | 245 | typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data); |
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h new file mode 100644 index 000000000000..fffea68c1203 --- /dev/null +++ b/tools/perf/util/symbol_conf.h | |||
| @@ -0,0 +1,73 @@ | |||
| 1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
| 2 | #ifndef __PERF_SYMBOL_CONF | ||
| 3 | #define __PERF_SYMBOL_CONF 1 | ||
| 4 | |||
| 5 | #include <stdbool.h> | ||
| 6 | |||
| 7 | struct strlist; | ||
| 8 | struct intlist; | ||
| 9 | |||
| 10 | struct symbol_conf { | ||
| 11 | unsigned short priv_size; | ||
| 12 | bool try_vmlinux_path, | ||
| 13 | init_annotation, | ||
| 14 | force, | ||
| 15 | ignore_vmlinux, | ||
| 16 | ignore_vmlinux_buildid, | ||
| 17 | show_kernel_path, | ||
| 18 | use_modules, | ||
| 19 | allow_aliases, | ||
| 20 | sort_by_name, | ||
| 21 | show_nr_samples, | ||
| 22 | show_total_period, | ||
| 23 | use_callchain, | ||
| 24 | cumulate_callchain, | ||
| 25 | show_branchflag_count, | ||
| 26 | exclude_other, | ||
| 27 | show_cpu_utilization, | ||
| 28 | initialized, | ||
| 29 | kptr_restrict, | ||
| 30 | event_group, | ||
| 31 | demangle, | ||
| 32 | demangle_kernel, | ||
| 33 | filter_relative, | ||
| 34 | show_hist_headers, | ||
| 35 | branch_callstack, | ||
| 36 | has_filter, | ||
| 37 | show_ref_callgraph, | ||
| 38 | hide_unresolved, | ||
| 39 | raw_trace, | ||
| 40 | report_hierarchy, | ||
| 41 | inline_name; | ||
| 42 | const char *vmlinux_name, | ||
| 43 | *kallsyms_name, | ||
| 44 | *source_prefix, | ||
| 45 | *field_sep, | ||
| 46 | *graph_function; | ||
| 47 | const char *default_guest_vmlinux_name, | ||
| 48 | *default_guest_kallsyms, | ||
| 49 | *default_guest_modules; | ||
| 50 | const char *guestmount; | ||
| 51 | const char *dso_list_str, | ||
| 52 | *comm_list_str, | ||
| 53 | *pid_list_str, | ||
| 54 | *tid_list_str, | ||
| 55 | *sym_list_str, | ||
| 56 | *col_width_list_str, | ||
| 57 | *bt_stop_list_str; | ||
| 58 | struct strlist *dso_list, | ||
| 59 | *comm_list, | ||
| 60 | *sym_list, | ||
| 61 | *dso_from_list, | ||
| 62 | *dso_to_list, | ||
| 63 | *sym_from_list, | ||
| 64 | *sym_to_list, | ||
| 65 | *bt_stop_list; | ||
| 66 | struct intlist *pid_list, | ||
| 67 | *tid_list; | ||
| 68 | const char *symfs; | ||
| 69 | }; | ||
| 70 | |||
| 71 | extern struct symbol_conf symbol_conf; | ||
| 72 | |||
| 73 | #endif // __PERF_SYMBOL_CONF | ||
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c index ed0205cc7942..02e89b02c2ce 100644 --- a/tools/perf/util/symbol_fprintf.c +++ b/tools/perf/util/symbol_fprintf.c | |||
| @@ -3,6 +3,7 @@ | |||
| 3 | #include <inttypes.h> | 3 | #include <inttypes.h> |
| 4 | #include <stdio.h> | 4 | #include <stdio.h> |
| 5 | 5 | ||
| 6 | #include "map.h" | ||
| 6 | #include "symbol.h" | 7 | #include "symbol.h" |
| 7 | 8 | ||
| 8 | size_t symbol__fprintf(struct symbol *sym, FILE *fp) | 9 | size_t symbol__fprintf(struct symbol *sym, FILE *fp) |
| @@ -64,7 +65,7 @@ size_t dso__fprintf_symbols_by_name(struct dso *dso, | |||
| 64 | struct rb_node *nd; | 65 | struct rb_node *nd; |
| 65 | struct symbol_name_rb_node *pos; | 66 | struct symbol_name_rb_node *pos; |
| 66 | 67 | ||
| 67 | for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) { | 68 | for (nd = rb_first_cached(&dso->symbol_names); nd; nd = rb_next(nd)) { |
| 68 | pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); | 69 | pos = rb_entry(nd, struct symbol_name_rb_node, rb_node); |
| 69 | fprintf(fp, "%s\n", pos->sym.name); | 70 | fprintf(fp, "%s\n", pos->sym.name); |
| 70 | } | 71 | } |
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index d52f27f373ce..a8b45168513c 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include "thread.h" | 20 | #include "thread.h" |
| 21 | #include "event.h" | 21 | #include "event.h" |
| 22 | #include "machine.h" | 22 | #include "machine.h" |
| 23 | #include "env.h" | ||
| 23 | #include "util.h" | 24 | #include "util.h" |
| 24 | #include "debug.h" | 25 | #include "debug.h" |
| 25 | #include "symbol.h" | 26 | #include "symbol.h" |
| @@ -29,6 +30,19 @@ | |||
| 29 | 30 | ||
| 30 | #define STACK_GROWTH 2048 | 31 | #define STACK_GROWTH 2048 |
| 31 | 32 | ||
| 33 | /* | ||
| 34 | * State of retpoline detection. | ||
| 35 | * | ||
| 36 | * RETPOLINE_NONE: no retpoline detection | ||
| 37 | * X86_RETPOLINE_POSSIBLE: x86 retpoline possible | ||
| 38 | * X86_RETPOLINE_DETECTED: x86 retpoline detected | ||
| 39 | */ | ||
| 40 | enum retpoline_state_t { | ||
| 41 | RETPOLINE_NONE, | ||
| 42 | X86_RETPOLINE_POSSIBLE, | ||
| 43 | X86_RETPOLINE_DETECTED, | ||
| 44 | }; | ||
| 45 | |||
| 32 | /** | 46 | /** |
| 33 | * struct thread_stack_entry - thread stack entry. | 47 | * struct thread_stack_entry - thread stack entry. |
| 34 | * @ret_addr: return address | 48 | * @ret_addr: return address |
| @@ -38,6 +52,7 @@ | |||
| 38 | * @cp: call path | 52 | * @cp: call path |
| 39 | * @no_call: a 'call' was not seen | 53 | * @no_call: a 'call' was not seen |
| 40 | * @trace_end: a 'call' but trace ended | 54 | * @trace_end: a 'call' but trace ended |
| 55 | * @non_call: a branch but not a 'call' to the start of a different symbol | ||
| 41 | */ | 56 | */ |
| 42 | struct thread_stack_entry { | 57 | struct thread_stack_entry { |
| 43 | u64 ret_addr; | 58 | u64 ret_addr; |
| @@ -47,6 +62,7 @@ struct thread_stack_entry { | |||
| 47 | struct call_path *cp; | 62 | struct call_path *cp; |
| 48 | bool no_call; | 63 | bool no_call; |
| 49 | bool trace_end; | 64 | bool trace_end; |
| 65 | bool non_call; | ||
| 50 | }; | 66 | }; |
| 51 | 67 | ||
| 52 | /** | 68 | /** |
| @@ -62,6 +78,7 @@ struct thread_stack_entry { | |||
| 62 | * @crp: call/return processor | 78 | * @crp: call/return processor |
| 63 | * @comm: current comm | 79 | * @comm: current comm |
| 64 | * @arr_sz: size of array if this is the first element of an array | 80 | * @arr_sz: size of array if this is the first element of an array |
| 81 | * @rstate: used to detect retpolines | ||
| 65 | */ | 82 | */ |
| 66 | struct thread_stack { | 83 | struct thread_stack { |
| 67 | struct thread_stack_entry *stack; | 84 | struct thread_stack_entry *stack; |
| @@ -74,6 +91,7 @@ struct thread_stack { | |||
| 74 | struct call_return_processor *crp; | 91 | struct call_return_processor *crp; |
| 75 | struct comm *comm; | 92 | struct comm *comm; |
| 76 | unsigned int arr_sz; | 93 | unsigned int arr_sz; |
| 94 | enum retpoline_state_t rstate; | ||
| 77 | }; | 95 | }; |
| 78 | 96 | ||
| 79 | /* | 97 | /* |
| @@ -113,10 +131,16 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread, | |||
| 113 | if (err) | 131 | if (err) |
| 114 | return err; | 132 | return err; |
| 115 | 133 | ||
| 116 | if (thread->mg && thread->mg->machine) | 134 | if (thread->mg && thread->mg->machine) { |
| 117 | ts->kernel_start = machine__kernel_start(thread->mg->machine); | 135 | struct machine *machine = thread->mg->machine; |
| 118 | else | 136 | const char *arch = perf_env__arch(machine->env); |
| 137 | |||
| 138 | ts->kernel_start = machine__kernel_start(machine); | ||
| 139 | if (!strcmp(arch, "x86")) | ||
| 140 | ts->rstate = X86_RETPOLINE_POSSIBLE; | ||
| 141 | } else { | ||
| 119 | ts->kernel_start = 1ULL << 63; | 142 | ts->kernel_start = 1ULL << 63; |
| 143 | } | ||
| 120 | ts->crp = crp; | 144 | ts->crp = crp; |
| 121 | 145 | ||
| 122 | return 0; | 146 | return 0; |
| @@ -268,6 +292,8 @@ static int thread_stack__call_return(struct thread *thread, | |||
| 268 | cr.flags |= CALL_RETURN_NO_CALL; | 292 | cr.flags |= CALL_RETURN_NO_CALL; |
| 269 | if (no_return) | 293 | if (no_return) |
| 270 | cr.flags |= CALL_RETURN_NO_RETURN; | 294 | cr.flags |= CALL_RETURN_NO_RETURN; |
| 295 | if (tse->non_call) | ||
| 296 | cr.flags |= CALL_RETURN_NON_CALL; | ||
| 271 | 297 | ||
| 272 | return crp->process(&cr, crp->data); | 298 | return crp->process(&cr, crp->data); |
| 273 | } | 299 | } |
| @@ -493,6 +519,9 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, | |||
| 493 | struct thread_stack_entry *tse; | 519 | struct thread_stack_entry *tse; |
| 494 | int err; | 520 | int err; |
| 495 | 521 | ||
| 522 | if (!cp) | ||
| 523 | return -ENOMEM; | ||
| 524 | |||
| 496 | if (ts->cnt == ts->sz) { | 525 | if (ts->cnt == ts->sz) { |
| 497 | err = thread_stack__grow(ts); | 526 | err = thread_stack__grow(ts); |
| 498 | if (err) | 527 | if (err) |
| @@ -507,6 +536,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, | |||
| 507 | tse->cp = cp; | 536 | tse->cp = cp; |
| 508 | tse->no_call = no_call; | 537 | tse->no_call = no_call; |
| 509 | tse->trace_end = trace_end; | 538 | tse->trace_end = trace_end; |
| 539 | tse->non_call = false; | ||
| 510 | 540 | ||
| 511 | return 0; | 541 | return 0; |
| 512 | } | 542 | } |
| @@ -528,14 +558,16 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, | |||
| 528 | timestamp, ref, false); | 558 | timestamp, ref, false); |
| 529 | } | 559 | } |
| 530 | 560 | ||
| 531 | if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) { | 561 | if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && |
| 562 | !ts->stack[ts->cnt - 1].non_call) { | ||
| 532 | return thread_stack__call_return(thread, ts, --ts->cnt, | 563 | return thread_stack__call_return(thread, ts, --ts->cnt, |
| 533 | timestamp, ref, false); | 564 | timestamp, ref, false); |
| 534 | } else { | 565 | } else { |
| 535 | size_t i = ts->cnt - 1; | 566 | size_t i = ts->cnt - 1; |
| 536 | 567 | ||
| 537 | while (i--) { | 568 | while (i--) { |
| 538 | if (ts->stack[i].ret_addr != ret_addr) | 569 | if (ts->stack[i].ret_addr != ret_addr || |
| 570 | ts->stack[i].non_call) | ||
| 539 | continue; | 571 | continue; |
| 540 | i += 1; | 572 | i += 1; |
| 541 | while (ts->cnt > i) { | 573 | while (ts->cnt > i) { |
| @@ -576,8 +608,6 @@ static int thread_stack__bottom(struct thread_stack *ts, | |||
| 576 | 608 | ||
| 577 | cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, | 609 | cp = call_path__findnew(cpr, &cpr->call_path, sym, ip, |
| 578 | ts->kernel_start); | 610 | ts->kernel_start); |
| 579 | if (!cp) | ||
| 580 | return -ENOMEM; | ||
| 581 | 611 | ||
| 582 | return thread_stack__push_cp(ts, ip, sample->time, ref, cp, | 612 | return thread_stack__push_cp(ts, ip, sample->time, ref, cp, |
| 583 | true, false); | 613 | true, false); |
| @@ -590,36 +620,36 @@ static int thread_stack__no_call_return(struct thread *thread, | |||
| 590 | struct addr_location *to_al, u64 ref) | 620 | struct addr_location *to_al, u64 ref) |
| 591 | { | 621 | { |
| 592 | struct call_path_root *cpr = ts->crp->cpr; | 622 | struct call_path_root *cpr = ts->crp->cpr; |
| 623 | struct call_path *root = &cpr->call_path; | ||
| 624 | struct symbol *fsym = from_al->sym; | ||
| 625 | struct symbol *tsym = to_al->sym; | ||
| 593 | struct call_path *cp, *parent; | 626 | struct call_path *cp, *parent; |
| 594 | u64 ks = ts->kernel_start; | 627 | u64 ks = ts->kernel_start; |
| 628 | u64 addr = sample->addr; | ||
| 629 | u64 tm = sample->time; | ||
| 630 | u64 ip = sample->ip; | ||
| 595 | int err; | 631 | int err; |
| 596 | 632 | ||
| 597 | if (sample->ip >= ks && sample->addr < ks) { | 633 | if (ip >= ks && addr < ks) { |
| 598 | /* Return to userspace, so pop all kernel addresses */ | 634 | /* Return to userspace, so pop all kernel addresses */ |
| 599 | while (thread_stack__in_kernel(ts)) { | 635 | while (thread_stack__in_kernel(ts)) { |
| 600 | err = thread_stack__call_return(thread, ts, --ts->cnt, | 636 | err = thread_stack__call_return(thread, ts, --ts->cnt, |
| 601 | sample->time, ref, | 637 | tm, ref, true); |
| 602 | true); | ||
| 603 | if (err) | 638 | if (err) |
| 604 | return err; | 639 | return err; |
| 605 | } | 640 | } |
| 606 | 641 | ||
| 607 | /* If the stack is empty, push the userspace address */ | 642 | /* If the stack is empty, push the userspace address */ |
| 608 | if (!ts->cnt) { | 643 | if (!ts->cnt) { |
| 609 | cp = call_path__findnew(cpr, &cpr->call_path, | 644 | cp = call_path__findnew(cpr, root, tsym, addr, ks); |
| 610 | to_al->sym, sample->addr, | 645 | return thread_stack__push_cp(ts, 0, tm, ref, cp, true, |
| 611 | ts->kernel_start); | 646 | false); |
| 612 | if (!cp) | ||
| 613 | return -ENOMEM; | ||
| 614 | return thread_stack__push_cp(ts, 0, sample->time, ref, | ||
| 615 | cp, true, false); | ||
| 616 | } | 647 | } |
| 617 | } else if (thread_stack__in_kernel(ts) && sample->ip < ks) { | 648 | } else if (thread_stack__in_kernel(ts) && ip < ks) { |
| 618 | /* Return to userspace, so pop all kernel addresses */ | 649 | /* Return to userspace, so pop all kernel addresses */ |
| 619 | while (thread_stack__in_kernel(ts)) { | 650 | while (thread_stack__in_kernel(ts)) { |
| 620 | err = thread_stack__call_return(thread, ts, --ts->cnt, | 651 | err = thread_stack__call_return(thread, ts, --ts->cnt, |
| 621 | sample->time, ref, | 652 | tm, ref, true); |
| 622 | true); | ||
| 623 | if (err) | 653 | if (err) |
| 624 | return err; | 654 | return err; |
| 625 | } | 655 | } |
| @@ -628,21 +658,59 @@ static int thread_stack__no_call_return(struct thread *thread, | |||
| 628 | if (ts->cnt) | 658 | if (ts->cnt) |
| 629 | parent = ts->stack[ts->cnt - 1].cp; | 659 | parent = ts->stack[ts->cnt - 1].cp; |
| 630 | else | 660 | else |
| 631 | parent = &cpr->call_path; | 661 | parent = root; |
| 632 | 662 | ||
| 633 | /* This 'return' had no 'call', so push and pop top of stack */ | 663 | if (parent->sym == from_al->sym) { |
| 634 | cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip, | 664 | /* |
| 635 | ts->kernel_start); | 665 | * At the bottom of the stack, assume the missing 'call' was |
| 636 | if (!cp) | 666 | * before the trace started. So, pop the current symbol and push |
| 637 | return -ENOMEM; | 667 | * the 'to' symbol. |
| 668 | */ | ||
| 669 | if (ts->cnt == 1) { | ||
| 670 | err = thread_stack__call_return(thread, ts, --ts->cnt, | ||
| 671 | tm, ref, false); | ||
| 672 | if (err) | ||
| 673 | return err; | ||
| 674 | } | ||
| 675 | |||
| 676 | if (!ts->cnt) { | ||
| 677 | cp = call_path__findnew(cpr, root, tsym, addr, ks); | ||
| 638 | 678 | ||
| 639 | err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp, | 679 | return thread_stack__push_cp(ts, addr, tm, ref, cp, |
| 640 | true, false); | 680 | true, false); |
| 681 | } | ||
| 682 | |||
| 683 | /* | ||
| 684 | * Otherwise assume the 'return' is being used as a jump (e.g. | ||
| 685 | * retpoline) and just push the 'to' symbol. | ||
| 686 | */ | ||
| 687 | cp = call_path__findnew(cpr, parent, tsym, addr, ks); | ||
| 688 | |||
| 689 | err = thread_stack__push_cp(ts, 0, tm, ref, cp, true, false); | ||
| 690 | if (!err) | ||
| 691 | ts->stack[ts->cnt - 1].non_call = true; | ||
| 692 | |||
| 693 | return err; | ||
| 694 | } | ||
| 695 | |||
| 696 | /* | ||
| 697 | * Assume 'parent' has not yet returned, so push 'to', and then push and | ||
| 698 | * pop 'from'. | ||
| 699 | */ | ||
| 700 | |||
| 701 | cp = call_path__findnew(cpr, parent, tsym, addr, ks); | ||
| 702 | |||
| 703 | err = thread_stack__push_cp(ts, addr, tm, ref, cp, true, false); | ||
| 641 | if (err) | 704 | if (err) |
| 642 | return err; | 705 | return err; |
| 643 | 706 | ||
| 644 | return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref, | 707 | cp = call_path__findnew(cpr, cp, fsym, ip, ks); |
| 645 | to_al->sym); | 708 | |
| 709 | err = thread_stack__push_cp(ts, ip, tm, ref, cp, true, false); | ||
| 710 | if (err) | ||
| 711 | return err; | ||
| 712 | |||
| 713 | return thread_stack__call_return(thread, ts, --ts->cnt, tm, ref, false); | ||
| 646 | } | 714 | } |
| 647 | 715 | ||
| 648 | static int thread_stack__trace_begin(struct thread *thread, | 716 | static int thread_stack__trace_begin(struct thread *thread, |
| @@ -680,8 +748,6 @@ static int thread_stack__trace_end(struct thread_stack *ts, | |||
| 680 | 748 | ||
| 681 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, | 749 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0, |
| 682 | ts->kernel_start); | 750 | ts->kernel_start); |
| 683 | if (!cp) | ||
| 684 | return -ENOMEM; | ||
| 685 | 751 | ||
| 686 | ret_addr = sample->ip + sample->insn_len; | 752 | ret_addr = sample->ip + sample->insn_len; |
| 687 | 753 | ||
| @@ -689,6 +755,70 @@ static int thread_stack__trace_end(struct thread_stack *ts, | |||
| 689 | false, true); | 755 | false, true); |
| 690 | } | 756 | } |
| 691 | 757 | ||
| 758 | static bool is_x86_retpoline(const char *name) | ||
| 759 | { | ||
| 760 | const char *p = strstr(name, "__x86_indirect_thunk_"); | ||
| 761 | |||
| 762 | return p == name || !strcmp(name, "__indirect_thunk_start"); | ||
| 763 | } | ||
| 764 | |||
| 765 | /* | ||
| 766 | * x86 retpoline functions pollute the call graph. This function removes them. | ||
| 767 | * This does not handle function return thunks, nor is there any improvement | ||
| 768 | * for the handling of inline thunks or extern thunks. | ||
| 769 | */ | ||
| 770 | static int thread_stack__x86_retpoline(struct thread_stack *ts, | ||
| 771 | struct perf_sample *sample, | ||
| 772 | struct addr_location *to_al) | ||
| 773 | { | ||
| 774 | struct thread_stack_entry *tse = &ts->stack[ts->cnt - 1]; | ||
| 775 | struct call_path_root *cpr = ts->crp->cpr; | ||
| 776 | struct symbol *sym = tse->cp->sym; | ||
| 777 | struct symbol *tsym = to_al->sym; | ||
| 778 | struct call_path *cp; | ||
| 779 | |||
| 780 | if (sym && is_x86_retpoline(sym->name)) { | ||
| 781 | /* | ||
| 782 | * This is a x86 retpoline fn. It pollutes the call graph by | ||
| 783 | * showing up everywhere there is an indirect branch, but does | ||
| 784 | * not itself mean anything. Here the top-of-stack is removed, | ||
| 785 | * by decrementing the stack count, and then further down, the | ||
| 786 | * resulting top-of-stack is replaced with the actual target. | ||
| 787 | * The result is that the retpoline functions will no longer | ||
| 788 | * appear in the call graph. Note this only affects the call | ||
| 789 | * graph, since all the original branches are left unchanged. | ||
| 790 | */ | ||
| 791 | ts->cnt -= 1; | ||
| 792 | sym = ts->stack[ts->cnt - 2].cp->sym; | ||
| 793 | if (sym && sym == tsym && to_al->addr != tsym->start) { | ||
| 794 | /* | ||
| 795 | * Target is back to the middle of the symbol we came | ||
| 796 | * from so assume it is an indirect jmp and forget it | ||
| 797 | * altogether. | ||
| 798 | */ | ||
| 799 | ts->cnt -= 1; | ||
| 800 | return 0; | ||
| 801 | } | ||
| 802 | } else if (sym && sym == tsym) { | ||
| 803 | /* | ||
| 804 | * Target is back to the symbol we came from so assume it is an | ||
| 805 | * indirect jmp and forget it altogether. | ||
| 806 | */ | ||
| 807 | ts->cnt -= 1; | ||
| 808 | return 0; | ||
| 809 | } | ||
| 810 | |||
| 811 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 2].cp, tsym, | ||
| 812 | sample->addr, ts->kernel_start); | ||
| 813 | if (!cp) | ||
| 814 | return -ENOMEM; | ||
| 815 | |||
| 816 | /* Replace the top-of-stack with the actual target */ | ||
| 817 | ts->stack[ts->cnt - 1].cp = cp; | ||
| 818 | |||
| 819 | return 0; | ||
| 820 | } | ||
| 821 | |||
| 692 | int thread_stack__process(struct thread *thread, struct comm *comm, | 822 | int thread_stack__process(struct thread *thread, struct comm *comm, |
| 693 | struct perf_sample *sample, | 823 | struct perf_sample *sample, |
| 694 | struct addr_location *from_al, | 824 | struct addr_location *from_al, |
| @@ -696,6 +826,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
| 696 | struct call_return_processor *crp) | 826 | struct call_return_processor *crp) |
| 697 | { | 827 | { |
| 698 | struct thread_stack *ts = thread__stack(thread, sample->cpu); | 828 | struct thread_stack *ts = thread__stack(thread, sample->cpu); |
| 829 | enum retpoline_state_t rstate; | ||
| 699 | int err = 0; | 830 | int err = 0; |
| 700 | 831 | ||
| 701 | if (ts && !ts->crp) { | 832 | if (ts && !ts->crp) { |
| @@ -711,6 +842,10 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
| 711 | ts->comm = comm; | 842 | ts->comm = comm; |
| 712 | } | 843 | } |
| 713 | 844 | ||
| 845 | rstate = ts->rstate; | ||
| 846 | if (rstate == X86_RETPOLINE_DETECTED) | ||
| 847 | ts->rstate = X86_RETPOLINE_POSSIBLE; | ||
| 848 | |||
| 714 | /* Flush stack on exec */ | 849 | /* Flush stack on exec */ |
| 715 | if (ts->comm != comm && thread->pid_ == thread->tid) { | 850 | if (ts->comm != comm && thread->pid_ == thread->tid) { |
| 716 | err = __thread_stack__flush(thread, ts); | 851 | err = __thread_stack__flush(thread, ts); |
| @@ -745,14 +880,27 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
| 745 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, | 880 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, |
| 746 | to_al->sym, sample->addr, | 881 | to_al->sym, sample->addr, |
| 747 | ts->kernel_start); | 882 | ts->kernel_start); |
| 748 | if (!cp) | ||
| 749 | return -ENOMEM; | ||
| 750 | err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, | 883 | err = thread_stack__push_cp(ts, ret_addr, sample->time, ref, |
| 751 | cp, false, trace_end); | 884 | cp, false, trace_end); |
| 885 | |||
| 886 | /* | ||
| 887 | * A call to the same symbol but not the start of the symbol, | ||
| 888 | * may be the start of a x86 retpoline. | ||
| 889 | */ | ||
| 890 | if (!err && rstate == X86_RETPOLINE_POSSIBLE && to_al->sym && | ||
| 891 | from_al->sym == to_al->sym && | ||
| 892 | to_al->addr != to_al->sym->start) | ||
| 893 | ts->rstate = X86_RETPOLINE_DETECTED; | ||
| 894 | |||
| 752 | } else if (sample->flags & PERF_IP_FLAG_RETURN) { | 895 | } else if (sample->flags & PERF_IP_FLAG_RETURN) { |
| 753 | if (!sample->ip || !sample->addr) | 896 | if (!sample->ip || !sample->addr) |
| 754 | return 0; | 897 | return 0; |
| 755 | 898 | ||
| 899 | /* x86 retpoline 'return' doesn't match the stack */ | ||
| 900 | if (rstate == X86_RETPOLINE_DETECTED && ts->cnt > 2 && | ||
| 901 | ts->stack[ts->cnt - 1].ret_addr != sample->addr) | ||
| 902 | return thread_stack__x86_retpoline(ts, sample, to_al); | ||
| 903 | |||
| 756 | err = thread_stack__pop_cp(thread, ts, sample->addr, | 904 | err = thread_stack__pop_cp(thread, ts, sample->addr, |
| 757 | sample->time, ref, from_al->sym); | 905 | sample->time, ref, from_al->sym); |
| 758 | if (err) { | 906 | if (err) { |
| @@ -765,6 +913,25 @@ int thread_stack__process(struct thread *thread, struct comm *comm, | |||
| 765 | err = thread_stack__trace_begin(thread, ts, sample->time, ref); | 913 | err = thread_stack__trace_begin(thread, ts, sample->time, ref); |
| 766 | } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { | 914 | } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { |
| 767 | err = thread_stack__trace_end(ts, sample, ref); | 915 | err = thread_stack__trace_end(ts, sample, ref); |
| 916 | } else if (sample->flags & PERF_IP_FLAG_BRANCH && | ||
| 917 | from_al->sym != to_al->sym && to_al->sym && | ||
| 918 | to_al->addr == to_al->sym->start) { | ||
| 919 | struct call_path_root *cpr = ts->crp->cpr; | ||
| 920 | struct call_path *cp; | ||
| 921 | |||
| 922 | /* | ||
| 923 | * The compiler might optimize a call/ret combination by making | ||
| 924 | * it a jmp. Make that visible by recording on the stack a | ||
| 925 | * branch to the start of a different symbol. Note, that means | ||
| 926 | * when a ret pops the stack, all jmps must be popped off first. | ||
| 927 | */ | ||
| 928 | cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, | ||
| 929 | to_al->sym, sample->addr, | ||
| 930 | ts->kernel_start); | ||
| 931 | err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, | ||
| 932 | false); | ||
| 933 | if (!err) | ||
| 934 | ts->stack[ts->cnt - 1].non_call = true; | ||
| 768 | } | 935 | } |
| 769 | 936 | ||
| 770 | return err; | 937 | return err; |
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 1f626f4a1c40..b7c04e19ad41 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h | |||
| @@ -35,10 +35,13 @@ struct call_path; | |||
| 35 | * | 35 | * |
| 36 | * CALL_RETURN_NO_CALL: 'return' but no matching 'call' | 36 | * CALL_RETURN_NO_CALL: 'return' but no matching 'call' |
| 37 | * CALL_RETURN_NO_RETURN: 'call' but no matching 'return' | 37 | * CALL_RETURN_NO_RETURN: 'call' but no matching 'return' |
| 38 | * CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different | ||
| 39 | * symbol | ||
| 38 | */ | 40 | */ |
| 39 | enum { | 41 | enum { |
| 40 | CALL_RETURN_NO_CALL = 1 << 0, | 42 | CALL_RETURN_NO_CALL = 1 << 0, |
| 41 | CALL_RETURN_NO_RETURN = 1 << 1, | 43 | CALL_RETURN_NO_RETURN = 1 << 1, |
| 44 | CALL_RETURN_NON_CALL = 1 << 2, | ||
| 42 | }; | 45 | }; |
| 43 | 46 | ||
| 44 | /** | 47 | /** |
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index c83372329f89..4c179fef442d 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | #include "debug.h" | 12 | #include "debug.h" |
| 13 | #include "namespaces.h" | 13 | #include "namespaces.h" |
| 14 | #include "comm.h" | 14 | #include "comm.h" |
| 15 | #include "symbol.h" | ||
| 15 | #include "unwind.h" | 16 | #include "unwind.h" |
| 16 | 17 | ||
| 17 | #include <api/fs/fs.h> | 18 | #include <api/fs/fs.h> |
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 712dd48cc0ca..8276ffeec556 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h | |||
| @@ -5,14 +5,18 @@ | |||
| 5 | #include <linux/refcount.h> | 5 | #include <linux/refcount.h> |
| 6 | #include <linux/rbtree.h> | 6 | #include <linux/rbtree.h> |
| 7 | #include <linux/list.h> | 7 | #include <linux/list.h> |
| 8 | #include <stdio.h> | ||
| 8 | #include <unistd.h> | 9 | #include <unistd.h> |
| 9 | #include <sys/types.h> | 10 | #include <sys/types.h> |
| 10 | #include "symbol.h" | 11 | #include "srccode.h" |
| 11 | #include "map.h" | 12 | #include "symbol_conf.h" |
| 12 | #include <strlist.h> | 13 | #include <strlist.h> |
| 13 | #include <intlist.h> | 14 | #include <intlist.h> |
| 14 | #include "rwsem.h" | 15 | #include "rwsem.h" |
| 15 | 16 | ||
| 17 | struct addr_location; | ||
| 18 | struct map; | ||
| 19 | struct namespaces_event; | ||
| 16 | struct thread_stack; | 20 | struct thread_stack; |
| 17 | struct unwind_libunwind_ops; | 21 | struct unwind_libunwind_ops; |
| 18 | 22 | ||
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 56e4ca54020a..250391672f9f 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h | |||
| @@ -53,7 +53,10 @@ struct perf_tool { | |||
| 53 | itrace_start, | 53 | itrace_start, |
| 54 | context_switch, | 54 | context_switch, |
| 55 | throttle, | 55 | throttle, |
| 56 | unthrottle; | 56 | unthrottle, |
| 57 | ksymbol, | ||
| 58 | bpf_event; | ||
| 59 | |||
| 57 | event_attr_op attr; | 60 | event_attr_op attr; |
| 58 | event_attr_op event_update; | 61 | event_attr_op event_update; |
| 59 | event_op2 tracing_data; | 62 | event_op2 tracing_data; |
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 5eff9bfc5758..407d0167b942 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c | |||
| @@ -8,6 +8,8 @@ | |||
| 8 | #include "unwind.h" | 8 | #include "unwind.h" |
| 9 | #include "unwind-libdw.h" | 9 | #include "unwind-libdw.h" |
| 10 | #include "machine.h" | 10 | #include "machine.h" |
| 11 | #include "map.h" | ||
| 12 | #include "symbol.h" | ||
| 11 | #include "thread.h" | 13 | #include "thread.h" |
| 12 | #include <linux/types.h> | 14 | #include <linux/types.h> |
| 13 | #include "event.h" | 15 | #include "event.h" |
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 79f521a552cf..f3c666a84e4d 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | #include "session.h" | 34 | #include "session.h" |
| 35 | #include "perf_regs.h" | 35 | #include "perf_regs.h" |
| 36 | #include "unwind.h" | 36 | #include "unwind.h" |
| 37 | #include "map.h" | ||
| 37 | #include "symbol.h" | 38 | #include "symbol.h" |
| 38 | #include "util.h" | 39 | #include "util.h" |
| 39 | #include "debug.h" | 40 | #include "debug.h" |
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index b029a5e9ae49..9778b3133b77 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c | |||
| @@ -1,5 +1,6 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | #include "unwind.h" | 2 | #include "unwind.h" |
| 3 | #include "map.h" | ||
| 3 | #include "thread.h" | 4 | #include "thread.h" |
| 4 | #include "session.h" | 5 | #include "session.h" |
| 5 | #include "debug.h" | 6 | #include "debug.h" |
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 093352e93d50..d388f80d8703 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #include "../perf.h" | 2 | #include "../perf.h" |
| 3 | #include "util.h" | 3 | #include "util.h" |
| 4 | #include "debug.h" | 4 | #include "debug.h" |
| 5 | #include "namespaces.h" | ||
| 5 | #include <api/fs/fs.h> | 6 | #include <api/fs/fs.h> |
| 6 | #include <sys/mman.h> | 7 | #include <sys/mman.h> |
| 7 | #include <sys/stat.h> | 8 | #include <sys/stat.h> |
| @@ -20,6 +21,7 @@ | |||
| 20 | #include <linux/time64.h> | 21 | #include <linux/time64.h> |
| 21 | #include <unistd.h> | 22 | #include <unistd.h> |
| 22 | #include "strlist.h" | 23 | #include "strlist.h" |
| 24 | #include "string2.h" | ||
| 23 | 25 | ||
| 24 | /* | 26 | /* |
| 25 | * XXX We need to find a better place for these things... | 27 | * XXX We need to find a better place for these things... |
| @@ -116,23 +118,67 @@ int mkdir_p(char *path, mode_t mode) | |||
| 116 | return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; | 118 | return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; |
| 117 | } | 119 | } |
| 118 | 120 | ||
| 119 | int rm_rf(const char *path) | 121 | static bool match_pat(char *file, const char **pat) |
| 122 | { | ||
| 123 | int i = 0; | ||
| 124 | |||
| 125 | if (!pat) | ||
| 126 | return true; | ||
| 127 | |||
| 128 | while (pat[i]) { | ||
| 129 | if (strglobmatch(file, pat[i])) | ||
| 130 | return true; | ||
| 131 | |||
| 132 | i++; | ||
| 133 | } | ||
| 134 | |||
| 135 | return false; | ||
| 136 | } | ||
| 137 | |||
| 138 | /* | ||
| 139 | * The depth specify how deep the removal will go. | ||
| 140 | * 0 - will remove only files under the 'path' directory | ||
| 141 | * 1 .. x - will dive in x-level deep under the 'path' directory | ||
| 142 | * | ||
| 143 | * If specified the pat is array of string patterns ended with NULL, | ||
| 144 | * which are checked upon every file/directory found. Only matching | ||
| 145 | * ones are removed. | ||
| 146 | * | ||
| 147 | * The function returns: | ||
| 148 | * 0 on success | ||
| 149 | * -1 on removal failure with errno set | ||
| 150 | * -2 on pattern failure | ||
| 151 | */ | ||
| 152 | static int rm_rf_depth_pat(const char *path, int depth, const char **pat) | ||
| 120 | { | 153 | { |
| 121 | DIR *dir; | 154 | DIR *dir; |
| 122 | int ret = 0; | 155 | int ret; |
| 123 | struct dirent *d; | 156 | struct dirent *d; |
| 124 | char namebuf[PATH_MAX]; | 157 | char namebuf[PATH_MAX]; |
| 158 | struct stat statbuf; | ||
| 125 | 159 | ||
| 160 | /* Do not fail if there's no file. */ | ||
| 161 | ret = lstat(path, &statbuf); | ||
| 162 | if (ret) | ||
| 163 | return 0; | ||
| 164 | |||
| 165 | /* Try to remove any file we get. */ | ||
| 166 | if (!(statbuf.st_mode & S_IFDIR)) | ||
| 167 | return unlink(path); | ||
| 168 | |||
| 169 | /* We have directory in path. */ | ||
| 126 | dir = opendir(path); | 170 | dir = opendir(path); |
| 127 | if (dir == NULL) | 171 | if (dir == NULL) |
| 128 | return 0; | 172 | return -1; |
| 129 | 173 | ||
| 130 | while ((d = readdir(dir)) != NULL && !ret) { | 174 | while ((d = readdir(dir)) != NULL && !ret) { |
| 131 | struct stat statbuf; | ||
| 132 | 175 | ||
| 133 | if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) | 176 | if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) |
| 134 | continue; | 177 | continue; |
| 135 | 178 | ||
| 179 | if (!match_pat(d->d_name, pat)) | ||
| 180 | return -2; | ||
| 181 | |||
| 136 | scnprintf(namebuf, sizeof(namebuf), "%s/%s", | 182 | scnprintf(namebuf, sizeof(namebuf), "%s/%s", |
| 137 | path, d->d_name); | 183 | path, d->d_name); |
| 138 | 184 | ||
| @@ -144,7 +190,7 @@ int rm_rf(const char *path) | |||
| 144 | } | 190 | } |
| 145 | 191 | ||
| 146 | if (S_ISDIR(statbuf.st_mode)) | 192 | if (S_ISDIR(statbuf.st_mode)) |
| 147 | ret = rm_rf(namebuf); | 193 | ret = depth ? rm_rf_depth_pat(namebuf, depth - 1, pat) : 0; |
| 148 | else | 194 | else |
| 149 | ret = unlink(namebuf); | 195 | ret = unlink(namebuf); |
| 150 | } | 196 | } |
| @@ -156,6 +202,22 @@ int rm_rf(const char *path) | |||
| 156 | return rmdir(path); | 202 | return rmdir(path); |
| 157 | } | 203 | } |
| 158 | 204 | ||
| 205 | int rm_rf_perf_data(const char *path) | ||
| 206 | { | ||
| 207 | const char *pat[] = { | ||
| 208 | "header", | ||
| 209 | "data.*", | ||
| 210 | NULL, | ||
| 211 | }; | ||
| 212 | |||
| 213 | return rm_rf_depth_pat(path, 0, pat); | ||
| 214 | } | ||
| 215 | |||
| 216 | int rm_rf(const char *path) | ||
| 217 | { | ||
| 218 | return rm_rf_depth_pat(path, INT_MAX, NULL); | ||
| 219 | } | ||
| 220 | |||
| 159 | /* A filter which removes dot files */ | 221 | /* A filter which removes dot files */ |
| 160 | bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d) | 222 | bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d) |
| 161 | { | 223 | { |
| @@ -506,3 +568,13 @@ out: | |||
| 506 | 568 | ||
| 507 | return tip; | 569 | return tip; |
| 508 | } | 570 | } |
| 571 | |||
| 572 | char *perf_exe(char *buf, int len) | ||
| 573 | { | ||
| 574 | int n = readlink("/proc/self/exe", buf, len); | ||
| 575 | if (n > 0) { | ||
| 576 | buf[n] = 0; | ||
| 577 | return buf; | ||
| 578 | } | ||
| 579 | return strcpy(buf, "perf"); | ||
| 580 | } | ||
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index ece040b799f6..09c1b0f91f65 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
| @@ -31,6 +31,7 @@ struct strlist; | |||
| 31 | 31 | ||
| 32 | int mkdir_p(char *path, mode_t mode); | 32 | int mkdir_p(char *path, mode_t mode); |
| 33 | int rm_rf(const char *path); | 33 | int rm_rf(const char *path); |
| 34 | int rm_rf_perf_data(const char *path); | ||
| 34 | struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); | 35 | struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *)); |
| 35 | bool lsdir_no_dot_filter(const char *name, struct dirent *d); | 36 | bool lsdir_no_dot_filter(const char *name, struct dirent *d); |
| 36 | int copyfile(const char *from, const char *to); | 37 | int copyfile(const char *from, const char *to); |
| @@ -76,6 +77,8 @@ extern bool perf_singlethreaded; | |||
| 76 | void perf_set_singlethreaded(void); | 77 | void perf_set_singlethreaded(void); |
| 77 | void perf_set_multithreaded(void); | 78 | void perf_set_multithreaded(void); |
| 78 | 79 | ||
| 80 | char *perf_exe(char *buf, int len); | ||
| 81 | |||
| 79 | #ifndef O_CLOEXEC | 82 | #ifndef O_CLOEXEC |
| 80 | #ifdef __sparc__ | 83 | #ifdef __sparc__ |
| 81 | #define O_CLOEXEC 0x400000 | 84 | #define O_CLOEXEC 0x400000 |
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 3702cba11d7d..5031b7b22bbd 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c | |||
| @@ -11,6 +11,7 @@ | |||
| 11 | 11 | ||
| 12 | #include "vdso.h" | 12 | #include "vdso.h" |
| 13 | #include "util.h" | 13 | #include "util.h" |
| 14 | #include "map.h" | ||
| 14 | #include "symbol.h" | 15 | #include "symbol.h" |
| 15 | #include "machine.h" | 16 | #include "machine.h" |
| 16 | #include "thread.h" | 17 | #include "thread.h" |
diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c index 902ce6384f57..512ad7c09b13 100644 --- a/tools/perf/util/zlib.c +++ b/tools/perf/util/zlib.c | |||
| @@ -6,7 +6,6 @@ | |||
| 6 | #include <sys/mman.h> | 6 | #include <sys/mman.h> |
| 7 | #include <zlib.h> | 7 | #include <zlib.h> |
| 8 | #include <linux/compiler.h> | 8 | #include <linux/compiler.h> |
| 9 | #include <unistd.h> | ||
| 10 | 9 | ||
| 11 | #include "util/compress.h" | 10 | #include "util/compress.h" |
| 12 | #include "util/util.h" | 11 | #include "util/util.h" |
