aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2019-07-08 12:06:57 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2019-07-08 12:06:57 -0400
commite3b22a65348ab54261a98b6bc90ecf8977ff8ebf (patch)
tree81c517d6f0e5585be7af5ffa1a4d4136b1f4a9c6 /tools/perf
parent05c78468a60f2fd961cd0a0c01c27f288bf81204 (diff)
parent552a031ba12a4236be107a5b082a399237758a5d (diff)
Merge remote-tracking branch 'tip/perf/core' into perf/urgent
To pick up fixes. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/db-export.txt41
-rw-r--r--tools/perf/Documentation/intel-pt.txt40
-rw-r--r--tools/perf/Documentation/perf-config.txt9
-rw-r--r--tools/perf/Documentation/perf-diff.txt31
-rw-r--r--tools/perf/Documentation/perf-record.txt11
-rw-r--r--tools/perf/Documentation/perf-report.txt11
-rw-r--r--tools/perf/Documentation/perf-script.txt17
-rw-r--r--tools/perf/Documentation/perf-stat.txt10
-rw-r--r--tools/perf/Documentation/perf-top.txt5
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt97
-rw-r--r--tools/perf/Documentation/tips.txt2
-rw-r--r--tools/perf/MANIFEST2
-rw-r--r--tools/perf/Makefile.config15
-rw-r--r--tools/perf/Makefile.perf44
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c310
-rw-r--r--tools/perf/arch/arm64/Build2
-rw-r--r--tools/perf/arch/arm64/tests/Build2
-rw-r--r--tools/perf/arch/csky/annotate/instructions.c48
-rw-r--r--tools/perf/arch/s390/util/header.c2
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/tests/Build2
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c4
-rw-r--r--tools/perf/arch/x86/tests/intel-cqm.c1
-rw-r--r--tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c304
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c1
-rw-r--r--tools/perf/arch/x86/util/machine.c3
-rw-r--r--tools/perf/builtin-diff.c382
-rw-r--r--tools/perf/builtin-kmem.c3
-rw-r--r--tools/perf/builtin-record.c4
-rw-r--r--tools/perf/builtin-report.c13
-rw-r--r--tools/perf/builtin-sched.c3
-rw-r--r--tools/perf/builtin-script.c107
-rw-r--r--tools/perf/builtin-stat.c89
-rw-r--r--tools/perf/builtin-top.c10
-rw-r--r--tools/perf/builtin-trace.c139
-rwxr-xr-xtools/perf/check-headers.sh2
-rw-r--r--tools/perf/examples/bpf/augmented_raw_syscalls.c268
-rw-r--r--tools/perf/perf-with-kcore.sh5
-rw-r--r--tools/perf/perf.c1
-rw-r--r--tools/perf/perf.h4
-rw-r--r--tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json44
-rw-r--r--tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json51
-rw-r--r--tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json37
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json22
-rw-r--r--tools/perf/pmu-events/jevents.c7
-rw-r--r--tools/perf/scripts/python/export-to-postgresql.py330
-rw-r--r--tools/perf/scripts/python/export-to-sqlite.py319
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py345
-rw-r--r--tools/perf/tests/Build4
-rw-r--r--tools/perf/tests/bp_account.c1
-rw-r--r--tools/perf/tests/bpf-script-example.c1
-rw-r--r--tools/perf/tests/bpf-script-test-kbuild.c1
-rw-r--r--tools/perf/tests/bpf-script-test-prologue.c1
-rw-r--r--tools/perf/tests/bpf-script-test-relocation.c1
-rw-r--r--tools/perf/tests/bpf.c1
-rw-r--r--tools/perf/tests/builtin-test.c11
-rw-r--r--tools/perf/tests/code-reading.c2
-rw-r--r--tools/perf/tests/map_groups.c121
-rw-r--r--tools/perf/tests/mem.c1
-rw-r--r--tools/perf/tests/mem2node.c1
-rw-r--r--tools/perf/tests/parse-events.c27
-rw-r--r--tools/perf/tests/shell/lib/probe.sh1
-rwxr-xr-xtools/perf/tests/shell/probe_vfs_getname.sh3
-rwxr-xr-xtools/perf/tests/shell/record+probe_libc_inet_pton.sh1
-rwxr-xr-xtools/perf/tests/shell/record+script_probe_vfs_getname.sh1
-rwxr-xr-xtools/perf/tests/shell/record+zstd_comp_decomp.sh2
-rwxr-xr-xtools/perf/tests/shell/trace+probe_vfs_getname.sh1
-rw-r--r--tools/perf/tests/tests.h2
-rw-r--r--tools/perf/tests/time-utils-test.c251
-rw-r--r--tools/perf/trace/beauty/Build4
-rw-r--r--tools/perf/trace/beauty/beauty.h15
-rw-r--r--tools/perf/trace/beauty/clone.c1
-rwxr-xr-xtools/perf/trace/beauty/fsconfig.sh17
-rw-r--r--tools/perf/trace/beauty/fsmount.c34
-rwxr-xr-xtools/perf/trace/beauty/fsmount.sh22
-rw-r--r--tools/perf/trace/beauty/fspick.c24
-rwxr-xr-xtools/perf/trace/beauty/fspick.sh17
-rw-r--r--tools/perf/trace/beauty/move_mount.c24
-rwxr-xr-xtools/perf/trace/beauty/move_mount_flags.sh17
-rw-r--r--tools/perf/trace/beauty/sync_file_range.c31
-rwxr-xr-xtools/perf/trace/beauty/sync_file_range.sh17
-rw-r--r--tools/perf/ui/browser.c4
-rw-r--r--tools/perf/ui/browsers/hists.c10
-rw-r--r--tools/perf/ui/browsers/map.c2
-rw-r--r--tools/perf/ui/gtk/hists.c5
-rw-r--r--tools/perf/ui/libslang.h5
-rw-r--r--tools/perf/ui/progress.c2
-rw-r--r--tools/perf/ui/stdio/hist.c43
-rw-r--r--tools/perf/util/Build9
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN2
-rw-r--r--tools/perf/util/annotate.c25
-rw-r--r--tools/perf/util/auxtrace.c5
-rw-r--r--tools/perf/util/auxtrace.h34
-rw-r--r--tools/perf/util/build-id.c2
-rw-r--r--tools/perf/util/config.c10
-rw-r--r--tools/perf/util/cpumap.c66
-rw-r--r--tools/perf/util/cpumap.h10
-rw-r--r--tools/perf/util/cputopo.c84
-rw-r--r--tools/perf/util/cputopo.h2
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c268
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h39
-rw-r--r--tools/perf/util/cs-etm.c1026
-rw-r--r--tools/perf/util/cs-etm.h94
-rw-r--r--tools/perf/util/ctype.c49
-rw-r--r--tools/perf/util/data-convert-bt.c2
-rw-r--r--tools/perf/util/debug.c2
-rw-r--r--tools/perf/util/demangle-java.c2
-rw-r--r--tools/perf/util/dso.c128
-rw-r--r--tools/perf/util/env.c3
-rw-r--r--tools/perf/util/env.h3
-rw-r--r--tools/perf/util/event.c10
-rw-r--r--tools/perf/util/event.h2
-rw-r--r--tools/perf/util/evsel.c27
-rw-r--r--tools/perf/util/header.c111
-rw-r--r--tools/perf/util/hist.c43
-rw-r--r--tools/perf/util/hist.h8
-rw-r--r--tools/perf/util/include/linux/ctype.h1
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c467
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h144
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c140
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h21
-rw-r--r--tools/perf/util/intel-pt.c762
-rw-r--r--tools/perf/util/jitdump.c2
-rw-r--r--tools/perf/util/machine.c11
-rw-r--r--tools/perf/util/map.c6
-rw-r--r--tools/perf/util/map_groups.h2
-rw-r--r--tools/perf/util/metricgroup.c73
-rw-r--r--tools/perf/util/perf_regs.h4
-rw-r--r--tools/perf/util/pmu.c69
-rw-r--r--tools/perf/util/print_binary.c2
-rw-r--r--tools/perf/util/probe-event.c2
-rw-r--r--tools/perf/util/probe-finder.h2
-rw-r--r--tools/perf/util/python-ext-sources3
-rw-r--r--tools/perf/util/python.c1
-rw-r--r--tools/perf/util/s390-cpumsf.c96
-rw-r--r--tools/perf/util/sane_ctype.h52
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c54
-rw-r--r--tools/perf/util/smt.c8
-rw-r--r--tools/perf/util/sort.h13
-rw-r--r--tools/perf/util/srcline.c7
-rw-r--r--tools/perf/util/stat-display.c43
-rw-r--r--tools/perf/util/stat-shadow.c24
-rw-r--r--tools/perf/util/stat.c1
-rw-r--r--tools/perf/util/stat.h1
-rw-r--r--tools/perf/util/strfilter.c6
-rw-r--r--tools/perf/util/string.c169
-rw-r--r--tools/perf/util/string2.h15
-rw-r--r--tools/perf/util/symbol-elf.c6
-rw-r--r--tools/perf/util/symbol.c121
-rw-r--r--tools/perf/util/symbol.h23
-rw-r--r--tools/perf/util/symbol_conf.h5
-rw-r--r--tools/perf/util/thread-stack.c62
-rw-r--r--tools/perf/util/thread-stack.h4
-rw-r--r--tools/perf/util/thread.c12
-rw-r--r--tools/perf/util/thread.h4
-rw-r--r--tools/perf/util/thread_map.c3
-rw-r--r--tools/perf/util/time-utils.c130
-rw-r--r--tools/perf/util/trace-event-parse.c2
-rw-r--r--tools/perf/util/util.c13
-rw-r--r--tools/perf/util/util.h1
161 files changed, 7101 insertions, 1485 deletions
diff --git a/tools/perf/Documentation/db-export.txt b/tools/perf/Documentation/db-export.txt
new file mode 100644
index 000000000000..52ffccb02d55
--- /dev/null
+++ b/tools/perf/Documentation/db-export.txt
@@ -0,0 +1,41 @@
1Database Export
2===============
3
4perf tool's python scripting engine:
5
6 tools/perf/util/scripting-engines/trace-event-python.c
7
8supports scripts:
9
10 tools/perf/scripts/python/export-to-sqlite.py
11 tools/perf/scripts/python/export-to-postgresql.py
12
13which export data to a SQLite3 or PostgreSQL database.
14
15The export process provides records with unique sequential ids which allows the
16data to be imported directly to a database and provides the relationships
17between tables.
18
19Over time it is possible to continue to expand the export while maintaining
20backward and forward compatibility, by following some simple rules:
21
221. Because of the nature of SQL, existing tables and columns can continue to be
23used so long as the names and meanings (and to some extent data types) remain
24the same.
25
262. New tables and columns can be added, without affecting existing SQL queries,
27so long as the new names are unique.
28
293. Scripts that use a database (e.g. exported-sql-viewer.py) can maintain
30backward compatibility by testing for the presence of new tables and columns
31before using them. e.g. function IsSelectable() in exported-sql-viewer.py
32
334. The export scripts themselves maintain forward compatibility (i.e. an existing
34script will continue to work with new versions of perf) by accepting a variable
35number of arguments (e.g. def call_return_table(*x)) i.e. perf can pass more
36arguments which old scripts will ignore.
37
385. The scripting engine tests for the existence of script handler functions
39before calling them. The scripting engine can also test for the support of new
40or optional features by checking for the existence and value of script global
41variables.
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 115eaacc455f..50c5b60101bd 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -88,21 +88,51 @@ smaller.
88 88
89To represent software control flow, "branches" samples are produced. By default 89To represent software control flow, "branches" samples are produced. By default
90a branch sample is synthesized for every single branch. To get an idea what 90a branch sample is synthesized for every single branch. To get an idea what
91data is available you can use the 'perf script' tool with no parameters, which 91data is available you can use the 'perf script' tool with all itrace sampling
92will list all the samples. 92options, which will list all the samples.
93 93
94 perf record -e intel_pt//u ls 94 perf record -e intel_pt//u ls
95 perf script 95 perf script --itrace=ibxwpe
96 96
97An interesting field that is not printed by default is 'flags' which can be 97An interesting field that is not printed by default is 'flags' which can be
98displayed as follows: 98displayed as follows:
99 99
100 perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags 100 perf script --itrace=ibxwpe -F+flags
101 101
102The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, 102The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
103system, asynchronous, interrupt, transaction abort, trace begin, trace end, and 103system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
104in transaction, respectively. 104in transaction, respectively.
105 105
106Another interesting field that is not printed by default is 'ipc' which can be
107displayed as follows:
108
109 perf script --itrace=be -F+ipc
110
111There are two ways that instructions-per-cycle (IPC) can be calculated depending
112on the recording.
113
114If the 'cyc' config term (see config terms section below) was used, then IPC is
115calculated using the cycle count from CYC packets, otherwise MTC packets are
116used - refer to the 'mtc' config term. When MTC is used, however, the values
117are less accurate because the timing is less accurate.
118
119Because Intel PT does not update the cycle count on every branch or instruction,
120the values will often be zero. When there are values, they will be the number
121of instructions and number of cycles since the last update, and thus represent
122the average IPC since the last IPC for that event type. Note IPC for "branches"
123events is calculated separately from IPC for "instructions" events.
124
125Also note that the IPC instruction count may or may not include the current
126instruction. If the cycle count is associated with an asynchronous branch
127(e.g. page fault or interrupt), then the instruction count does not include the
128current instruction, otherwise it does. That is consistent with whether or not
129that instruction has retired when the cycle count is updated.
130
131Another note, in the case of "branches" events, non-taken branches are not
132presently sampled, so IPC values for them do not appear e.g. a CYC packet with a
133TNT packet that starts with a non-taken branch. To see every possible IPC
134value, "instructions" events can be used e.g. --itrace=i0ns
135
106While it is possible to create scripts to analyze the data, an alternative 136While it is possible to create scripts to analyze the data, an alternative
107approach is available to export the data to a sqlite or postgresql database. 137approach is available to export the data to a sqlite or postgresql database.
108Refer to script export-to-sqlite.py or export-to-postgresql.py for more details, 138Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
@@ -713,7 +743,7 @@ Having no option is the same as
713 743
714which, in turn, is the same as 744which, in turn, is the same as
715 745
716 --itrace=ibxwpe 746 --itrace=cepwx
717 747
718The letters are: 748The letters are:
719 749
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 462b3cde0675..e4aa268d2e38 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -564,9 +564,12 @@ llvm.*::
564 llvm.clang-bpf-cmd-template:: 564 llvm.clang-bpf-cmd-template::
565 Cmdline template. Below lines show its default value. Environment 565 Cmdline template. Below lines show its default value. Environment
566 variable is used to pass options. 566 variable is used to pass options.
567 "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS $KERNEL_INC_OPTIONS \ 567 "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
568 -Wno-unused-value -Wno-pointer-sign -working-directory \ 568 "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \
569 $WORKING_DIR -c $CLANG_SOURCE -target bpf -O2 -o -" 569 "$CLANG_OPTIONS $PERF_BPF_INC_OPTIONS $KERNEL_INC_OPTIONS " \
570 "-Wno-unused-value -Wno-pointer-sign " \
571 "-working-directory $WORKING_DIR " \
572 "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE"
570 573
571 llvm.clang-opt:: 574 llvm.clang-opt::
572 Options passed to clang. 575 Options passed to clang.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index da7809b15cc9..d5cc15e651cf 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -90,9 +90,10 @@ OPTIONS
90 90
91-c:: 91-c::
92--compute:: 92--compute::
93 Differential computation selection - delta, ratio, wdiff, delta-abs 93 Differential computation selection - delta, ratio, wdiff, cycles,
94 (default is delta-abs). Default can be changed using diff.compute 94 delta-abs (default is delta-abs). Default can be changed using
95 config option. See COMPARISON METHODS section for more info. 95 diff.compute config option. See COMPARISON METHODS section for
96 more info.
96 97
97-p:: 98-p::
98--period:: 99--period::
@@ -142,12 +143,14 @@ OPTIONS
142 perf diff --time 0%-10%,30%-40% 143 perf diff --time 0%-10%,30%-40%
143 144
144 It also supports analyzing samples within a given time window 145 It also supports analyzing samples within a given time window
145 <start>,<stop>. Times have the format seconds.microseconds. If 'start' 146 <start>,<stop>. Times have the format seconds.nanoseconds. If 'start'
146 is not given (i.e., time string is ',x.y') then analysis starts at 147 is not given (i.e. time string is ',x.y') then analysis starts at
147 the beginning of the file. If stop time is not given (i.e, time 148 the beginning of the file. If stop time is not given (i.e. time
148 string is 'x.y,') then analysis goes to the end of the file. Time string is 149 string is 'x.y,') then analysis goes to the end of the file.
149 'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps for different 150 Multiple ranges can be separated by spaces, which requires the argument
150 perf.data files. 151 to be quoted e.g. --time "1234.567,1234.789 1235,"
152 Time string is'a1.b1,c1.d1:a2.b2,c2.d2'. Use ':' to separate timestamps
153 for different perf.data files.
151 154
152 For example, we get the timestamp information from 'perf script'. 155 For example, we get the timestamp information from 'perf script'.
153 156
@@ -278,6 +281,16 @@ If specified the 'Weighted diff' column is displayed with value 'd' computed as:
278 - WEIGHT-A being the weight of the data file 281 - WEIGHT-A being the weight of the data file
279 - WEIGHT-B being the weight of the baseline data file 282 - WEIGHT-B being the weight of the baseline data file
280 283
284cycles
285~~~~~~
286If specified the '[Program Block Range] Cycles Diff' column is displayed.
287It displays the cycles difference of same program basic block amongst
288two perf.data. The program basic block is the code between two branches.
289
290'[Program Block Range]' indicates the range of a program basic block.
291Source line is reported if it can be found otherwise uses symbol+offset
292instead.
293
281SEE ALSO 294SEE ALSO
282-------- 295--------
283linkperf:perf-record[1], linkperf:perf-report[1] 296linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index de269430720a..15e0fa87241b 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -490,6 +490,17 @@ Configure all used events to run in kernel space.
490--all-user:: 490--all-user::
491Configure all used events to run in user space. 491Configure all used events to run in user space.
492 492
493--kernel-callchains::
494Collect callchains only from kernel space. I.e. this option sets
495perf_event_attr.exclude_callchain_user to 1.
496
497--user-callchains::
498Collect callchains only from user space. I.e. this option sets
499perf_event_attr.exclude_callchain_kernel to 1.
500
501Don't use both --kernel-callchains and --user-callchains at the same time or no
502callchains will be collected.
503
493--timestamp-filename 504--timestamp-filename
494Append timestamp to output file name. 505Append timestamp to output file name.
495 506
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index f441baa794ce..987261d158d4 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -89,7 +89,7 @@ OPTIONS
89 - socket: processor socket number the task ran at the time of sample 89 - socket: processor socket number the task ran at the time of sample
90 - srcline: filename and line number executed at the time of sample. The 90 - srcline: filename and line number executed at the time of sample. The
91 DWARF debugging info must be provided. 91 DWARF debugging info must be provided.
92 - srcfile: file name of the source file of the same. Requires dwarf 92 - srcfile: file name of the source file of the samples. Requires dwarf
93 information. 93 information.
94 - weight: Event specific weight, e.g. memory latency or transaction 94 - weight: Event specific weight, e.g. memory latency or transaction
95 abort cost. This is the global weight. 95 abort cost. This is the global weight.
@@ -412,12 +412,13 @@ OPTIONS
412 412
413--time:: 413--time::
414 Only analyze samples within given time window: <start>,<stop>. Times 414 Only analyze samples within given time window: <start>,<stop>. Times
415 have the format seconds.microseconds. If start is not given (i.e., time 415 have the format seconds.nanoseconds. If start is not given (i.e. time
416 string is ',x.y') then analysis starts at the beginning of the file. If 416 string is ',x.y') then analysis starts at the beginning of the file. If
417 stop time is not given (i.e, time string is 'x.y,') then analysis goes 417 stop time is not given (i.e. time string is 'x.y,') then analysis goes
418 to end of file. 418 to end of file. Multiple ranges can be separated by spaces, which
419 requires the argument to be quoted e.g. --time "1234.567,1234.789 1235,"
419 420
420 Also support time percent with multiple time range. Time string is 421 Also support time percent with multiple time ranges. Time string is
421 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. 422 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
422 423
423 For example: 424 For example:
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 9b0d04dd2a61..d4e2e18a5881 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -117,7 +117,7 @@ OPTIONS
117 Comma separated list of fields to print. Options are: 117 Comma separated list of fields to print. Options are:
118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, 118 comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
119 srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, 119 srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
120 brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode. 120 brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc, srccode, ipc.
121 Field list can be prepended with the type, trace, sw or hw, 121 Field list can be prepended with the type, trace, sw or hw,
122 to indicate to which event type the field list applies. 122 to indicate to which event type the field list applies.
123 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace 123 e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
@@ -203,6 +203,9 @@ OPTIONS
203 The synth field is used by synthesized events which may be created when 203 The synth field is used by synthesized events which may be created when
204 Instruction Trace decoding. 204 Instruction Trace decoding.
205 205
206 The ipc (instructions per cycle) field is synthesized and may have a value when
207 Instruction Trace decoding.
208
206 Finally, a user may not set fields to none for all event types. 209 Finally, a user may not set fields to none for all event types.
207 i.e., -F "" is not allowed. 210 i.e., -F "" is not allowed.
208 211
@@ -313,6 +316,9 @@ OPTIONS
313--show-round-events 316--show-round-events
314 Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND. 317 Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND.
315 318
319--show-bpf-events
320 Display bpf events i.e. events of type PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT.
321
316--demangle:: 322--demangle::
317 Demangle symbol names to human readable form. It's enabled by default, 323 Demangle symbol names to human readable form. It's enabled by default,
318 disable with --no-demangle. 324 disable with --no-demangle.
@@ -355,12 +361,13 @@ include::itrace.txt[]
355 361
356--time:: 362--time::
357 Only analyze samples within given time window: <start>,<stop>. Times 363 Only analyze samples within given time window: <start>,<stop>. Times
358 have the format seconds.microseconds. If start is not given (i.e., time 364 have the format seconds.nanoseconds. If start is not given (i.e. time
359 string is ',x.y') then analysis starts at the beginning of the file. If 365 string is ',x.y') then analysis starts at the beginning of the file. If
360 stop time is not given (i.e, time string is 'x.y,') then analysis goes 366 stop time is not given (i.e. time string is 'x.y,') then analysis goes
361 to end of file. 367 to end of file. Multiple ranges can be separated by spaces, which
368 requires the argument to be quoted e.g. --time "1234.567,1234.789 1235,"
362 369
363 Also support time percent with multipe time range. Time string is 370 Also support time percent with multiple time ranges. Time string is
364 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. 371 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
365 372
366 For example: 373 For example:
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 1e312c2672e4..930c51c01201 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -200,6 +200,13 @@ use --per-socket in addition to -a. (system-wide). The output includes the
200socket number and the number of online processors on that socket. This is 200socket number and the number of online processors on that socket. This is
201useful to gauge the amount of aggregation. 201useful to gauge the amount of aggregation.
202 202
203--per-die::
204Aggregate counts per processor die for system-wide mode measurements. This
205is a useful mode to detect imbalance between dies. To enable this mode,
206use --per-die in addition to -a. (system-wide). The output includes the
207die number and the number of online processors on that die. This is
208useful to gauge the amount of aggregation.
209
203--per-core:: 210--per-core::
204Aggregate counts per physical processor for system-wide mode measurements. This 211Aggregate counts per physical processor for system-wide mode measurements. This
205is a useful mode to detect imbalance between physical cores. To enable this mode, 212is a useful mode to detect imbalance between physical cores. To enable this mode,
@@ -239,6 +246,9 @@ Input file name.
239--per-socket:: 246--per-socket::
240Aggregate counts per processor socket for system-wide mode measurements. 247Aggregate counts per processor socket for system-wide mode measurements.
241 248
249--per-die::
250Aggregate counts per processor die for system-wide mode measurements.
251
242--per-core:: 252--per-core::
243Aggregate counts per physical processor for system-wide mode measurements. 253Aggregate counts per physical processor for system-wide mode measurements.
244 254
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 44d89fb9c788..cfea87c6f38e 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -262,6 +262,11 @@ Default is to monitor all CPUS.
262 The number of threads to run when synthesizing events for existing processes. 262 The number of threads to run when synthesizing events for existing processes.
263 By default, the number of threads equals to the number of online CPUs. 263 By default, the number of threads equals to the number of online CPUs.
264 264
265--namespaces::
266 Record events of type PERF_RECORD_NAMESPACES and display it with the
267 'cgroup_id' sort key.
268
269
265INTERACTIVE PROMPTING KEYS 270INTERACTIVE PROMPTING KEYS
266-------------------------- 271--------------------------
267 272
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index 6967e9b02be5..5f54feb19977 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -151,25 +151,45 @@ struct {
151 151
152 HEADER_CPU_TOPOLOGY = 13, 152 HEADER_CPU_TOPOLOGY = 13,
153 153
154String lists defining the core and CPU threads topology.
155The string lists are followed by a variable length array
156which contains core_id and socket_id of each cpu.
157The number of entries can be determined by the size of the
158section minus the sizes of both string lists.
159
160struct { 154struct {
155 /*
156 * First revision of HEADER_CPU_TOPOLOGY
157 *
158 * See 'struct perf_header_string_list' definition earlier
159 * in this file.
160 */
161
161 struct perf_header_string_list cores; /* Variable length */ 162 struct perf_header_string_list cores; /* Variable length */
162 struct perf_header_string_list threads; /* Variable length */ 163 struct perf_header_string_list threads; /* Variable length */
164
165 /*
166 * Second revision of HEADER_CPU_TOPOLOGY, older tools
167 * will not consider what comes next
168 */
169
163 struct { 170 struct {
164 uint32_t core_id; 171 uint32_t core_id;
165 uint32_t socket_id; 172 uint32_t socket_id;
166 } cpus[nr]; /* Variable length records */ 173 } cpus[nr]; /* Variable length records */
174 /* 'nr' comes from previously processed HEADER_NRCPUS's nr_cpu_avail */
175
176 /*
177 * Third revision of HEADER_CPU_TOPOLOGY, older tools
178 * will not consider what comes next
179 */
180
181 struct perf_header_string_list dies; /* Variable length */
182 uint32_t die_id[nr_cpus_avail]; /* from previously processed HEADER_NR_CPUS, VLA */
167}; 183};
168 184
169Example: 185Example:
170 sibling cores : 0-3 186 sibling sockets : 0-8
187 sibling dies : 0-3
188 sibling dies : 4-7
171 sibling threads : 0-1 189 sibling threads : 0-1
172 sibling threads : 2-3 190 sibling threads : 2-3
191 sibling threads : 4-5
192 sibling threads : 6-7
173 193
174 HEADER_NUMA_TOPOLOGY = 14, 194 HEADER_NUMA_TOPOLOGY = 14,
175 195
@@ -272,6 +292,69 @@ struct {
272 292
273Two uint64_t for the time of first sample and the time of last sample. 293Two uint64_t for the time of first sample and the time of last sample.
274 294
295 HEADER_SAMPLE_TOPOLOGY = 22,
296
297Physical memory map and its node assignments.
298
299The format of data in MEM_TOPOLOGY is as follows:
300
301 0 - version | for future changes
302 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
303 16 - count | number of nodes
304
305For each node we store map of physical indexes:
306
307 32 - node id | node index
308 40 - size | size of bitmap
309 48 - bitmap | bitmap of memory indexes that belongs to node
310 | /sys/devices/system/node/node<NODE>/memory<INDEX>
311
312The MEM_TOPOLOGY can be displayed with following command:
313
314$ perf report --header-only -I
315...
316# memory nodes (nr 1, block size 0x8000000):
317# 0 [7G]: 0-23,32-69
318
319 HEADER_CLOCKID = 23,
320
321One uint64_t for the clockid frequency, specified, for instance, via 'perf
322record -k' (see clock_gettime()), to enable timestamps derived metrics
323conversion into wall clock time on the reporting stage.
324
325 HEADER_DIR_FORMAT = 24,
326
327The data files layout is described by HEADER_DIR_FORMAT feature. Currently it
328holds only version number (1):
329
330 uint64_t version;
331
332The current version holds only version value (1) means that data files:
333
334- Follow the 'data.*' name format.
335
336- Contain raw events data in standard perf format as read from kernel (and need
337 to be sorted)
338
339Future versions are expected to describe different data files layout according
340to special needs.
341
342 HEADER_BPF_PROG_INFO = 25,
343
344struct bpf_prog_info_linear, which contains detailed information about
345a BPF program, including type, id, tag, jited/xlated instructions, etc.
346
347 HEADER_BPF_BTF = 26,
348
349Contains BPF Type Format (BTF). For more information about BTF, please
350refer to Documentation/bpf/btf.rst.
351
352struct {
353 u32 id;
354 u32 data_size;
355 char data[];
356};
357
275 HEADER_COMPRESSED = 27, 358 HEADER_COMPRESSED = 27,
276 359
277struct { 360struct {
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
index 869965d629ce..825745a645c1 100644
--- a/tools/perf/Documentation/tips.txt
+++ b/tools/perf/Documentation/tips.txt
@@ -38,6 +38,6 @@ To report cacheline events from previous recording: perf c2c report
38To browse sample contexts use perf report --sample 10 and select in context menu 38To browse sample contexts use perf report --sample 10 and select in context menu
39To separate samples by time use perf report --sort time,overhead,sym 39To separate samples by time use perf report --sort time,overhead,sym
40To set sample time separation other than 100ms with --sort time use --time-quantum 40To set sample time separation other than 100ms with --sort time use --time-quantum
41Add -I to perf report to sample register values visible in perf report context. 41Add -I to perf record to sample register values, which will be visible in perf report sample context.
42To show IPC for sampling periods use perf record -e '{cycles,instructions}:S' and then browse context 42To show IPC for sampling periods use perf record -e '{cycles,instructions}:S' and then browse context
43To show context switches in perf report sample context add --switch-events to perf record. 43To show context switches in perf report sample context add --switch-events to perf record.
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 627b7cada144..6a5de44b2de9 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -7,6 +7,8 @@ tools/lib/traceevent
7tools/lib/api 7tools/lib/api
8tools/lib/bpf 8tools/lib/bpf
9tools/lib/subcmd 9tools/lib/subcmd
10tools/lib/argv_split.c
11tools/lib/ctype.c
10tools/lib/hweight.c 12tools/lib/hweight.c
11tools/lib/rbtree.c 13tools/lib/rbtree.c
12tools/lib/string.c 14tools/lib/string.c
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index a6251e622c7e..89ac5a1f1550 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -417,6 +417,9 @@ ifdef CORESIGHT
417 $(call feature_check,libopencsd) 417 $(call feature_check,libopencsd)
418 ifeq ($(feature-libopencsd), 1) 418 ifeq ($(feature-libopencsd), 1)
419 CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) 419 CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS)
420 ifeq ($(feature-reallocarray), 0)
421 CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
422 endif
420 LDFLAGS += $(LIBOPENCSD_LDFLAGS) 423 LDFLAGS += $(LIBOPENCSD_LDFLAGS)
421 EXTLIBS += $(OPENCSDLIBS) 424 EXTLIBS += $(OPENCSDLIBS)
422 $(call detected,CONFIG_LIBOPENCSD) 425 $(call detected,CONFIG_LIBOPENCSD)
@@ -641,11 +644,15 @@ endif
641 644
642ifndef NO_SLANG 645ifndef NO_SLANG
643 ifneq ($(feature-libslang), 1) 646 ifneq ($(feature-libslang), 1)
644 msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev); 647 ifneq ($(feature-libslang-include-subdir), 1)
645 NO_SLANG := 1 648 msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev);
646 else 649 NO_SLANG := 1
650 else
651 CFLAGS += -DHAVE_SLANG_INCLUDE_SUBDIR
652 endif
653 endif
654 ifndef NO_SLANG
647 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h 655 # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
648 CFLAGS += -I/usr/include/slang
649 CFLAGS += -DHAVE_SLANG_SUPPORT 656 CFLAGS += -DHAVE_SLANG_SUPPORT
650 EXTLIBS += -lslang 657 EXTLIBS += -lslang
651 $(call detected,CONFIG_SLANG) 658 $(call detected,CONFIG_SLANG)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4d46ca6d7e20..0fffd2bb6cd9 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -420,6 +420,24 @@ fadvise_advice_tbl := $(srctree)/tools/perf/trace/beauty/fadvise.sh
420$(fadvise_advice_array): $(linux_uapi_dir)/in.h $(fadvise_advice_tbl) 420$(fadvise_advice_array): $(linux_uapi_dir)/in.h $(fadvise_advice_tbl)
421 $(Q)$(SHELL) '$(fadvise_advice_tbl)' $(linux_uapi_dir) > $@ 421 $(Q)$(SHELL) '$(fadvise_advice_tbl)' $(linux_uapi_dir) > $@
422 422
423fsmount_arrays := $(beauty_outdir)/fsmount_arrays.c
424fsmount_tbls := $(srctree)/tools/perf/trace/beauty/fsmount.sh
425
426$(fsmount_arrays): $(linux_uapi_dir)/fs.h $(fsmount_tbls)
427 $(Q)$(SHELL) '$(fsmount_tbls)' $(linux_uapi_dir) > $@
428
429fspick_arrays := $(beauty_outdir)/fspick_arrays.c
430fspick_tbls := $(srctree)/tools/perf/trace/beauty/fspick.sh
431
432$(fspick_arrays): $(linux_uapi_dir)/fs.h $(fspick_tbls)
433 $(Q)$(SHELL) '$(fspick_tbls)' $(linux_uapi_dir) > $@
434
435fsconfig_arrays := $(beauty_outdir)/fsconfig_arrays.c
436fsconfig_tbls := $(srctree)/tools/perf/trace/beauty/fsconfig.sh
437
438$(fsconfig_arrays): $(linux_uapi_dir)/fs.h $(fsconfig_tbls)
439 $(Q)$(SHELL) '$(fsconfig_tbls)' $(linux_uapi_dir) > $@
440
423pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c 441pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c
424asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/ 442asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
425pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh 443pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
@@ -494,6 +512,12 @@ mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh
494$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl) 512$(mount_flags_array): $(linux_uapi_dir)/fs.h $(mount_flags_tbl)
495 $(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@ 513 $(Q)$(SHELL) '$(mount_flags_tbl)' $(linux_uapi_dir) > $@
496 514
515move_mount_flags_array := $(beauty_outdir)/move_mount_flags_array.c
516move_mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/move_mount_flags.sh
517
518$(move_mount_flags_array): $(linux_uapi_dir)/fs.h $(move_mount_flags_tbl)
519 $(Q)$(SHELL) '$(move_mount_flags_tbl)' $(linux_uapi_dir) > $@
520
497prctl_option_array := $(beauty_outdir)/prctl_option_array.c 521prctl_option_array := $(beauty_outdir)/prctl_option_array.c
498prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/ 522prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
499prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh 523prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
@@ -526,6 +550,12 @@ arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
526$(arch_errno_name_array): $(arch_errno_tbl) 550$(arch_errno_name_array): $(arch_errno_tbl)
527 $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@ 551 $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@
528 552
553sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
554sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
555
556$(sync_file_range_arrays): $(linux_uapi_dir)/fs.h $(sync_file_range_tbls)
557 $(Q)$(SHELL) '$(sync_file_range_tbls)' $(linux_uapi_dir) > $@
558
529all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) 559all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
530 560
531# Create python binding output directory if not already present 561# Create python binding output directory if not already present
@@ -629,6 +659,9 @@ build-dir = $(if $(__build-dir),$(__build-dir),.)
629 659
630prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \ 660prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \
631 $(fadvise_advice_array) \ 661 $(fadvise_advice_array) \
662 $(fsconfig_arrays) \
663 $(fsmount_arrays) \
664 $(fspick_arrays) \
632 $(pkey_alloc_access_rights_array) \ 665 $(pkey_alloc_access_rights_array) \
633 $(sndrv_pcm_ioctl_array) \ 666 $(sndrv_pcm_ioctl_array) \
634 $(sndrv_ctl_ioctl_array) \ 667 $(sndrv_ctl_ioctl_array) \
@@ -639,12 +672,14 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
639 $(madvise_behavior_array) \ 672 $(madvise_behavior_array) \
640 $(mmap_flags_array) \ 673 $(mmap_flags_array) \
641 $(mount_flags_array) \ 674 $(mount_flags_array) \
675 $(move_mount_flags_array) \
642 $(perf_ioctl_array) \ 676 $(perf_ioctl_array) \
643 $(prctl_option_array) \ 677 $(prctl_option_array) \
644 $(usbdevfs_ioctl_array) \ 678 $(usbdevfs_ioctl_array) \
645 $(x86_arch_prctl_code_array) \ 679 $(x86_arch_prctl_code_array) \
646 $(rename_flags_array) \ 680 $(rename_flags_array) \
647 $(arch_errno_name_array) 681 $(arch_errno_name_array) \
682 $(sync_file_range_arrays)
648 683
649$(OUTPUT)%.o: %.c prepare FORCE 684$(OUTPUT)%.o: %.c prepare FORCE
650 $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ 685 $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
@@ -923,9 +958,13 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
923 $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \ 958 $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
924 $(OUTPUT)pmu-events/pmu-events.c \ 959 $(OUTPUT)pmu-events/pmu-events.c \
925 $(OUTPUT)$(fadvise_advice_array) \ 960 $(OUTPUT)$(fadvise_advice_array) \
961 $(OUTPUT)$(fsconfig_arrays) \
962 $(OUTPUT)$(fsmount_arrays) \
963 $(OUTPUT)$(fspick_arrays) \
926 $(OUTPUT)$(madvise_behavior_array) \ 964 $(OUTPUT)$(madvise_behavior_array) \
927 $(OUTPUT)$(mmap_flags_array) \ 965 $(OUTPUT)$(mmap_flags_array) \
928 $(OUTPUT)$(mount_flags_array) \ 966 $(OUTPUT)$(mount_flags_array) \
967 $(OUTPUT)$(move_mount_flags_array) \
929 $(OUTPUT)$(drm_ioctl_array) \ 968 $(OUTPUT)$(drm_ioctl_array) \
930 $(OUTPUT)$(pkey_alloc_access_rights_array) \ 969 $(OUTPUT)$(pkey_alloc_access_rights_array) \
931 $(OUTPUT)$(sndrv_ctl_ioctl_array) \ 970 $(OUTPUT)$(sndrv_ctl_ioctl_array) \
@@ -939,7 +978,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
939 $(OUTPUT)$(usbdevfs_ioctl_array) \ 978 $(OUTPUT)$(usbdevfs_ioctl_array) \
940 $(OUTPUT)$(x86_arch_prctl_code_array) \ 979 $(OUTPUT)$(x86_arch_prctl_code_array) \
941 $(OUTPUT)$(rename_flags_array) \ 980 $(OUTPUT)$(rename_flags_array) \
942 $(OUTPUT)$(arch_errno_name_array) 981 $(OUTPUT)$(arch_errno_name_array) \
982 $(OUTPUT)$(sync_file_range_arrays)
943 $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean 983 $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
944 984
945# 985#
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 911426721170..2b83cc8e4796 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -22,6 +22,7 @@
22#include "../../util/pmu.h" 22#include "../../util/pmu.h"
23#include "../../util/thread_map.h" 23#include "../../util/thread_map.h"
24#include "../../util/cs-etm.h" 24#include "../../util/cs-etm.h"
25#include "../../util/util.h"
25 26
26#include <errno.h> 27#include <errno.h>
27#include <stdlib.h> 28#include <stdlib.h>
@@ -31,12 +32,158 @@ struct cs_etm_recording {
31 struct auxtrace_record itr; 32 struct auxtrace_record itr;
32 struct perf_pmu *cs_etm_pmu; 33 struct perf_pmu *cs_etm_pmu;
33 struct perf_evlist *evlist; 34 struct perf_evlist *evlist;
35 int wrapped_cnt;
36 bool *wrapped;
34 bool snapshot_mode; 37 bool snapshot_mode;
35 size_t snapshot_size; 38 size_t snapshot_size;
36}; 39};
37 40
41static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
42 [CS_ETM_ETMCCER] = "mgmt/etmccer",
43 [CS_ETM_ETMIDR] = "mgmt/etmidr",
44};
45
46static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
47 [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0",
48 [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1",
49 [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2",
50 [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8",
51 [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus",
52};
53
38static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu); 54static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
39 55
56static int cs_etm_set_context_id(struct auxtrace_record *itr,
57 struct perf_evsel *evsel, int cpu)
58{
59 struct cs_etm_recording *ptr;
60 struct perf_pmu *cs_etm_pmu;
61 char path[PATH_MAX];
62 int err = -EINVAL;
63 u32 val;
64
65 ptr = container_of(itr, struct cs_etm_recording, itr);
66 cs_etm_pmu = ptr->cs_etm_pmu;
67
68 if (!cs_etm_is_etmv4(itr, cpu))
69 goto out;
70
71 /* Get a handle on TRCIRD2 */
72 snprintf(path, PATH_MAX, "cpu%d/%s",
73 cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
74 err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
75
76 /* There was a problem reading the file, bailing out */
77 if (err != 1) {
78 pr_err("%s: can't read file %s\n",
79 CORESIGHT_ETM_PMU_NAME, path);
80 goto out;
81 }
82
83 /*
84 * TRCIDR2.CIDSIZE, bit [9-5], indicates whether contextID tracing
85 * is supported:
86 * 0b00000 Context ID tracing is not supported.
87 * 0b00100 Maximum of 32-bit Context ID size.
88 * All other values are reserved.
89 */
90 val = BMVAL(val, 5, 9);
91 if (!val || val != 0x4) {
92 err = -EINVAL;
93 goto out;
94 }
95
96 /* All good, let the kernel know */
97 evsel->attr.config |= (1 << ETM_OPT_CTXTID);
98 err = 0;
99
100out:
101
102 return err;
103}
104
105static int cs_etm_set_timestamp(struct auxtrace_record *itr,
106 struct perf_evsel *evsel, int cpu)
107{
108 struct cs_etm_recording *ptr;
109 struct perf_pmu *cs_etm_pmu;
110 char path[PATH_MAX];
111 int err = -EINVAL;
112 u32 val;
113
114 ptr = container_of(itr, struct cs_etm_recording, itr);
115 cs_etm_pmu = ptr->cs_etm_pmu;
116
117 if (!cs_etm_is_etmv4(itr, cpu))
118 goto out;
119
120 /* Get a handle on TRCIRD0 */
121 snprintf(path, PATH_MAX, "cpu%d/%s",
122 cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
123 err = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
124
125 /* There was a problem reading the file, bailing out */
126 if (err != 1) {
127 pr_err("%s: can't read file %s\n",
128 CORESIGHT_ETM_PMU_NAME, path);
129 goto out;
130 }
131
132 /*
133 * TRCIDR0.TSSIZE, bit [28-24], indicates whether global timestamping
134 * is supported:
135 * 0b00000 Global timestamping is not implemented
136 * 0b00110 Implementation supports a maximum timestamp of 48bits.
137 * 0b01000 Implementation supports a maximum timestamp of 64bits.
138 */
139 val &= GENMASK(28, 24);
140 if (!val) {
141 err = -EINVAL;
142 goto out;
143 }
144
145 /* All good, let the kernel know */
146 evsel->attr.config |= (1 << ETM_OPT_TS);
147 err = 0;
148
149out:
150 return err;
151}
152
153static int cs_etm_set_option(struct auxtrace_record *itr,
154 struct perf_evsel *evsel, u32 option)
155{
156 int i, err = -EINVAL;
157 struct cpu_map *event_cpus = evsel->evlist->cpus;
158 struct cpu_map *online_cpus = cpu_map__new(NULL);
159
160 /* Set option of each CPU we have */
161 for (i = 0; i < cpu__max_cpu(); i++) {
162 if (!cpu_map__has(event_cpus, i) ||
163 !cpu_map__has(online_cpus, i))
164 continue;
165
166 if (option & ETM_OPT_CTXTID) {
167 err = cs_etm_set_context_id(itr, evsel, i);
168 if (err)
169 goto out;
170 }
171 if (option & ETM_OPT_TS) {
172 err = cs_etm_set_timestamp(itr, evsel, i);
173 if (err)
174 goto out;
175 }
176 if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS))
177 /* Nothing else is currently supported */
178 goto out;
179 }
180
181 err = 0;
182out:
183 cpu_map__put(online_cpus);
184 return err;
185}
186
40static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr, 187static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
41 struct record_opts *opts, 188 struct record_opts *opts,
42 const char *str) 189 const char *str)
@@ -105,12 +252,16 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
105 container_of(itr, struct cs_etm_recording, itr); 252 container_of(itr, struct cs_etm_recording, itr);
106 struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; 253 struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
107 struct perf_evsel *evsel, *cs_etm_evsel = NULL; 254 struct perf_evsel *evsel, *cs_etm_evsel = NULL;
108 const struct cpu_map *cpus = evlist->cpus; 255 struct cpu_map *cpus = evlist->cpus;
109 bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0); 256 bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0);
257 int err = 0;
110 258
111 ptr->evlist = evlist; 259 ptr->evlist = evlist;
112 ptr->snapshot_mode = opts->auxtrace_snapshot_mode; 260 ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
113 261
262 if (perf_can_record_switch_events())
263 opts->record_switch_events = true;
264
114 evlist__for_each_entry(evlist, evsel) { 265 evlist__for_each_entry(evlist, evsel) {
115 if (evsel->attr.type == cs_etm_pmu->type) { 266 if (evsel->attr.type == cs_etm_pmu->type) {
116 if (cs_etm_evsel) { 267 if (cs_etm_evsel) {
@@ -241,19 +392,25 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
241 392
242 /* 393 /*
243 * In the case of per-cpu mmaps, we need the CPU on the 394 * In the case of per-cpu mmaps, we need the CPU on the
244 * AUX event. 395 * AUX event. We also need the contextID in order to be notified
396 * when a context switch happened.
245 */ 397 */
246 if (!cpu_map__empty(cpus)) 398 if (!cpu_map__empty(cpus)) {
247 perf_evsel__set_sample_bit(cs_etm_evsel, CPU); 399 perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
248 400
401 err = cs_etm_set_option(itr, cs_etm_evsel,
402 ETM_OPT_CTXTID | ETM_OPT_TS);
403 if (err)
404 goto out;
405 }
406
249 /* Add dummy event to keep tracking */ 407 /* Add dummy event to keep tracking */
250 if (opts->full_auxtrace) { 408 if (opts->full_auxtrace) {
251 struct perf_evsel *tracking_evsel; 409 struct perf_evsel *tracking_evsel;
252 int err;
253 410
254 err = parse_events(evlist, "dummy:u", NULL); 411 err = parse_events(evlist, "dummy:u", NULL);
255 if (err) 412 if (err)
256 return err; 413 goto out;
257 414
258 tracking_evsel = perf_evlist__last(evlist); 415 tracking_evsel = perf_evlist__last(evlist);
259 perf_evlist__set_tracking_event(evlist, tracking_evsel); 416 perf_evlist__set_tracking_event(evlist, tracking_evsel);
@@ -266,7 +423,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
266 perf_evsel__set_sample_bit(tracking_evsel, TIME); 423 perf_evsel__set_sample_bit(tracking_evsel, TIME);
267 } 424 }
268 425
269 return 0; 426out:
427 return err;
270} 428}
271 429
272static u64 cs_etm_get_config(struct auxtrace_record *itr) 430static u64 cs_etm_get_config(struct auxtrace_record *itr)
@@ -314,6 +472,8 @@ static u64 cs_etmv4_get_config(struct auxtrace_record *itr)
314 config_opts = cs_etm_get_config(itr); 472 config_opts = cs_etm_get_config(itr);
315 if (config_opts & BIT(ETM_OPT_CYCACC)) 473 if (config_opts & BIT(ETM_OPT_CYCACC))
316 config |= BIT(ETM4_CFG_BIT_CYCACC); 474 config |= BIT(ETM4_CFG_BIT_CYCACC);
475 if (config_opts & BIT(ETM_OPT_CTXTID))
476 config |= BIT(ETM4_CFG_BIT_CTXTID);
317 if (config_opts & BIT(ETM_OPT_TS)) 477 if (config_opts & BIT(ETM_OPT_TS))
318 config |= BIT(ETM4_CFG_BIT_TS); 478 config |= BIT(ETM4_CFG_BIT_TS);
319 if (config_opts & BIT(ETM_OPT_RETSTK)) 479 if (config_opts & BIT(ETM_OPT_RETSTK))
@@ -363,19 +523,6 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
363 (etmv3 * CS_ETMV3_PRIV_SIZE)); 523 (etmv3 * CS_ETMV3_PRIV_SIZE));
364} 524}
365 525
366static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
367 [CS_ETM_ETMCCER] = "mgmt/etmccer",
368 [CS_ETM_ETMIDR] = "mgmt/etmidr",
369};
370
371static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
372 [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0",
373 [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1",
374 [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2",
375 [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8",
376 [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus",
377};
378
379static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu) 526static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu)
380{ 527{
381 bool ret = false; 528 bool ret = false;
@@ -536,16 +683,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
536 return 0; 683 return 0;
537} 684}
538 685
539static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused, 686static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
687{
688 bool *wrapped;
689 int cnt = ptr->wrapped_cnt;
690
691 /* Make @ptr->wrapped as big as @idx */
692 while (cnt <= idx)
693 cnt++;
694
695 /*
696 * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
697 * cross compilation problems where the host's system supports
698 * reallocarray() but not the target.
699 */
700 wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
701 if (!wrapped)
702 return -ENOMEM;
703
704 wrapped[cnt - 1] = false;
705 ptr->wrapped_cnt = cnt;
706 ptr->wrapped = wrapped;
707
708 return 0;
709}
710
711static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
712 size_t buffer_size, u64 head)
713{
714 u64 i, watermark;
715 u64 *buf = (u64 *)buffer;
716 size_t buf_size = buffer_size;
717
718 /*
719 * We want to look the very last 512 byte (chosen arbitrarily) in
720 * the ring buffer.
721 */
722 watermark = buf_size - 512;
723
724 /*
725 * @head is continuously increasing - if its value is equal or greater
726 * than the size of the ring buffer, it has wrapped around.
727 */
728 if (head >= buffer_size)
729 return true;
730
731 /*
732 * The value of @head is somewhere within the size of the ring buffer.
733 * This can be that there hasn't been enough data to fill the ring
734 * buffer yet or the trace time was so long that @head has numerically
735 * wrapped around. To find we need to check if we have data at the very
736 * end of the ring buffer. We can reliably do this because mmap'ed
737 * pages are zeroed out and there is a fresh mapping with every new
738 * session.
739 */
740
741 /* @head is less than 512 byte from the end of the ring buffer */
742 if (head > watermark)
743 watermark = head;
744
745 /*
746 * Speed things up by using 64 bit transactions (see "u64 *buf" above)
747 */
748 watermark >>= 3;
749 buf_size >>= 3;
750
751 /*
752 * If we find trace data at the end of the ring buffer, @head has
753 * been there and has numerically wrapped around at least once.
754 */
755 for (i = watermark; i < buf_size; i++)
756 if (buf[i])
757 return true;
758
759 return false;
760}
761
762static int cs_etm_find_snapshot(struct auxtrace_record *itr,
540 int idx, struct auxtrace_mmap *mm, 763 int idx, struct auxtrace_mmap *mm,
541 unsigned char *data __maybe_unused, 764 unsigned char *data,
542 u64 *head, u64 *old) 765 u64 *head, u64 *old)
543{ 766{
767 int err;
768 bool wrapped;
769 struct cs_etm_recording *ptr =
770 container_of(itr, struct cs_etm_recording, itr);
771
772 /*
773 * Allocate memory to keep track of wrapping if this is the first
774 * time we deal with this *mm.
775 */
776 if (idx >= ptr->wrapped_cnt) {
777 err = cs_etm_alloc_wrapped_array(ptr, idx);
778 if (err)
779 return err;
780 }
781
782 /*
783 * Check to see if *head has wrapped around. If it hasn't only the
784 * amount of data between *head and *old is snapshot'ed to avoid
785 * bloating the perf.data file with zeros. But as soon as *head has
786 * wrapped around the entire size of the AUX ring buffer it taken.
787 */
788 wrapped = ptr->wrapped[idx];
789 if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
790 wrapped = true;
791 ptr->wrapped[idx] = true;
792 }
793
544 pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n", 794 pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
545 __func__, idx, (size_t)*old, (size_t)*head, mm->len); 795 __func__, idx, (size_t)*old, (size_t)*head, mm->len);
546 796
547 *old = *head; 797 /* No wrap has occurred, we can just use *head and *old. */
548 *head += mm->len; 798 if (!wrapped)
799 return 0;
800
801 /*
802 * *head has wrapped around - adjust *head and *old to pickup the
803 * entire content of the AUX buffer.
804 */
805 if (*head >= mm->len) {
806 *old = *head - mm->len;
807 } else {
808 *head += mm->len;
809 *old = *head - mm->len;
810 }
549 811
550 return 0; 812 return 0;
551} 813}
@@ -586,6 +848,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
586{ 848{
587 struct cs_etm_recording *ptr = 849 struct cs_etm_recording *ptr =
588 container_of(itr, struct cs_etm_recording, itr); 850 container_of(itr, struct cs_etm_recording, itr);
851
852 zfree(&ptr->wrapped);
589 free(ptr); 853 free(ptr);
590} 854}
591 855
diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build
index 36222e64bbf7..a7dd46a5b678 100644
--- a/tools/perf/arch/arm64/Build
+++ b/tools/perf/arch/arm64/Build
@@ -1,2 +1,2 @@
1perf-y += util/ 1perf-y += util/
2perf-$(CONFIG_DWARF_UNWIND) += tests/ 2perf-y += tests/
diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build
index 41707fea74b3..a61c06bdb757 100644
--- a/tools/perf/arch/arm64/tests/Build
+++ b/tools/perf/arch/arm64/tests/Build
@@ -1,4 +1,4 @@
1perf-y += regs_load.o 1perf-y += regs_load.o
2perf-y += dwarf-unwind.o 2perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
3 3
4perf-y += arch-tests.o 4perf-y += arch-tests.o
diff --git a/tools/perf/arch/csky/annotate/instructions.c b/tools/perf/arch/csky/annotate/instructions.c
new file mode 100644
index 000000000000..5337bfb7d5fc
--- /dev/null
+++ b/tools/perf/arch/csky/annotate/instructions.c
@@ -0,0 +1,48 @@
1// SPDX-License-Identifier: GPL-2.0
2// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
3
4#include <linux/compiler.h>
5
6static struct ins_ops *csky__associate_ins_ops(struct arch *arch,
7 const char *name)
8{
9 struct ins_ops *ops = NULL;
10
11 /* catch all kind of jumps */
12 if (!strcmp(name, "bt") ||
13 !strcmp(name, "bf") ||
14 !strcmp(name, "bez") ||
15 !strcmp(name, "bnez") ||
16 !strcmp(name, "bnezad") ||
17 !strcmp(name, "bhsz") ||
18 !strcmp(name, "bhz") ||
19 !strcmp(name, "blsz") ||
20 !strcmp(name, "blz") ||
21 !strcmp(name, "br") ||
22 !strcmp(name, "jmpi") ||
23 !strcmp(name, "jmp"))
24 ops = &jump_ops;
25
26 /* catch function call */
27 if (!strcmp(name, "bsr") ||
28 !strcmp(name, "jsri") ||
29 !strcmp(name, "jsr"))
30 ops = &call_ops;
31
32 /* catch function return */
33 if (!strcmp(name, "rts"))
34 ops = &ret_ops;
35
36 if (ops)
37 arch__associate_ins_ops(arch, name, ops);
38 return ops;
39}
40
41static int csky__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
42{
43 arch->initialized = true;
44 arch->objdump.comment_char = '/';
45 arch->associate_instruction_ops = csky__associate_ins_ops;
46
47 return 0;
48}
diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c
index 3db85cd2069e..a25896135abe 100644
--- a/tools/perf/arch/s390/util/header.c
+++ b/tools/perf/arch/s390/util/header.c
@@ -11,7 +11,7 @@
11#include <unistd.h> 11#include <unistd.h>
12#include <stdio.h> 12#include <stdio.h>
13#include <string.h> 13#include <string.h>
14#include <ctype.h> 14#include <linux/ctype.h>
15 15
16#include "../../util/header.h" 16#include "../../util/header.h"
17#include "../../util/util.h" 17#include "../../util/util.h"
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 613709cfbbd0..c41c5affe4be 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -9,6 +9,7 @@ struct test;
9int test__rdpmc(struct test *test __maybe_unused, int subtest); 9int test__rdpmc(struct test *test __maybe_unused, int subtest);
10int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest); 10int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
11int test__insn_x86(struct test *test __maybe_unused, int subtest); 11int test__insn_x86(struct test *test __maybe_unused, int subtest);
12int test__intel_pt_pkt_decoder(struct test *test, int subtest);
12int test__bp_modify(struct test *test, int subtest); 13int test__bp_modify(struct test *test, int subtest);
13 14
14#ifdef HAVE_DWARF_UNWIND_SUPPORT 15#ifdef HAVE_DWARF_UNWIND_SUPPORT
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 3d83d0c6982d..2997c506550c 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -4,5 +4,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
4perf-y += arch-tests.o 4perf-y += arch-tests.o
5perf-y += rdpmc.o 5perf-y += rdpmc.o
6perf-y += perf-time-to-tsc.o 6perf-y += perf-time-to-tsc.o
7perf-$(CONFIG_AUXTRACE) += insn-x86.o 7perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
8perf-$(CONFIG_X86_64) += bp-modify.o 8perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index d47d3f8e3c8e..6763135aec17 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -23,6 +23,10 @@ struct test arch_tests[] = {
23 .desc = "x86 instruction decoder - new instructions", 23 .desc = "x86 instruction decoder - new instructions",
24 .func = test__insn_x86, 24 .func = test__insn_x86,
25 }, 25 },
26 {
27 .desc = "Intel PT packet decoder",
28 .func = test__intel_pt_pkt_decoder,
29 },
26#endif 30#endif
27#if defined(__x86_64__) 31#if defined(__x86_64__)
28 { 32 {
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
index 90a4a8c58a62..94aa0b673b7f 100644
--- a/tools/perf/arch/x86/tests/intel-cqm.c
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -6,6 +6,7 @@
6#include "evlist.h" 6#include "evlist.h"
7#include "evsel.h" 7#include "evsel.h"
8#include "arch-tests.h" 8#include "arch-tests.h"
9#include "util.h"
9 10
10#include <signal.h> 11#include <signal.h>
11#include <sys/mman.h> 12#include <sys/mman.h>
diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
new file mode 100644
index 000000000000..901bf1f449c4
--- /dev/null
+++ b/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
@@ -0,0 +1,304 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#include <string.h>
4
5#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
6
7#include "debug.h"
8#include "tests/tests.h"
9#include "arch-tests.h"
10
11/**
12 * struct test_data - Test data.
13 * @len: number of bytes to decode
14 * @bytes: bytes to decode
15 * @ctx: packet context to decode
16 * @packet: expected packet
17 * @new_ctx: expected new packet context
18 * @ctx_unchanged: the packet context must not change
19 */
20struct test_data {
21 int len;
22 u8 bytes[INTEL_PT_PKT_MAX_SZ];
23 enum intel_pt_pkt_ctx ctx;
24 struct intel_pt_pkt packet;
25 enum intel_pt_pkt_ctx new_ctx;
26 int ctx_unchanged;
27} data[] = {
28 /* Padding Packet */
29 {1, {0}, 0, {INTEL_PT_PAD, 0, 0}, 0, 1 },
30 /* Short Taken/Not Taken Packet */
31 {1, {4}, 0, {INTEL_PT_TNT, 1, 0}, 0, 0 },
32 {1, {6}, 0, {INTEL_PT_TNT, 1, 0x20ULL << 58}, 0, 0 },
33 {1, {0x80}, 0, {INTEL_PT_TNT, 6, 0}, 0, 0 },
34 {1, {0xfe}, 0, {INTEL_PT_TNT, 6, 0x3fULL << 58}, 0, 0 },
35 /* Long Taken/Not Taken Packet */
36 {8, {0x02, 0xa3, 2}, 0, {INTEL_PT_TNT, 1, 0xa302ULL << 47}, 0, 0 },
37 {8, {0x02, 0xa3, 3}, 0, {INTEL_PT_TNT, 1, 0x1a302ULL << 47}, 0, 0 },
38 {8, {0x02, 0xa3, 0, 0, 0, 0, 0, 0x80}, 0, {INTEL_PT_TNT, 47, 0xa302ULL << 1}, 0, 0 },
39 {8, {0x02, 0xa3, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_TNT, 47, 0xffffffffffffa302ULL << 1}, 0, 0 },
40 /* Target IP Packet */
41 {1, {0x0d}, 0, {INTEL_PT_TIP, 0, 0}, 0, 0 },
42 {3, {0x2d, 1, 2}, 0, {INTEL_PT_TIP, 1, 0x201}, 0, 0 },
43 {5, {0x4d, 1, 2, 3, 4}, 0, {INTEL_PT_TIP, 2, 0x4030201}, 0, 0 },
44 {7, {0x6d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 3, 0x60504030201}, 0, 0 },
45 {7, {0x8d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP, 4, 0x60504030201}, 0, 0 },
46 {9, {0xcd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP, 6, 0x807060504030201}, 0, 0 },
47 /* Packet Generation Enable */
48 {1, {0x11}, 0, {INTEL_PT_TIP_PGE, 0, 0}, 0, 0 },
49 {3, {0x31, 1, 2}, 0, {INTEL_PT_TIP_PGE, 1, 0x201}, 0, 0 },
50 {5, {0x51, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGE, 2, 0x4030201}, 0, 0 },
51 {7, {0x71, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 3, 0x60504030201}, 0, 0 },
52 {7, {0x91, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGE, 4, 0x60504030201}, 0, 0 },
53 {9, {0xd1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGE, 6, 0x807060504030201}, 0, 0 },
54 /* Packet Generation Disable */
55 {1, {0x01}, 0, {INTEL_PT_TIP_PGD, 0, 0}, 0, 0 },
56 {3, {0x21, 1, 2}, 0, {INTEL_PT_TIP_PGD, 1, 0x201}, 0, 0 },
57 {5, {0x41, 1, 2, 3, 4}, 0, {INTEL_PT_TIP_PGD, 2, 0x4030201}, 0, 0 },
58 {7, {0x61, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 3, 0x60504030201}, 0, 0 },
59 {7, {0x81, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_TIP_PGD, 4, 0x60504030201}, 0, 0 },
60 {9, {0xc1, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_TIP_PGD, 6, 0x807060504030201}, 0, 0 },
61 /* Flow Update Packet */
62 {1, {0x1d}, 0, {INTEL_PT_FUP, 0, 0}, 0, 0 },
63 {3, {0x3d, 1, 2}, 0, {INTEL_PT_FUP, 1, 0x201}, 0, 0 },
64 {5, {0x5d, 1, 2, 3, 4}, 0, {INTEL_PT_FUP, 2, 0x4030201}, 0, 0 },
65 {7, {0x7d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 3, 0x60504030201}, 0, 0 },
66 {7, {0x9d, 1, 2, 3, 4, 5, 6}, 0, {INTEL_PT_FUP, 4, 0x60504030201}, 0, 0 },
67 {9, {0xdd, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_FUP, 6, 0x807060504030201}, 0, 0 },
68 /* Paging Information Packet */
69 {8, {0x02, 0x43, 2, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201}, 0, 0 },
70 {8, {0x02, 0x43, 3, 4, 6, 8, 10, 12}, 0, {INTEL_PT_PIP, 0, 0x60504030201 | (1ULL << 63)}, 0, 0 },
71 /* Mode Exec Packet */
72 {2, {0x99, 0x00}, 0, {INTEL_PT_MODE_EXEC, 0, 16}, 0, 0 },
73 {2, {0x99, 0x01}, 0, {INTEL_PT_MODE_EXEC, 0, 64}, 0, 0 },
74 {2, {0x99, 0x02}, 0, {INTEL_PT_MODE_EXEC, 0, 32}, 0, 0 },
75 /* Mode TSX Packet */
76 {2, {0x99, 0x20}, 0, {INTEL_PT_MODE_TSX, 0, 0}, 0, 0 },
77 {2, {0x99, 0x21}, 0, {INTEL_PT_MODE_TSX, 0, 1}, 0, 0 },
78 {2, {0x99, 0x22}, 0, {INTEL_PT_MODE_TSX, 0, 2}, 0, 0 },
79 /* Trace Stop Packet */
80 {2, {0x02, 0x83}, 0, {INTEL_PT_TRACESTOP, 0, 0}, 0, 0 },
81 /* Core:Bus Ratio Packet */
82 {4, {0x02, 0x03, 0x12, 0}, 0, {INTEL_PT_CBR, 0, 0x12}, 0, 1 },
83 /* Timestamp Counter Packet */
84 {8, {0x19, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_TSC, 0, 0x7060504030201}, 0, 1 },
85 /* Mini Time Counter Packet */
86 {2, {0x59, 0x12}, 0, {INTEL_PT_MTC, 0, 0x12}, 0, 1 },
87 /* TSC / MTC Alignment Packet */
88 {7, {0x02, 0x73}, 0, {INTEL_PT_TMA, 0, 0}, 0, 1 },
89 {7, {0x02, 0x73, 1, 2}, 0, {INTEL_PT_TMA, 0, 0x201}, 0, 1 },
90 {7, {0x02, 0x73, 0, 0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0}, 0, 1 },
91 {7, {0x02, 0x73, 0x80, 0xc0, 0, 0xff, 1}, 0, {INTEL_PT_TMA, 0x1ff, 0xc080}, 0, 1 },
92 /* Cycle Count Packet */
93 {1, {0x03}, 0, {INTEL_PT_CYC, 0, 0}, 0, 1 },
94 {1, {0x0b}, 0, {INTEL_PT_CYC, 0, 1}, 0, 1 },
95 {1, {0xfb}, 0, {INTEL_PT_CYC, 0, 0x1f}, 0, 1 },
96 {2, {0x07, 2}, 0, {INTEL_PT_CYC, 0, 0x20}, 0, 1 },
97 {2, {0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xfff}, 0, 1 },
98 {3, {0x07, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x1000}, 0, 1 },
99 {3, {0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7ffff}, 0, 1 },
100 {4, {0x07, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x80000}, 0, 1 },
101 {4, {0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3ffffff}, 0, 1 },
102 {5, {0x07, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x4000000}, 0, 1 },
103 {5, {0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1ffffffff}, 0, 1 },
104 {6, {0x07, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x200000000}, 0, 1 },
105 {6, {0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0xffffffffff}, 0, 1 },
106 {7, {0x07, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x10000000000}, 0, 1 },
107 {7, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x7fffffffffff}, 0, 1 },
108 {8, {0x07, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x800000000000}, 0, 1 },
109 {8, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x3fffffffffffff}, 0, 1 },
110 {9, {0x07, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x40000000000000}, 0, 1 },
111 {9, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe}, 0, {INTEL_PT_CYC, 0, 0x1fffffffffffffff}, 0, 1 },
112 {10, {0x07, 1, 1, 1, 1, 1, 1, 1, 1, 2}, 0, {INTEL_PT_CYC, 0, 0x2000000000000000}, 0, 1 },
113 {10, {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe}, 0, {INTEL_PT_CYC, 0, 0xffffffffffffffff}, 0, 1 },
114 /* Virtual-Machine Control Structure Packet */
115 {7, {0x02, 0xc8, 1, 2, 3, 4, 5}, 0, {INTEL_PT_VMCS, 5, 0x504030201}, 0, 0 },
116 /* Overflow Packet */
117 {2, {0x02, 0xf3}, 0, {INTEL_PT_OVF, 0, 0}, 0, 0 },
118 {2, {0x02, 0xf3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 },
119 {2, {0x02, 0xf3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_OVF, 0, 0}, 0, 0 },
120 /* Packet Stream Boundary*/
121 {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, 0, {INTEL_PT_PSB, 0, 0}, 0, 0 },
122 {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_4_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 },
123 {16, {0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82, 0x02, 0x82}, INTEL_PT_BLK_8_CTX, {INTEL_PT_PSB, 0, 0}, 0, 0 },
124 /* PSB End Packet */
125 {2, {0x02, 0x23}, 0, {INTEL_PT_PSBEND, 0, 0}, 0, 0 },
126 /* Maintenance Packet */
127 {11, {0x02, 0xc3, 0x88, 1, 2, 3, 4, 5, 6, 7}, 0, {INTEL_PT_MNT, 0, 0x7060504030201}, 0, 1 },
128 /* Write Data to PT Packet */
129 {6, {0x02, 0x12, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE, 0, 0x4030201}, 0, 0 },
130 {10, {0x02, 0x32, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE, 1, 0x807060504030201}, 0, 0 },
131 {6, {0x02, 0x92, 1, 2, 3, 4}, 0, {INTEL_PT_PTWRITE_IP, 0, 0x4030201}, 0, 0 },
132 {10, {0x02, 0xb2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_PTWRITE_IP, 1, 0x807060504030201}, 0, 0 },
133 /* Execution Stop Packet */
134 {2, {0x02, 0x62}, 0, {INTEL_PT_EXSTOP, 0, 0}, 0, 1 },
135 {2, {0x02, 0xe2}, 0, {INTEL_PT_EXSTOP_IP, 0, 0}, 0, 1 },
136 /* Monitor Wait Packet */
137 {10, {0x02, 0xc2}, 0, {INTEL_PT_MWAIT, 0, 0}, 0, 0 },
138 {10, {0x02, 0xc2, 1, 2, 3, 4, 5, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x807060504030201}, 0, 0 },
139 {10, {0x02, 0xc2, 0xff, 2, 3, 4, 7, 6, 7, 8}, 0, {INTEL_PT_MWAIT, 0, 0x8070607040302ff}, 0, 0 },
140 /* Power Entry Packet */
141 {4, {0x02, 0x22}, 0, {INTEL_PT_PWRE, 0, 0}, 0, 1 },
142 {4, {0x02, 0x22, 1, 2}, 0, {INTEL_PT_PWRE, 0, 0x0201}, 0, 1 },
143 {4, {0x02, 0x22, 0x80, 0x34}, 0, {INTEL_PT_PWRE, 0, 0x3480}, 0, 1 },
144 {4, {0x02, 0x22, 0x00, 0x56}, 0, {INTEL_PT_PWRE, 0, 0x5600}, 0, 1 },
145 /* Power Exit Packet */
146 {7, {0x02, 0xa2}, 0, {INTEL_PT_PWRX, 0, 0}, 0, 1 },
147 {7, {0x02, 0xa2, 1, 2, 3, 4, 5}, 0, {INTEL_PT_PWRX, 0, 0x504030201}, 0, 1 },
148 {7, {0x02, 0xa2, 0xff, 0xff, 0xff, 0xff, 0xff}, 0, {INTEL_PT_PWRX, 0, 0xffffffffff}, 0, 1 },
149 /* Block Begin Packet */
150 {3, {0x02, 0x63, 0x00}, 0, {INTEL_PT_BBP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 },
151 {3, {0x02, 0x63, 0x80}, 0, {INTEL_PT_BBP, 1, 0}, INTEL_PT_BLK_4_CTX, 0 },
152 {3, {0x02, 0x63, 0x1f}, 0, {INTEL_PT_BBP, 0, 0x1f}, INTEL_PT_BLK_8_CTX, 0 },
153 {3, {0x02, 0x63, 0x9f}, 0, {INTEL_PT_BBP, 1, 0x1f}, INTEL_PT_BLK_4_CTX, 0 },
154 /* 4-byte Block Item Packet */
155 {5, {0x04}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_4_CTX, 0 },
156 {5, {0xfc}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_4_CTX, 0 },
157 {5, {0x04, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 },
158 {5, {0xfc, 1, 2, 3, 4}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BIP, 0x1f, 0x04030201}, INTEL_PT_BLK_4_CTX, 0 },
159 /* 8-byte Block Item Packet */
160 {9, {0x04}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0}, INTEL_PT_BLK_8_CTX, 0 },
161 {9, {0xfc}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0}, INTEL_PT_BLK_8_CTX, 0 },
162 {9, {0x04, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 },
163 {9, {0xfc, 1, 2, 3, 4, 5, 6, 7, 8}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BIP, 0x1f, 0x0807060504030201}, INTEL_PT_BLK_8_CTX, 0 },
164 /* Block End Packet */
165 {2, {0x02, 0x33}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 },
166 {2, {0x02, 0xb3}, INTEL_PT_BLK_4_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 },
167 {2, {0x02, 0x33}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP, 0, 0}, 0, 0 },
168 {2, {0x02, 0xb3}, INTEL_PT_BLK_8_CTX, {INTEL_PT_BEP_IP, 0, 0}, 0, 0 },
169 /* Terminator */
170 {0, {0}, 0, {0, 0, 0}, 0, 0 },
171};
172
173static int dump_packet(struct intel_pt_pkt *packet, u8 *bytes, int len)
174{
175 char desc[INTEL_PT_PKT_DESC_MAX];
176 int ret, i;
177
178 for (i = 0; i < len; i++)
179 pr_debug(" %02x", bytes[i]);
180 for (; i < INTEL_PT_PKT_MAX_SZ; i++)
181 pr_debug(" ");
182 pr_debug(" ");
183 ret = intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
184 if (ret < 0) {
185 pr_debug("intel_pt_pkt_desc failed!\n");
186 return TEST_FAIL;
187 }
188 pr_debug("%s\n", desc);
189
190 return TEST_OK;
191}
192
193static void decoding_failed(struct test_data *d)
194{
195 pr_debug("Decoding failed!\n");
196 pr_debug("Decoding: ");
197 dump_packet(&d->packet, d->bytes, d->len);
198}
199
200static int fail(struct test_data *d, struct intel_pt_pkt *packet, int len,
201 enum intel_pt_pkt_ctx new_ctx)
202{
203 decoding_failed(d);
204
205 if (len != d->len)
206 pr_debug("Expected length: %d Decoded length %d\n",
207 d->len, len);
208
209 if (packet->type != d->packet.type)
210 pr_debug("Expected type: %d Decoded type %d\n",
211 d->packet.type, packet->type);
212
213 if (packet->count != d->packet.count)
214 pr_debug("Expected count: %d Decoded count %d\n",
215 d->packet.count, packet->count);
216
217 if (packet->payload != d->packet.payload)
218 pr_debug("Expected payload: 0x%llx Decoded payload 0x%llx\n",
219 (unsigned long long)d->packet.payload,
220 (unsigned long long)packet->payload);
221
222 if (new_ctx != d->new_ctx)
223 pr_debug("Expected packet context: %d Decoded packet context %d\n",
224 d->new_ctx, new_ctx);
225
226 return TEST_FAIL;
227}
228
229static int test_ctx_unchanged(struct test_data *d, struct intel_pt_pkt *packet,
230 enum intel_pt_pkt_ctx ctx)
231{
232 enum intel_pt_pkt_ctx old_ctx = ctx;
233
234 intel_pt_upd_pkt_ctx(packet, &ctx);
235
236 if (ctx != old_ctx) {
237 decoding_failed(d);
238 pr_debug("Packet context changed!\n");
239 return TEST_FAIL;
240 }
241
242 return TEST_OK;
243}
244
245static int test_one(struct test_data *d)
246{
247 struct intel_pt_pkt packet;
248 enum intel_pt_pkt_ctx ctx = d->ctx;
249 int ret;
250
251 memset(&packet, 0xff, sizeof(packet));
252
253 /* Decode a packet */
254 ret = intel_pt_get_packet(d->bytes, d->len, &packet, &ctx);
255 if (ret < 0 || ret > INTEL_PT_PKT_MAX_SZ) {
256 decoding_failed(d);
257 pr_debug("intel_pt_get_packet returned %d\n", ret);
258 return TEST_FAIL;
259 }
260
261 /* Some packets must always leave the packet context unchanged */
262 if (d->ctx_unchanged) {
263 int err;
264
265 err = test_ctx_unchanged(d, &packet, INTEL_PT_NO_CTX);
266 if (err)
267 return err;
268 err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_4_CTX);
269 if (err)
270 return err;
271 err = test_ctx_unchanged(d, &packet, INTEL_PT_BLK_8_CTX);
272 if (err)
273 return err;
274 }
275
276 /* Compare to the expected values */
277 if (ret != d->len || packet.type != d->packet.type ||
278 packet.count != d->packet.count ||
279 packet.payload != d->packet.payload || ctx != d->new_ctx)
280 return fail(d, &packet, ret, ctx);
281
282 pr_debug("Decoded ok:");
283 ret = dump_packet(&d->packet, d->bytes, d->len);
284
285 return ret;
286}
287
288/*
289 * This test feeds byte sequences to the Intel PT packet decoder and checks the
290 * results. Changes to the packet context are also checked.
291 */
292int test__intel_pt_pkt_decoder(struct test *test __maybe_unused, int subtest __maybe_unused)
293{
294 struct test_data *d = data;
295 int ret;
296
297 for (d = data; d->len; d++) {
298 ret = test_one(d);
299 if (ret)
300 return ret;
301 }
302
303 return TEST_OK;
304}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 1869f62a10cd..9804098dcefb 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -25,6 +25,7 @@
25#include "../../util/auxtrace.h" 25#include "../../util/auxtrace.h"
26#include "../../util/tsc.h" 26#include "../../util/tsc.h"
27#include "../../util/intel-pt.h" 27#include "../../util/intel-pt.h"
28#include "../../util/util.h"
28 29
29#define KiB(x) ((x) * 1024) 30#define KiB(x) ((x) * 1024)
30#define MiB(x) ((x) * 1024 * 1024) 31#define MiB(x) ((x) * 1024 * 1024)
diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c
index 4520ac53caa9..1e9ec783b9a1 100644
--- a/tools/perf/arch/x86/util/machine.c
+++ b/tools/perf/arch/x86/util/machine.c
@@ -3,10 +3,11 @@
3#include <linux/string.h> 3#include <linux/string.h>
4#include <stdlib.h> 4#include <stdlib.h>
5 5
6#include "../../util/util.h"
6#include "../../util/machine.h" 7#include "../../util/machine.h"
7#include "../../util/map.h" 8#include "../../util/map.h"
8#include "../../util/symbol.h" 9#include "../../util/symbol.h"
9#include "../../util/sane_ctype.h" 10#include <linux/ctype.h>
10 11
11#include <symbol/kallsyms.h> 12#include <symbol/kallsyms.h>
12 13
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 6e7920793729..f924b46910b5 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -20,6 +20,8 @@
20#include "util/data.h" 20#include "util/data.h"
21#include "util/config.h" 21#include "util/config.h"
22#include "util/time-utils.h" 22#include "util/time-utils.h"
23#include "util/annotate.h"
24#include "util/map.h"
23 25
24#include <errno.h> 26#include <errno.h>
25#include <inttypes.h> 27#include <inttypes.h>
@@ -32,6 +34,7 @@ struct perf_diff {
32 struct perf_time_interval *ptime_range; 34 struct perf_time_interval *ptime_range;
33 int range_size; 35 int range_size;
34 int range_num; 36 int range_num;
37 bool has_br_stack;
35}; 38};
36 39
37/* Diff command specific HPP columns. */ 40/* Diff command specific HPP columns. */
@@ -44,6 +47,7 @@ enum {
44 PERF_HPP_DIFF__WEIGHTED_DIFF, 47 PERF_HPP_DIFF__WEIGHTED_DIFF,
45 PERF_HPP_DIFF__FORMULA, 48 PERF_HPP_DIFF__FORMULA,
46 PERF_HPP_DIFF__DELTA_ABS, 49 PERF_HPP_DIFF__DELTA_ABS,
50 PERF_HPP_DIFF__CYCLES,
47 51
48 PERF_HPP_DIFF__MAX_INDEX 52 PERF_HPP_DIFF__MAX_INDEX
49}; 53};
@@ -86,11 +90,14 @@ static s64 compute_wdiff_w2;
86static const char *cpu_list; 90static const char *cpu_list;
87static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); 91static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
88 92
93static struct addr_location dummy_al;
94
89enum { 95enum {
90 COMPUTE_DELTA, 96 COMPUTE_DELTA,
91 COMPUTE_RATIO, 97 COMPUTE_RATIO,
92 COMPUTE_WEIGHTED_DIFF, 98 COMPUTE_WEIGHTED_DIFF,
93 COMPUTE_DELTA_ABS, 99 COMPUTE_DELTA_ABS,
100 COMPUTE_CYCLES,
94 COMPUTE_MAX, 101 COMPUTE_MAX,
95}; 102};
96 103
@@ -99,6 +106,7 @@ const char *compute_names[COMPUTE_MAX] = {
99 [COMPUTE_DELTA_ABS] = "delta-abs", 106 [COMPUTE_DELTA_ABS] = "delta-abs",
100 [COMPUTE_RATIO] = "ratio", 107 [COMPUTE_RATIO] = "ratio",
101 [COMPUTE_WEIGHTED_DIFF] = "wdiff", 108 [COMPUTE_WEIGHTED_DIFF] = "wdiff",
109 [COMPUTE_CYCLES] = "cycles",
102}; 110};
103 111
104static int compute = COMPUTE_DELTA_ABS; 112static int compute = COMPUTE_DELTA_ABS;
@@ -108,6 +116,7 @@ static int compute_2_hpp[COMPUTE_MAX] = {
108 [COMPUTE_DELTA_ABS] = PERF_HPP_DIFF__DELTA_ABS, 116 [COMPUTE_DELTA_ABS] = PERF_HPP_DIFF__DELTA_ABS,
109 [COMPUTE_RATIO] = PERF_HPP_DIFF__RATIO, 117 [COMPUTE_RATIO] = PERF_HPP_DIFF__RATIO,
110 [COMPUTE_WEIGHTED_DIFF] = PERF_HPP_DIFF__WEIGHTED_DIFF, 118 [COMPUTE_WEIGHTED_DIFF] = PERF_HPP_DIFF__WEIGHTED_DIFF,
119 [COMPUTE_CYCLES] = PERF_HPP_DIFF__CYCLES,
111}; 120};
112 121
113#define MAX_COL_WIDTH 70 122#define MAX_COL_WIDTH 70
@@ -146,6 +155,10 @@ static struct header_column {
146 [PERF_HPP_DIFF__FORMULA] = { 155 [PERF_HPP_DIFF__FORMULA] = {
147 .name = "Formula", 156 .name = "Formula",
148 .width = MAX_COL_WIDTH, 157 .width = MAX_COL_WIDTH,
158 },
159 [PERF_HPP_DIFF__CYCLES] = {
160 .name = "[Program Block Range] Cycles Diff",
161 .width = 70,
149 } 162 }
150}; 163};
151 164
@@ -335,6 +348,31 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
335 return -1; 348 return -1;
336} 349}
337 350
351static void *block_hist_zalloc(size_t size)
352{
353 struct block_hist *bh;
354
355 bh = zalloc(size + sizeof(*bh));
356 if (!bh)
357 return NULL;
358
359 return &bh->he;
360}
361
362static void block_hist_free(void *he)
363{
364 struct block_hist *bh;
365
366 bh = container_of(he, struct block_hist, he);
367 hists__delete_entries(&bh->block_hists);
368 free(bh);
369}
370
371struct hist_entry_ops block_hist_ops = {
372 .new = block_hist_zalloc,
373 .free = block_hist_free,
374};
375
338static int diff__process_sample_event(struct perf_tool *tool, 376static int diff__process_sample_event(struct perf_tool *tool,
339 union perf_event *event, 377 union perf_event *event,
340 struct perf_sample *sample, 378 struct perf_sample *sample,
@@ -362,9 +400,22 @@ static int diff__process_sample_event(struct perf_tool *tool,
362 goto out_put; 400 goto out_put;
363 } 401 }
364 402
365 if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) { 403 if (compute != COMPUTE_CYCLES) {
366 pr_warning("problem incrementing symbol period, skipping event\n"); 404 if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample,
367 goto out_put; 405 true)) {
406 pr_warning("problem incrementing symbol period, "
407 "skipping event\n");
408 goto out_put;
409 }
410 } else {
411 if (!hists__add_entry_ops(hists, &block_hist_ops, &al, NULL,
412 NULL, NULL, sample, true)) {
413 pr_warning("problem incrementing symbol period, "
414 "skipping event\n");
415 goto out_put;
416 }
417
418 hist__account_cycles(sample->branch_stack, &al, sample, false);
368 } 419 }
369 420
370 /* 421 /*
@@ -474,6 +525,203 @@ static void hists__baseline_only(struct hists *hists)
474 } 525 }
475} 526}
476 527
528static int64_t block_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
529 struct hist_entry *left, struct hist_entry *right)
530{
531 struct block_info *bi_l = left->block_info;
532 struct block_info *bi_r = right->block_info;
533 int cmp;
534
535 if (!bi_l->sym || !bi_r->sym) {
536 if (!bi_l->sym && !bi_r->sym)
537 return 0;
538 else if (!bi_l->sym)
539 return -1;
540 else
541 return 1;
542 }
543
544 if (bi_l->sym == bi_r->sym) {
545 if (bi_l->start == bi_r->start) {
546 if (bi_l->end == bi_r->end)
547 return 0;
548 else
549 return (int64_t)(bi_r->end - bi_l->end);
550 } else
551 return (int64_t)(bi_r->start - bi_l->start);
552 } else {
553 cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
554 return cmp;
555 }
556
557 if (bi_l->sym->start != bi_r->sym->start)
558 return (int64_t)(bi_r->sym->start - bi_l->sym->start);
559
560 return (int64_t)(bi_r->sym->end - bi_l->sym->end);
561}
562
563static int64_t block_cycles_diff_cmp(struct hist_entry *left,
564 struct hist_entry *right)
565{
566 bool pairs_left = hist_entry__has_pairs(left);
567 bool pairs_right = hist_entry__has_pairs(right);
568 s64 l, r;
569
570 if (!pairs_left && !pairs_right)
571 return 0;
572
573 l = labs(left->diff.cycles);
574 r = labs(right->diff.cycles);
575 return r - l;
576}
577
578static int64_t block_sort(struct perf_hpp_fmt *fmt __maybe_unused,
579 struct hist_entry *left, struct hist_entry *right)
580{
581 return block_cycles_diff_cmp(right, left);
582}
583
584static void init_block_hist(struct block_hist *bh)
585{
586 __hists__init(&bh->block_hists, &bh->block_list);
587 perf_hpp_list__init(&bh->block_list);
588
589 INIT_LIST_HEAD(&bh->block_fmt.list);
590 INIT_LIST_HEAD(&bh->block_fmt.sort_list);
591 bh->block_fmt.cmp = block_cmp;
592 bh->block_fmt.sort = block_sort;
593 perf_hpp_list__register_sort_field(&bh->block_list,
594 &bh->block_fmt);
595 bh->valid = true;
596}
597
598static void init_block_info(struct block_info *bi, struct symbol *sym,
599 struct cyc_hist *ch, int offset)
600{
601 bi->sym = sym;
602 bi->start = ch->start;
603 bi->end = offset;
604 bi->cycles = ch->cycles;
605 bi->cycles_aggr = ch->cycles_aggr;
606 bi->num = ch->num;
607 bi->num_aggr = ch->num_aggr;
608}
609
610static int process_block_per_sym(struct hist_entry *he)
611{
612 struct annotation *notes;
613 struct cyc_hist *ch;
614 struct block_hist *bh;
615
616 if (!he->ms.map || !he->ms.sym)
617 return 0;
618
619 notes = symbol__annotation(he->ms.sym);
620 if (!notes || !notes->src || !notes->src->cycles_hist)
621 return 0;
622
623 bh = container_of(he, struct block_hist, he);
624 init_block_hist(bh);
625
626 ch = notes->src->cycles_hist;
627 for (unsigned int i = 0; i < symbol__size(he->ms.sym); i++) {
628 if (ch[i].num_aggr) {
629 struct block_info *bi;
630 struct hist_entry *he_block;
631
632 bi = block_info__new();
633 if (!bi)
634 return -1;
635
636 init_block_info(bi, he->ms.sym, &ch[i], i);
637 he_block = hists__add_entry_block(&bh->block_hists,
638 &dummy_al, bi);
639 if (!he_block) {
640 block_info__put(bi);
641 return -1;
642 }
643 }
644 }
645
646 return 0;
647}
648
649static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b)
650{
651 struct block_info *bi_a = a->block_info;
652 struct block_info *bi_b = b->block_info;
653 int cmp;
654
655 if (!bi_a->sym || !bi_b->sym)
656 return -1;
657
658 cmp = strcmp(bi_a->sym->name, bi_b->sym->name);
659
660 if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end))
661 return 0;
662
663 return -1;
664}
665
666static struct hist_entry *get_block_pair(struct hist_entry *he,
667 struct hists *hists_pair)
668{
669 struct rb_root_cached *root = hists_pair->entries_in;
670 struct rb_node *next = rb_first_cached(root);
671 int cmp;
672
673 while (next != NULL) {
674 struct hist_entry *he_pair = rb_entry(next, struct hist_entry,
675 rb_node_in);
676
677 next = rb_next(&he_pair->rb_node_in);
678
679 cmp = block_pair_cmp(he_pair, he);
680 if (!cmp)
681 return he_pair;
682 }
683
684 return NULL;
685}
686
687static void compute_cycles_diff(struct hist_entry *he,
688 struct hist_entry *pair)
689{
690 pair->diff.computed = true;
691 if (pair->block_info->num && he->block_info->num) {
692 pair->diff.cycles =
693 pair->block_info->cycles_aggr / pair->block_info->num_aggr -
694 he->block_info->cycles_aggr / he->block_info->num_aggr;
695 }
696}
697
698static void block_hists_match(struct hists *hists_base,
699 struct hists *hists_pair)
700{
701 struct rb_root_cached *root = hists_base->entries_in;
702 struct rb_node *next = rb_first_cached(root);
703
704 while (next != NULL) {
705 struct hist_entry *he = rb_entry(next, struct hist_entry,
706 rb_node_in);
707 struct hist_entry *pair = get_block_pair(he, hists_pair);
708
709 next = rb_next(&he->rb_node_in);
710
711 if (pair) {
712 hist_entry__add_pair(pair, he);
713 compute_cycles_diff(he, pair);
714 }
715 }
716}
717
718static int filter_cb(struct hist_entry *he, void *arg __maybe_unused)
719{
720 /* Skip the calculation of column length in output_resort */
721 he->filtered = true;
722 return 0;
723}
724
477static void hists__precompute(struct hists *hists) 725static void hists__precompute(struct hists *hists)
478{ 726{
479 struct rb_root_cached *root; 727 struct rb_root_cached *root;
@@ -486,6 +734,7 @@ static void hists__precompute(struct hists *hists)
486 734
487 next = rb_first_cached(root); 735 next = rb_first_cached(root);
488 while (next != NULL) { 736 while (next != NULL) {
737 struct block_hist *bh, *pair_bh;
489 struct hist_entry *he, *pair; 738 struct hist_entry *he, *pair;
490 struct data__file *d; 739 struct data__file *d;
491 int i; 740 int i;
@@ -493,6 +742,9 @@ static void hists__precompute(struct hists *hists)
493 he = rb_entry(next, struct hist_entry, rb_node_in); 742 he = rb_entry(next, struct hist_entry, rb_node_in);
494 next = rb_next(&he->rb_node_in); 743 next = rb_next(&he->rb_node_in);
495 744
745 if (compute == COMPUTE_CYCLES)
746 process_block_per_sym(he);
747
496 data__for_each_file_new(i, d) { 748 data__for_each_file_new(i, d) {
497 pair = get_pair_data(he, d); 749 pair = get_pair_data(he, d);
498 if (!pair) 750 if (!pair)
@@ -509,6 +761,19 @@ static void hists__precompute(struct hists *hists)
509 case COMPUTE_WEIGHTED_DIFF: 761 case COMPUTE_WEIGHTED_DIFF:
510 compute_wdiff(he, pair); 762 compute_wdiff(he, pair);
511 break; 763 break;
764 case COMPUTE_CYCLES:
765 process_block_per_sym(pair);
766 bh = container_of(he, struct block_hist, he);
767 pair_bh = container_of(pair, struct block_hist,
768 he);
769
770 if (bh->valid && pair_bh->valid) {
771 block_hists_match(&bh->block_hists,
772 &pair_bh->block_hists);
773 hists__output_resort_cb(&pair_bh->block_hists,
774 NULL, filter_cb);
775 }
776 break;
512 default: 777 default:
513 BUG_ON(1); 778 BUG_ON(1);
514 } 779 }
@@ -720,6 +985,9 @@ static void hists__process(struct hists *hists)
720 hists__precompute(hists); 985 hists__precompute(hists);
721 hists__output_resort(hists, NULL); 986 hists__output_resort(hists, NULL);
722 987
988 if (compute == COMPUTE_CYCLES)
989 symbol_conf.report_block = true;
990
723 hists__fprintf(hists, !quiet, 0, 0, 0, stdout, 991 hists__fprintf(hists, !quiet, 0, 0, 0, stdout,
724 !symbol_conf.use_callchain); 992 !symbol_conf.use_callchain);
725} 993}
@@ -873,6 +1141,31 @@ static int parse_time_str(struct data__file *d, char *abstime_ostr,
873 return ret; 1141 return ret;
874} 1142}
875 1143
1144static int check_file_brstack(void)
1145{
1146 struct data__file *d;
1147 bool has_br_stack;
1148 int i;
1149
1150 data__for_each_file(i, d) {
1151 d->session = perf_session__new(&d->data, false, &pdiff.tool);
1152 if (!d->session) {
1153 pr_err("Failed to open %s\n", d->data.path);
1154 return -1;
1155 }
1156
1157 has_br_stack = perf_header__has_feat(&d->session->header,
1158 HEADER_BRANCH_STACK);
1159 perf_session__delete(d->session);
1160 if (!has_br_stack)
1161 return 0;
1162 }
1163
1164 /* Set only all files having branch stacks */
1165 pdiff.has_br_stack = true;
1166 return 0;
1167}
1168
876static int __cmd_diff(void) 1169static int __cmd_diff(void)
877{ 1170{
878 struct data__file *d; 1171 struct data__file *d;
@@ -950,7 +1243,7 @@ static const struct option options[] = {
950 OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, 1243 OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
951 "Show only items with match in baseline"), 1244 "Show only items with match in baseline"),
952 OPT_CALLBACK('c', "compute", &compute, 1245 OPT_CALLBACK('c', "compute", &compute,
953 "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs)", 1246 "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs),cycles",
954 "Entries differential computation selection", 1247 "Entries differential computation selection",
955 setup_compute), 1248 setup_compute),
956 OPT_BOOLEAN('p', "period", &show_period, 1249 OPT_BOOLEAN('p', "period", &show_period,
@@ -1028,6 +1321,49 @@ static int hpp__entry_baseline(struct hist_entry *he, char *buf, size_t size)
1028 return ret; 1321 return ret;
1029} 1322}
1030 1323
1324static int cycles_printf(struct hist_entry *he, struct hist_entry *pair,
1325 struct perf_hpp *hpp, int width)
1326{
1327 struct block_hist *bh = container_of(he, struct block_hist, he);
1328 struct block_hist *bh_pair = container_of(pair, struct block_hist, he);
1329 struct hist_entry *block_he;
1330 struct block_info *bi;
1331 char buf[128];
1332 char *start_line, *end_line;
1333
1334 block_he = hists__get_entry(&bh_pair->block_hists, bh->block_idx);
1335 if (!block_he) {
1336 hpp->skip = true;
1337 return 0;
1338 }
1339
1340 /*
1341 * Avoid printing the warning "addr2line_init failed for ..."
1342 */
1343 symbol_conf.disable_add2line_warn = true;
1344
1345 bi = block_he->block_info;
1346
1347 start_line = map__srcline(he->ms.map, bi->sym->start + bi->start,
1348 he->ms.sym);
1349
1350 end_line = map__srcline(he->ms.map, bi->sym->start + bi->end,
1351 he->ms.sym);
1352
1353 if ((start_line != SRCLINE_UNKNOWN) && (end_line != SRCLINE_UNKNOWN)) {
1354 scnprintf(buf, sizeof(buf), "[%s -> %s] %4ld",
1355 start_line, end_line, block_he->diff.cycles);
1356 } else {
1357 scnprintf(buf, sizeof(buf), "[%7lx -> %7lx] %4ld",
1358 bi->start, bi->end, block_he->diff.cycles);
1359 }
1360
1361 free_srcline(start_line);
1362 free_srcline(end_line);
1363
1364 return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
1365}
1366
1031static int __hpp__color_compare(struct perf_hpp_fmt *fmt, 1367static int __hpp__color_compare(struct perf_hpp_fmt *fmt,
1032 struct perf_hpp *hpp, struct hist_entry *he, 1368 struct perf_hpp *hpp, struct hist_entry *he,
1033 int comparison_method) 1369 int comparison_method)
@@ -1039,8 +1375,17 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt,
1039 s64 wdiff; 1375 s64 wdiff;
1040 char pfmt[20] = " "; 1376 char pfmt[20] = " ";
1041 1377
1042 if (!pair) 1378 if (!pair) {
1379 if (comparison_method == COMPUTE_CYCLES) {
1380 struct block_hist *bh;
1381
1382 bh = container_of(he, struct block_hist, he);
1383 if (bh->block_idx)
1384 hpp->skip = true;
1385 }
1386
1043 goto no_print; 1387 goto no_print;
1388 }
1044 1389
1045 switch (comparison_method) { 1390 switch (comparison_method) {
1046 case COMPUTE_DELTA: 1391 case COMPUTE_DELTA:
@@ -1075,6 +1420,8 @@ static int __hpp__color_compare(struct perf_hpp_fmt *fmt,
1075 return color_snprintf(hpp->buf, hpp->size, 1420 return color_snprintf(hpp->buf, hpp->size,
1076 get_percent_color(wdiff), 1421 get_percent_color(wdiff),
1077 pfmt, wdiff); 1422 pfmt, wdiff);
1423 case COMPUTE_CYCLES:
1424 return cycles_printf(he, pair, hpp, dfmt->header_width);
1078 default: 1425 default:
1079 BUG_ON(1); 1426 BUG_ON(1);
1080 } 1427 }
@@ -1104,6 +1451,12 @@ static int hpp__color_wdiff(struct perf_hpp_fmt *fmt,
1104 return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF); 1451 return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF);
1105} 1452}
1106 1453
1454static int hpp__color_cycles(struct perf_hpp_fmt *fmt,
1455 struct perf_hpp *hpp, struct hist_entry *he)
1456{
1457 return __hpp__color_compare(fmt, hpp, he, COMPUTE_CYCLES);
1458}
1459
1107static void 1460static void
1108hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size) 1461hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size)
1109{ 1462{
@@ -1305,6 +1658,10 @@ static void data__hpp_register(struct data__file *d, int idx)
1305 fmt->color = hpp__color_delta; 1658 fmt->color = hpp__color_delta;
1306 fmt->sort = hist_entry__cmp_delta_abs; 1659 fmt->sort = hist_entry__cmp_delta_abs;
1307 break; 1660 break;
1661 case PERF_HPP_DIFF__CYCLES:
1662 fmt->color = hpp__color_cycles;
1663 fmt->sort = hist_entry__cmp_nop;
1664 break;
1308 default: 1665 default:
1309 fmt->sort = hist_entry__cmp_nop; 1666 fmt->sort = hist_entry__cmp_nop;
1310 break; 1667 break;
@@ -1385,6 +1742,13 @@ static int ui_init(void)
1385 case COMPUTE_DELTA_ABS: 1742 case COMPUTE_DELTA_ABS:
1386 fmt->sort = hist_entry__cmp_delta_abs_idx; 1743 fmt->sort = hist_entry__cmp_delta_abs_idx;
1387 break; 1744 break;
1745 case COMPUTE_CYCLES:
1746 /*
1747 * Should set since 'fmt->sort' is called without
1748 * checking valid during sorting
1749 */
1750 fmt->sort = hist_entry__cmp_nop;
1751 break;
1388 default: 1752 default:
1389 BUG_ON(1); 1753 BUG_ON(1);
1390 } 1754 }
@@ -1481,12 +1845,20 @@ int cmd_diff(int argc, const char **argv)
1481 if (quiet) 1845 if (quiet)
1482 perf_quiet_option(); 1846 perf_quiet_option();
1483 1847
1848 symbol__annotation_init();
1849
1484 if (symbol__init(NULL) < 0) 1850 if (symbol__init(NULL) < 0)
1485 return -1; 1851 return -1;
1486 1852
1487 if (data_init(argc, argv) < 0) 1853 if (data_init(argc, argv) < 0)
1488 return -1; 1854 return -1;
1489 1855
1856 if (check_file_brstack() < 0)
1857 return -1;
1858
1859 if (compute == COMPUTE_CYCLES && !pdiff.has_br_stack)
1860 return -1;
1861
1490 if (ui_init() < 0) 1862 if (ui_init() < 0)
1491 return -1; 1863 return -1;
1492 1864
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index b80eee455111..9bd3829de76d 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -21,6 +21,7 @@
21#include "util/cpumap.h" 21#include "util/cpumap.h"
22 22
23#include "util/debug.h" 23#include "util/debug.h"
24#include "util/string2.h"
24 25
25#include <linux/kernel.h> 26#include <linux/kernel.h>
26#include <linux/rbtree.h> 27#include <linux/rbtree.h>
@@ -30,7 +31,7 @@
30#include <locale.h> 31#include <locale.h>
31#include <regex.h> 32#include <regex.h>
32 33
33#include "sane_ctype.h" 34#include <linux/ctype.h>
34 35
35static int kmem_slab; 36static int kmem_slab;
36static int kmem_page; 37static int kmem_page;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e2c3a585a61e..dca55997934e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2191,6 +2191,10 @@ static struct option __record_options[] = {
2191 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user, 2191 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
2192 "Configure all used events to run in user space.", 2192 "Configure all used events to run in user space.",
2193 PARSE_OPT_EXCLUSIVE), 2193 PARSE_OPT_EXCLUSIVE),
2194 OPT_BOOLEAN(0, "kernel-callchains", &record.opts.kernel_callchains,
2195 "collect kernel callchains"),
2196 OPT_BOOLEAN(0, "user-callchains", &record.opts.user_callchains,
2197 "collect user callchains"),
2194 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path", 2198 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
2195 "clang binary to use for compiling BPF scriptlets"), 2199 "clang binary to use for compiling BPF scriptlets"),
2196 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options", 2200 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1ca533f06a4c..aef59f318a67 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -47,7 +47,7 @@
47#include <errno.h> 47#include <errno.h>
48#include <inttypes.h> 48#include <inttypes.h>
49#include <regex.h> 49#include <regex.h>
50#include "sane_ctype.h" 50#include <linux/ctype.h>
51#include <signal.h> 51#include <signal.h>
52#include <linux/bitmap.h> 52#include <linux/bitmap.h>
53#include <linux/stringify.h> 53#include <linux/stringify.h>
@@ -941,8 +941,7 @@ parse_time_quantum(const struct option *opt, const char *arg,
941 pr_err("time quantum cannot be 0"); 941 pr_err("time quantum cannot be 0");
942 return -1; 942 return -1;
943 } 943 }
944 while (isspace(*end)) 944 end = skip_spaces(end);
945 end++;
946 if (*end == 0) 945 if (*end == 0)
947 return 0; 946 return 0;
948 if (!strcmp(end, "s")) { 947 if (!strcmp(end, "s")) {
@@ -1428,6 +1427,10 @@ repeat:
1428 &report.range_num); 1427 &report.range_num);
1429 if (ret < 0) 1428 if (ret < 0)
1430 goto error; 1429 goto error;
1430
1431 itrace_synth_opts__set_time_range(&itrace_synth_opts,
1432 report.ptime_range,
1433 report.range_num);
1431 } 1434 }
1432 1435
1433 if (session->tevent.pevent && 1436 if (session->tevent.pevent &&
@@ -1449,8 +1452,10 @@ repeat:
1449 ret = 0; 1452 ret = 0;
1450 1453
1451error: 1454error:
1452 if (report.ptime_range) 1455 if (report.ptime_range) {
1456 itrace_synth_opts__clear_time_range(&itrace_synth_opts);
1453 zfree(&report.ptime_range); 1457 zfree(&report.ptime_range);
1458 }
1454 zstd_fini(&(session->zstd_data)); 1459 zstd_fini(&(session->zstd_data));
1455 perf_session__delete(session); 1460 perf_session__delete(session);
1456 return ret; 1461 return ret;
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 275f2d92a7bf..1519989961ff 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -15,6 +15,7 @@
15#include "util/thread_map.h" 15#include "util/thread_map.h"
16#include "util/color.h" 16#include "util/color.h"
17#include "util/stat.h" 17#include "util/stat.h"
18#include "util/string2.h"
18#include "util/callchain.h" 19#include "util/callchain.h"
19#include "util/time-utils.h" 20#include "util/time-utils.h"
20 21
@@ -36,7 +37,7 @@
36#include <api/fs/fs.h> 37#include <api/fs/fs.h>
37#include <linux/time64.h> 38#include <linux/time64.h>
38 39
39#include "sane_ctype.h" 40#include <linux/ctype.h>
40 41
41#define PR_SET_NAME 15 /* Set process name */ 42#define PR_SET_NAME 15 /* Set process name */
42#define MAX_CPUS 4096 43#define MAX_CPUS 4096
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 61cfd8f70989..2f6232f1bfdc 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -49,7 +49,7 @@
49#include <unistd.h> 49#include <unistd.h>
50#include <subcmd/pager.h> 50#include <subcmd/pager.h>
51 51
52#include "sane_ctype.h" 52#include <linux/ctype.h>
53 53
54static char const *script_name; 54static char const *script_name;
55static char const *generate_script_lang; 55static char const *generate_script_lang;
@@ -102,6 +102,7 @@ enum perf_output_field {
102 PERF_OUTPUT_METRIC = 1U << 28, 102 PERF_OUTPUT_METRIC = 1U << 28,
103 PERF_OUTPUT_MISC = 1U << 29, 103 PERF_OUTPUT_MISC = 1U << 29,
104 PERF_OUTPUT_SRCCODE = 1U << 30, 104 PERF_OUTPUT_SRCCODE = 1U << 30,
105 PERF_OUTPUT_IPC = 1U << 31,
105}; 106};
106 107
107struct output_option { 108struct output_option {
@@ -139,6 +140,7 @@ struct output_option {
139 {.str = "metric", .field = PERF_OUTPUT_METRIC}, 140 {.str = "metric", .field = PERF_OUTPUT_METRIC},
140 {.str = "misc", .field = PERF_OUTPUT_MISC}, 141 {.str = "misc", .field = PERF_OUTPUT_MISC},
141 {.str = "srccode", .field = PERF_OUTPUT_SRCCODE}, 142 {.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
143 {.str = "ipc", .field = PERF_OUTPUT_IPC},
142}; 144};
143 145
144enum { 146enum {
@@ -1268,6 +1270,20 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample,
1268 return printed; 1270 return printed;
1269} 1271}
1270 1272
1273static int perf_sample__fprintf_ipc(struct perf_sample *sample,
1274 struct perf_event_attr *attr, FILE *fp)
1275{
1276 unsigned int ipc;
1277
1278 if (!PRINT_FIELD(IPC) || !sample->cyc_cnt || !sample->insn_cnt)
1279 return 0;
1280
1281 ipc = (sample->insn_cnt * 100) / sample->cyc_cnt;
1282
1283 return fprintf(fp, " \t IPC: %u.%02u (%" PRIu64 "/%" PRIu64 ") ",
1284 ipc / 100, ipc % 100, sample->insn_cnt, sample->cyc_cnt);
1285}
1286
1271static int perf_sample__fprintf_bts(struct perf_sample *sample, 1287static int perf_sample__fprintf_bts(struct perf_sample *sample,
1272 struct perf_evsel *evsel, 1288 struct perf_evsel *evsel,
1273 struct thread *thread, 1289 struct thread *thread,
@@ -1312,6 +1328,8 @@ static int perf_sample__fprintf_bts(struct perf_sample *sample,
1312 printed += perf_sample__fprintf_addr(sample, thread, attr, fp); 1328 printed += perf_sample__fprintf_addr(sample, thread, attr, fp);
1313 } 1329 }
1314 1330
1331 printed += perf_sample__fprintf_ipc(sample, attr, fp);
1332
1315 if (print_srcline_last) 1333 if (print_srcline_last)
1316 printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp); 1334 printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
1317 1335
@@ -1606,6 +1624,7 @@ struct perf_script {
1606 bool show_namespace_events; 1624 bool show_namespace_events;
1607 bool show_lost_events; 1625 bool show_lost_events;
1608 bool show_round_events; 1626 bool show_round_events;
1627 bool show_bpf_events;
1609 bool allocated; 1628 bool allocated;
1610 bool per_event_dump; 1629 bool per_event_dump;
1611 struct cpu_map *cpus; 1630 struct cpu_map *cpus;
@@ -1858,6 +1877,9 @@ static void process_event(struct perf_script *script,
1858 1877
1859 if (PRINT_FIELD(PHYS_ADDR)) 1878 if (PRINT_FIELD(PHYS_ADDR))
1860 fprintf(fp, "%16" PRIx64, sample->phys_addr); 1879 fprintf(fp, "%16" PRIx64, sample->phys_addr);
1880
1881 perf_sample__fprintf_ipc(sample, attr, fp);
1882
1861 fprintf(fp, "\n"); 1883 fprintf(fp, "\n");
1862 1884
1863 if (PRINT_FIELD(SRCCODE)) { 1885 if (PRINT_FIELD(SRCCODE)) {
@@ -2318,6 +2340,41 @@ process_finished_round_event(struct perf_tool *tool __maybe_unused,
2318 return 0; 2340 return 0;
2319} 2341}
2320 2342
2343static int
2344process_bpf_events(struct perf_tool *tool __maybe_unused,
2345 union perf_event *event,
2346 struct perf_sample *sample,
2347 struct machine *machine)
2348{
2349 struct thread *thread;
2350 struct perf_script *script = container_of(tool, struct perf_script, tool);
2351 struct perf_session *session = script->session;
2352 struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
2353
2354 if (machine__process_ksymbol(machine, event, sample) < 0)
2355 return -1;
2356
2357 if (!evsel->attr.sample_id_all) {
2358 perf_event__fprintf(event, stdout);
2359 return 0;
2360 }
2361
2362 thread = machine__findnew_thread(machine, sample->pid, sample->tid);
2363 if (thread == NULL) {
2364 pr_debug("problem processing MMAP event, skipping it.\n");
2365 return -1;
2366 }
2367
2368 if (!filter_cpu(sample)) {
2369 perf_sample__fprintf_start(sample, thread, evsel,
2370 event->header.type, stdout);
2371 perf_event__fprintf(event, stdout);
2372 }
2373
2374 thread__put(thread);
2375 return 0;
2376}
2377
2321static void sig_handler(int sig __maybe_unused) 2378static void sig_handler(int sig __maybe_unused)
2322{ 2379{
2323 session_done = 1; 2380 session_done = 1;
@@ -2420,6 +2477,10 @@ static int __cmd_script(struct perf_script *script)
2420 script->tool.ordered_events = false; 2477 script->tool.ordered_events = false;
2421 script->tool.finished_round = process_finished_round_event; 2478 script->tool.finished_round = process_finished_round_event;
2422 } 2479 }
2480 if (script->show_bpf_events) {
2481 script->tool.ksymbol = process_bpf_events;
2482 script->tool.bpf_event = process_bpf_events;
2483 }
2423 2484
2424 if (perf_script__setup_per_event_dump(script)) { 2485 if (perf_script__setup_per_event_dump(script)) {
2425 pr_err("Couldn't create the per event dump files\n"); 2486 pr_err("Couldn't create the per event dump files\n");
@@ -2819,7 +2880,7 @@ static int read_script_info(struct script_desc *desc, const char *filename)
2819 return -1; 2880 return -1;
2820 2881
2821 while (fgets(line, sizeof(line), fp)) { 2882 while (fgets(line, sizeof(line), fp)) {
2822 p = ltrim(line); 2883 p = skip_spaces(line);
2823 if (strlen(p) == 0) 2884 if (strlen(p) == 0)
2824 continue; 2885 continue;
2825 if (*p != '#') 2886 if (*p != '#')
@@ -2828,19 +2889,19 @@ static int read_script_info(struct script_desc *desc, const char *filename)
2828 if (strlen(p) && *p == '!') 2889 if (strlen(p) && *p == '!')
2829 continue; 2890 continue;
2830 2891
2831 p = ltrim(p); 2892 p = skip_spaces(p);
2832 if (strlen(p) && p[strlen(p) - 1] == '\n') 2893 if (strlen(p) && p[strlen(p) - 1] == '\n')
2833 p[strlen(p) - 1] = '\0'; 2894 p[strlen(p) - 1] = '\0';
2834 2895
2835 if (!strncmp(p, "description:", strlen("description:"))) { 2896 if (!strncmp(p, "description:", strlen("description:"))) {
2836 p += strlen("description:"); 2897 p += strlen("description:");
2837 desc->half_liner = strdup(ltrim(p)); 2898 desc->half_liner = strdup(skip_spaces(p));
2838 continue; 2899 continue;
2839 } 2900 }
2840 2901
2841 if (!strncmp(p, "args:", strlen("args:"))) { 2902 if (!strncmp(p, "args:", strlen("args:"))) {
2842 p += strlen("args:"); 2903 p += strlen("args:");
2843 desc->args = strdup(ltrim(p)); 2904 desc->args = strdup(skip_spaces(p));
2844 continue; 2905 continue;
2845 } 2906 }
2846 } 2907 }
@@ -2947,7 +3008,7 @@ static int check_ev_match(char *dir_name, char *scriptname,
2947 return -1; 3008 return -1;
2948 3009
2949 while (fgets(line, sizeof(line), fp)) { 3010 while (fgets(line, sizeof(line), fp)) {
2950 p = ltrim(line); 3011 p = skip_spaces(line);
2951 if (*p == '#') 3012 if (*p == '#')
2952 continue; 3013 continue;
2953 3014
@@ -2957,7 +3018,7 @@ static int check_ev_match(char *dir_name, char *scriptname,
2957 break; 3018 break;
2958 3019
2959 p += 2; 3020 p += 2;
2960 p = ltrim(p); 3021 p = skip_spaces(p);
2961 len = strcspn(p, " \t"); 3022 len = strcspn(p, " \t");
2962 if (!len) 3023 if (!len)
2963 break; 3024 break;
@@ -3297,6 +3358,7 @@ static int parse_call_trace(const struct option *opt __maybe_unused,
3297 parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0); 3358 parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0);
3298 itrace_parse_synth_opts(opt, "cewp", 0); 3359 itrace_parse_synth_opts(opt, "cewp", 0);
3299 symbol_conf.nanosecs = true; 3360 symbol_conf.nanosecs = true;
3361 symbol_conf.pad_output_len_dso = 50;
3300 return 0; 3362 return 0;
3301} 3363}
3302 3364
@@ -3392,7 +3454,7 @@ int cmd_script(int argc, const char **argv)
3392 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," 3454 "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
3393 "addr,symoff,srcline,period,iregs,uregs,brstack," 3455 "addr,symoff,srcline,period,iregs,uregs,brstack,"
3394 "brstacksym,flags,bpf-output,brstackinsn,brstackoff," 3456 "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
3395 "callindent,insn,insnlen,synth,phys_addr,metric,misc", 3457 "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc",
3396 parse_output_fields), 3458 parse_output_fields),
3397 OPT_BOOLEAN('a', "all-cpus", &system_wide, 3459 OPT_BOOLEAN('a', "all-cpus", &system_wide,
3398 "system-wide collection from all CPUs"), 3460 "system-wide collection from all CPUs"),
@@ -3438,6 +3500,8 @@ int cmd_script(int argc, const char **argv)
3438 "Show lost events (if recorded)"), 3500 "Show lost events (if recorded)"),
3439 OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events, 3501 OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
3440 "Show round events (if recorded)"), 3502 "Show round events (if recorded)"),
3503 OPT_BOOLEAN('\0', "show-bpf-events", &script.show_bpf_events,
3504 "Show bpf related events (if recorded)"),
3441 OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, 3505 OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
3442 "Dump trace output to files named by the monitored events"), 3506 "Dump trace output to files named by the monitored events"),
3443 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), 3507 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
@@ -3458,6 +3522,15 @@ int cmd_script(int argc, const char **argv)
3458 "Time span of interest (start,stop)"), 3522 "Time span of interest (start,stop)"),
3459 OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name, 3523 OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
3460 "Show inline function"), 3524 "Show inline function"),
3525 OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
3526 "guest mount directory under which every guest os"
3527 " instance has a subdir"),
3528 OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name,
3529 "file", "file saving guest os vmlinux"),
3530 OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms,
3531 "file", "file saving guest os /proc/kallsyms"),
3532 OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
3533 "file", "file saving guest os /proc/modules"),
3461 OPT_END() 3534 OPT_END()
3462 }; 3535 };
3463 const char * const script_subcommands[] = { "record", "report", NULL }; 3536 const char * const script_subcommands[] = { "record", "report", NULL };
@@ -3477,6 +3550,16 @@ int cmd_script(int argc, const char **argv)
3477 argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, 3550 argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
3478 PARSE_OPT_STOP_AT_NON_OPTION); 3551 PARSE_OPT_STOP_AT_NON_OPTION);
3479 3552
3553 if (symbol_conf.guestmount ||
3554 symbol_conf.default_guest_vmlinux_name ||
3555 symbol_conf.default_guest_kallsyms ||
3556 symbol_conf.default_guest_modules) {
3557 /*
3558 * Enable guest sample processing.
3559 */
3560 perf_guest = true;
3561 }
3562
3480 data.path = input_name; 3563 data.path = input_name;
3481 data.force = symbol_conf.force; 3564 data.force = symbol_conf.force;
3482 3565
@@ -3765,6 +3848,10 @@ int cmd_script(int argc, const char **argv)
3765 &script.range_num); 3848 &script.range_num);
3766 if (err < 0) 3849 if (err < 0)
3767 goto out_delete; 3850 goto out_delete;
3851
3852 itrace_synth_opts__set_time_range(&itrace_synth_opts,
3853 script.ptime_range,
3854 script.range_num);
3768 } 3855 }
3769 3856
3770 err = __cmd_script(&script); 3857 err = __cmd_script(&script);
@@ -3772,8 +3859,10 @@ int cmd_script(int argc, const char **argv)
3772 flush_scripting(); 3859 flush_scripting();
3773 3860
3774out_delete: 3861out_delete:
3775 if (script.ptime_range) 3862 if (script.ptime_range) {
3863 itrace_synth_opts__clear_time_range(&itrace_synth_opts);
3776 zfree(&script.ptime_range); 3864 zfree(&script.ptime_range);
3865 }
3777 3866
3778 perf_evlist__free_stats(session->evlist); 3867 perf_evlist__free_stats(session->evlist);
3779 perf_session__delete(session); 3868 perf_session__delete(session);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1ae66f09dc7d..e5e19b461061 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -82,7 +82,7 @@
82#include <sys/time.h> 82#include <sys/time.h>
83#include <sys/resource.h> 83#include <sys/resource.h>
84 84
85#include "sane_ctype.h" 85#include <linux/ctype.h>
86 86
87#define DEFAULT_SEPARATOR " " 87#define DEFAULT_SEPARATOR " "
88#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" 88#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
@@ -776,6 +776,8 @@ static struct option stat_options[] = {
776 "stop workload and print counts after a timeout period in ms (>= 10ms)"), 776 "stop workload and print counts after a timeout period in ms (>= 10ms)"),
777 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 777 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
778 "aggregate counts per processor socket", AGGR_SOCKET), 778 "aggregate counts per processor socket", AGGR_SOCKET),
779 OPT_SET_UINT(0, "per-die", &stat_config.aggr_mode,
780 "aggregate counts per processor die", AGGR_DIE),
779 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 781 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
780 "aggregate counts per physical processor core", AGGR_CORE), 782 "aggregate counts per physical processor core", AGGR_CORE),
781 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, 783 OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
@@ -800,6 +802,12 @@ static int perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
800 return cpu_map__get_socket(map, cpu, NULL); 802 return cpu_map__get_socket(map, cpu, NULL);
801} 803}
802 804
805static int perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
806 struct cpu_map *map, int cpu)
807{
808 return cpu_map__get_die(map, cpu, NULL);
809}
810
803static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused, 811static int perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
804 struct cpu_map *map, int cpu) 812 struct cpu_map *map, int cpu)
805{ 813{
@@ -840,6 +848,12 @@ static int perf_stat__get_socket_cached(struct perf_stat_config *config,
840 return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx); 848 return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
841} 849}
842 850
851static int perf_stat__get_die_cached(struct perf_stat_config *config,
852 struct cpu_map *map, int idx)
853{
854 return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
855}
856
843static int perf_stat__get_core_cached(struct perf_stat_config *config, 857static int perf_stat__get_core_cached(struct perf_stat_config *config,
844 struct cpu_map *map, int idx) 858 struct cpu_map *map, int idx)
845{ 859{
@@ -870,6 +884,13 @@ static int perf_stat_init_aggr_mode(void)
870 } 884 }
871 stat_config.aggr_get_id = perf_stat__get_socket_cached; 885 stat_config.aggr_get_id = perf_stat__get_socket_cached;
872 break; 886 break;
887 case AGGR_DIE:
888 if (cpu_map__build_die_map(evsel_list->cpus, &stat_config.aggr_map)) {
889 perror("cannot build die map");
890 return -1;
891 }
892 stat_config.aggr_get_id = perf_stat__get_die_cached;
893 break;
873 case AGGR_CORE: 894 case AGGR_CORE:
874 if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) { 895 if (cpu_map__build_core_map(evsel_list->cpus, &stat_config.aggr_map)) {
875 perror("cannot build core map"); 896 perror("cannot build core map");
@@ -935,21 +956,55 @@ static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
935 return cpu == -1 ? -1 : env->cpu[cpu].socket_id; 956 return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
936} 957}
937 958
959static int perf_env__get_die(struct cpu_map *map, int idx, void *data)
960{
961 struct perf_env *env = data;
962 int die_id = -1, cpu = perf_env__get_cpu(env, map, idx);
963
964 if (cpu != -1) {
965 /*
966 * Encode socket in bit range 15:8
967 * die_id is relative to socket,
968 * we need a global id. So we combine
969 * socket + die id
970 */
971 if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
972 return -1;
973
974 if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
975 return -1;
976
977 die_id = (env->cpu[cpu].socket_id << 8) | (env->cpu[cpu].die_id & 0xff);
978 }
979
980 return die_id;
981}
982
938static int perf_env__get_core(struct cpu_map *map, int idx, void *data) 983static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
939{ 984{
940 struct perf_env *env = data; 985 struct perf_env *env = data;
941 int core = -1, cpu = perf_env__get_cpu(env, map, idx); 986 int core = -1, cpu = perf_env__get_cpu(env, map, idx);
942 987
943 if (cpu != -1) { 988 if (cpu != -1) {
944 int socket_id = env->cpu[cpu].socket_id;
945
946 /* 989 /*
947 * Encode socket in upper 16 bits 990 * Encode socket in bit range 31:24
948 * core_id is relative to socket, and 991 * encode die id in bit range 23:16
992 * core_id is relative to socket and die,
949 * we need a global id. So we combine 993 * we need a global id. So we combine
950 * socket + core id. 994 * socket + die id + core id
951 */ 995 */
952 core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff); 996 if (WARN_ONCE(env->cpu[cpu].socket_id >> 8, "The socket id number is too big.\n"))
997 return -1;
998
999 if (WARN_ONCE(env->cpu[cpu].die_id >> 8, "The die id number is too big.\n"))
1000 return -1;
1001
1002 if (WARN_ONCE(env->cpu[cpu].core_id >> 16, "The core id number is too big.\n"))
1003 return -1;
1004
1005 core = (env->cpu[cpu].socket_id << 24) |
1006 (env->cpu[cpu].die_id << 16) |
1007 (env->cpu[cpu].core_id & 0xffff);
953 } 1008 }
954 1009
955 return core; 1010 return core;
@@ -961,6 +1016,12 @@ static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus
961 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env); 1016 return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
962} 1017}
963 1018
1019static int perf_env__build_die_map(struct perf_env *env, struct cpu_map *cpus,
1020 struct cpu_map **diep)
1021{
1022 return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
1023}
1024
964static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus, 1025static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
965 struct cpu_map **corep) 1026 struct cpu_map **corep)
966{ 1027{
@@ -972,6 +1033,11 @@ static int perf_stat__get_socket_file(struct perf_stat_config *config __maybe_un
972{ 1033{
973 return perf_env__get_socket(map, idx, &perf_stat.session->header.env); 1034 return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
974} 1035}
1036static int perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
1037 struct cpu_map *map, int idx)
1038{
1039 return perf_env__get_die(map, idx, &perf_stat.session->header.env);
1040}
975 1041
976static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused, 1042static int perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
977 struct cpu_map *map, int idx) 1043 struct cpu_map *map, int idx)
@@ -991,6 +1057,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
991 } 1057 }
992 stat_config.aggr_get_id = perf_stat__get_socket_file; 1058 stat_config.aggr_get_id = perf_stat__get_socket_file;
993 break; 1059 break;
1060 case AGGR_DIE:
1061 if (perf_env__build_die_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
1062 perror("cannot build die map");
1063 return -1;
1064 }
1065 stat_config.aggr_get_id = perf_stat__get_die_file;
1066 break;
994 case AGGR_CORE: 1067 case AGGR_CORE:
995 if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) { 1068 if (perf_env__build_core_map(env, evsel_list->cpus, &stat_config.aggr_map)) {
996 perror("cannot build core map"); 1069 perror("cannot build core map");
@@ -1541,6 +1614,8 @@ static int __cmd_report(int argc, const char **argv)
1541 OPT_STRING('i', "input", &input_name, "file", "input file name"), 1614 OPT_STRING('i', "input", &input_name, "file", "input file name"),
1542 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode, 1615 OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
1543 "aggregate counts per processor socket", AGGR_SOCKET), 1616 "aggregate counts per processor socket", AGGR_SOCKET),
1617 OPT_SET_UINT(0, "per-die", &perf_stat.aggr_mode,
1618 "aggregate counts per processor die", AGGR_DIE),
1544 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode, 1619 OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
1545 "aggregate counts per physical processor core", AGGR_CORE), 1620 "aggregate counts per physical processor core", AGGR_CORE),
1546 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode, 1621 OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 466621cd1017..6d40a4ef58c5 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -40,6 +40,7 @@
40#include "util/cpumap.h" 40#include "util/cpumap.h"
41#include "util/xyarray.h" 41#include "util/xyarray.h"
42#include "util/sort.h" 42#include "util/sort.h"
43#include "util/string2.h"
43#include "util/term.h" 44#include "util/term.h"
44#include "util/intlist.h" 45#include "util/intlist.h"
45#include "util/parse-branch-options.h" 46#include "util/parse-branch-options.h"
@@ -75,7 +76,7 @@
75#include <linux/time64.h> 76#include <linux/time64.h>
76#include <linux/types.h> 77#include <linux/types.h>
77 78
78#include "sane_ctype.h" 79#include <linux/ctype.h>
79 80
80static volatile int done; 81static volatile int done;
81static volatile int resize; 82static volatile int resize;
@@ -1207,11 +1208,14 @@ static int __cmd_top(struct perf_top *top)
1207 1208
1208 init_process_thread(top); 1209 init_process_thread(top);
1209 1210
1211 if (opts->record_namespaces)
1212 top->tool.namespace_events = true;
1213
1210 ret = perf_event__synthesize_bpf_events(top->session, perf_event__process, 1214 ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
1211 &top->session->machines.host, 1215 &top->session->machines.host,
1212 &top->record_opts); 1216 &top->record_opts);
1213 if (ret < 0) 1217 if (ret < 0)
1214 pr_warning("Couldn't synthesize bpf events.\n"); 1218 pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n");
1215 1219
1216 machine__synthesize_threads(&top->session->machines.host, &opts->target, 1220 machine__synthesize_threads(&top->session->machines.host, &opts->target,
1217 top->evlist->threads, false, 1221 top->evlist->threads, false,
@@ -1499,6 +1503,8 @@ int cmd_top(int argc, const char **argv)
1499 OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"), 1503 OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
1500 OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize, 1504 OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
1501 "number of thread to run event synthesize"), 1505 "number of thread to run event synthesize"),
1506 OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces,
1507 "Record namespaces events"),
1502 OPT_END() 1508 OPT_END()
1503 }; 1509 };
1504 struct perf_evlist *sb_evlist = NULL; 1510 struct perf_evlist *sb_evlist = NULL;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 52fadc858ef0..d0eb7224dd36 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -64,7 +64,7 @@
64#include <fcntl.h> 64#include <fcntl.h>
65#include <sys/sysmacros.h> 65#include <sys/sysmacros.h>
66 66
67#include "sane_ctype.h" 67#include <linux/ctype.h>
68 68
69#ifndef O_CLOEXEC 69#ifndef O_CLOEXEC
70# define O_CLOEXEC 02000000 70# define O_CLOEXEC 02000000
@@ -402,6 +402,11 @@ static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
402 402
403#define SCA_STRARRAY syscall_arg__scnprintf_strarray 403#define SCA_STRARRAY syscall_arg__scnprintf_strarray
404 404
405size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg)
406{
407 return strarray__scnprintf_flags(arg->parm, bf, size, arg->show_string_prefix, arg->val);
408}
409
405size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val) 410size_t strarrays__scnprintf(struct strarrays *sas, char *bf, size_t size, const char *intfmt, bool show_prefix, int val)
406{ 411{
407 size_t printed; 412 size_t printed;
@@ -481,6 +486,15 @@ static const char *bpf_cmd[] = {
481}; 486};
482static DEFINE_STRARRAY(bpf_cmd, "BPF_"); 487static DEFINE_STRARRAY(bpf_cmd, "BPF_");
483 488
489static const char *fsmount_flags[] = {
490 [1] = "CLOEXEC",
491};
492static DEFINE_STRARRAY(fsmount_flags, "FSMOUNT_");
493
494#include "trace/beauty/generated/fsconfig_arrays.c"
495
496static DEFINE_STRARRAY(fsconfig_cmds, "FSCONFIG_");
497
484static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", }; 498static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
485static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1); 499static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, "EPOLL_CTL_", 1);
486 500
@@ -641,6 +655,10 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
641 { .scnprintf = SCA_STRARRAY, \ 655 { .scnprintf = SCA_STRARRAY, \
642 .parm = &strarray__##array, } 656 .parm = &strarray__##array, }
643 657
658#define STRARRAY_FLAGS(name, array) \
659 { .scnprintf = SCA_STRARRAY_FLAGS, \
660 .parm = &strarray__##array, }
661
644#include "trace/beauty/arch_errno_names.c" 662#include "trace/beauty/arch_errno_names.c"
645#include "trace/beauty/eventfd.c" 663#include "trace/beauty/eventfd.c"
646#include "trace/beauty/futex_op.c" 664#include "trace/beauty/futex_op.c"
@@ -712,6 +730,15 @@ static struct syscall_fmt {
712 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, }, 730 [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
713 { .name = "flock", 731 { .name = "flock",
714 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, }, 732 .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
733 { .name = "fsconfig",
734 .arg = { [1] = STRARRAY(cmd, fsconfig_cmds), }, },
735 { .name = "fsmount",
736 .arg = { [1] = STRARRAY_FLAGS(flags, fsmount_flags),
737 [2] = { .scnprintf = SCA_FSMOUNT_ATTR_FLAGS, /* attr_flags */ }, }, },
738 { .name = "fspick",
739 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
740 [1] = { .scnprintf = SCA_FILENAME, /* path */ },
741 [2] = { .scnprintf = SCA_FSPICK_FLAGS, /* flags */ }, }, },
715 { .name = "fstat", .alias = "newfstat", }, 742 { .name = "fstat", .alias = "newfstat", },
716 { .name = "fstatat", .alias = "newfstatat", }, 743 { .name = "fstatat", .alias = "newfstatat", },
717 { .name = "futex", 744 { .name = "futex",
@@ -774,6 +801,12 @@ static struct syscall_fmt {
774 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ }, 801 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* dev_name */ },
775 [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */ 802 [3] = { .scnprintf = SCA_MOUNT_FLAGS, /* flags */
776 .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, }, 803 .mask_val = SCAMV_MOUNT_FLAGS, /* flags */ }, }, },
804 { .name = "move_mount",
805 .arg = { [0] = { .scnprintf = SCA_FDAT, /* from_dfd */ },
806 [1] = { .scnprintf = SCA_FILENAME, /* from_pathname */ },
807 [2] = { .scnprintf = SCA_FDAT, /* to_dfd */ },
808 [3] = { .scnprintf = SCA_FILENAME, /* to_pathname */ },
809 [4] = { .scnprintf = SCA_MOVE_MOUNT_FLAGS, /* flags */ }, }, },
777 { .name = "mprotect", 810 { .name = "mprotect",
778 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ }, 811 .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
779 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, }, 812 [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
@@ -878,6 +911,8 @@ static struct syscall_fmt {
878 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, }, 911 .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
879 { .name = "symlinkat", 912 { .name = "symlinkat",
880 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, }, 913 .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
914 { .name = "sync_file_range",
915 .arg = { [3] = { .scnprintf = SCA_SYNC_FILE_RANGE_FLAGS, /* flags */ }, }, },
881 { .name = "tgkill", 916 { .name = "tgkill",
882 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, 917 .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
883 { .name = "tkill", 918 { .name = "tkill",
@@ -936,8 +971,14 @@ struct syscall {
936 struct syscall_arg_fmt *arg_fmt; 971 struct syscall_arg_fmt *arg_fmt;
937}; 972};
938 973
974/*
975 * Must match what is in the BPF program:
976 *
977 * tools/perf/examples/bpf/augmented_raw_syscalls.c
978 */
939struct bpf_map_syscall_entry { 979struct bpf_map_syscall_entry {
940 bool enabled; 980 bool enabled;
981 u16 string_args_len[6];
941}; 982};
942 983
943/* 984/*
@@ -1191,8 +1232,17 @@ static void thread__set_filename_pos(struct thread *thread, const char *bf,
1191static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size) 1232static size_t syscall_arg__scnprintf_augmented_string(struct syscall_arg *arg, char *bf, size_t size)
1192{ 1233{
1193 struct augmented_arg *augmented_arg = arg->augmented.args; 1234 struct augmented_arg *augmented_arg = arg->augmented.args;
1235 size_t printed = scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value);
1236 /*
1237 * So that the next arg with a payload can consume its augmented arg, i.e. for rename* syscalls
1238 * we would have two strings, each prefixed by its size.
1239 */
1240 int consumed = sizeof(*augmented_arg) + augmented_arg->size;
1241
1242 arg->augmented.args = ((void *)arg->augmented.args) + consumed;
1243 arg->augmented.size -= consumed;
1194 1244
1195 return scnprintf(bf, size, "\"%.*s\"", augmented_arg->size, augmented_arg->value); 1245 return printed;
1196} 1246}
1197 1247
1198static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, 1248static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
@@ -1380,10 +1430,11 @@ static int syscall__set_arg_fmts(struct syscall *sc)
1380 if (sc->fmt && sc->fmt->arg[idx].scnprintf) 1430 if (sc->fmt && sc->fmt->arg[idx].scnprintf)
1381 continue; 1431 continue;
1382 1432
1433 len = strlen(field->name);
1434
1383 if (strcmp(field->type, "const char *") == 0 && 1435 if (strcmp(field->type, "const char *") == 0 &&
1384 (strcmp(field->name, "filename") == 0 || 1436 ((len >= 4 && strcmp(field->name + len - 4, "name") == 0) ||
1385 strcmp(field->name, "path") == 0 || 1437 strstr(field->name, "path") != NULL))
1386 strcmp(field->name, "pathname") == 0))
1387 sc->arg_fmt[idx].scnprintf = SCA_FILENAME; 1438 sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
1388 else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr")) 1439 else if ((field->flags & TEP_FIELD_IS_POINTER) || strstr(field->name, "addr"))
1389 sc->arg_fmt[idx].scnprintf = SCA_PTR; 1440 sc->arg_fmt[idx].scnprintf = SCA_PTR;
@@ -1394,8 +1445,7 @@ static int syscall__set_arg_fmts(struct syscall *sc)
1394 else if ((strcmp(field->type, "int") == 0 || 1445 else if ((strcmp(field->type, "int") == 0 ||
1395 strcmp(field->type, "unsigned int") == 0 || 1446 strcmp(field->type, "unsigned int") == 0 ||
1396 strcmp(field->type, "long") == 0) && 1447 strcmp(field->type, "long") == 0) &&
1397 (len = strlen(field->name)) >= 2 && 1448 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
1398 strcmp(field->name + len - 2, "fd") == 0) {
1399 /* 1449 /*
1400 * /sys/kernel/tracing/events/syscalls/sys_enter* 1450 * /sys/kernel/tracing/events/syscalls/sys_enter*
1401 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c 1451 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
@@ -1477,12 +1527,12 @@ static int trace__read_syscall_info(struct trace *trace, int id)
1477 1527
1478static int trace__validate_ev_qualifier(struct trace *trace) 1528static int trace__validate_ev_qualifier(struct trace *trace)
1479{ 1529{
1480 int err = 0, i; 1530 int err = 0;
1481 size_t nr_allocated; 1531 bool printed_invalid_prefix = false;
1482 struct str_node *pos; 1532 struct str_node *pos;
1533 size_t nr_used = 0, nr_allocated = strlist__nr_entries(trace->ev_qualifier);
1483 1534
1484 trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); 1535 trace->ev_qualifier_ids.entries = malloc(nr_allocated *
1485 trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
1486 sizeof(trace->ev_qualifier_ids.entries[0])); 1536 sizeof(trace->ev_qualifier_ids.entries[0]));
1487 1537
1488 if (trace->ev_qualifier_ids.entries == NULL) { 1538 if (trace->ev_qualifier_ids.entries == NULL) {
@@ -1492,9 +1542,6 @@ static int trace__validate_ev_qualifier(struct trace *trace)
1492 goto out; 1542 goto out;
1493 } 1543 }
1494 1544
1495 nr_allocated = trace->ev_qualifier_ids.nr;
1496 i = 0;
1497
1498 strlist__for_each_entry(pos, trace->ev_qualifier) { 1545 strlist__for_each_entry(pos, trace->ev_qualifier) {
1499 const char *sc = pos->s; 1546 const char *sc = pos->s;
1500 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; 1547 int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
@@ -1504,17 +1551,18 @@ static int trace__validate_ev_qualifier(struct trace *trace)
1504 if (id >= 0) 1551 if (id >= 0)
1505 goto matches; 1552 goto matches;
1506 1553
1507 if (err == 0) { 1554 if (!printed_invalid_prefix) {
1508 fputs("Error:\tInvalid syscall ", trace->output); 1555 pr_debug("Skipping unknown syscalls: ");
1509 err = -EINVAL; 1556 printed_invalid_prefix = true;
1510 } else { 1557 } else {
1511 fputs(", ", trace->output); 1558 pr_debug(", ");
1512 } 1559 }
1513 1560
1514 fputs(sc, trace->output); 1561 pr_debug("%s", sc);
1562 continue;
1515 } 1563 }
1516matches: 1564matches:
1517 trace->ev_qualifier_ids.entries[i++] = id; 1565 trace->ev_qualifier_ids.entries[nr_used++] = id;
1518 if (match_next == -1) 1566 if (match_next == -1)
1519 continue; 1567 continue;
1520 1568
@@ -1522,7 +1570,7 @@ matches:
1522 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); 1570 id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
1523 if (id < 0) 1571 if (id < 0)
1524 break; 1572 break;
1525 if (nr_allocated == trace->ev_qualifier_ids.nr) { 1573 if (nr_allocated == nr_used) {
1526 void *entries; 1574 void *entries;
1527 1575
1528 nr_allocated += 8; 1576 nr_allocated += 8;
@@ -1535,20 +1583,19 @@ matches:
1535 } 1583 }
1536 trace->ev_qualifier_ids.entries = entries; 1584 trace->ev_qualifier_ids.entries = entries;
1537 } 1585 }
1538 trace->ev_qualifier_ids.nr++; 1586 trace->ev_qualifier_ids.entries[nr_used++] = id;
1539 trace->ev_qualifier_ids.entries[i++] = id;
1540 } 1587 }
1541 } 1588 }
1542 1589
1543 if (err < 0) { 1590 trace->ev_qualifier_ids.nr = nr_used;
1544 fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
1545 "\nHint:\tand: 'man syscalls'\n", trace->output);
1546out_free:
1547 zfree(&trace->ev_qualifier_ids.entries);
1548 trace->ev_qualifier_ids.nr = 0;
1549 }
1550out: 1591out:
1592 if (printed_invalid_prefix)
1593 pr_debug("\n");
1551 return err; 1594 return err;
1595out_free:
1596 zfree(&trace->ev_qualifier_ids.entries);
1597 trace->ev_qualifier_ids.nr = 0;
1598 goto out;
1552} 1599}
1553 1600
1554/* 1601/*
@@ -2675,6 +2722,25 @@ out_enomem:
2675} 2722}
2676 2723
2677#ifdef HAVE_LIBBPF_SUPPORT 2724#ifdef HAVE_LIBBPF_SUPPORT
2725static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
2726{
2727 struct syscall *sc = trace__syscall_info(trace, NULL, id);
2728 int arg = 0;
2729
2730 if (sc == NULL)
2731 goto out;
2732
2733 for (; arg < sc->nr_args; ++arg) {
2734 entry->string_args_len[arg] = 0;
2735 if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
2736 /* Should be set like strace -s strsize */
2737 entry->string_args_len[arg] = PATH_MAX;
2738 }
2739 }
2740out:
2741 for (; arg < 6; ++arg)
2742 entry->string_args_len[arg] = 0;
2743}
2678static int trace__set_ev_qualifier_bpf_filter(struct trace *trace) 2744static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
2679{ 2745{
2680 int fd = bpf_map__fd(trace->syscalls.map); 2746 int fd = bpf_map__fd(trace->syscalls.map);
@@ -2687,6 +2753,9 @@ static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
2687 for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) { 2753 for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
2688 int key = trace->ev_qualifier_ids.entries[i]; 2754 int key = trace->ev_qualifier_ids.entries[i];
2689 2755
2756 if (value.enabled)
2757 trace__init_bpf_map_syscall_args(trace, key, &value);
2758
2690 err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST); 2759 err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
2691 if (err) 2760 if (err)
2692 break; 2761 break;
@@ -2704,6 +2773,9 @@ static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
2704 int err = 0, key; 2773 int err = 0, key;
2705 2774
2706 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) { 2775 for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
2776 if (enabled)
2777 trace__init_bpf_map_syscall_args(trace, key, &value);
2778
2707 err = bpf_map_update_elem(fd, &key, &value, BPF_ANY); 2779 err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
2708 if (err) 2780 if (err)
2709 break; 2781 break;
@@ -3627,7 +3699,12 @@ static int trace__config(const char *var, const char *value, void *arg)
3627 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", 3699 struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
3628 "event selector. use 'perf list' to list available events", 3700 "event selector. use 'perf list' to list available events",
3629 parse_events_option); 3701 parse_events_option);
3630 err = parse_events_option(&o, value, 0); 3702 /*
3703 * We can't propagate parse_event_option() return, as it is 1
3704 * for failure while perf_config() expects -1.
3705 */
3706 if (parse_events_option(&o, value, 0))
3707 err = -1;
3631 } else if (!strcmp(var, "trace.show_timestamp")) { 3708 } else if (!strcmp(var, "trace.show_timestamp")) {
3632 trace->show_tstamp = perf_config_bool(var, value); 3709 trace->show_tstamp = perf_config_bool(var, value);
3633 } else if (!strcmp(var, "trace.show_duration")) { 3710 } else if (!strcmp(var, "trace.show_duration")) {
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index c68ee06cae63..f211c015cb76 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -105,6 +105,8 @@ check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/ex
105check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"' 105check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
106check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' 106check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
107check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' 107check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
108check include/linux/ctype.h '-I "isdigit("'
109check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
108 110
109# diff non-symmetric files 111# diff non-symmetric files
110check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl 112check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 2422894a8194..2f822bb51717 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -21,8 +21,14 @@
21/* bpf-output associated map */ 21/* bpf-output associated map */
22bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); 22bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
23 23
24/*
25 * string_args_len: one per syscall arg, 0 means not a string or don't copy it,
26 * PATH_MAX for copying everything, any other value to limit
27 * it a la 'strace -s strsize'.
28 */
24struct syscall { 29struct syscall {
25 bool enabled; 30 bool enabled;
31 u16 string_args_len[6];
26}; 32};
27 33
28bpf_map(syscalls, ARRAY, int, struct syscall, 512); 34bpf_map(syscalls, ARRAY, int, struct syscall, 512);
@@ -41,83 +47,10 @@ struct syscall_exit_args {
41 47
42struct augmented_filename { 48struct augmented_filename {
43 unsigned int size; 49 unsigned int size;
44 int reserved; 50 int err;
45 char value[PATH_MAX]; 51 char value[PATH_MAX];
46}; 52};
47 53
48/* syscalls where the first arg is a string */
49#define SYS_OPEN 2
50#define SYS_STAT 4
51#define SYS_LSTAT 6
52#define SYS_ACCESS 21
53#define SYS_EXECVE 59
54#define SYS_TRUNCATE 76
55#define SYS_CHDIR 80
56#define SYS_RENAME 82
57#define SYS_MKDIR 83
58#define SYS_RMDIR 84
59#define SYS_CREAT 85
60#define SYS_LINK 86
61#define SYS_UNLINK 87
62#define SYS_SYMLINK 88
63#define SYS_READLINK 89
64#define SYS_CHMOD 90
65#define SYS_CHOWN 92
66#define SYS_LCHOWN 94
67#define SYS_MKNOD 133
68#define SYS_STATFS 137
69#define SYS_PIVOT_ROOT 155
70#define SYS_CHROOT 161
71#define SYS_ACCT 163
72#define SYS_SWAPON 167
73#define SYS_SWAPOFF 168
74#define SYS_DELETE_MODULE 176
75#define SYS_SETXATTR 188
76#define SYS_LSETXATTR 189
77#define SYS_GETXATTR 191
78#define SYS_LGETXATTR 192
79#define SYS_LISTXATTR 194
80#define SYS_LLISTXATTR 195
81#define SYS_REMOVEXATTR 197
82#define SYS_LREMOVEXATTR 198
83#define SYS_MQ_OPEN 240
84#define SYS_MQ_UNLINK 241
85#define SYS_ADD_KEY 248
86#define SYS_REQUEST_KEY 249
87#define SYS_SYMLINKAT 266
88#define SYS_MEMFD_CREATE 319
89
90/* syscalls where the first arg is a string */
91
92#define SYS_PWRITE64 18
93#define SYS_EXECVE 59
94#define SYS_RENAME 82
95#define SYS_QUOTACTL 179
96#define SYS_FSETXATTR 190
97#define SYS_FGETXATTR 193
98#define SYS_FREMOVEXATTR 199
99#define SYS_MQ_TIMEDSEND 242
100#define SYS_REQUEST_KEY 249
101#define SYS_INOTIFY_ADD_WATCH 254
102#define SYS_OPENAT 257
103#define SYS_MKDIRAT 258
104#define SYS_MKNODAT 259
105#define SYS_FCHOWNAT 260
106#define SYS_FUTIMESAT 261
107#define SYS_NEWFSTATAT 262
108#define SYS_UNLINKAT 263
109#define SYS_RENAMEAT 264
110#define SYS_LINKAT 265
111#define SYS_READLINKAT 267
112#define SYS_FCHMODAT 268
113#define SYS_FACCESSAT 269
114#define SYS_UTIMENSAT 280
115#define SYS_NAME_TO_HANDLE_AT 303
116#define SYS_FINIT_MODULE 313
117#define SYS_RENAMEAT2 316
118#define SYS_EXECVEAT 322
119#define SYS_STATX 332
120
121pid_filter(pids_filtered); 54pid_filter(pids_filtered);
122 55
123struct augmented_args_filename { 56struct augmented_args_filename {
@@ -127,12 +60,48 @@ struct augmented_args_filename {
127 60
128bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1); 61bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1);
129 62
63static inline
64unsigned int augmented_filename__read(struct augmented_filename *augmented_filename,
65 const void *filename_arg, unsigned int filename_len)
66{
67 unsigned int len = sizeof(*augmented_filename);
68 int size = probe_read_str(&augmented_filename->value, filename_len, filename_arg);
69
70 augmented_filename->size = augmented_filename->err = 0;
71 /*
72 * probe_read_str may return < 0, e.g. -EFAULT
73 * So we leave that in the augmented_filename->size that userspace will
74 */
75 if (size > 0) {
76 len -= sizeof(augmented_filename->value) - size;
77 len &= sizeof(augmented_filename->value) - 1;
78 augmented_filename->size = size;
79 } else {
80 /*
81 * So that username notice the error while still being able
82 * to skip this augmented arg record
83 */
84 augmented_filename->err = size;
85 len = offsetof(struct augmented_filename, value);
86 }
87
88 return len;
89}
90
130SEC("raw_syscalls:sys_enter") 91SEC("raw_syscalls:sys_enter")
131int sys_enter(struct syscall_enter_args *args) 92int sys_enter(struct syscall_enter_args *args)
132{ 93{
133 struct augmented_args_filename *augmented_args; 94 struct augmented_args_filename *augmented_args;
134 unsigned int len = sizeof(*augmented_args); 95 /*
135 const void *filename_arg = NULL; 96 * We start len, the amount of data that will be in the perf ring
97 * buffer, if this is not filtered out by one of pid_filter__has(),
98 * syscall->enabled, etc, with the non-augmented raw syscall payload,
99 * i.e. sizeof(augmented_args->args).
100 *
101 * We'll add to this as we add augmented syscalls right after that
102 * initial, non-augmented raw_syscalls:sys_enter payload.
103 */
104 unsigned int len = sizeof(augmented_args->args);
136 struct syscall *syscall; 105 struct syscall *syscall;
137 int key = 0; 106 int key = 0;
138 107
@@ -189,102 +158,67 @@ int sys_enter(struct syscall_enter_args *args)
189 * after the ctx memory access to prevent their down stream merging. 158 * after the ctx memory access to prevent their down stream merging.
190 */ 159 */
191 /* 160 /*
192 * This table of what args are strings will be provided by userspace, 161 * For now copy just the first string arg, we need to improve the protocol
193 * in the syscalls map, i.e. we will already have to do the lookup to 162 * and have more than one.
194 * see if this specific syscall is filtered, so we can as well get more
195 * info about what syscall args are strings or pointers, and how many
196 * bytes to copy, per arg, etc.
197 * 163 *
198 * For now hard code it, till we have all the basic mechanisms in place 164 * Using the unrolled loop is not working, only when we do it manually,
199 * to automate everything and make the kernel part be completely driven 165 * check this out later...
200 * by information obtained in userspace for each kernel version and 166
201 * processor architecture, making the kernel part the same no matter what 167 u8 arg;
202 * kernel version or processor architecture it runs on. 168#pragma clang loop unroll(full)
203 */ 169 for (arg = 0; arg < 6; ++arg) {
204 switch (augmented_args->args.syscall_nr) { 170 if (syscall->string_args_len[arg] != 0) {
205 case SYS_ACCT: 171 filename_len = syscall->string_args_len[arg];
206 case SYS_ADD_KEY: 172 filename_arg = (const void *)args->args[arg];
207 case SYS_CHDIR:
208 case SYS_CHMOD:
209 case SYS_CHOWN:
210 case SYS_CHROOT:
211 case SYS_CREAT:
212 case SYS_DELETE_MODULE:
213 case SYS_EXECVE:
214 case SYS_GETXATTR:
215 case SYS_LCHOWN:
216 case SYS_LGETXATTR:
217 case SYS_LINK:
218 case SYS_LISTXATTR:
219 case SYS_LLISTXATTR:
220 case SYS_LREMOVEXATTR:
221 case SYS_LSETXATTR:
222 case SYS_LSTAT:
223 case SYS_MEMFD_CREATE:
224 case SYS_MKDIR:
225 case SYS_MKNOD:
226 case SYS_MQ_OPEN:
227 case SYS_MQ_UNLINK:
228 case SYS_PIVOT_ROOT:
229 case SYS_READLINK:
230 case SYS_REMOVEXATTR:
231 case SYS_RENAME:
232 case SYS_REQUEST_KEY:
233 case SYS_RMDIR:
234 case SYS_SETXATTR:
235 case SYS_STAT:
236 case SYS_STATFS:
237 case SYS_SWAPOFF:
238 case SYS_SWAPON:
239 case SYS_SYMLINK:
240 case SYS_SYMLINKAT:
241 case SYS_TRUNCATE:
242 case SYS_UNLINK:
243 case SYS_ACCESS:
244 case SYS_OPEN: filename_arg = (const void *)args->args[0];
245 __asm__ __volatile__("": : :"memory"); 173 __asm__ __volatile__("": : :"memory");
246 break; 174 break;
247 case SYS_EXECVEAT:
248 case SYS_FACCESSAT:
249 case SYS_FCHMODAT:
250 case SYS_FCHOWNAT:
251 case SYS_FGETXATTR:
252 case SYS_FINIT_MODULE:
253 case SYS_FREMOVEXATTR:
254 case SYS_FSETXATTR:
255 case SYS_FUTIMESAT:
256 case SYS_INOTIFY_ADD_WATCH:
257 case SYS_LINKAT:
258 case SYS_MKDIRAT:
259 case SYS_MKNODAT:
260 case SYS_MQ_TIMEDSEND:
261 case SYS_NAME_TO_HANDLE_AT:
262 case SYS_NEWFSTATAT:
263 case SYS_PWRITE64:
264 case SYS_QUOTACTL:
265 case SYS_READLINKAT:
266 case SYS_RENAMEAT:
267 case SYS_RENAMEAT2:
268 case SYS_STATX:
269 case SYS_UNLINKAT:
270 case SYS_UTIMENSAT:
271 case SYS_OPENAT: filename_arg = (const void *)args->args[1];
272 break;
273 }
274
275 if (filename_arg != NULL) {
276 augmented_args->filename.reserved = 0;
277 augmented_args->filename.size = probe_read_str(&augmented_args->filename.value,
278 sizeof(augmented_args->filename.value),
279 filename_arg);
280 if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) {
281 len -= sizeof(augmented_args->filename.value) - augmented_args->filename.size;
282 len &= sizeof(augmented_args->filename.value) - 1;
283 } 175 }
284 } else {
285 len = sizeof(augmented_args->args);
286 } 176 }
287 177
178 verifier log:
179
180; if (syscall->string_args_len[arg] != 0) {
18137: (69) r3 = *(u16 *)(r0 +2)
182 R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv0 R2_w=map_value(id=0,off=2,ks=4,vs=14,imm=0) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm
183; if (syscall->string_args_len[arg] != 0) {
18438: (55) if r3 != 0x0 goto pc+5
185 R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv0 R2=map_value(id=0,off=2,ks=4,vs=14,imm=0) R3=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm
18639: (b7) r1 = 1
187; if (syscall->string_args_len[arg] != 0) {
18840: (bf) r2 = r0
18941: (07) r2 += 4
19042: (69) r3 = *(u16 *)(r0 +4)
191 R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1_w=inv1 R2_w=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3_w=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm
192; if (syscall->string_args_len[arg] != 0) {
19343: (15) if r3 == 0x0 goto pc+32
194 R0=map_value(id=0,off=0,ks=4,vs=14,imm=0) R1=inv1 R2=map_value(id=0,off=4,ks=4,vs=14,imm=0) R3=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)) R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=4168,imm=0) R10=fp0,call_-1 fp-8=mmmmmmmm
195; filename_arg = (const void *)args->args[arg];
19644: (67) r1 <<= 3
19745: (bf) r3 = r6
19846: (0f) r3 += r1
19947: (b7) r5 = 64
20048: (79) r3 = *(u64 *)(r3 +16)
201dereference of modified ctx ptr R3 off=8 disallowed
202processed 46 insns (limit 1000000) max_states_per_insn 0 total_states 12 peak_states 12 mark_read 7
203 */
204
205#define __loop_iter(arg) \
206 if (syscall->string_args_len[arg] != 0) { \
207 unsigned int filename_len = syscall->string_args_len[arg]; \
208 const void *filename_arg = (const void *)args->args[arg]; \
209 if (filename_len <= sizeof(augmented_args->filename.value)) \
210 len += augmented_filename__read(&augmented_args->filename, filename_arg, filename_len);
211#define loop_iter_first() __loop_iter(0); }
212#define loop_iter(arg) else __loop_iter(arg); }
213#define loop_iter_last(arg) else __loop_iter(arg); __asm__ __volatile__("": : :"memory"); }
214
215 loop_iter_first()
216 loop_iter(1)
217 loop_iter(2)
218 loop_iter(3)
219 loop_iter(4)
220 loop_iter_last(5)
221
288 /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ 222 /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
289 return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len); 223 return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len);
290} 224}
diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh
index 74e4627ca278..0b96545c8184 100644
--- a/tools/perf/perf-with-kcore.sh
+++ b/tools/perf/perf-with-kcore.sh
@@ -104,11 +104,6 @@ fix_buildid_cache_permissions()
104 104
105 USER_HOME=$(bash <<< "echo ~$SUDO_USER") 105 USER_HOME=$(bash <<< "echo ~$SUDO_USER")
106 106
107 if [ "$HOME" != "$USER_HOME" ] ; then
108 echo "Fix unnecessary because root has a home: $HOME" >&2
109 exit 1
110 fi
111
112 echo "Fixing buildid cache permissions" 107 echo "Fixing buildid cache permissions"
113 108
114 find "$USER_HOME/.debug" -xdev -type d ! -user "$SUDO_USER" -ls -exec chown "$SUDO_USER" \{\} \; 109 find "$USER_HOME/.debug" -xdev -type d ! -user "$SUDO_USER" -ls -exec chown "$SUDO_USER" \{\} \;
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 72df4b6fa36f..2123b3cc4dcf 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -18,6 +18,7 @@
18#include "util/bpf-loader.h" 18#include "util/bpf-loader.h"
19#include "util/debug.h" 19#include "util/debug.h"
20#include "util/event.h" 20#include "util/event.h"
21#include "util/util.h"
21#include <api/fs/fs.h> 22#include <api/fs/fs.h>
22#include <api/fs/tracing_path.h> 23#include <api/fs/tracing_path.h>
23#include <errno.h> 24#include <errno.h>
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index d59dee61b64d..74d0124d38f3 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -26,7 +26,7 @@ static inline unsigned long long rdclock(void)
26} 26}
27 27
28#ifndef MAX_NR_CPUS 28#ifndef MAX_NR_CPUS
29#define MAX_NR_CPUS 1024 29#define MAX_NR_CPUS 2048
30#endif 30#endif
31 31
32extern const char *input_name; 32extern const char *input_name;
@@ -61,6 +61,8 @@ struct record_opts {
61 bool record_switch_events; 61 bool record_switch_events;
62 bool all_kernel; 62 bool all_kernel;
63 bool all_user; 63 bool all_user;
64 bool kernel_callchains;
65 bool user_callchains;
64 bool tail_synthesize; 66 bool tail_synthesize;
65 bool overwrite; 67 bool overwrite;
66 bool ignore_missing_thread; 68 bool ignore_missing_thread;
diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json
new file mode 100644
index 000000000000..0d1556fcdffe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-ddrc.json
@@ -0,0 +1,44 @@
1[
2 {
3 "EventCode": "0x02",
4 "EventName": "uncore_hisi_ddrc.flux_wcmd",
5 "BriefDescription": "DDRC write commands",
6 "PublicDescription": "DDRC write commands",
7 "Unit": "hisi_sccl,ddrc",
8 },
9 {
10 "EventCode": "0x03",
11 "EventName": "uncore_hisi_ddrc.flux_rcmd",
12 "BriefDescription": "DDRC read commands",
13 "PublicDescription": "DDRC read commands",
14 "Unit": "hisi_sccl,ddrc",
15 },
16 {
17 "EventCode": "0x04",
18 "EventName": "uncore_hisi_ddrc.flux_wr",
19 "BriefDescription": "DDRC precharge commands",
20 "PublicDescription": "DDRC precharge commands",
21 "Unit": "hisi_sccl,ddrc",
22 },
23 {
24 "EventCode": "0x05",
25 "EventName": "uncore_hisi_ddrc.act_cmd",
26 "BriefDescription": "DDRC active commands",
27 "PublicDescription": "DDRC active commands",
28 "Unit": "hisi_sccl,ddrc",
29 },
30 {
31 "EventCode": "0x06",
32 "EventName": "uncore_hisi_ddrc.rnk_chg",
33 "BriefDescription": "DDRC rank commands",
34 "PublicDescription": "DDRC rank commands",
35 "Unit": "hisi_sccl,ddrc",
36 },
37 {
38 "EventCode": "0x07",
39 "EventName": "uncore_hisi_ddrc.rw_chg",
40 "BriefDescription": "DDRC read and write changes",
41 "PublicDescription": "DDRC read and write changes",
42 "Unit": "hisi_sccl,ddrc",
43 },
44]
diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json
new file mode 100644
index 000000000000..447d3064de90
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-hha.json
@@ -0,0 +1,51 @@
1[
2 {
3 "EventCode": "0x00",
4 "EventName": "uncore_hisi_hha.rx_ops_num",
5 "BriefDescription": "The number of all operations received by the HHA",
6 "PublicDescription": "The number of all operations received by the HHA",
7 "Unit": "hisi_sccl,hha",
8 },
9 {
10 "EventCode": "0x01",
11 "EventName": "uncore_hisi_hha.rx_outer",
12 "BriefDescription": "The number of all operations received by the HHA from another socket",
13 "PublicDescription": "The number of all operations received by the HHA from another socket",
14 "Unit": "hisi_sccl,hha",
15 },
16 {
17 "EventCode": "0x02",
18 "EventName": "uncore_hisi_hha.rx_sccl",
19 "BriefDescription": "The number of all operations received by the HHA from another SCCL in this socket",
20 "PublicDescription": "The number of all operations received by the HHA from another SCCL in this socket",
21 "Unit": "hisi_sccl,hha",
22 },
23 {
24 "EventCode": "0x1c",
25 "EventName": "uncore_hisi_hha.rd_ddr_64b",
26 "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 64 bytes",
27 "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 64bytes",
28 "Unit": "hisi_sccl,hha",
29 },
30 {
31 "EventCode": "0x1d",
32 "EventName": "uncore_hisi_hha.wr_dr_64b",
33 "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes",
34 "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 64 bytes",
35 "Unit": "hisi_sccl,hha",
36 },
37 {
38 "EventCode": "0x1e",
39 "EventName": "uncore_hisi_hha.rd_ddr_128b",
40 "BriefDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes",
41 "PublicDescription": "The number of read operations sent by HHA to DDRC which size is 128 bytes",
42 "Unit": "hisi_sccl,hha",
43 },
44 {
45 "EventCode": "0x1f",
46 "EventName": "uncore_hisi_hha.wr_ddr_128b",
47 "BriefDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes",
48 "PublicDescription": "The number of write operations sent by HHA to DDRC which size is 128 bytes",
49 "Unit": "hisi_sccl,hha",
50 },
51]
diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json
new file mode 100644
index 000000000000..ca48747642e1
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/uncore-l3c.json
@@ -0,0 +1,37 @@
1[
2 {
3 "EventCode": "0x00",
4 "EventName": "uncore_hisi_l3c.rd_cpipe",
5 "BriefDescription": "Total read accesses",
6 "PublicDescription": "Total read accesses",
7 "Unit": "hisi_sccl,l3c",
8 },
9 {
10 "EventCode": "0x01",
11 "EventName": "uncore_hisi_l3c.wr_cpipe",
12 "BriefDescription": "Total write accesses",
13 "PublicDescription": "Total write accesses",
14 "Unit": "hisi_sccl,l3c",
15 },
16 {
17 "EventCode": "0x02",
18 "EventName": "uncore_hisi_l3c.rd_hit_cpipe",
19 "BriefDescription": "Total read hits",
20 "PublicDescription": "Total read hits",
21 "Unit": "hisi_sccl,l3c",
22 },
23 {
24 "EventCode": "0x03",
25 "EventName": "uncore_hisi_l3c.wr_hit_cpipe",
26 "BriefDescription": "Total write hits",
27 "PublicDescription": "Total write hits",
28 "Unit": "hisi_sccl,l3c",
29 },
30 {
31 "EventCode": "0x04",
32 "EventName": "uncore_hisi_l3c.victim_num",
33 "BriefDescription": "l3c precharge commands",
34 "PublicDescription": "l3c precharge commands",
35 "Unit": "hisi_sccl,l3c",
36 },
37]
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 1a1a3501180a..a382b115633d 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -314,13 +314,13 @@
314 "MetricName": "DRAM_BW_Use" 314 "MetricName": "DRAM_BW_Use"
315 }, 315 },
316 { 316 {
317 "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", 317 "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x35\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
318 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", 318 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
319 "MetricGroup": "Memory_Lat", 319 "MetricGroup": "Memory_Lat",
320 "MetricName": "DRAM_Read_Latency" 320 "MetricName": "DRAM_Read_Latency"
321 }, 321 },
322 { 322 {
323 "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", 323 "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1\\\\\\,config\\=0x40433@",
324 "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", 324 "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
325 "MetricGroup": "Memory_BW", 325 "MetricGroup": "Memory_BW",
326 "MetricName": "DRAM_Parallel_Reads" 326 "MetricName": "DRAM_Parallel_Reads"
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 56e03ba771f4..35b255fa6a79 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -314,36 +314,18 @@
314 "MetricName": "DRAM_BW_Use" 314 "MetricName": "DRAM_BW_Use"
315 }, 315 },
316 { 316 {
317 "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )", 317 "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x35\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ ) / ( cha_0@event\\=0x0@ / duration_time )",
318 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches", 318 "BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
319 "MetricGroup": "Memory_Lat", 319 "MetricGroup": "Memory_Lat",
320 "MetricName": "DRAM_Read_Latency" 320 "MetricName": "DRAM_Read_Latency"
321 }, 321 },
322 { 322 {
323 "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@", 323 "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,config\\=0x40433@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1\\\\\\,config\\=0x40433@",
324 "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches", 324 "BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
325 "MetricGroup": "Memory_BW", 325 "MetricGroup": "Memory_BW",
326 "MetricName": "DRAM_Parallel_Reads" 326 "MetricName": "DRAM_Parallel_Reads"
327 }, 327 },
328 { 328 {
329 "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 0 == 1 else 0 else 0",
330 "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
331 "MetricGroup": "Memory_Lat",
332 "MetricName": "MEM_PMM_Read_Latency"
333 },
334 {
335 "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0",
336 "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
337 "MetricGroup": "Memory_BW",
338 "MetricName": "PMM_Read_BW"
339 },
340 {
341 "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0",
342 "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
343 "MetricGroup": "Memory_BW",
344 "MetricName": "PMM_Write_BW"
345 },
346 {
347 "MetricExpr": "cha_0@event\\=0x0@", 329 "MetricExpr": "cha_0@event\\=0x0@",
348 "BriefDescription": "Socket actual clocks when any core is active on that socket", 330 "BriefDescription": "Socket actual clocks when any core is active on that socket",
349 "MetricGroup": "", 331 "MetricGroup": "",
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 58f77fd0f59f..287a6f10ca48 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -236,6 +236,9 @@ static struct map {
236 { "CPU-M-CF", "cpum_cf" }, 236 { "CPU-M-CF", "cpum_cf" },
237 { "CPU-M-SF", "cpum_sf" }, 237 { "CPU-M-SF", "cpum_sf" },
238 { "UPI LL", "uncore_upi" }, 238 { "UPI LL", "uncore_upi" },
239 { "hisi_sccl,ddrc", "hisi_sccl,ddrc" },
240 { "hisi_sccl,hha", "hisi_sccl,hha" },
241 { "hisi_sccl,l3c", "hisi_sccl,l3c" },
239 {} 242 {}
240}; 243};
241 244
@@ -841,7 +844,7 @@ static void create_empty_mapping(const char *output_file)
841 _Exit(1); 844 _Exit(1);
842 } 845 }
843 846
844 fprintf(outfp, "#include \"../../pmu-events/pmu-events.h\"\n"); 847 fprintf(outfp, "#include \"pmu-events/pmu-events.h\"\n");
845 print_mapping_table_prefix(outfp); 848 print_mapping_table_prefix(outfp);
846 print_mapping_table_suffix(outfp); 849 print_mapping_table_suffix(outfp);
847 fclose(outfp); 850 fclose(outfp);
@@ -1096,7 +1099,7 @@ int main(int argc, char *argv[])
1096 } 1099 }
1097 1100
1098 /* Include pmu-events.h first */ 1101 /* Include pmu-events.h first */
1099 fprintf(eventsfp, "#include \"../../pmu-events/pmu-events.h\"\n"); 1102 fprintf(eventsfp, "#include \"pmu-events/pmu-events.h\"\n");
1100 1103
1101 /* 1104 /*
1102 * The mapfile allows multiple CPUids to point to the same JSON file, 1105 * The mapfile allows multiple CPUids to point to the same JSON file,
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
index c3eae1d77d36..4447f0d7c754 100644
--- a/tools/perf/scripts/python/export-to-postgresql.py
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -27,18 +27,31 @@ import datetime
27# 27#
28# fedora: 28# fedora:
29# 29#
30# $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql 30# $ sudo yum install postgresql postgresql-server qt-postgresql
31# $ sudo su - postgres -c initdb 31# $ sudo su - postgres -c initdb
32# $ sudo service postgresql start 32# $ sudo service postgresql start
33# $ sudo su - postgres 33# $ sudo su - postgres
34# $ createuser <your user id here> 34# $ createuser -s <your user id here> # Older versions may not support -s, in which case answer the prompt below:
35# Shall the new role be a superuser? (y/n) y 35# Shall the new role be a superuser? (y/n) y
36# $ sudo yum install python-pyside
37#
38# Alternately, to use Python3 and/or pyside 2, one of the following:
39# $ sudo yum install python3-pyside
40# $ pip install --user PySide2
41# $ pip3 install --user PySide2
36# 42#
37# ubuntu: 43# ubuntu:
38# 44#
39# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql 45# $ sudo apt-get install postgresql
40# $ sudo su - postgres 46# $ sudo su - postgres
41# $ createuser -s <your user id here> 47# $ createuser -s <your user id here>
48# $ sudo apt-get install python-pyside.qtsql libqt4-sql-psql
49#
50# Alternately, to use Python3 and/or pyside 2, one of the following:
51#
52# $ sudo apt-get install python3-pyside.qtsql libqt4-sql-psql
53# $ sudo apt-get install python-pyside2.qtsql libqt5sql5-psql
54# $ sudo apt-get install python3-pyside2.qtsql libqt5sql5-psql
42# 55#
43# An example of using this script with Intel PT: 56# An example of using this script with Intel PT:
44# 57#
@@ -199,7 +212,16 @@ import datetime
199# print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5)) 212# print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5))
200# call_path_id = query.value(6) 213# call_path_id = query.value(6)
201 214
202from PySide.QtSql import * 215pyside_version_1 = True
216if not "pyside-version-1" in sys.argv:
217 try:
218 from PySide2.QtSql import *
219 pyside_version_1 = False
220 except:
221 pass
222
223if pyside_version_1:
224 from PySide.QtSql import *
203 225
204if sys.version_info < (3, 0): 226if sys.version_info < (3, 0):
205 def toserverstr(str): 227 def toserverstr(str):
@@ -255,11 +277,12 @@ def printdate(*args, **kw_args):
255 print(datetime.datetime.today(), *args, sep=' ', **kw_args) 277 print(datetime.datetime.today(), *args, sep=' ', **kw_args)
256 278
257def usage(): 279def usage():
258 printerr("Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]") 280 printerr("Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>] [<pyside-version-1>]");
259 printerr("where: columns 'all' or 'branches'") 281 printerr("where: columns 'all' or 'branches'");
260 printerr(" calls 'calls' => create calls and call_paths table") 282 printerr(" calls 'calls' => create calls and call_paths table");
261 printerr(" callchains 'callchains' => create call_paths table") 283 printerr(" callchains 'callchains' => create call_paths table");
262 raise Exception("Too few arguments") 284 printerr(" pyside-version-1 'pyside-version-1' => use pyside version 1");
285 raise Exception("Too few or bad arguments")
263 286
264if (len(sys.argv) < 2): 287if (len(sys.argv) < 2):
265 usage() 288 usage()
@@ -281,6 +304,8 @@ for i in range(3,len(sys.argv)):
281 perf_db_export_calls = True 304 perf_db_export_calls = True
282 elif (sys.argv[i] == "callchains"): 305 elif (sys.argv[i] == "callchains"):
283 perf_db_export_callchains = True 306 perf_db_export_callchains = True
307 elif (sys.argv[i] == "pyside-version-1"):
308 pass
284 else: 309 else:
285 usage() 310 usage()
286 311
@@ -369,7 +394,9 @@ if branches:
369 'to_ip bigint,' 394 'to_ip bigint,'
370 'branch_type integer,' 395 'branch_type integer,'
371 'in_tx boolean,' 396 'in_tx boolean,'
372 'call_path_id bigint)') 397 'call_path_id bigint,'
398 'insn_count bigint,'
399 'cyc_count bigint)')
373else: 400else:
374 do_query(query, 'CREATE TABLE samples (' 401 do_query(query, 'CREATE TABLE samples ('
375 'id bigint NOT NULL,' 402 'id bigint NOT NULL,'
@@ -393,7 +420,9 @@ else:
393 'data_src bigint,' 420 'data_src bigint,'
394 'branch_type integer,' 421 'branch_type integer,'
395 'in_tx boolean,' 422 'in_tx boolean,'
396 'call_path_id bigint)') 423 'call_path_id bigint,'
424 'insn_count bigint,'
425 'cyc_count bigint)')
397 426
398if perf_db_export_calls or perf_db_export_callchains: 427if perf_db_export_calls or perf_db_export_callchains:
399 do_query(query, 'CREATE TABLE call_paths (' 428 do_query(query, 'CREATE TABLE call_paths ('
@@ -414,7 +443,41 @@ if perf_db_export_calls:
414 'return_id bigint,' 443 'return_id bigint,'
415 'parent_call_path_id bigint,' 444 'parent_call_path_id bigint,'
416 'flags integer,' 445 'flags integer,'
417 'parent_id bigint)') 446 'parent_id bigint,'
447 'insn_count bigint,'
448 'cyc_count bigint)')
449
450do_query(query, 'CREATE TABLE ptwrite ('
451 'id bigint NOT NULL,'
452 'payload bigint,'
453 'exact_ip boolean)')
454
455do_query(query, 'CREATE TABLE cbr ('
456 'id bigint NOT NULL,'
457 'cbr integer,'
458 'mhz integer,'
459 'percent integer)')
460
461do_query(query, 'CREATE TABLE mwait ('
462 'id bigint NOT NULL,'
463 'hints integer,'
464 'extensions integer)')
465
466do_query(query, 'CREATE TABLE pwre ('
467 'id bigint NOT NULL,'
468 'cstate integer,'
469 'subcstate integer,'
470 'hw boolean)')
471
472do_query(query, 'CREATE TABLE exstop ('
473 'id bigint NOT NULL,'
474 'exact_ip boolean)')
475
476do_query(query, 'CREATE TABLE pwrx ('
477 'id bigint NOT NULL,'
478 'deepest_cstate integer,'
479 'last_cstate integer,'
480 'wake_reason integer)')
418 481
419do_query(query, 'CREATE VIEW machines_view AS ' 482do_query(query, 'CREATE VIEW machines_view AS '
420 'SELECT ' 483 'SELECT '
@@ -496,6 +559,9 @@ if perf_db_export_calls:
496 'return_time,' 559 'return_time,'
497 'return_time - call_time AS elapsed_time,' 560 'return_time - call_time AS elapsed_time,'
498 'branch_count,' 561 'branch_count,'
562 'insn_count,'
563 'cyc_count,'
564 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC,'
499 'call_id,' 565 'call_id,'
500 'return_id,' 566 'return_id,'
501 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE CAST ( flags AS VARCHAR(6) ) END AS flags,' 567 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE CAST ( flags AS VARCHAR(6) ) END AS flags,'
@@ -521,9 +587,110 @@ do_query(query, 'CREATE VIEW samples_view AS '
521 'to_sym_offset,' 587 'to_sym_offset,'
522 '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' 588 '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
523 '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' 589 '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
524 'in_tx' 590 'in_tx,'
591 'insn_count,'
592 'cyc_count,'
593 'CASE WHEN cyc_count=0 THEN CAST(0 AS NUMERIC(20, 2)) ELSE CAST((CAST(insn_count AS FLOAT) / cyc_count) AS NUMERIC(20, 2)) END AS IPC'
525 ' FROM samples') 594 ' FROM samples')
526 595
596do_query(query, 'CREATE VIEW ptwrite_view AS '
597 'SELECT '
598 'ptwrite.id,'
599 'time,'
600 'cpu,'
601 'to_hex(payload) AS payload_hex,'
602 'CASE WHEN exact_ip=FALSE THEN \'False\' ELSE \'True\' END AS exact_ip'
603 ' FROM ptwrite'
604 ' INNER JOIN samples ON samples.id = ptwrite.id')
605
606do_query(query, 'CREATE VIEW cbr_view AS '
607 'SELECT '
608 'cbr.id,'
609 'time,'
610 'cpu,'
611 'cbr,'
612 'mhz,'
613 'percent'
614 ' FROM cbr'
615 ' INNER JOIN samples ON samples.id = cbr.id')
616
617do_query(query, 'CREATE VIEW mwait_view AS '
618 'SELECT '
619 'mwait.id,'
620 'time,'
621 'cpu,'
622 'to_hex(hints) AS hints_hex,'
623 'to_hex(extensions) AS extensions_hex'
624 ' FROM mwait'
625 ' INNER JOIN samples ON samples.id = mwait.id')
626
627do_query(query, 'CREATE VIEW pwre_view AS '
628 'SELECT '
629 'pwre.id,'
630 'time,'
631 'cpu,'
632 'cstate,'
633 'subcstate,'
634 'CASE WHEN hw=FALSE THEN \'False\' ELSE \'True\' END AS hw'
635 ' FROM pwre'
636 ' INNER JOIN samples ON samples.id = pwre.id')
637
638do_query(query, 'CREATE VIEW exstop_view AS '
639 'SELECT '
640 'exstop.id,'
641 'time,'
642 'cpu,'
643 'CASE WHEN exact_ip=FALSE THEN \'False\' ELSE \'True\' END AS exact_ip'
644 ' FROM exstop'
645 ' INNER JOIN samples ON samples.id = exstop.id')
646
647do_query(query, 'CREATE VIEW pwrx_view AS '
648 'SELECT '
649 'pwrx.id,'
650 'time,'
651 'cpu,'
652 'deepest_cstate,'
653 'last_cstate,'
654 'CASE WHEN wake_reason=1 THEN \'Interrupt\''
655 ' WHEN wake_reason=2 THEN \'Timer Deadline\''
656 ' WHEN wake_reason=4 THEN \'Monitored Address\''
657 ' WHEN wake_reason=8 THEN \'HW\''
658 ' ELSE CAST ( wake_reason AS VARCHAR(2) )'
659 'END AS wake_reason'
660 ' FROM pwrx'
661 ' INNER JOIN samples ON samples.id = pwrx.id')
662
663do_query(query, 'CREATE VIEW power_events_view AS '
664 'SELECT '
665 'samples.id,'
666 'samples.time,'
667 'samples.cpu,'
668 'selected_events.name AS event,'
669 'FORMAT(\'%6s\', cbr.cbr) AS cbr,'
670 'FORMAT(\'%6s\', cbr.mhz) AS MHz,'
671 'FORMAT(\'%5s\', cbr.percent) AS percent,'
672 'to_hex(mwait.hints) AS hints_hex,'
673 'to_hex(mwait.extensions) AS extensions_hex,'
674 'FORMAT(\'%3s\', pwre.cstate) AS cstate,'
675 'FORMAT(\'%3s\', pwre.subcstate) AS subcstate,'
676 'CASE WHEN pwre.hw=FALSE THEN \'False\' WHEN pwre.hw=TRUE THEN \'True\' ELSE NULL END AS hw,'
677 'CASE WHEN exstop.exact_ip=FALSE THEN \'False\' WHEN exstop.exact_ip=TRUE THEN \'True\' ELSE NULL END AS exact_ip,'
678 'FORMAT(\'%3s\', pwrx.deepest_cstate) AS deepest_cstate,'
679 'FORMAT(\'%3s\', pwrx.last_cstate) AS last_cstate,'
680 'CASE WHEN pwrx.wake_reason=1 THEN \'Interrupt\''
681 ' WHEN pwrx.wake_reason=2 THEN \'Timer Deadline\''
682 ' WHEN pwrx.wake_reason=4 THEN \'Monitored Address\''
683 ' WHEN pwrx.wake_reason=8 THEN \'HW\''
684 ' ELSE FORMAT(\'%2s\', pwrx.wake_reason)'
685 'END AS wake_reason'
686 ' FROM cbr'
687 ' FULL JOIN mwait ON mwait.id = cbr.id'
688 ' FULL JOIN pwre ON pwre.id = cbr.id'
689 ' FULL JOIN exstop ON exstop.id = cbr.id'
690 ' FULL JOIN pwrx ON pwrx.id = cbr.id'
691 ' INNER JOIN samples ON samples.id = coalesce(cbr.id, mwait.id, pwre.id, exstop.id, pwrx.id)'
692 ' INNER JOIN selected_events ON selected_events.id = samples.evsel_id'
693 ' ORDER BY samples.id')
527 694
528file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0) 695file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0)
529file_trailer = b"\377\377" 696file_trailer = b"\377\377"
@@ -583,6 +750,12 @@ if perf_db_export_calls or perf_db_export_callchains:
583 call_path_file = open_output_file("call_path_table.bin") 750 call_path_file = open_output_file("call_path_table.bin")
584if perf_db_export_calls: 751if perf_db_export_calls:
585 call_file = open_output_file("call_table.bin") 752 call_file = open_output_file("call_table.bin")
753ptwrite_file = open_output_file("ptwrite_table.bin")
754cbr_file = open_output_file("cbr_table.bin")
755mwait_file = open_output_file("mwait_table.bin")
756pwre_file = open_output_file("pwre_table.bin")
757exstop_file = open_output_file("exstop_table.bin")
758pwrx_file = open_output_file("pwrx_table.bin")
586 759
587def trace_begin(): 760def trace_begin():
588 printdate("Writing to intermediate files...") 761 printdate("Writing to intermediate files...")
@@ -593,13 +766,23 @@ def trace_begin():
593 comm_table(0, "unknown") 766 comm_table(0, "unknown")
594 dso_table(0, 0, "unknown", "unknown", "") 767 dso_table(0, 0, "unknown", "unknown", "")
595 symbol_table(0, 0, 0, 0, 0, "unknown") 768 symbol_table(0, 0, 0, 0, 0, "unknown")
596 sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 769 sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
597 if perf_db_export_calls or perf_db_export_callchains: 770 if perf_db_export_calls or perf_db_export_callchains:
598 call_path_table(0, 0, 0, 0) 771 call_path_table(0, 0, 0, 0)
599 call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 772 call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
600 773
601unhandled_count = 0 774unhandled_count = 0
602 775
776def is_table_empty(table_name):
777 do_query(query, 'SELECT * FROM ' + table_name + ' LIMIT 1');
778 if query.next():
779 return False
780 return True
781
782def drop(table_name):
783 do_query(query, 'DROP VIEW ' + table_name + '_view');
784 do_query(query, 'DROP TABLE ' + table_name);
785
603def trace_end(): 786def trace_end():
604 printdate("Copying to database...") 787 printdate("Copying to database...")
605 copy_output_file(evsel_file, "selected_events") 788 copy_output_file(evsel_file, "selected_events")
@@ -615,6 +798,12 @@ def trace_end():
615 copy_output_file(call_path_file, "call_paths") 798 copy_output_file(call_path_file, "call_paths")
616 if perf_db_export_calls: 799 if perf_db_export_calls:
617 copy_output_file(call_file, "calls") 800 copy_output_file(call_file, "calls")
801 copy_output_file(ptwrite_file, "ptwrite")
802 copy_output_file(cbr_file, "cbr")
803 copy_output_file(mwait_file, "mwait")
804 copy_output_file(pwre_file, "pwre")
805 copy_output_file(exstop_file, "exstop")
806 copy_output_file(pwrx_file, "pwrx")
618 807
619 printdate("Removing intermediate files...") 808 printdate("Removing intermediate files...")
620 remove_output_file(evsel_file) 809 remove_output_file(evsel_file)
@@ -630,6 +819,12 @@ def trace_end():
630 remove_output_file(call_path_file) 819 remove_output_file(call_path_file)
631 if perf_db_export_calls: 820 if perf_db_export_calls:
632 remove_output_file(call_file) 821 remove_output_file(call_file)
822 remove_output_file(ptwrite_file)
823 remove_output_file(cbr_file)
824 remove_output_file(mwait_file)
825 remove_output_file(pwre_file)
826 remove_output_file(exstop_file)
827 remove_output_file(pwrx_file)
633 os.rmdir(output_dir_name) 828 os.rmdir(output_dir_name)
634 printdate("Adding primary keys") 829 printdate("Adding primary keys")
635 do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') 830 do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
@@ -645,6 +840,12 @@ def trace_end():
645 do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)') 840 do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)')
646 if perf_db_export_calls: 841 if perf_db_export_calls:
647 do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)') 842 do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)')
843 do_query(query, 'ALTER TABLE ptwrite ADD PRIMARY KEY (id)')
844 do_query(query, 'ALTER TABLE cbr ADD PRIMARY KEY (id)')
845 do_query(query, 'ALTER TABLE mwait ADD PRIMARY KEY (id)')
846 do_query(query, 'ALTER TABLE pwre ADD PRIMARY KEY (id)')
847 do_query(query, 'ALTER TABLE exstop ADD PRIMARY KEY (id)')
848 do_query(query, 'ALTER TABLE pwrx ADD PRIMARY KEY (id)')
648 849
649 printdate("Adding foreign keys") 850 printdate("Adding foreign keys")
650 do_query(query, 'ALTER TABLE threads ' 851 do_query(query, 'ALTER TABLE threads '
@@ -680,6 +881,30 @@ def trace_end():
680 'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)') 881 'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)')
681 do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') 882 do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
682 do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') 883 do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)')
884 do_query(query, 'ALTER TABLE ptwrite '
885 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)')
886 do_query(query, 'ALTER TABLE cbr '
887 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)')
888 do_query(query, 'ALTER TABLE mwait '
889 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)')
890 do_query(query, 'ALTER TABLE pwre '
891 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)')
892 do_query(query, 'ALTER TABLE exstop '
893 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)')
894 do_query(query, 'ALTER TABLE pwrx '
895 'ADD CONSTRAINT idfk FOREIGN KEY (id) REFERENCES samples (id)')
896
897 printdate("Dropping unused tables")
898 if is_table_empty("ptwrite"):
899 drop("ptwrite")
900 if is_table_empty("mwait") and is_table_empty("pwre") and is_table_empty("exstop") and is_table_empty("pwrx"):
901 drop("mwait")
902 drop("pwre")
903 drop("exstop")
904 drop("pwrx")
905 do_query(query, 'DROP VIEW power_events_view');
906 if is_table_empty("cbr"):
907 drop("cbr")
683 908
684 if (unhandled_count): 909 if (unhandled_count):
685 printdate("Warning: ", unhandled_count, " unhandled events") 910 printdate("Warning: ", unhandled_count, " unhandled events")
@@ -747,11 +972,11 @@ def branch_type_table(branch_type, name, *x):
747 value = struct.pack(fmt, 2, 4, branch_type, n, name) 972 value = struct.pack(fmt, 2, 4, branch_type, n, name)
748 branch_type_file.write(value) 973 branch_type_file.write(value)
749 974
750def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x): 975def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, insn_cnt, cyc_cnt, *x):
751 if branches: 976 if branches:
752 value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id) 977 value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiqiqiq", 20, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id, 8, insn_cnt, 8, cyc_cnt)
753 else: 978 else:
754 value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id) 979 value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiqiqiq", 24, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id, 8, insn_cnt, 8, cyc_cnt)
755 sample_file.write(value) 980 sample_file.write(value)
756 981
757def call_path_table(cp_id, parent_id, symbol_id, ip, *x): 982def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
@@ -759,7 +984,70 @@ def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
759 value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip) 984 value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip)
760 call_path_file.write(value) 985 call_path_file.write(value)
761 986
762def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, parent_id, *x): 987def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, parent_id, insn_cnt, cyc_cnt, *x):
763 fmt = "!hiqiqiqiqiqiqiqiqiqiqiiiq" 988 fmt = "!hiqiqiqiqiqiqiqiqiqiqiiiqiqiq"
764 value = struct.pack(fmt, 12, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags, 8, parent_id) 989 value = struct.pack(fmt, 14, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags, 8, parent_id, 8, insn_cnt, 8, cyc_cnt)
765 call_file.write(value) 990 call_file.write(value)
991
992def ptwrite(id, raw_buf):
993 data = struct.unpack_from("<IQ", raw_buf)
994 flags = data[0]
995 payload = data[1]
996 exact_ip = flags & 1
997 value = struct.pack("!hiqiqiB", 3, 8, id, 8, payload, 1, exact_ip)
998 ptwrite_file.write(value)
999
1000def cbr(id, raw_buf):
1001 data = struct.unpack_from("<BBBBII", raw_buf)
1002 cbr = data[0]
1003 MHz = (data[4] + 500) / 1000
1004 percent = ((cbr * 1000 / data[2]) + 5) / 10
1005 value = struct.pack("!hiqiiiiii", 4, 8, id, 4, cbr, 4, MHz, 4, percent)
1006 cbr_file.write(value)
1007
1008def mwait(id, raw_buf):
1009 data = struct.unpack_from("<IQ", raw_buf)
1010 payload = data[1]
1011 hints = payload & 0xff
1012 extensions = (payload >> 32) & 0x3
1013 value = struct.pack("!hiqiiii", 3, 8, id, 4, hints, 4, extensions)
1014 mwait_file.write(value)
1015
1016def pwre(id, raw_buf):
1017 data = struct.unpack_from("<IQ", raw_buf)
1018 payload = data[1]
1019 hw = (payload >> 7) & 1
1020 cstate = (payload >> 12) & 0xf
1021 subcstate = (payload >> 8) & 0xf
1022 value = struct.pack("!hiqiiiiiB", 4, 8, id, 4, cstate, 4, subcstate, 1, hw)
1023 pwre_file.write(value)
1024
1025def exstop(id, raw_buf):
1026 data = struct.unpack_from("<I", raw_buf)
1027 flags = data[0]
1028 exact_ip = flags & 1
1029 value = struct.pack("!hiqiB", 2, 8, id, 1, exact_ip)
1030 exstop_file.write(value)
1031
1032def pwrx(id, raw_buf):
1033 data = struct.unpack_from("<IQ", raw_buf)
1034 payload = data[1]
1035 deepest_cstate = payload & 0xf
1036 last_cstate = (payload >> 4) & 0xf
1037 wake_reason = (payload >> 8) & 0xf
1038 value = struct.pack("!hiqiiiiii", 4, 8, id, 4, deepest_cstate, 4, last_cstate, 4, wake_reason)
1039 pwrx_file.write(value)
1040
1041def synth_data(id, config, raw_buf, *x):
1042 if config == 0:
1043 ptwrite(id, raw_buf)
1044 elif config == 1:
1045 mwait(id, raw_buf)
1046 elif config == 2:
1047 pwre(id, raw_buf)
1048 elif config == 3:
1049 exstop(id, raw_buf)
1050 elif config == 4:
1051 pwrx(id, raw_buf)
1052 elif config == 5:
1053 cbr(id, raw_buf)
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
index bf271fbc3a88..3222a83f4184 100644
--- a/tools/perf/scripts/python/export-to-sqlite.py
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -21,6 +21,26 @@ import datetime
21# provides LGPL-licensed Python bindings for Qt. You will also need the package 21# provides LGPL-licensed Python bindings for Qt. You will also need the package
22# libqt4-sql-sqlite for Qt sqlite3 support. 22# libqt4-sql-sqlite for Qt sqlite3 support.
23# 23#
24# Examples of installing pyside:
25#
26# ubuntu:
27#
28# $ sudo apt-get install python-pyside.qtsql libqt4-sql-psql
29#
30# Alternately, to use Python3 and/or pyside 2, one of the following:
31#
32# $ sudo apt-get install python3-pyside.qtsql libqt4-sql-psql
33# $ sudo apt-get install python-pyside2.qtsql libqt5sql5-psql
34# $ sudo apt-get install python3-pyside2.qtsql libqt5sql5-psql
35# fedora:
36#
37# $ sudo yum install python-pyside
38#
39# Alternately, to use Python3 and/or pyside 2, one of the following:
40# $ sudo yum install python3-pyside
41# $ pip install --user PySide2
42# $ pip3 install --user PySide2
43#
24# An example of using this script with Intel PT: 44# An example of using this script with Intel PT:
25# 45#
26# $ perf record -e intel_pt//u ls 46# $ perf record -e intel_pt//u ls
@@ -49,7 +69,16 @@ import datetime
49# difference is the 'transaction' column of the 'samples' table which is 69# difference is the 'transaction' column of the 'samples' table which is
50# renamed 'transaction_' in sqlite because 'transaction' is a reserved word. 70# renamed 'transaction_' in sqlite because 'transaction' is a reserved word.
51 71
52from PySide.QtSql import * 72pyside_version_1 = True
73if not "pyside-version-1" in sys.argv:
74 try:
75 from PySide2.QtSql import *
76 pyside_version_1 = False
77 except:
78 pass
79
80if pyside_version_1:
81 from PySide.QtSql import *
53 82
54sys.path.append(os.environ['PERF_EXEC_PATH'] + \ 83sys.path.append(os.environ['PERF_EXEC_PATH'] + \
55 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') 84 '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
@@ -69,11 +98,12 @@ def printdate(*args, **kw_args):
69 print(datetime.datetime.today(), *args, sep=' ', **kw_args) 98 print(datetime.datetime.today(), *args, sep=' ', **kw_args)
70 99
71def usage(): 100def usage():
72 printerr("Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>]"); 101 printerr("Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>] [<pyside-version-1>]");
73 printerr("where: columns 'all' or 'branches'"); 102 printerr("where: columns 'all' or 'branches'");
74 printerr(" calls 'calls' => create calls and call_paths table"); 103 printerr(" calls 'calls' => create calls and call_paths table");
75 printerr(" callchains 'callchains' => create call_paths table"); 104 printerr(" callchains 'callchains' => create call_paths table");
76 raise Exception("Too few arguments") 105 printerr(" pyside-version-1 'pyside-version-1' => use pyside version 1");
106 raise Exception("Too few or bad arguments")
77 107
78if (len(sys.argv) < 2): 108if (len(sys.argv) < 2):
79 usage() 109 usage()
@@ -95,6 +125,8 @@ for i in range(3,len(sys.argv)):
95 perf_db_export_calls = True 125 perf_db_export_calls = True
96 elif (sys.argv[i] == "callchains"): 126 elif (sys.argv[i] == "callchains"):
97 perf_db_export_callchains = True 127 perf_db_export_callchains = True
128 elif (sys.argv[i] == "pyside-version-1"):
129 pass
98 else: 130 else:
99 usage() 131 usage()
100 132
@@ -186,7 +218,9 @@ if branches:
186 'to_ip bigint,' 218 'to_ip bigint,'
187 'branch_type integer,' 219 'branch_type integer,'
188 'in_tx boolean,' 220 'in_tx boolean,'
189 'call_path_id bigint)') 221 'call_path_id bigint,'
222 'insn_count bigint,'
223 'cyc_count bigint)')
190else: 224else:
191 do_query(query, 'CREATE TABLE samples (' 225 do_query(query, 'CREATE TABLE samples ('
192 'id integer NOT NULL PRIMARY KEY,' 226 'id integer NOT NULL PRIMARY KEY,'
@@ -210,7 +244,9 @@ else:
210 'data_src bigint,' 244 'data_src bigint,'
211 'branch_type integer,' 245 'branch_type integer,'
212 'in_tx boolean,' 246 'in_tx boolean,'
213 'call_path_id bigint)') 247 'call_path_id bigint,'
248 'insn_count bigint,'
249 'cyc_count bigint)')
214 250
215if perf_db_export_calls or perf_db_export_callchains: 251if perf_db_export_calls or perf_db_export_callchains:
216 do_query(query, 'CREATE TABLE call_paths (' 252 do_query(query, 'CREATE TABLE call_paths ('
@@ -231,7 +267,41 @@ if perf_db_export_calls:
231 'return_id bigint,' 267 'return_id bigint,'
232 'parent_call_path_id bigint,' 268 'parent_call_path_id bigint,'
233 'flags integer,' 269 'flags integer,'
234 'parent_id bigint)') 270 'parent_id bigint,'
271 'insn_count bigint,'
272 'cyc_count bigint)')
273
274do_query(query, 'CREATE TABLE ptwrite ('
275 'id integer NOT NULL PRIMARY KEY,'
276 'payload bigint,'
277 'exact_ip integer)')
278
279do_query(query, 'CREATE TABLE cbr ('
280 'id integer NOT NULL PRIMARY KEY,'
281 'cbr integer,'
282 'mhz integer,'
283 'percent integer)')
284
285do_query(query, 'CREATE TABLE mwait ('
286 'id integer NOT NULL PRIMARY KEY,'
287 'hints integer,'
288 'extensions integer)')
289
290do_query(query, 'CREATE TABLE pwre ('
291 'id integer NOT NULL PRIMARY KEY,'
292 'cstate integer,'
293 'subcstate integer,'
294 'hw integer)')
295
296do_query(query, 'CREATE TABLE exstop ('
297 'id integer NOT NULL PRIMARY KEY,'
298 'exact_ip integer)')
299
300do_query(query, 'CREATE TABLE pwrx ('
301 'id integer NOT NULL PRIMARY KEY,'
302 'deepest_cstate integer,'
303 'last_cstate integer,'
304 'wake_reason integer)')
235 305
236# printf was added to sqlite in version 3.8.3 306# printf was added to sqlite in version 3.8.3
237sqlite_has_printf = False 307sqlite_has_printf = False
@@ -327,6 +397,9 @@ if perf_db_export_calls:
327 'return_time,' 397 'return_time,'
328 'return_time - call_time AS elapsed_time,' 398 'return_time - call_time AS elapsed_time,'
329 'branch_count,' 399 'branch_count,'
400 'insn_count,'
401 'cyc_count,'
402 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC,'
330 'call_id,' 403 'call_id,'
331 'return_id,' 404 'return_id,'
332 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 405 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,'
@@ -352,9 +425,108 @@ do_query(query, 'CREATE VIEW samples_view AS '
352 'to_sym_offset,' 425 'to_sym_offset,'
353 '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,' 426 '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
354 '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,' 427 '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
355 'in_tx' 428 'in_tx,'
429 'insn_count,'
430 'cyc_count,'
431 'CASE WHEN cyc_count=0 THEN CAST(0 AS FLOAT) ELSE ROUND(CAST(insn_count AS FLOAT) / cyc_count, 2) END AS IPC'
356 ' FROM samples') 432 ' FROM samples')
357 433
434do_query(query, 'CREATE VIEW ptwrite_view AS '
435 'SELECT '
436 'ptwrite.id,'
437 'time,'
438 'cpu,'
439 + emit_to_hex('payload') + ' AS payload_hex,'
440 'CASE WHEN exact_ip=0 THEN \'False\' ELSE \'True\' END AS exact_ip'
441 ' FROM ptwrite'
442 ' INNER JOIN samples ON samples.id = ptwrite.id')
443
444do_query(query, 'CREATE VIEW cbr_view AS '
445 'SELECT '
446 'cbr.id,'
447 'time,'
448 'cpu,'
449 'cbr,'
450 'mhz,'
451 'percent'
452 ' FROM cbr'
453 ' INNER JOIN samples ON samples.id = cbr.id')
454
455do_query(query, 'CREATE VIEW mwait_view AS '
456 'SELECT '
457 'mwait.id,'
458 'time,'
459 'cpu,'
460 + emit_to_hex('hints') + ' AS hints_hex,'
461 + emit_to_hex('extensions') + ' AS extensions_hex'
462 ' FROM mwait'
463 ' INNER JOIN samples ON samples.id = mwait.id')
464
465do_query(query, 'CREATE VIEW pwre_view AS '
466 'SELECT '
467 'pwre.id,'
468 'time,'
469 'cpu,'
470 'cstate,'
471 'subcstate,'
472 'CASE WHEN hw=0 THEN \'False\' ELSE \'True\' END AS hw'
473 ' FROM pwre'
474 ' INNER JOIN samples ON samples.id = pwre.id')
475
476do_query(query, 'CREATE VIEW exstop_view AS '
477 'SELECT '
478 'exstop.id,'
479 'time,'
480 'cpu,'
481 'CASE WHEN exact_ip=0 THEN \'False\' ELSE \'True\' END AS exact_ip'
482 ' FROM exstop'
483 ' INNER JOIN samples ON samples.id = exstop.id')
484
485do_query(query, 'CREATE VIEW pwrx_view AS '
486 'SELECT '
487 'pwrx.id,'
488 'time,'
489 'cpu,'
490 'deepest_cstate,'
491 'last_cstate,'
492 'CASE WHEN wake_reason=1 THEN \'Interrupt\''
493 ' WHEN wake_reason=2 THEN \'Timer Deadline\''
494 ' WHEN wake_reason=4 THEN \'Monitored Address\''
495 ' WHEN wake_reason=8 THEN \'HW\''
496 ' ELSE wake_reason '
497 'END AS wake_reason'
498 ' FROM pwrx'
499 ' INNER JOIN samples ON samples.id = pwrx.id')
500
501do_query(query, 'CREATE VIEW power_events_view AS '
502 'SELECT '
503 'samples.id,'
504 'time,'
505 'cpu,'
506 'selected_events.name AS event,'
507 'CASE WHEN selected_events.name=\'cbr\' THEN (SELECT cbr FROM cbr WHERE cbr.id = samples.id) ELSE "" END AS cbr,'
508 'CASE WHEN selected_events.name=\'cbr\' THEN (SELECT mhz FROM cbr WHERE cbr.id = samples.id) ELSE "" END AS mhz,'
509 'CASE WHEN selected_events.name=\'cbr\' THEN (SELECT percent FROM cbr WHERE cbr.id = samples.id) ELSE "" END AS percent,'
510 'CASE WHEN selected_events.name=\'mwait\' THEN (SELECT ' + emit_to_hex('hints') + ' FROM mwait WHERE mwait.id = samples.id) ELSE "" END AS hints_hex,'
511 'CASE WHEN selected_events.name=\'mwait\' THEN (SELECT ' + emit_to_hex('extensions') + ' FROM mwait WHERE mwait.id = samples.id) ELSE "" END AS extensions_hex,'
512 'CASE WHEN selected_events.name=\'pwre\' THEN (SELECT cstate FROM pwre WHERE pwre.id = samples.id) ELSE "" END AS cstate,'
513 'CASE WHEN selected_events.name=\'pwre\' THEN (SELECT subcstate FROM pwre WHERE pwre.id = samples.id) ELSE "" END AS subcstate,'
514 'CASE WHEN selected_events.name=\'pwre\' THEN (SELECT hw FROM pwre WHERE pwre.id = samples.id) ELSE "" END AS hw,'
515 'CASE WHEN selected_events.name=\'exstop\' THEN (SELECT exact_ip FROM exstop WHERE exstop.id = samples.id) ELSE "" END AS exact_ip,'
516 'CASE WHEN selected_events.name=\'pwrx\' THEN (SELECT deepest_cstate FROM pwrx WHERE pwrx.id = samples.id) ELSE "" END AS deepest_cstate,'
517 'CASE WHEN selected_events.name=\'pwrx\' THEN (SELECT last_cstate FROM pwrx WHERE pwrx.id = samples.id) ELSE "" END AS last_cstate,'
518 'CASE WHEN selected_events.name=\'pwrx\' THEN (SELECT '
519 'CASE WHEN wake_reason=1 THEN \'Interrupt\''
520 ' WHEN wake_reason=2 THEN \'Timer Deadline\''
521 ' WHEN wake_reason=4 THEN \'Monitored Address\''
522 ' WHEN wake_reason=8 THEN \'HW\''
523 ' ELSE wake_reason '
524 'END'
525 ' FROM pwrx WHERE pwrx.id = samples.id) ELSE "" END AS wake_reason'
526 ' FROM samples'
527 ' INNER JOIN selected_events ON selected_events.id = evsel_id'
528 ' WHERE selected_events.name IN (\'cbr\',\'mwait\',\'exstop\',\'pwre\',\'pwrx\')')
529
358do_query(query, 'END TRANSACTION') 530do_query(query, 'END TRANSACTION')
359 531
360evsel_query = QSqlQuery(db) 532evsel_query = QSqlQuery(db)
@@ -375,15 +547,27 @@ branch_type_query = QSqlQuery(db)
375branch_type_query.prepare("INSERT INTO branch_types VALUES (?, ?)") 547branch_type_query.prepare("INSERT INTO branch_types VALUES (?, ?)")
376sample_query = QSqlQuery(db) 548sample_query = QSqlQuery(db)
377if branches: 549if branches:
378 sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") 550 sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
379else: 551else:
380 sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") 552 sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
381if perf_db_export_calls or perf_db_export_callchains: 553if perf_db_export_calls or perf_db_export_callchains:
382 call_path_query = QSqlQuery(db) 554 call_path_query = QSqlQuery(db)
383 call_path_query.prepare("INSERT INTO call_paths VALUES (?, ?, ?, ?)") 555 call_path_query.prepare("INSERT INTO call_paths VALUES (?, ?, ?, ?)")
384if perf_db_export_calls: 556if perf_db_export_calls:
385 call_query = QSqlQuery(db) 557 call_query = QSqlQuery(db)
386 call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") 558 call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
559ptwrite_query = QSqlQuery(db)
560ptwrite_query.prepare("INSERT INTO ptwrite VALUES (?, ?, ?)")
561cbr_query = QSqlQuery(db)
562cbr_query.prepare("INSERT INTO cbr VALUES (?, ?, ?, ?)")
563mwait_query = QSqlQuery(db)
564mwait_query.prepare("INSERT INTO mwait VALUES (?, ?, ?)")
565pwre_query = QSqlQuery(db)
566pwre_query.prepare("INSERT INTO pwre VALUES (?, ?, ?, ?)")
567exstop_query = QSqlQuery(db)
568exstop_query.prepare("INSERT INTO exstop VALUES (?, ?)")
569pwrx_query = QSqlQuery(db)
570pwrx_query.prepare("INSERT INTO pwrx VALUES (?, ?, ?, ?)")
387 571
388def trace_begin(): 572def trace_begin():
389 printdate("Writing records...") 573 printdate("Writing records...")
@@ -395,13 +579,23 @@ def trace_begin():
395 comm_table(0, "unknown") 579 comm_table(0, "unknown")
396 dso_table(0, 0, "unknown", "unknown", "") 580 dso_table(0, 0, "unknown", "unknown", "")
397 symbol_table(0, 0, 0, 0, 0, "unknown") 581 symbol_table(0, 0, 0, 0, 0, "unknown")
398 sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 582 sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
399 if perf_db_export_calls or perf_db_export_callchains: 583 if perf_db_export_calls or perf_db_export_callchains:
400 call_path_table(0, 0, 0, 0) 584 call_path_table(0, 0, 0, 0)
401 call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 585 call_return_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
402 586
403unhandled_count = 0 587unhandled_count = 0
404 588
589def is_table_empty(table_name):
590 do_query(query, 'SELECT * FROM ' + table_name + ' LIMIT 1');
591 if query.next():
592 return False
593 return True
594
595def drop(table_name):
596 do_query(query, 'DROP VIEW ' + table_name + '_view');
597 do_query(query, 'DROP TABLE ' + table_name);
598
405def trace_end(): 599def trace_end():
406 do_query(query, 'END TRANSACTION') 600 do_query(query, 'END TRANSACTION')
407 601
@@ -410,6 +604,18 @@ def trace_end():
410 do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') 604 do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
411 do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') 605 do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)')
412 606
607 printdate("Dropping unused tables")
608 if is_table_empty("ptwrite"):
609 drop("ptwrite")
610 if is_table_empty("mwait") and is_table_empty("pwre") and is_table_empty("exstop") and is_table_empty("pwrx"):
611 drop("mwait")
612 drop("pwre")
613 drop("exstop")
614 drop("pwrx")
615 do_query(query, 'DROP VIEW power_events_view');
616 if is_table_empty("cbr"):
617 drop("cbr")
618
413 if (unhandled_count): 619 if (unhandled_count):
414 printdate("Warning: ", unhandled_count, " unhandled events") 620 printdate("Warning: ", unhandled_count, " unhandled events")
415 printdate("Done") 621 printdate("Done")
@@ -454,14 +660,91 @@ def sample_table(*x):
454 if branches: 660 if branches:
455 for xx in x[0:15]: 661 for xx in x[0:15]:
456 sample_query.addBindValue(str(xx)) 662 sample_query.addBindValue(str(xx))
457 for xx in x[19:22]: 663 for xx in x[19:24]:
458 sample_query.addBindValue(str(xx)) 664 sample_query.addBindValue(str(xx))
459 do_query_(sample_query) 665 do_query_(sample_query)
460 else: 666 else:
461 bind_exec(sample_query, 22, x) 667 bind_exec(sample_query, 24, x)
462 668
463def call_path_table(*x): 669def call_path_table(*x):
464 bind_exec(call_path_query, 4, x) 670 bind_exec(call_path_query, 4, x)
465 671
466def call_return_table(*x): 672def call_return_table(*x):
467 bind_exec(call_query, 12, x) 673 bind_exec(call_query, 14, x)
674
675def ptwrite(id, raw_buf):
676 data = struct.unpack_from("<IQ", raw_buf)
677 flags = data[0]
678 payload = data[1]
679 exact_ip = flags & 1
680 ptwrite_query.addBindValue(str(id))
681 ptwrite_query.addBindValue(str(payload))
682 ptwrite_query.addBindValue(str(exact_ip))
683 do_query_(ptwrite_query)
684
685def cbr(id, raw_buf):
686 data = struct.unpack_from("<BBBBII", raw_buf)
687 cbr = data[0]
688 MHz = (data[4] + 500) / 1000
689 percent = ((cbr * 1000 / data[2]) + 5) / 10
690 cbr_query.addBindValue(str(id))
691 cbr_query.addBindValue(str(cbr))
692 cbr_query.addBindValue(str(MHz))
693 cbr_query.addBindValue(str(percent))
694 do_query_(cbr_query)
695
696def mwait(id, raw_buf):
697 data = struct.unpack_from("<IQ", raw_buf)
698 payload = data[1]
699 hints = payload & 0xff
700 extensions = (payload >> 32) & 0x3
701 mwait_query.addBindValue(str(id))
702 mwait_query.addBindValue(str(hints))
703 mwait_query.addBindValue(str(extensions))
704 do_query_(mwait_query)
705
706def pwre(id, raw_buf):
707 data = struct.unpack_from("<IQ", raw_buf)
708 payload = data[1]
709 hw = (payload >> 7) & 1
710 cstate = (payload >> 12) & 0xf
711 subcstate = (payload >> 8) & 0xf
712 pwre_query.addBindValue(str(id))
713 pwre_query.addBindValue(str(cstate))
714 pwre_query.addBindValue(str(subcstate))
715 pwre_query.addBindValue(str(hw))
716 do_query_(pwre_query)
717
718def exstop(id, raw_buf):
719 data = struct.unpack_from("<I", raw_buf)
720 flags = data[0]
721 exact_ip = flags & 1
722 exstop_query.addBindValue(str(id))
723 exstop_query.addBindValue(str(exact_ip))
724 do_query_(exstop_query)
725
726def pwrx(id, raw_buf):
727 data = struct.unpack_from("<IQ", raw_buf)
728 payload = data[1]
729 deepest_cstate = payload & 0xf
730 last_cstate = (payload >> 4) & 0xf
731 wake_reason = (payload >> 8) & 0xf
732 pwrx_query.addBindValue(str(id))
733 pwrx_query.addBindValue(str(deepest_cstate))
734 pwrx_query.addBindValue(str(last_cstate))
735 pwrx_query.addBindValue(str(wake_reason))
736 do_query_(pwrx_query)
737
738def synth_data(id, config, raw_buf, *x):
739 if config == 0:
740 ptwrite(id, raw_buf)
741 elif config == 1:
742 mwait(id, raw_buf)
743 elif config == 2:
744 pwre(id, raw_buf)
745 elif config == 3:
746 exstop(id, raw_buf)
747 elif config == 4:
748 pwrx(id, raw_buf)
749 elif config == 5:
750 cbr(id, raw_buf)
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index affed7d149be..6e7934f2ac9a 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -1,4 +1,4 @@
1#!/usr/bin/env python2 1#!/usr/bin/env python
2# SPDX-License-Identifier: GPL-2.0 2# SPDX-License-Identifier: GPL-2.0
3# exported-sql-viewer.py: view data from sql database 3# exported-sql-viewer.py: view data from sql database
4# Copyright (c) 2014-2018, Intel Corporation. 4# Copyright (c) 2014-2018, Intel Corporation.
@@ -91,6 +91,7 @@
91from __future__ import print_function 91from __future__ import print_function
92 92
93import sys 93import sys
94import argparse
94import weakref 95import weakref
95import threading 96import threading
96import string 97import string
@@ -104,10 +105,23 @@ except ImportError:
104 glb_nsz = 16 105 glb_nsz = 16
105import re 106import re
106import os 107import os
107from PySide.QtCore import * 108
108from PySide.QtGui import *
109from PySide.QtSql import *
110pyside_version_1 = True 109pyside_version_1 = True
110if not "--pyside-version-1" in sys.argv:
111 try:
112 from PySide2.QtCore import *
113 from PySide2.QtGui import *
114 from PySide2.QtSql import *
115 from PySide2.QtWidgets import *
116 pyside_version_1 = False
117 except:
118 pass
119
120if pyside_version_1:
121 from PySide.QtCore import *
122 from PySide.QtGui import *
123 from PySide.QtSql import *
124
111from decimal import * 125from decimal import *
112from ctypes import * 126from ctypes import *
113from multiprocessing import Process, Array, Value, Event 127from multiprocessing import Process, Array, Value, Event
@@ -186,9 +200,10 @@ class Thread(QThread):
186 200
187class TreeModel(QAbstractItemModel): 201class TreeModel(QAbstractItemModel):
188 202
189 def __init__(self, glb, parent=None): 203 def __init__(self, glb, params, parent=None):
190 super(TreeModel, self).__init__(parent) 204 super(TreeModel, self).__init__(parent)
191 self.glb = glb 205 self.glb = glb
206 self.params = params
192 self.root = self.GetRoot() 207 self.root = self.GetRoot()
193 self.last_row_read = 0 208 self.last_row_read = 0
194 209
@@ -385,6 +400,7 @@ class FindBar():
385 400
386 def Activate(self): 401 def Activate(self):
387 self.bar.show() 402 self.bar.show()
403 self.textbox.lineEdit().selectAll()
388 self.textbox.setFocus() 404 self.textbox.setFocus()
389 405
390 def Deactivate(self): 406 def Deactivate(self):
@@ -449,8 +465,9 @@ class FindBar():
449 465
450class CallGraphLevelItemBase(object): 466class CallGraphLevelItemBase(object):
451 467
452 def __init__(self, glb, row, parent_item): 468 def __init__(self, glb, params, row, parent_item):
453 self.glb = glb 469 self.glb = glb
470 self.params = params
454 self.row = row 471 self.row = row
455 self.parent_item = parent_item 472 self.parent_item = parent_item
456 self.query_done = False; 473 self.query_done = False;
@@ -489,18 +506,24 @@ class CallGraphLevelItemBase(object):
489 506
490class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase): 507class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
491 508
492 def __init__(self, glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item): 509 def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item):
493 super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) 510 super(CallGraphLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item)
494 self.comm_id = comm_id 511 self.comm_id = comm_id
495 self.thread_id = thread_id 512 self.thread_id = thread_id
496 self.call_path_id = call_path_id 513 self.call_path_id = call_path_id
514 self.insn_cnt = insn_cnt
515 self.cyc_cnt = cyc_cnt
497 self.branch_count = branch_count 516 self.branch_count = branch_count
498 self.time = time 517 self.time = time
499 518
500 def Select(self): 519 def Select(self):
501 self.query_done = True; 520 self.query_done = True;
502 query = QSqlQuery(self.glb.db) 521 query = QSqlQuery(self.glb.db)
503 QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time), SUM(branch_count)" 522 if self.params.have_ipc:
523 ipc_str = ", SUM(insn_count), SUM(cyc_count)"
524 else:
525 ipc_str = ""
526 QueryExec(query, "SELECT call_path_id, name, short_name, COUNT(calls.id), SUM(return_time - call_time)" + ipc_str + ", SUM(branch_count)"
504 " FROM calls" 527 " FROM calls"
505 " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" 528 " INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
506 " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" 529 " INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
@@ -511,7 +534,15 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
511 " GROUP BY call_path_id, name, short_name" 534 " GROUP BY call_path_id, name, short_name"
512 " ORDER BY call_path_id") 535 " ORDER BY call_path_id")
513 while query.next(): 536 while query.next():
514 child_item = CallGraphLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) 537 if self.params.have_ipc:
538 insn_cnt = int(query.value(5))
539 cyc_cnt = int(query.value(6))
540 branch_count = int(query.value(7))
541 else:
542 insn_cnt = 0
543 cyc_cnt = 0
544 branch_count = int(query.value(5))
545 child_item = CallGraphLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), insn_cnt, cyc_cnt, branch_count, self)
515 self.child_items.append(child_item) 546 self.child_items.append(child_item)
516 self.child_count += 1 547 self.child_count += 1
517 548
@@ -519,37 +550,57 @@ class CallGraphLevelTwoPlusItemBase(CallGraphLevelItemBase):
519 550
520class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase): 551class CallGraphLevelThreeItem(CallGraphLevelTwoPlusItemBase):
521 552
522 def __init__(self, glb, row, comm_id, thread_id, call_path_id, name, dso, count, time, branch_count, parent_item): 553 def __init__(self, glb, params, row, comm_id, thread_id, call_path_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item):
523 super(CallGraphLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, call_path_id, time, branch_count, parent_item) 554 super(CallGraphLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, call_path_id, time, insn_cnt, cyc_cnt, branch_count, parent_item)
524 dso = dsoname(dso) 555 dso = dsoname(dso)
525 self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] 556 if self.params.have_ipc:
557 insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt)
558 cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt)
559 br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count)
560 ipc = CalcIPC(cyc_cnt, insn_cnt)
561 self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ]
562 else:
563 self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
526 self.dbid = call_path_id 564 self.dbid = call_path_id
527 565
528# Context-sensitive call graph data model level two item 566# Context-sensitive call graph data model level two item
529 567
530class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase): 568class CallGraphLevelTwoItem(CallGraphLevelTwoPlusItemBase):
531 569
532 def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): 570 def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item):
533 super(CallGraphLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 1, 0, 0, parent_item) 571 super(CallGraphLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 1, 0, 0, 0, 0, parent_item)
534 self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] 572 if self.params.have_ipc:
573 self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""]
574 else:
575 self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
535 self.dbid = thread_id 576 self.dbid = thread_id
536 577
537 def Select(self): 578 def Select(self):
538 super(CallGraphLevelTwoItem, self).Select() 579 super(CallGraphLevelTwoItem, self).Select()
539 for child_item in self.child_items: 580 for child_item in self.child_items:
540 self.time += child_item.time 581 self.time += child_item.time
582 self.insn_cnt += child_item.insn_cnt
583 self.cyc_cnt += child_item.cyc_cnt
541 self.branch_count += child_item.branch_count 584 self.branch_count += child_item.branch_count
542 for child_item in self.child_items: 585 for child_item in self.child_items:
543 child_item.data[4] = PercentToOneDP(child_item.time, self.time) 586 child_item.data[4] = PercentToOneDP(child_item.time, self.time)
544 child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) 587 if self.params.have_ipc:
588 child_item.data[6] = PercentToOneDP(child_item.insn_cnt, self.insn_cnt)
589 child_item.data[8] = PercentToOneDP(child_item.cyc_cnt, self.cyc_cnt)
590 child_item.data[11] = PercentToOneDP(child_item.branch_count, self.branch_count)
591 else:
592 child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
545 593
546# Context-sensitive call graph data model level one item 594# Context-sensitive call graph data model level one item
547 595
548class CallGraphLevelOneItem(CallGraphLevelItemBase): 596class CallGraphLevelOneItem(CallGraphLevelItemBase):
549 597
550 def __init__(self, glb, row, comm_id, comm, parent_item): 598 def __init__(self, glb, params, row, comm_id, comm, parent_item):
551 super(CallGraphLevelOneItem, self).__init__(glb, row, parent_item) 599 super(CallGraphLevelOneItem, self).__init__(glb, params, row, parent_item)
552 self.data = [comm, "", "", "", "", "", ""] 600 if self.params.have_ipc:
601 self.data = [comm, "", "", "", "", "", "", "", "", "", "", ""]
602 else:
603 self.data = [comm, "", "", "", "", "", ""]
553 self.dbid = comm_id 604 self.dbid = comm_id
554 605
555 def Select(self): 606 def Select(self):
@@ -560,7 +611,7 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase):
560 " INNER JOIN threads ON thread_id = threads.id" 611 " INNER JOIN threads ON thread_id = threads.id"
561 " WHERE comm_id = " + str(self.dbid)) 612 " WHERE comm_id = " + str(self.dbid))
562 while query.next(): 613 while query.next():
563 child_item = CallGraphLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) 614 child_item = CallGraphLevelTwoItem(self.glb, self.params, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
564 self.child_items.append(child_item) 615 self.child_items.append(child_item)
565 self.child_count += 1 616 self.child_count += 1
566 617
@@ -568,8 +619,8 @@ class CallGraphLevelOneItem(CallGraphLevelItemBase):
568 619
569class CallGraphRootItem(CallGraphLevelItemBase): 620class CallGraphRootItem(CallGraphLevelItemBase):
570 621
571 def __init__(self, glb): 622 def __init__(self, glb, params):
572 super(CallGraphRootItem, self).__init__(glb, 0, None) 623 super(CallGraphRootItem, self).__init__(glb, params, 0, None)
573 self.dbid = 0 624 self.dbid = 0
574 self.query_done = True; 625 self.query_done = True;
575 query = QSqlQuery(glb.db) 626 query = QSqlQuery(glb.db)
@@ -577,16 +628,23 @@ class CallGraphRootItem(CallGraphLevelItemBase):
577 while query.next(): 628 while query.next():
578 if not query.value(0): 629 if not query.value(0):
579 continue 630 continue
580 child_item = CallGraphLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) 631 child_item = CallGraphLevelOneItem(glb, params, self.child_count, query.value(0), query.value(1), self)
581 self.child_items.append(child_item) 632 self.child_items.append(child_item)
582 self.child_count += 1 633 self.child_count += 1
583 634
635# Call graph model parameters
636
637class CallGraphModelParams():
638
639 def __init__(self, glb, parent=None):
640 self.have_ipc = IsSelectable(glb.db, "calls", columns = "insn_count, cyc_count")
641
584# Context-sensitive call graph data model base 642# Context-sensitive call graph data model base
585 643
586class CallGraphModelBase(TreeModel): 644class CallGraphModelBase(TreeModel):
587 645
588 def __init__(self, glb, parent=None): 646 def __init__(self, glb, parent=None):
589 super(CallGraphModelBase, self).__init__(glb, parent) 647 super(CallGraphModelBase, self).__init__(glb, CallGraphModelParams(glb), parent)
590 648
591 def FindSelect(self, value, pattern, query): 649 def FindSelect(self, value, pattern, query):
592 if pattern: 650 if pattern:
@@ -668,17 +726,26 @@ class CallGraphModel(CallGraphModelBase):
668 super(CallGraphModel, self).__init__(glb, parent) 726 super(CallGraphModel, self).__init__(glb, parent)
669 727
670 def GetRoot(self): 728 def GetRoot(self):
671 return CallGraphRootItem(self.glb) 729 return CallGraphRootItem(self.glb, self.params)
672 730
673 def columnCount(self, parent=None): 731 def columnCount(self, parent=None):
674 return 7 732 if self.params.have_ipc:
733 return 12
734 else:
735 return 7
675 736
676 def columnHeader(self, column): 737 def columnHeader(self, column):
677 headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] 738 if self.params.have_ipc:
739 headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Insn Cnt", "Insn Cnt (%)", "Cyc Cnt", "Cyc Cnt (%)", "IPC", "Branch Count ", "Branch Count (%) "]
740 else:
741 headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
678 return headers[column] 742 return headers[column]
679 743
680 def columnAlignment(self, column): 744 def columnAlignment(self, column):
681 alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] 745 if self.params.have_ipc:
746 alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
747 else:
748 alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
682 return alignment[column] 749 return alignment[column]
683 750
684 def DoFindSelect(self, query, match): 751 def DoFindSelect(self, query, match):
@@ -715,11 +782,13 @@ class CallGraphModel(CallGraphModelBase):
715 782
716class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase): 783class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
717 784
718 def __init__(self, glb, row, comm_id, thread_id, calls_id, time, branch_count, parent_item): 785 def __init__(self, glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item):
719 super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, row, parent_item) 786 super(CallTreeLevelTwoPlusItemBase, self).__init__(glb, params, row, parent_item)
720 self.comm_id = comm_id 787 self.comm_id = comm_id
721 self.thread_id = thread_id 788 self.thread_id = thread_id
722 self.calls_id = calls_id 789 self.calls_id = calls_id
790 self.insn_cnt = insn_cnt
791 self.cyc_cnt = cyc_cnt
723 self.branch_count = branch_count 792 self.branch_count = branch_count
724 self.time = time 793 self.time = time
725 794
@@ -729,8 +798,12 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
729 comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id) 798 comm_thread = " AND comm_id = " + str(self.comm_id) + " AND thread_id = " + str(self.thread_id)
730 else: 799 else:
731 comm_thread = "" 800 comm_thread = ""
801 if self.params.have_ipc:
802 ipc_str = ", insn_count, cyc_count"
803 else:
804 ipc_str = ""
732 query = QSqlQuery(self.glb.db) 805 query = QSqlQuery(self.glb.db)
733 QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time, branch_count" 806 QueryExec(query, "SELECT calls.id, name, short_name, call_time, return_time - call_time" + ipc_str + ", branch_count"
734 " FROM calls" 807 " FROM calls"
735 " INNER JOIN call_paths ON calls.call_path_id = call_paths.id" 808 " INNER JOIN call_paths ON calls.call_path_id = call_paths.id"
736 " INNER JOIN symbols ON call_paths.symbol_id = symbols.id" 809 " INNER JOIN symbols ON call_paths.symbol_id = symbols.id"
@@ -738,7 +811,15 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
738 " WHERE calls.parent_id = " + str(self.calls_id) + comm_thread + 811 " WHERE calls.parent_id = " + str(self.calls_id) + comm_thread +
739 " ORDER BY call_time, calls.id") 812 " ORDER BY call_time, calls.id")
740 while query.next(): 813 while query.next():
741 child_item = CallTreeLevelThreeItem(self.glb, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), int(query.value(5)), self) 814 if self.params.have_ipc:
815 insn_cnt = int(query.value(5))
816 cyc_cnt = int(query.value(6))
817 branch_count = int(query.value(7))
818 else:
819 insn_cnt = 0
820 cyc_cnt = 0
821 branch_count = int(query.value(5))
822 child_item = CallTreeLevelThreeItem(self.glb, self.params, self.child_count, self.comm_id, self.thread_id, query.value(0), query.value(1), query.value(2), query.value(3), int(query.value(4)), insn_cnt, cyc_cnt, branch_count, self)
742 self.child_items.append(child_item) 823 self.child_items.append(child_item)
743 self.child_count += 1 824 self.child_count += 1
744 825
@@ -746,37 +827,57 @@ class CallTreeLevelTwoPlusItemBase(CallGraphLevelItemBase):
746 827
747class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase): 828class CallTreeLevelThreeItem(CallTreeLevelTwoPlusItemBase):
748 829
749 def __init__(self, glb, row, comm_id, thread_id, calls_id, name, dso, count, time, branch_count, parent_item): 830 def __init__(self, glb, params, row, comm_id, thread_id, calls_id, name, dso, count, time, insn_cnt, cyc_cnt, branch_count, parent_item):
750 super(CallTreeLevelThreeItem, self).__init__(glb, row, comm_id, thread_id, calls_id, time, branch_count, parent_item) 831 super(CallTreeLevelThreeItem, self).__init__(glb, params, row, comm_id, thread_id, calls_id, time, insn_cnt, cyc_cnt, branch_count, parent_item)
751 dso = dsoname(dso) 832 dso = dsoname(dso)
752 self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ] 833 if self.params.have_ipc:
834 insn_pcnt = PercentToOneDP(insn_cnt, parent_item.insn_cnt)
835 cyc_pcnt = PercentToOneDP(cyc_cnt, parent_item.cyc_cnt)
836 br_pcnt = PercentToOneDP(branch_count, parent_item.branch_count)
837 ipc = CalcIPC(cyc_cnt, insn_cnt)
838 self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(insn_cnt), insn_pcnt, str(cyc_cnt), cyc_pcnt, ipc, str(branch_count), br_pcnt ]
839 else:
840 self.data = [ name, dso, str(count), str(time), PercentToOneDP(time, parent_item.time), str(branch_count), PercentToOneDP(branch_count, parent_item.branch_count) ]
753 self.dbid = calls_id 841 self.dbid = calls_id
754 842
755# Call tree data model level two item 843# Call tree data model level two item
756 844
757class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase): 845class CallTreeLevelTwoItem(CallTreeLevelTwoPlusItemBase):
758 846
759 def __init__(self, glb, row, comm_id, thread_id, pid, tid, parent_item): 847 def __init__(self, glb, params, row, comm_id, thread_id, pid, tid, parent_item):
760 super(CallTreeLevelTwoItem, self).__init__(glb, row, comm_id, thread_id, 0, 0, 0, parent_item) 848 super(CallTreeLevelTwoItem, self).__init__(glb, params, row, comm_id, thread_id, 0, 0, 0, 0, 0, parent_item)
761 self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""] 849 if self.params.have_ipc:
850 self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", "", "", "", "", "", ""]
851 else:
852 self.data = [str(pid) + ":" + str(tid), "", "", "", "", "", ""]
762 self.dbid = thread_id 853 self.dbid = thread_id
763 854
764 def Select(self): 855 def Select(self):
765 super(CallTreeLevelTwoItem, self).Select() 856 super(CallTreeLevelTwoItem, self).Select()
766 for child_item in self.child_items: 857 for child_item in self.child_items:
767 self.time += child_item.time 858 self.time += child_item.time
859 self.insn_cnt += child_item.insn_cnt
860 self.cyc_cnt += child_item.cyc_cnt
768 self.branch_count += child_item.branch_count 861 self.branch_count += child_item.branch_count
769 for child_item in self.child_items: 862 for child_item in self.child_items:
770 child_item.data[4] = PercentToOneDP(child_item.time, self.time) 863 child_item.data[4] = PercentToOneDP(child_item.time, self.time)
771 child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count) 864 if self.params.have_ipc:
865 child_item.data[6] = PercentToOneDP(child_item.insn_cnt, self.insn_cnt)
866 child_item.data[8] = PercentToOneDP(child_item.cyc_cnt, self.cyc_cnt)
867 child_item.data[11] = PercentToOneDP(child_item.branch_count, self.branch_count)
868 else:
869 child_item.data[6] = PercentToOneDP(child_item.branch_count, self.branch_count)
772 870
773# Call tree data model level one item 871# Call tree data model level one item
774 872
775class CallTreeLevelOneItem(CallGraphLevelItemBase): 873class CallTreeLevelOneItem(CallGraphLevelItemBase):
776 874
777 def __init__(self, glb, row, comm_id, comm, parent_item): 875 def __init__(self, glb, params, row, comm_id, comm, parent_item):
778 super(CallTreeLevelOneItem, self).__init__(glb, row, parent_item) 876 super(CallTreeLevelOneItem, self).__init__(glb, params, row, parent_item)
779 self.data = [comm, "", "", "", "", "", ""] 877 if self.params.have_ipc:
878 self.data = [comm, "", "", "", "", "", "", "", "", "", "", ""]
879 else:
880 self.data = [comm, "", "", "", "", "", ""]
780 self.dbid = comm_id 881 self.dbid = comm_id
781 882
782 def Select(self): 883 def Select(self):
@@ -787,7 +888,7 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase):
787 " INNER JOIN threads ON thread_id = threads.id" 888 " INNER JOIN threads ON thread_id = threads.id"
788 " WHERE comm_id = " + str(self.dbid)) 889 " WHERE comm_id = " + str(self.dbid))
789 while query.next(): 890 while query.next():
790 child_item = CallTreeLevelTwoItem(self.glb, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self) 891 child_item = CallTreeLevelTwoItem(self.glb, self.params, self.child_count, self.dbid, query.value(0), query.value(1), query.value(2), self)
791 self.child_items.append(child_item) 892 self.child_items.append(child_item)
792 self.child_count += 1 893 self.child_count += 1
793 894
@@ -795,8 +896,8 @@ class CallTreeLevelOneItem(CallGraphLevelItemBase):
795 896
796class CallTreeRootItem(CallGraphLevelItemBase): 897class CallTreeRootItem(CallGraphLevelItemBase):
797 898
798 def __init__(self, glb): 899 def __init__(self, glb, params):
799 super(CallTreeRootItem, self).__init__(glb, 0, None) 900 super(CallTreeRootItem, self).__init__(glb, params, 0, None)
800 self.dbid = 0 901 self.dbid = 0
801 self.query_done = True; 902 self.query_done = True;
802 query = QSqlQuery(glb.db) 903 query = QSqlQuery(glb.db)
@@ -804,7 +905,7 @@ class CallTreeRootItem(CallGraphLevelItemBase):
804 while query.next(): 905 while query.next():
805 if not query.value(0): 906 if not query.value(0):
806 continue 907 continue
807 child_item = CallTreeLevelOneItem(glb, self.child_count, query.value(0), query.value(1), self) 908 child_item = CallTreeLevelOneItem(glb, params, self.child_count, query.value(0), query.value(1), self)
808 self.child_items.append(child_item) 909 self.child_items.append(child_item)
809 self.child_count += 1 910 self.child_count += 1
810 911
@@ -816,17 +917,26 @@ class CallTreeModel(CallGraphModelBase):
816 super(CallTreeModel, self).__init__(glb, parent) 917 super(CallTreeModel, self).__init__(glb, parent)
817 918
818 def GetRoot(self): 919 def GetRoot(self):
819 return CallTreeRootItem(self.glb) 920 return CallTreeRootItem(self.glb, self.params)
820 921
821 def columnCount(self, parent=None): 922 def columnCount(self, parent=None):
822 return 7 923 if self.params.have_ipc:
924 return 12
925 else:
926 return 7
823 927
824 def columnHeader(self, column): 928 def columnHeader(self, column):
825 headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] 929 if self.params.have_ipc:
930 headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Insn Cnt", "Insn Cnt (%)", "Cyc Cnt", "Cyc Cnt (%)", "IPC", "Branch Count ", "Branch Count (%) "]
931 else:
932 headers = ["Call Path", "Object", "Call Time", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
826 return headers[column] 933 return headers[column]
827 934
828 def columnAlignment(self, column): 935 def columnAlignment(self, column):
829 alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ] 936 if self.params.have_ipc:
937 alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
938 else:
939 alignment = [ Qt.AlignLeft, Qt.AlignLeft, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight, Qt.AlignRight ]
830 return alignment[column] 940 return alignment[column]
831 941
832 def DoFindSelect(self, query, match): 942 def DoFindSelect(self, query, match):
@@ -1355,11 +1465,11 @@ class FetchMoreRecordsBar():
1355 1465
1356class BranchLevelTwoItem(): 1466class BranchLevelTwoItem():
1357 1467
1358 def __init__(self, row, text, parent_item): 1468 def __init__(self, row, col, text, parent_item):
1359 self.row = row 1469 self.row = row
1360 self.parent_item = parent_item 1470 self.parent_item = parent_item
1361 self.data = [""] * 8 1471 self.data = [""] * (col + 1)
1362 self.data[7] = text 1472 self.data[col] = text
1363 self.level = 2 1473 self.level = 2
1364 1474
1365 def getParentItem(self): 1475 def getParentItem(self):
@@ -1391,6 +1501,7 @@ class BranchLevelOneItem():
1391 self.dbid = data[0] 1501 self.dbid = data[0]
1392 self.level = 1 1502 self.level = 1
1393 self.query_done = False 1503 self.query_done = False
1504 self.br_col = len(self.data) - 1
1394 1505
1395 def getChildItem(self, row): 1506 def getChildItem(self, row):
1396 return self.child_items[row] 1507 return self.child_items[row]
@@ -1471,7 +1582,7 @@ class BranchLevelOneItem():
1471 while k < 15: 1582 while k < 15:
1472 byte_str += " " 1583 byte_str += " "
1473 k += 1 1584 k += 1
1474 self.child_items.append(BranchLevelTwoItem(0, byte_str + " " + text, self)) 1585 self.child_items.append(BranchLevelTwoItem(0, self.br_col, byte_str + " " + text, self))
1475 self.child_count += 1 1586 self.child_count += 1
1476 else: 1587 else:
1477 return 1588 return
@@ -1522,16 +1633,37 @@ class BranchRootItem():
1522 def getData(self, column): 1633 def getData(self, column):
1523 return "" 1634 return ""
1524 1635
1636# Calculate instructions per cycle
1637
1638def CalcIPC(cyc_cnt, insn_cnt):
1639 if cyc_cnt and insn_cnt:
1640 ipc = Decimal(float(insn_cnt) / cyc_cnt)
1641 ipc = str(ipc.quantize(Decimal(".01"), rounding=ROUND_HALF_UP))
1642 else:
1643 ipc = "0"
1644 return ipc
1645
1525# Branch data preparation 1646# Branch data preparation
1526 1647
1527def BranchDataPrep(query): 1648def BranchDataPrepBr(query, data):
1528 data = []
1529 for i in xrange(0, 8):
1530 data.append(query.value(i))
1531 data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + 1649 data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) +
1532 " (" + dsoname(query.value(11)) + ")" + " -> " + 1650 " (" + dsoname(query.value(11)) + ")" + " -> " +
1533 tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + 1651 tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) +
1534 " (" + dsoname(query.value(15)) + ")") 1652 " (" + dsoname(query.value(15)) + ")")
1653
1654def BranchDataPrepIPC(query, data):
1655 insn_cnt = query.value(16)
1656 cyc_cnt = query.value(17)
1657 ipc = CalcIPC(cyc_cnt, insn_cnt)
1658 data.append(insn_cnt)
1659 data.append(cyc_cnt)
1660 data.append(ipc)
1661
1662def BranchDataPrep(query):
1663 data = []
1664 for i in xrange(0, 8):
1665 data.append(query.value(i))
1666 BranchDataPrepBr(query, data)
1535 return data 1667 return data
1536 1668
1537def BranchDataPrepWA(query): 1669def BranchDataPrepWA(query):
@@ -1541,10 +1673,26 @@ def BranchDataPrepWA(query):
1541 data.append("{:>19}".format(query.value(1))) 1673 data.append("{:>19}".format(query.value(1)))
1542 for i in xrange(2, 8): 1674 for i in xrange(2, 8):
1543 data.append(query.value(i)) 1675 data.append(query.value(i))
1544 data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + 1676 BranchDataPrepBr(query, data)
1545 " (" + dsoname(query.value(11)) + ")" + " -> " + 1677 return data
1546 tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + 1678
1547 " (" + dsoname(query.value(15)) + ")") 1679def BranchDataWithIPCPrep(query):
1680 data = []
1681 for i in xrange(0, 8):
1682 data.append(query.value(i))
1683 BranchDataPrepIPC(query, data)
1684 BranchDataPrepBr(query, data)
1685 return data
1686
1687def BranchDataWithIPCPrepWA(query):
1688 data = []
1689 data.append(query.value(0))
1690 # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string
1691 data.append("{:>19}".format(query.value(1)))
1692 for i in xrange(2, 8):
1693 data.append(query.value(i))
1694 BranchDataPrepIPC(query, data)
1695 BranchDataPrepBr(query, data)
1548 return data 1696 return data
1549 1697
1550# Branch data model 1698# Branch data model
@@ -1554,14 +1702,24 @@ class BranchModel(TreeModel):
1554 progress = Signal(object) 1702 progress = Signal(object)
1555 1703
1556 def __init__(self, glb, event_id, where_clause, parent=None): 1704 def __init__(self, glb, event_id, where_clause, parent=None):
1557 super(BranchModel, self).__init__(glb, parent) 1705 super(BranchModel, self).__init__(glb, None, parent)
1558 self.event_id = event_id 1706 self.event_id = event_id
1559 self.more = True 1707 self.more = True
1560 self.populated = 0 1708 self.populated = 0
1709 self.have_ipc = IsSelectable(glb.db, "samples", columns = "insn_count, cyc_count")
1710 if self.have_ipc:
1711 select_ipc = ", insn_count, cyc_count"
1712 prep_fn = BranchDataWithIPCPrep
1713 prep_wa_fn = BranchDataWithIPCPrepWA
1714 else:
1715 select_ipc = ""
1716 prep_fn = BranchDataPrep
1717 prep_wa_fn = BranchDataPrepWA
1561 sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name," 1718 sql = ("SELECT samples.id, time, cpu, comm, pid, tid, branch_types.name,"
1562 " CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END," 1719 " CASE WHEN in_tx = '0' THEN 'No' ELSE 'Yes' END,"
1563 " ip, symbols.name, sym_offset, dsos.short_name," 1720 " ip, symbols.name, sym_offset, dsos.short_name,"
1564 " to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name" 1721 " to_ip, to_symbols.name, to_sym_offset, to_dsos.short_name"
1722 + select_ipc +
1565 " FROM samples" 1723 " FROM samples"
1566 " INNER JOIN comms ON comm_id = comms.id" 1724 " INNER JOIN comms ON comm_id = comms.id"
1567 " INNER JOIN threads ON thread_id = threads.id" 1725 " INNER JOIN threads ON thread_id = threads.id"
@@ -1575,9 +1733,9 @@ class BranchModel(TreeModel):
1575 " ORDER BY samples.id" 1733 " ORDER BY samples.id"
1576 " LIMIT " + str(glb_chunk_sz)) 1734 " LIMIT " + str(glb_chunk_sz))
1577 if pyside_version_1 and sys.version_info[0] == 3: 1735 if pyside_version_1 and sys.version_info[0] == 3:
1578 prep = BranchDataPrepWA 1736 prep = prep_fn
1579 else: 1737 else:
1580 prep = BranchDataPrep 1738 prep = prep_wa_fn
1581 self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample) 1739 self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample)
1582 self.fetcher.done.connect(self.Update) 1740 self.fetcher.done.connect(self.Update)
1583 self.fetcher.Fetch(glb_chunk_sz) 1741 self.fetcher.Fetch(glb_chunk_sz)
@@ -1586,13 +1744,23 @@ class BranchModel(TreeModel):
1586 return BranchRootItem() 1744 return BranchRootItem()
1587 1745
1588 def columnCount(self, parent=None): 1746 def columnCount(self, parent=None):
1589 return 8 1747 if self.have_ipc:
1748 return 11
1749 else:
1750 return 8
1590 1751
1591 def columnHeader(self, column): 1752 def columnHeader(self, column):
1592 return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column] 1753 if self.have_ipc:
1754 return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Insn Cnt", "Cyc Cnt", "IPC", "Branch")[column]
1755 else:
1756 return ("Time", "CPU", "Command", "PID", "TID", "Branch Type", "In Tx", "Branch")[column]
1593 1757
1594 def columnFont(self, column): 1758 def columnFont(self, column):
1595 if column != 7: 1759 if self.have_ipc:
1760 br_col = 10
1761 else:
1762 br_col = 7
1763 if column != br_col:
1596 return None 1764 return None
1597 return QFont("Monospace") 1765 return QFont("Monospace")
1598 1766
@@ -2100,10 +2268,10 @@ def GetEventList(db):
2100 2268
2101# Is a table selectable 2269# Is a table selectable
2102 2270
2103def IsSelectable(db, table, sql = ""): 2271def IsSelectable(db, table, sql = "", columns = "*"):
2104 query = QSqlQuery(db) 2272 query = QSqlQuery(db)
2105 try: 2273 try:
2106 QueryExec(query, "SELECT * FROM " + table + " " + sql + " LIMIT 1") 2274 QueryExec(query, "SELECT " + columns + " FROM " + table + " " + sql + " LIMIT 1")
2107 except: 2275 except:
2108 return False 2276 return False
2109 return True 2277 return True
@@ -2754,7 +2922,7 @@ class WindowMenu():
2754 action = self.window_menu.addAction(label) 2922 action = self.window_menu.addAction(label)
2755 action.setCheckable(True) 2923 action.setCheckable(True)
2756 action.setChecked(sub_window == self.mdi_area.activeSubWindow()) 2924 action.setChecked(sub_window == self.mdi_area.activeSubWindow())
2757 action.triggered.connect(lambda x=nr: self.setActiveSubWindow(x)) 2925 action.triggered.connect(lambda a=None,x=nr: self.setActiveSubWindow(x))
2758 self.window_menu.addAction(action) 2926 self.window_menu.addAction(action)
2759 nr += 1 2927 nr += 1
2760 2928
@@ -2840,6 +3008,12 @@ cd xed
2840sudo ./mfile.py --prefix=/usr/local install 3008sudo ./mfile.py --prefix=/usr/local install
2841sudo ldconfig 3009sudo ldconfig
2842</pre> 3010</pre>
3011<h3>Instructions per Cycle (IPC)</h3>
3012If available, IPC information is displayed in columns 'insn_cnt', 'cyc_cnt' and 'IPC'.
3013<p><b>Intel PT note:</b> The information applies to the blocks of code ending with, and including, that branch.
3014Due to the granularity of timing information, the number of cycles for some code blocks will not be known.
3015In that case, 'insn_cnt', 'cyc_cnt' and 'IPC' are zero, but when 'IPC' is displayed it covers the period
3016since the previous displayed 'IPC'.
2843<h3>Find</h3> 3017<h3>Find</h3>
2844Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match. 3018Ctrl-F displays a Find bar which finds substrings by either an exact match or a regular expression match.
2845Refer to Python documentation for the regular expression syntax. 3019Refer to Python documentation for the regular expression syntax.
@@ -3114,14 +3288,14 @@ class MainWindow(QMainWindow):
3114 event = event.split(":")[0] 3288 event = event.split(":")[0]
3115 if event == "branches": 3289 if event == "branches":
3116 label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")" 3290 label = "All branches" if branches_events == 1 else "All branches " + "(id=" + dbid + ")"
3117 reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewBranchView(x), self)) 3291 reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewBranchView(x), self))
3118 label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")" 3292 label = "Selected branches" if branches_events == 1 else "Selected branches " + "(id=" + dbid + ")"
3119 reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda x=dbid: self.NewSelectedBranchView(x), self)) 3293 reports_menu.addAction(CreateAction(label, "Create a new window displaying branch events", lambda a=None,x=dbid: self.NewSelectedBranchView(x), self))
3120 3294
3121 def TableMenu(self, tables, menu): 3295 def TableMenu(self, tables, menu):
3122 table_menu = menu.addMenu("&Tables") 3296 table_menu = menu.addMenu("&Tables")
3123 for table in tables: 3297 for table in tables:
3124 table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda t=table: self.NewTableView(t), self)) 3298 table_menu.addAction(CreateAction(table, "Create a new window containing a table view", lambda a=None,t=table: self.NewTableView(t), self))
3125 3299
3126 def NewCallGraph(self): 3300 def NewCallGraph(self):
3127 CallGraphWindow(self.glb, self) 3301 CallGraphWindow(self.glb, self)
@@ -3361,18 +3535,27 @@ class DBRef():
3361# Main 3535# Main
3362 3536
3363def Main(): 3537def Main():
3364 if (len(sys.argv) < 2): 3538 usage_str = "exported-sql-viewer.py [--pyside-version-1] <database name>\n" \
3365 printerr("Usage is: exported-sql-viewer.py {<database name> | --help-only}"); 3539 " or: exported-sql-viewer.py --help-only"
3366 raise Exception("Too few arguments") 3540 ap = argparse.ArgumentParser(usage = usage_str, add_help = False)
3367 3541 ap.add_argument("--pyside-version-1", action='store_true')
3368 dbname = sys.argv[1] 3542 ap.add_argument("dbname", nargs="?")
3369 if dbname == "--help-only": 3543 ap.add_argument("--help-only", action='store_true')
3544 args = ap.parse_args()
3545
3546 if args.help_only:
3370 app = QApplication(sys.argv) 3547 app = QApplication(sys.argv)
3371 mainwindow = HelpOnlyWindow() 3548 mainwindow = HelpOnlyWindow()
3372 mainwindow.show() 3549 mainwindow.show()
3373 err = app.exec_() 3550 err = app.exec_()
3374 sys.exit(err) 3551 sys.exit(err)
3375 3552
3553 dbname = args.dbname
3554 if dbname is None:
3555 ap.print_usage()
3556 print("Too few arguments")
3557 sys.exit(1)
3558
3376 is_sqlite3 = False 3559 is_sqlite3 = False
3377 try: 3560 try:
3378 f = open(dbname, "rb") 3561 f = open(dbname, "rb")
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 0b2b8305c965..e72accefd669 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -1,3 +1,5 @@
1# SPDX-License-Identifier: GPL-2.0
2
1perf-y += builtin-test.o 3perf-y += builtin-test.o
2perf-y += parse-events.o 4perf-y += parse-events.o
3perf-y += dso-data.o 5perf-y += dso-data.o
@@ -50,6 +52,8 @@ perf-y += perf-hooks.o
50perf-y += clang.o 52perf-y += clang.o
51perf-y += unit_number__scnprintf.o 53perf-y += unit_number__scnprintf.o
52perf-y += mem2node.o 54perf-y += mem2node.o
55perf-y += map_groups.o
56perf-y += time-utils-test.o
53 57
54$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build 58$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
55 $(call rule_mkdir) 59 $(call rule_mkdir)
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
index 57fc544aedb0..153624e2d0f5 100644
--- a/tools/perf/tests/bp_account.c
+++ b/tools/perf/tests/bp_account.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select 3 * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
3 * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu. 4 * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c
index 1ca5106df5f1..ab4b98b3165d 100644
--- a/tools/perf/tests/bpf-script-example.c
+++ b/tools/perf/tests/bpf-script-example.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * bpf-script-example.c 3 * bpf-script-example.c
3 * Test basic LLVM building 4 * Test basic LLVM building
diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c
index ff3ec8337f0a..219673aa278f 100644
--- a/tools/perf/tests/bpf-script-test-kbuild.c
+++ b/tools/perf/tests/bpf-script-test-kbuild.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * bpf-script-test-kbuild.c 3 * bpf-script-test-kbuild.c
3 * Test include from kernel header 4 * Test include from kernel header
diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
index 43f1e16486f4..bd83d364cf30 100644
--- a/tools/perf/tests/bpf-script-test-prologue.c
+++ b/tools/perf/tests/bpf-script-test-prologue.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * bpf-script-test-prologue.c 3 * bpf-script-test-prologue.c
3 * Test BPF prologue 4 * Test BPF prologue
diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c
index 93af77421816..74006e4b2d24 100644
--- a/tools/perf/tests/bpf-script-test-relocation.c
+++ b/tools/perf/tests/bpf-script-test-relocation.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1/* 2/*
2 * bpf-script-test-relocation.c 3 * bpf-script-test-relocation.c
3 * Test BPF loader checking relocation 4 * Test BPF loader checking relocation
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 79b54f8ddebf..c9e4cdc4c9c8 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <errno.h> 2#include <errno.h>
2#include <stdio.h> 3#include <stdio.h>
3#include <sys/epoll.h> 4#include <sys/epoll.h>
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 9852b5d624a5..66a82badc1d1 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -22,6 +22,7 @@
22#include "string2.h" 22#include "string2.h"
23#include "symbol.h" 23#include "symbol.h"
24#include <linux/kernel.h> 24#include <linux/kernel.h>
25#include <linux/string.h>
25#include <subcmd/exec-cmd.h> 26#include <subcmd/exec-cmd.h>
26 27
27static bool dont_fork; 28static bool dont_fork;
@@ -290,6 +291,14 @@ static struct test generic_tests[] = {
290 .func = test__mem2node, 291 .func = test__mem2node,
291 }, 292 },
292 { 293 {
294 .desc = "time utils",
295 .func = test__time_utils,
296 },
297 {
298 .desc = "map_groups__merge_in",
299 .func = test__map_groups__merge_in,
300 },
301 {
293 .func = NULL, 302 .func = NULL,
294 }, 303 },
295}; 304};
@@ -430,7 +439,7 @@ static const char *shell_test__description(char *description, size_t size,
430 description = fgets(description, size, fp); 439 description = fgets(description, size, fp);
431 fclose(fp); 440 fclose(fp);
432 441
433 return description ? trim(description + 1) : NULL; 442 return description ? strim(description + 1) : NULL;
434} 443}
435 444
436#define for_each_shell_test(dir, base, ent) \ 445#define for_each_shell_test(dir, base, ent) \
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 4ebd2681e760..aa6df122b175 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -22,7 +22,7 @@
22 22
23#include "tests.h" 23#include "tests.h"
24 24
25#include "sane_ctype.h" 25#include <linux/ctype.h>
26 26
27#define BUFSZ 1024 27#define BUFSZ 1024
28#define READLEN 128 28#define READLEN 128
diff --git a/tools/perf/tests/map_groups.c b/tools/perf/tests/map_groups.c
new file mode 100644
index 000000000000..594fdaca4f71
--- /dev/null
+++ b/tools/perf/tests/map_groups.c
@@ -0,0 +1,121 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/compiler.h>
3#include <linux/kernel.h>
4#include "tests.h"
5#include "map.h"
6#include "map_groups.h"
7#include "dso.h"
8#include "debug.h"
9
10struct map_def {
11 const char *name;
12 u64 start;
13 u64 end;
14};
15
16static int check_maps(struct map_def *merged, unsigned int size, struct map_groups *mg)
17{
18 struct map *map;
19 unsigned int i = 0;
20
21 map = map_groups__first(mg);
22 while (map) {
23 TEST_ASSERT_VAL("wrong map start", map->start == merged[i].start);
24 TEST_ASSERT_VAL("wrong map end", map->end == merged[i].end);
25 TEST_ASSERT_VAL("wrong map name", !strcmp(map->dso->name, merged[i].name));
26 TEST_ASSERT_VAL("wrong map refcnt", refcount_read(&map->refcnt) == 2);
27
28 i++;
29 map = map_groups__next(map);
30
31 TEST_ASSERT_VAL("less maps expected", (map && i < size) || (!map && i == size));
32 }
33
34 return TEST_OK;
35}
36
37int test__map_groups__merge_in(struct test *t __maybe_unused, int subtest __maybe_unused)
38{
39 struct map_groups mg;
40 unsigned int i;
41 struct map_def bpf_progs[] = {
42 { "bpf_prog_1", 200, 300 },
43 { "bpf_prog_2", 500, 600 },
44 { "bpf_prog_3", 800, 900 },
45 };
46 struct map_def merged12[] = {
47 { "kcore1", 100, 200 },
48 { "bpf_prog_1", 200, 300 },
49 { "kcore1", 300, 500 },
50 { "bpf_prog_2", 500, 600 },
51 { "kcore1", 600, 800 },
52 { "bpf_prog_3", 800, 900 },
53 { "kcore1", 900, 1000 },
54 };
55 struct map_def merged3[] = {
56 { "kcore1", 100, 200 },
57 { "bpf_prog_1", 200, 300 },
58 { "kcore1", 300, 500 },
59 { "bpf_prog_2", 500, 600 },
60 { "kcore1", 600, 800 },
61 { "bpf_prog_3", 800, 900 },
62 { "kcore1", 900, 1000 },
63 { "kcore3", 1000, 1100 },
64 };
65 struct map *map_kcore1, *map_kcore2, *map_kcore3;
66 int ret;
67
68 map_groups__init(&mg, NULL);
69
70 for (i = 0; i < ARRAY_SIZE(bpf_progs); i++) {
71 struct map *map;
72
73 map = dso__new_map(bpf_progs[i].name);
74 TEST_ASSERT_VAL("failed to create map", map);
75
76 map->start = bpf_progs[i].start;
77 map->end = bpf_progs[i].end;
78 map_groups__insert(&mg, map);
79 map__put(map);
80 }
81
82 map_kcore1 = dso__new_map("kcore1");
83 TEST_ASSERT_VAL("failed to create map", map_kcore1);
84
85 map_kcore2 = dso__new_map("kcore2");
86 TEST_ASSERT_VAL("failed to create map", map_kcore2);
87
88 map_kcore3 = dso__new_map("kcore3");
89 TEST_ASSERT_VAL("failed to create map", map_kcore3);
90
91 /* kcore1 map overlaps over all bpf maps */
92 map_kcore1->start = 100;
93 map_kcore1->end = 1000;
94
95 /* kcore2 map hides behind bpf_prog_2 */
96 map_kcore2->start = 550;
97 map_kcore2->end = 570;
98
99 /* kcore3 map hides behind bpf_prog_3, kcore1 and adds new map */
100 map_kcore3->start = 880;
101 map_kcore3->end = 1100;
102
103 ret = map_groups__merge_in(&mg, map_kcore1);
104 TEST_ASSERT_VAL("failed to merge map", !ret);
105
106 ret = check_maps(merged12, ARRAY_SIZE(merged12), &mg);
107 TEST_ASSERT_VAL("merge check failed", !ret);
108
109 ret = map_groups__merge_in(&mg, map_kcore2);
110 TEST_ASSERT_VAL("failed to merge map", !ret);
111
112 ret = check_maps(merged12, ARRAY_SIZE(merged12), &mg);
113 TEST_ASSERT_VAL("merge check failed", !ret);
114
115 ret = map_groups__merge_in(&mg, map_kcore3);
116 TEST_ASSERT_VAL("failed to merge map", !ret);
117
118 ret = check_maps(merged3, ARRAY_SIZE(merged3), &mg);
119 TEST_ASSERT_VAL("merge check failed", !ret);
120 return TEST_OK;
121}
diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c
index 0f82ee9fd3f7..efe3397824d2 100644
--- a/tools/perf/tests/mem.c
+++ b/tools/perf/tests/mem.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include "util/mem-events.h" 2#include "util/mem-events.h"
2#include "util/symbol.h" 3#include "util/symbol.h"
3#include "linux/perf_event.h" 4#include "linux/perf_event.h"
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
index 9e9e4d37cc77..d23ff1b68eba 100644
--- a/tools/perf/tests/mem2node.c
+++ b/tools/perf/tests/mem2node.c
@@ -1,3 +1,4 @@
1// SPDX-License-Identifier: GPL-2.0
1#include <linux/compiler.h> 2#include <linux/compiler.h>
2#include <linux/bitmap.h> 3#include <linux/bitmap.h>
3#include "cpumap.h" 4#include "cpumap.h"
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 4a69c07f4101..8f3c80e13584 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -18,6 +18,32 @@
18#define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ 18#define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \
19 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD) 19 PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
20 20
21#if defined(__s390x__)
22/* Return true if kvm module is available and loaded. Test this
23 * and retun success when trace point kvm_s390_create_vm
24 * exists. Otherwise this test always fails.
25 */
26static bool kvm_s390_create_vm_valid(void)
27{
28 char *eventfile;
29 bool rc = false;
30
31 eventfile = get_events_file("kvm-s390");
32
33 if (eventfile) {
34 DIR *mydir = opendir(eventfile);
35
36 if (mydir) {
37 rc = true;
38 closedir(mydir);
39 }
40 put_events_file(eventfile);
41 }
42
43 return rc;
44}
45#endif
46
21static int test__checkevent_tracepoint(struct perf_evlist *evlist) 47static int test__checkevent_tracepoint(struct perf_evlist *evlist)
22{ 48{
23 struct perf_evsel *evsel = perf_evlist__first(evlist); 49 struct perf_evsel *evsel = perf_evlist__first(evlist);
@@ -1642,6 +1668,7 @@ static struct evlist_test test__events[] = {
1642 { 1668 {
1643 .name = "kvm-s390:kvm_s390_create_vm", 1669 .name = "kvm-s390:kvm_s390_create_vm",
1644 .check = test__checkevent_tracepoint, 1670 .check = test__checkevent_tracepoint,
1671 .valid = kvm_s390_create_vm_valid,
1645 .id = 100, 1672 .id = 100,
1646 }, 1673 },
1647#endif 1674#endif
diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh
index e37787be672b..51e3f60baba0 100644
--- a/tools/perf/tests/shell/lib/probe.sh
+++ b/tools/perf/tests/shell/lib/probe.sh
@@ -1,3 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0
1# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 2# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
2 3
3skip_if_no_perf_probe() { 4skip_if_no_perf_probe() {
diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh
index 46e076e3c537..5d1b63d3f3e1 100755
--- a/tools/perf/tests/shell/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/probe_vfs_getname.sh
@@ -1,6 +1,7 @@
1#!/bin/sh 1#!/bin/sh
2# Add vfs_getname probe to get syscall args filenames 2# Add vfs_getname probe to get syscall args filenames
3# 3
4# SPDX-License-Identifier: GPL-2.0
4# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 5# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
5 6
6. $(dirname $0)/lib/probe.sh 7. $(dirname $0)/lib/probe.sh
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index 58a99a292930..f12a4e217968 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -7,6 +7,7 @@
7# This needs no debuginfo package, all is done using the libc ELF symtab 7# This needs no debuginfo package, all is done using the libc ELF symtab
8# and the CFI info in the binaries. 8# and the CFI info in the binaries.
9 9
10# SPDX-License-Identifier: GPL-2.0
10# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 11# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
11 12
12. $(dirname $0)/lib/probe.sh 13. $(dirname $0)/lib/probe.sh
diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
index 9b073e7fa88c..54030c18bfc2 100755
--- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
@@ -6,6 +6,7 @@
6# checks that that was captured by the vfs_getname probe in the generated 6# checks that that was captured by the vfs_getname probe in the generated
7# perf.data file, with the temp file name as the pathname argument. 7# perf.data file, with the temp file name as the pathname argument.
8 8
9# SPDX-License-Identifier: GPL-2.0
9# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 10# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
10 11
11. $(dirname $0)/lib/probe.sh 12. $(dirname $0)/lib/probe.sh
diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
index 5dcba800109f..899604d17b85 100755
--- a/tools/perf/tests/shell/record+zstd_comp_decomp.sh
+++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
@@ -1,6 +1,8 @@
1#!/bin/sh 1#!/bin/sh
2# Zstd perf.data compression/decompression 2# Zstd perf.data compression/decompression
3 3
4# SPDX-License-Identifier: GPL-2.0
5
4trace_file=$(mktemp /tmp/perf.data.XXX) 6trace_file=$(mktemp /tmp/perf.data.XXX)
5perf_tool=perf 7perf_tool=perf
6 8
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 147efeb6b195..45d269b0157e 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -7,6 +7,7 @@
7# that already handles "probe:vfs_getname" if present, and used in the 7# that already handles "probe:vfs_getname" if present, and used in the
8# "open" syscall "filename" argument beautifier. 8# "open" syscall "filename" argument beautifier.
9 9
10# SPDX-License-Identifier: GPL-2.0
10# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 11# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
11 12
12. $(dirname $0)/lib/probe.sh 13. $(dirname $0)/lib/probe.sh
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 399f18ca71a3..72912eb473cb 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -107,6 +107,8 @@ const char *test__clang_subtest_get_desc(int subtest);
107int test__clang_subtest_get_nr(void); 107int test__clang_subtest_get_nr(void);
108int test__unit_number__scnprint(struct test *test, int subtest); 108int test__unit_number__scnprint(struct test *test, int subtest);
109int test__mem2node(struct test *t, int subtest); 109int test__mem2node(struct test *t, int subtest);
110int test__map_groups__merge_in(struct test *t, int subtest);
111int test__time_utils(struct test *t, int subtest);
110 112
111bool test__bp_signal_is_supported(void); 113bool test__bp_signal_is_supported(void);
112bool test__wp_is_supported(void); 114bool test__wp_is_supported(void);
diff --git a/tools/perf/tests/time-utils-test.c b/tools/perf/tests/time-utils-test.c
new file mode 100644
index 000000000000..4f53006233a1
--- /dev/null
+++ b/tools/perf/tests/time-utils-test.c
@@ -0,0 +1,251 @@
1// SPDX-License-Identifier: GPL-2.0
2#include <linux/compiler.h>
3#include <linux/time64.h>
4#include <inttypes.h>
5#include <string.h>
6#include "time-utils.h"
7#include "evlist.h"
8#include "session.h"
9#include "debug.h"
10#include "tests.h"
11
12static bool test__parse_nsec_time(const char *str, u64 expected)
13{
14 u64 ptime;
15 int err;
16
17 pr_debug("\nparse_nsec_time(\"%s\")\n", str);
18
19 err = parse_nsec_time(str, &ptime);
20 if (err) {
21 pr_debug("error %d\n", err);
22 return false;
23 }
24
25 if (ptime != expected) {
26 pr_debug("Failed. ptime %" PRIu64 " expected %" PRIu64 "\n",
27 ptime, expected);
28 return false;
29 }
30
31 pr_debug("%" PRIu64 "\n", ptime);
32
33 return true;
34}
35
36static bool test__perf_time__parse_str(const char *ostr, u64 start, u64 end)
37{
38 struct perf_time_interval ptime;
39 int err;
40
41 pr_debug("\nperf_time__parse_str(\"%s\")\n", ostr);
42
43 err = perf_time__parse_str(&ptime, ostr);
44 if (err) {
45 pr_debug("Error %d\n", err);
46 return false;
47 }
48
49 if (ptime.start != start || ptime.end != end) {
50 pr_debug("Failed. Expected %" PRIu64 " to %" PRIu64 "\n",
51 start, end);
52 return false;
53 }
54
55 return true;
56}
57
58#define TEST_MAX 64
59
60struct test_data {
61 const char *str;
62 u64 first;
63 u64 last;
64 struct perf_time_interval ptime[TEST_MAX];
65 int num;
66 u64 skip[TEST_MAX];
67 u64 noskip[TEST_MAX];
68};
69
70static bool test__perf_time__parse_for_ranges(struct test_data *d)
71{
72 struct perf_evlist evlist = {
73 .first_sample_time = d->first,
74 .last_sample_time = d->last,
75 };
76 struct perf_session session = { .evlist = &evlist };
77 struct perf_time_interval *ptime = NULL;
78 int range_size, range_num;
79 bool pass = false;
80 int i, err;
81
82 pr_debug("\nperf_time__parse_for_ranges(\"%s\")\n", d->str);
83
84 if (strchr(d->str, '%'))
85 pr_debug("first_sample_time %" PRIu64 " last_sample_time %" PRIu64 "\n",
86 d->first, d->last);
87
88 err = perf_time__parse_for_ranges(d->str, &session, &ptime, &range_size,
89 &range_num);
90 if (err) {
91 pr_debug("error %d\n", err);
92 goto out;
93 }
94
95 if (range_size < d->num || range_num != d->num) {
96 pr_debug("bad size: range_size %d range_num %d expected num %d\n",
97 range_size, range_num, d->num);
98 goto out;
99 }
100
101 for (i = 0; i < d->num; i++) {
102 if (ptime[i].start != d->ptime[i].start ||
103 ptime[i].end != d->ptime[i].end) {
104 pr_debug("bad range %d expected %" PRIu64 " to %" PRIu64 "\n",
105 i, d->ptime[i].start, d->ptime[i].end);
106 goto out;
107 }
108 }
109
110 if (perf_time__ranges_skip_sample(ptime, d->num, 0)) {
111 pr_debug("failed to keep 0\n");
112 goto out;
113 }
114
115 for (i = 0; i < TEST_MAX; i++) {
116 if (d->skip[i] &&
117 !perf_time__ranges_skip_sample(ptime, d->num, d->skip[i])) {
118 pr_debug("failed to skip %" PRIu64 "\n", d->skip[i]);
119 goto out;
120 }
121 if (d->noskip[i] &&
122 perf_time__ranges_skip_sample(ptime, d->num, d->noskip[i])) {
123 pr_debug("failed to keep %" PRIu64 "\n", d->noskip[i]);
124 goto out;
125 }
126 }
127
128 pass = true;
129out:
130 free(ptime);
131 return pass;
132}
133
134int test__time_utils(struct test *t __maybe_unused, int subtest __maybe_unused)
135{
136 bool pass = true;
137
138 pass &= test__parse_nsec_time("0", 0);
139 pass &= test__parse_nsec_time("1", 1000000000ULL);
140 pass &= test__parse_nsec_time("0.000000001", 1);
141 pass &= test__parse_nsec_time("1.000000001", 1000000001ULL);
142 pass &= test__parse_nsec_time("123456.123456", 123456123456000ULL);
143 pass &= test__parse_nsec_time("1234567.123456789", 1234567123456789ULL);
144 pass &= test__parse_nsec_time("18446744073.709551615",
145 0xFFFFFFFFFFFFFFFFULL);
146
147 pass &= test__perf_time__parse_str("1234567.123456789,1234567.123456789",
148 1234567123456789ULL, 1234567123456789ULL);
149 pass &= test__perf_time__parse_str("1234567.123456789,1234567.123456790",
150 1234567123456789ULL, 1234567123456790ULL);
151 pass &= test__perf_time__parse_str("1234567.123456789,",
152 1234567123456789ULL, 0);
153 pass &= test__perf_time__parse_str(",1234567.123456789",
154 0, 1234567123456789ULL);
155 pass &= test__perf_time__parse_str("0,1234567.123456789",
156 0, 1234567123456789ULL);
157
158 {
159 u64 b = 1234567123456789ULL;
160 struct test_data d = {
161 .str = "1234567.123456789,1234567.123456790",
162 .ptime = { {b, b + 1}, },
163 .num = 1,
164 .skip = { b - 1, b + 2, },
165 .noskip = { b, b + 1, },
166 };
167
168 pass &= test__perf_time__parse_for_ranges(&d);
169 }
170
171 {
172 u64 b = 1234567123456789ULL;
173 u64 c = 7654321987654321ULL;
174 u64 e = 8000000000000000ULL;
175 struct test_data d = {
176 .str = "1234567.123456789,1234567.123456790 "
177 "7654321.987654321,7654321.987654444 "
178 "8000000,8000000.000000005",
179 .ptime = { {b, b + 1}, {c, c + 123}, {e, e + 5}, },
180 .num = 3,
181 .skip = { b - 1, b + 2, c - 1, c + 124, e - 1, e + 6 },
182 .noskip = { b, b + 1, c, c + 123, e, e + 5 },
183 };
184
185 pass &= test__perf_time__parse_for_ranges(&d);
186 }
187
188 {
189 u64 b = 7654321ULL * NSEC_PER_SEC;
190 struct test_data d = {
191 .str = "10%/1",
192 .first = b,
193 .last = b + 100,
194 .ptime = { {b, b + 9}, },
195 .num = 1,
196 .skip = { b - 1, b + 10, },
197 .noskip = { b, b + 9, },
198 };
199
200 pass &= test__perf_time__parse_for_ranges(&d);
201 }
202
203 {
204 u64 b = 7654321ULL * NSEC_PER_SEC;
205 struct test_data d = {
206 .str = "10%/2",
207 .first = b,
208 .last = b + 100,
209 .ptime = { {b + 10, b + 19}, },
210 .num = 1,
211 .skip = { b + 9, b + 20, },
212 .noskip = { b + 10, b + 19, },
213 };
214
215 pass &= test__perf_time__parse_for_ranges(&d);
216 }
217
218 {
219 u64 b = 11223344ULL * NSEC_PER_SEC;
220 struct test_data d = {
221 .str = "10%/1,10%/2",
222 .first = b,
223 .last = b + 100,
224 .ptime = { {b, b + 9}, {b + 10, b + 19}, },
225 .num = 2,
226 .skip = { b - 1, b + 20, },
227 .noskip = { b, b + 8, b + 9, b + 10, b + 11, b + 12, b + 19, },
228 };
229
230 pass &= test__perf_time__parse_for_ranges(&d);
231 }
232
233 {
234 u64 b = 11223344ULL * NSEC_PER_SEC;
235 struct test_data d = {
236 .str = "10%/1,10%/3,10%/10",
237 .first = b,
238 .last = b + 100,
239 .ptime = { {b, b + 9}, {b + 20, b + 29}, { b + 90, b + 100}, },
240 .num = 3,
241 .skip = { b - 1, b + 10, b + 19, b + 30, b + 89, b + 101 },
242 .noskip = { b, b + 9, b + 20, b + 29, b + 90, b + 100},
243 };
244
245 pass &= test__perf_time__parse_for_ranges(&d);
246 }
247
248 pr_debug("\n");
249
250 return pass ? 0 : TEST_FAIL;
251}
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index 85f328ddf897..afa75a76f6b8 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -1,11 +1,14 @@
1perf-y += clone.o 1perf-y += clone.o
2perf-y += fcntl.o 2perf-y += fcntl.o
3perf-y += flock.o 3perf-y += flock.o
4perf-y += fsmount.o
5perf-y += fspick.o
4ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) 6ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
5perf-y += ioctl.o 7perf-y += ioctl.o
6endif 8endif
7perf-y += kcmp.o 9perf-y += kcmp.o
8perf-y += mount_flags.o 10perf-y += mount_flags.o
11perf-y += move_mount.o
9perf-y += pkey_alloc.o 12perf-y += pkey_alloc.o
10perf-y += arch_prctl.o 13perf-y += arch_prctl.o
11perf-y += prctl.o 14perf-y += prctl.o
@@ -13,3 +16,4 @@ perf-y += renameat.o
13perf-y += sockaddr.o 16perf-y += sockaddr.o
14perf-y += socket.o 17perf-y += socket.o
15perf-y += statx.o 18perf-y += statx.o
19perf-y += sync_file_range.o
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index 139d485a6f16..7e06605f7c76 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -108,6 +108,9 @@ struct syscall_arg {
108 108
109unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx); 109unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx);
110 110
111size_t syscall_arg__scnprintf_strarray_flags(char *bf, size_t size, struct syscall_arg *arg);
112#define SCA_STRARRAY_FLAGS syscall_arg__scnprintf_strarray_flags
113
111size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg); 114size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg);
112#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays 115#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
113 116
@@ -141,6 +144,12 @@ size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_ar
141size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg); 144size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg);
142#define SCA_FLOCK syscall_arg__scnprintf_flock 145#define SCA_FLOCK syscall_arg__scnprintf_flock
143 146
147size_t syscall_arg__scnprintf_fsmount_attr_flags(char *bf, size_t size, struct syscall_arg *arg);
148#define SCA_FSMOUNT_ATTR_FLAGS syscall_arg__scnprintf_fsmount_attr_flags
149
150size_t syscall_arg__scnprintf_fspick_flags(char *bf, size_t size, struct syscall_arg *arg);
151#define SCA_FSPICK_FLAGS syscall_arg__scnprintf_fspick_flags
152
144size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); 153size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg);
145#define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd 154#define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd
146 155
@@ -156,6 +165,9 @@ unsigned long syscall_arg__mask_val_mount_flags(struct syscall_arg *arg, unsigne
156size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg); 165size_t syscall_arg__scnprintf_mount_flags(char *bf, size_t size, struct syscall_arg *arg);
157#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags 166#define SCA_MOUNT_FLAGS syscall_arg__scnprintf_mount_flags
158 167
168size_t syscall_arg__scnprintf_move_mount_flags(char *bf, size_t size, struct syscall_arg *arg);
169#define SCA_MOVE_MOUNT_FLAGS syscall_arg__scnprintf_move_mount_flags
170
159size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg); 171size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg);
160#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights 172#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights
161 173
@@ -189,6 +201,9 @@ size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_
189size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg); 201size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg);
190#define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask 202#define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask
191 203
204size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg);
205#define SCA_SYNC_FILE_RANGE_FLAGS syscall_arg__scnprintf_sync_file_range_flags
206
192size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix); 207size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix);
193 208
194void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, 209void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
index 6eb9a6636171..1a8d3be2030e 100644
--- a/tools/perf/trace/beauty/clone.c
+++ b/tools/perf/trace/beauty/clone.c
@@ -25,6 +25,7 @@ static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size,
25 P_FLAG(FS); 25 P_FLAG(FS);
26 P_FLAG(FILES); 26 P_FLAG(FILES);
27 P_FLAG(SIGHAND); 27 P_FLAG(SIGHAND);
28 P_FLAG(PIDFD);
28 P_FLAG(PTRACE); 29 P_FLAG(PTRACE);
29 P_FLAG(VFORK); 30 P_FLAG(VFORK);
30 P_FLAG(PARENT); 31 P_FLAG(PARENT);
diff --git a/tools/perf/trace/beauty/fsconfig.sh b/tools/perf/trace/beauty/fsconfig.sh
new file mode 100755
index 000000000000..83fb24df05c9
--- /dev/null
+++ b/tools/perf/trace/beauty/fsconfig.sh
@@ -0,0 +1,17 @@
1#!/bin/sh
2# SPDX-License-Identifier: LGPL-2.1
3
4if [ $# -ne 1 ] ; then
5 linux_header_dir=tools/include/uapi/linux
6else
7 linux_header_dir=$1
8fi
9
10linux_mount=${linux_header_dir}/mount.h
11
12printf "static const char *fsconfig_cmds[] = {\n"
13regex='^[[:space:]]*+FSCONFIG_([[:alnum:]_]+)[[:space:]]*=[[:space:]]*([[:digit:]]+)[[:space:]]*,[[:space:]]*.*'
14egrep $regex ${linux_mount} | \
15 sed -r "s/$regex/\2 \1/g" | \
16 xargs printf "\t[%s] = \"%s\",\n"
17printf "};\n"
diff --git a/tools/perf/trace/beauty/fsmount.c b/tools/perf/trace/beauty/fsmount.c
new file mode 100644
index 000000000000..30c8c082a3c3
--- /dev/null
+++ b/tools/perf/trace/beauty/fsmount.c
@@ -0,0 +1,34 @@
1// SPDX-License-Identifier: LGPL-2.1
2/*
3 * trace/beauty/fsmount.c
4 *
5 * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
6 */
7
8#include "trace/beauty/beauty.h"
9#include <linux/log2.h>
10#include <uapi/linux/mount.h>
11
12static size_t fsmount__scnprintf_attr_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
13{
14#include "trace/beauty/generated/fsmount_arrays.c"
15 static DEFINE_STRARRAY(fsmount_attr_flags, "MOUNT_ATTR_");
16 size_t printed = 0;
17
18 if ((flags & ~MOUNT_ATTR__ATIME) != 0)
19 printed += strarray__scnprintf_flags(&strarray__fsmount_attr_flags, bf, size, show_prefix, flags);
20
21 if ((flags & MOUNT_ATTR__ATIME) == MOUNT_ATTR_RELATIME) {
22 printed += scnprintf(bf + printed, size - printed, "%s%s%s",
23 printed ? "|" : "", show_prefix ? "MOUNT_ATTR_" : "", "RELATIME");
24 }
25
26 return printed;
27}
28
29size_t syscall_arg__scnprintf_fsmount_attr_flags(char *bf, size_t size, struct syscall_arg *arg)
30{
31 unsigned long flags = arg->val;
32
33 return fsmount__scnprintf_attr_flags(flags, bf, size, arg->show_string_prefix);
34}
diff --git a/tools/perf/trace/beauty/fsmount.sh b/tools/perf/trace/beauty/fsmount.sh
new file mode 100755
index 000000000000..615cc0fcf4f9
--- /dev/null
+++ b/tools/perf/trace/beauty/fsmount.sh
@@ -0,0 +1,22 @@
1#!/bin/sh
2# SPDX-License-Identifier: LGPL-2.1
3
4if [ $# -ne 1 ] ; then
5 linux_header_dir=tools/include/uapi/linux
6else
7 linux_header_dir=$1
8fi
9
10linux_mount=${linux_header_dir}/mount.h
11
12# Remove MOUNT_ATTR_RELATIME as it is zeros, handle it a special way in the beautifier
13# Only handle MOUNT_ATTR_ followed by a capital letter/num as __ is special case
14# for things like MOUNT_ATTR__ATIME that is a mask for the possible ATIME handling
15# bits. Special case it as well in the beautifier
16
17printf "static const char *fsmount_attr_flags[] = {\n"
18regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOUNT_ATTR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
19egrep $regex ${linux_mount} | grep -v MOUNT_ATTR_RELATIME | \
20 sed -r "s/$regex/\2 \1/g" | \
21 xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
22printf "};\n"
diff --git a/tools/perf/trace/beauty/fspick.c b/tools/perf/trace/beauty/fspick.c
new file mode 100644
index 000000000000..c402479c96f0
--- /dev/null
+++ b/tools/perf/trace/beauty/fspick.c
@@ -0,0 +1,24 @@
1// SPDX-License-Identifier: LGPL-2.1
2/*
3 * trace/beauty/fspick.c
4 *
5 * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
6 */
7
8#include "trace/beauty/beauty.h"
9#include <linux/log2.h>
10
11static size_t fspick__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
12{
13#include "trace/beauty/generated/fspick_arrays.c"
14 static DEFINE_STRARRAY(fspick_flags, "FSPICK_");
15
16 return strarray__scnprintf_flags(&strarray__fspick_flags, bf, size, show_prefix, flags);
17}
18
19size_t syscall_arg__scnprintf_fspick_flags(char *bf, size_t size, struct syscall_arg *arg)
20{
21 unsigned long flags = arg->val;
22
23 return fspick__scnprintf_flags(flags, bf, size, arg->show_string_prefix);
24}
diff --git a/tools/perf/trace/beauty/fspick.sh b/tools/perf/trace/beauty/fspick.sh
new file mode 100755
index 000000000000..b220e07ef452
--- /dev/null
+++ b/tools/perf/trace/beauty/fspick.sh
@@ -0,0 +1,17 @@
1#!/bin/sh
2# SPDX-License-Identifier: LGPL-2.1
3
4if [ $# -ne 1 ] ; then
5 linux_header_dir=tools/include/uapi/linux
6else
7 linux_header_dir=$1
8fi
9
10linux_mount=${linux_header_dir}/mount.h
11
12printf "static const char *fspick_flags[] = {\n"
13regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+FSPICK_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
14egrep $regex ${linux_mount} | \
15 sed -r "s/$regex/\2 \1/g" | \
16 xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
17printf "};\n"
diff --git a/tools/perf/trace/beauty/move_mount.c b/tools/perf/trace/beauty/move_mount.c
new file mode 100644
index 000000000000..78ed80395406
--- /dev/null
+++ b/tools/perf/trace/beauty/move_mount.c
@@ -0,0 +1,24 @@
1// SPDX-License-Identifier: LGPL-2.1
2/*
3 * trace/beauty/move_mount.c
4 *
5 * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
6 */
7
8#include "trace/beauty/beauty.h"
9#include <linux/log2.h>
10
11static size_t move_mount__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
12{
13#include "trace/beauty/generated/move_mount_flags_array.c"
14 static DEFINE_STRARRAY(move_mount_flags, "MOVE_MOUNT_");
15
16 return strarray__scnprintf_flags(&strarray__move_mount_flags, bf, size, show_prefix, flags);
17}
18
19size_t syscall_arg__scnprintf_move_mount_flags(char *bf, size_t size, struct syscall_arg *arg)
20{
21 unsigned long flags = arg->val;
22
23 return move_mount__scnprintf_flags(flags, bf, size, arg->show_string_prefix);
24}
diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh
new file mode 100755
index 000000000000..55e59241daa4
--- /dev/null
+++ b/tools/perf/trace/beauty/move_mount_flags.sh
@@ -0,0 +1,17 @@
1#!/bin/sh
2# SPDX-License-Identifier: LGPL-2.1
3
4if [ $# -ne 1 ] ; then
5 linux_header_dir=tools/include/uapi/linux
6else
7 linux_header_dir=$1
8fi
9
10linux_mount=${linux_header_dir}/mount.h
11
12printf "static const char *move_mount_flags[] = {\n"
13regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([FT]_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
14egrep $regex ${linux_mount} | \
15 sed -r "s/$regex/\2 \1/g" | \
16 xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
17printf "};\n"
diff --git a/tools/perf/trace/beauty/sync_file_range.c b/tools/perf/trace/beauty/sync_file_range.c
new file mode 100644
index 000000000000..1c425f04047d
--- /dev/null
+++ b/tools/perf/trace/beauty/sync_file_range.c
@@ -0,0 +1,31 @@
1// SPDX-License-Identifier: LGPL-2.1
2/*
3 * trace/beauty/sync_file_range.c
4 *
5 * Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
6 */
7
8#include "trace/beauty/beauty.h"
9#include <linux/log2.h>
10#include <uapi/linux/fs.h>
11
12static size_t sync_file_range__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix)
13{
14#include "trace/beauty/generated/sync_file_range_arrays.c"
15 static DEFINE_STRARRAY(sync_file_range_flags, "SYNC_FILE_RANGE_");
16 size_t printed = 0;
17
18 if ((flags & SYNC_FILE_RANGE_WRITE_AND_WAIT) == SYNC_FILE_RANGE_WRITE_AND_WAIT) {
19 printed += scnprintf(bf + printed, size - printed, "%s%s", show_prefix ? "SYNC_FILE_RANGE_" : "", "WRITE_AND_WAIT");
20 flags &= ~SYNC_FILE_RANGE_WRITE_AND_WAIT;
21 }
22
23 return printed + strarray__scnprintf_flags(&strarray__sync_file_range_flags, bf + printed, size - printed, show_prefix, flags);
24}
25
26size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg)
27{
28 unsigned long flags = arg->val;
29
30 return sync_file_range__scnprintf_flags(flags, bf, size, arg->show_string_prefix);
31}
diff --git a/tools/perf/trace/beauty/sync_file_range.sh b/tools/perf/trace/beauty/sync_file_range.sh
new file mode 100755
index 000000000000..7a9282d04e44
--- /dev/null
+++ b/tools/perf/trace/beauty/sync_file_range.sh
@@ -0,0 +1,17 @@
1#!/bin/sh
2# SPDX-License-Identifier: LGPL-2.1
3
4if [ $# -ne 1 ] ; then
5 linux_header_dir=tools/include/uapi/linux
6else
7 linux_header_dir=$1
8fi
9
10linux_fs=${linux_header_dir}/fs.h
11
12printf "static const char *sync_file_range_flags[] = {\n"
13regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+SYNC_FILE_RANGE_([[:alnum:]_]+)[[:space:]]+([[:xdigit:]]+)[[:space:]]*.*'
14egrep $regex ${linux_fs} | \
15 sed -r "s/$regex/\2 \1/g" | \
16 xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
17printf "};\n"
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
index 4ad37d8c7d6a..55ff05a46e0b 100644
--- a/tools/perf/ui/browser.c
+++ b/tools/perf/ui/browser.c
@@ -16,7 +16,7 @@
16#include "helpline.h" 16#include "helpline.h"
17#include "keysyms.h" 17#include "keysyms.h"
18#include "../color.h" 18#include "../color.h"
19#include "sane_ctype.h" 19#include <linux/ctype.h>
20 20
21static int ui_browser__percent_color(struct ui_browser *browser, 21static int ui_browser__percent_color(struct ui_browser *browser,
22 double percent, bool current) 22 double percent, bool current)
@@ -594,7 +594,7 @@ static int ui_browser__color_config(const char *var, const char *value,
594 break; 594 break;
595 595
596 *bg = '\0'; 596 *bg = '\0';
597 bg = ltrim(++bg); 597 bg = skip_spaces(bg + 1);
598 ui_browser__colorsets[i].bg = bg; 598 ui_browser__colorsets[i].bg = bg;
599 ui_browser__colorsets[i].fg = fg; 599 ui_browser__colorsets[i].fg = fg;
600 return 0; 600 return 0;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 3421ecbdd3f0..33e67aa91347 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -6,6 +6,7 @@
6#include <stdlib.h> 6#include <stdlib.h>
7#include <string.h> 7#include <string.h>
8#include <linux/rbtree.h> 8#include <linux/rbtree.h>
9#include <linux/string.h>
9#include <sys/ttydefaults.h> 10#include <sys/ttydefaults.h>
10#include <linux/time64.h> 11#include <linux/time64.h>
11 12
@@ -33,7 +34,7 @@
33#include "units.h" 34#include "units.h"
34#include "time-utils.h" 35#include "time-utils.h"
35 36
36#include "sane_ctype.h" 37#include <linux/ctype.h>
37 38
38extern void hist_browser__init_hpp(void); 39extern void hist_browser__init_hpp(void);
39 40
@@ -1470,7 +1471,7 @@ static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
1470 int i = 0; 1471 int i = 0;
1471 1472
1472 width -= fmt->entry(fmt, &hpp, entry); 1473 width -= fmt->entry(fmt, &hpp, entry);
1473 ui_browser__printf(&browser->b, "%s", ltrim(s)); 1474 ui_browser__printf(&browser->b, "%s", skip_spaces(s));
1474 1475
1475 while (isspace(s[i++])) 1476 while (isspace(s[i++]))
1476 width++; 1477 width++;
@@ -1686,7 +1687,7 @@ static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *brows
1686 ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL); 1687 ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
1687 dummy_hpp.buf[ret] = '\0'; 1688 dummy_hpp.buf[ret] = '\0';
1688 1689
1689 start = trim(dummy_hpp.buf); 1690 start = strim(dummy_hpp.buf);
1690 ret = strlen(start); 1691 ret = strlen(start);
1691 1692
1692 if (start != dummy_hpp.buf) 1693 if (start != dummy_hpp.buf)
@@ -2070,7 +2071,8 @@ static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
2070 advance_hpp(&hpp, ret); 2071 advance_hpp(&hpp, ret);
2071 } 2072 }
2072 2073
2073 printed += fprintf(fp, "%s\n", rtrim(s)); 2074 strim(s);
2075 printed += fprintf(fp, "%s\n", s);
2074 2076
2075 if (he->leaf && folded_sign == '-') { 2077 if (he->leaf && folded_sign == '-') {
2076 printed += hist_browser__fprintf_callchain(browser, he, fp, 2078 printed += hist_browser__fprintf_callchain(browser, he, fp,
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
index c70d9337405b..5f6529c9eb8e 100644
--- a/tools/perf/ui/browsers/map.c
+++ b/tools/perf/ui/browsers/map.c
@@ -13,7 +13,7 @@
13#include "../keysyms.h" 13#include "../keysyms.h"
14#include "map.h" 14#include "map.h"
15 15
16#include "sane_ctype.h" 16#include <linux/ctype.h>
17 17
18struct map_browser { 18struct map_browser {
19 struct ui_browser b; 19 struct ui_browser b;
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index 0c08890f006a..3955ed1d1bd9 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -9,6 +9,7 @@
9#include "../string2.h" 9#include "../string2.h"
10#include "gtk.h" 10#include "gtk.h"
11#include <signal.h> 11#include <signal.h>
12#include <linux/string.h>
12 13
13#define MAX_COLUMNS 32 14#define MAX_COLUMNS 32
14 15
@@ -459,7 +460,7 @@ static void perf_gtk__add_hierarchy_entries(struct hists *hists,
459 advance_hpp(hpp, ret + 2); 460 advance_hpp(hpp, ret + 2);
460 } 461 }
461 462
462 gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1); 463 gtk_tree_store_set(store, &iter, col_idx, strim(bf), -1);
463 464
464 if (!he->leaf) { 465 if (!he->leaf) {
465 hpp->buf = bf; 466 hpp->buf = bf;
@@ -555,7 +556,7 @@ static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
555 first_col = false; 556 first_col = false;
556 557
557 fmt->header(fmt, &hpp, hists, 0, NULL); 558 fmt->header(fmt, &hpp, hists, 0, NULL);
558 strcat(buf, ltrim(rtrim(hpp.buf))); 559 strcat(buf, strim(hpp.buf));
559 } 560 }
560 } 561 }
561 562
diff --git a/tools/perf/ui/libslang.h b/tools/perf/ui/libslang.h
index c0686cda39a5..991e692b9b46 100644
--- a/tools/perf/ui/libslang.h
+++ b/tools/perf/ui/libslang.h
@@ -10,7 +10,12 @@
10#ifndef HAVE_LONG_LONG 10#ifndef HAVE_LONG_LONG
11#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG 11#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG
12#endif 12#endif
13
14#ifdef HAVE_SLANG_INCLUDE_SUBDIR
15#include <slang/slang.h>
16#else
13#include <slang.h> 17#include <slang.h>
18#endif
14 19
15#if SLANG_VERSION < 20104 20#if SLANG_VERSION < 20104
16#define slsmg_printf(msg, args...) \ 21#define slsmg_printf(msg, args...) \
diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c
index bbfbc91a0fa4..8cd3b64c6893 100644
--- a/tools/perf/ui/progress.c
+++ b/tools/perf/ui/progress.c
@@ -1,6 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/kernel.h> 2#include <linux/kernel.h>
3#include "../cache.h" 3#include "../util/cache.h"
4#include "progress.h" 4#include "progress.h"
5 5
6static void null_progress__update(struct ui_progress *p __maybe_unused) 6static void null_progress__update(struct ui_progress *p __maybe_unused)
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
index a60f2993d390..89393c79d870 100644
--- a/tools/perf/ui/stdio/hist.c
+++ b/tools/perf/ui/stdio/hist.c
@@ -13,7 +13,7 @@
13#include "../../util/srcline.h" 13#include "../../util/srcline.h"
14#include "../../util/string2.h" 14#include "../../util/string2.h"
15#include "../../util/thread.h" 15#include "../../util/thread.h"
16#include "../../util/sane_ctype.h" 16#include <linux/ctype.h>
17 17
18static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin) 18static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin)
19{ 19{
@@ -516,7 +516,7 @@ static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
516 * dynamic entries are right-aligned but we want left-aligned 516 * dynamic entries are right-aligned but we want left-aligned
517 * in the hierarchy mode 517 * in the hierarchy mode
518 */ 518 */
519 printed += fprintf(fp, "%s%s", sep ?: " ", ltrim(buf)); 519 printed += fprintf(fp, "%s%s", sep ?: " ", skip_spaces(buf));
520 } 520 }
521 printed += putc('\n', fp); 521 printed += putc('\n', fp);
522 522
@@ -531,6 +531,30 @@ out:
531 return printed; 531 return printed;
532} 532}
533 533
534static int hist_entry__block_fprintf(struct hist_entry *he,
535 char *bf, size_t size,
536 FILE *fp)
537{
538 struct block_hist *bh = container_of(he, struct block_hist, he);
539 int ret = 0;
540
541 for (unsigned int i = 0; i < bh->block_hists.nr_entries; i++) {
542 struct perf_hpp hpp = {
543 .buf = bf,
544 .size = size,
545 .skip = false,
546 };
547
548 bh->block_idx = i;
549 hist_entry__snprintf(he, &hpp);
550
551 if (!hpp.skip)
552 ret += fprintf(fp, "%s\n", bf);
553 }
554
555 return ret;
556}
557
534static int hist_entry__fprintf(struct hist_entry *he, size_t size, 558static int hist_entry__fprintf(struct hist_entry *he, size_t size,
535 char *bf, size_t bfsz, FILE *fp, 559 char *bf, size_t bfsz, FILE *fp,
536 bool ignore_callchains) 560 bool ignore_callchains)
@@ -550,6 +574,9 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
550 if (symbol_conf.report_hierarchy) 574 if (symbol_conf.report_hierarchy)
551 return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp); 575 return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp);
552 576
577 if (symbol_conf.report_block)
578 return hist_entry__block_fprintf(he, bf, size, fp);
579
553 hist_entry__snprintf(he, &hpp); 580 hist_entry__snprintf(he, &hpp);
554 581
555 ret = fprintf(fp, "%s\n", bf); 582 ret = fprintf(fp, "%s\n", bf);
@@ -566,10 +593,14 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
566static int print_hierarchy_indent(const char *sep, int indent, 593static int print_hierarchy_indent(const char *sep, int indent,
567 const char *line, FILE *fp) 594 const char *line, FILE *fp)
568{ 595{
596 int width;
597
569 if (sep != NULL || indent < 2) 598 if (sep != NULL || indent < 2)
570 return 0; 599 return 0;
571 600
572 return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line); 601 width = (indent - 2) * HIERARCHY_INDENT;
602
603 return fprintf(fp, "%-*.*s", width, width, line);
573} 604}
574 605
575static int hists__fprintf_hierarchy_headers(struct hists *hists, 606static int hists__fprintf_hierarchy_headers(struct hists *hists,
@@ -587,7 +618,7 @@ static int hists__fprintf_hierarchy_headers(struct hists *hists,
587 indent = hists->nr_hpp_node; 618 indent = hists->nr_hpp_node;
588 619
589 /* preserve max indent depth for column headers */ 620 /* preserve max indent depth for column headers */
590 print_hierarchy_indent(sep, indent, spaces, fp); 621 print_hierarchy_indent(sep, indent, " ", fp);
591 622
592 /* the first hpp_list_node is for overhead columns */ 623 /* the first hpp_list_node is for overhead columns */
593 fmt_node = list_first_entry(&hists->hpp_formats, 624 fmt_node = list_first_entry(&hists->hpp_formats,
@@ -616,7 +647,7 @@ static int hists__fprintf_hierarchy_headers(struct hists *hists,
616 647
617 fmt->header(fmt, hpp, hists, 0, NULL); 648 fmt->header(fmt, hpp, hists, 0, NULL);
618 649
619 header_width += fprintf(fp, "%s", trim(hpp->buf)); 650 header_width += fprintf(fp, "%s", strim(hpp->buf));
620 } 651 }
621 } 652 }
622 653
@@ -816,7 +847,7 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
816 if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) { 847 if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) {
817 int depth = hists->nr_hpp_node + h->depth + 1; 848 int depth = hists->nr_hpp_node + h->depth + 1;
818 849
819 print_hierarchy_indent(sep, depth, spaces, fp); 850 print_hierarchy_indent(sep, depth, " ", fp);
820 fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt); 851 fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt);
821 852
822 if (max_rows && ++nr_rows >= max_rows) 853 if (max_rows && ++nr_rows >= max_rows)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 6d5bbc8b589b..d3408a463060 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -20,6 +20,7 @@ perf-y += parse-events.o
20perf-y += perf_regs.o 20perf-y += perf_regs.o
21perf-y += path.o 21perf-y += path.o
22perf-y += print_binary.o 22perf-y += print_binary.o
23perf-y += argv_split.o
23perf-y += rbtree.o 24perf-y += rbtree.o
24perf-y += libstring.o 25perf-y += libstring.o
25perf-y += bitmap.o 26perf-y += bitmap.o
@@ -209,10 +210,18 @@ $(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
209 $(call rule_mkdir) 210 $(call rule_mkdir)
210 $(call if_changed_dep,cc_o_c) 211 $(call if_changed_dep,cc_o_c)
211 212
213$(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE
214 $(call rule_mkdir)
215 $(call if_changed_dep,cc_o_c)
216
212$(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE 217$(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE
213 $(call rule_mkdir) 218 $(call rule_mkdir)
214 $(call if_changed_dep,cc_o_c) 219 $(call if_changed_dep,cc_o_c)
215 220
221$(OUTPUT)util/ctype.o: ../lib/ctype.c FORCE
222 $(call rule_mkdir)
223 $(call if_changed_dep,cc_o_c)
224
216$(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE 225$(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE
217 $(call rule_mkdir) 226 $(call rule_mkdir)
218 $(call if_changed_dep,cc_o_c) 227 $(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
index 3802cee5e188..59241ff342be 100755
--- a/tools/perf/util/PERF-VERSION-GEN
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -19,7 +19,7 @@ TAG=
19if test -d ../../.git -o -f ../../.git 19if test -d ../../.git -o -f ../../.git
20then 20then
21 TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null ) 21 TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null )
22 CID=$(git log -1 --abbrev=4 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID" 22 CID=$(git log -1 --abbrev=12 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
23elif test -f ../../PERF-VERSION-FILE 23elif test -f ../../PERF-VERSION-FILE
24then 24then
25 TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g') 25 TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 79db038b56f2..ec7aaf31c2b2 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -35,6 +35,7 @@
35#include <pthread.h> 35#include <pthread.h>
36#include <linux/bitops.h> 36#include <linux/bitops.h>
37#include <linux/kernel.h> 37#include <linux/kernel.h>
38#include <linux/string.h>
38#include <bpf/libbpf.h> 39#include <bpf/libbpf.h>
39 40
40/* FIXME: For the HE_COLORSET */ 41/* FIXME: For the HE_COLORSET */
@@ -49,7 +50,7 @@
49#define DARROW_CHAR ((unsigned char)'.') 50#define DARROW_CHAR ((unsigned char)'.')
50#define UARROW_CHAR ((unsigned char)'-') 51#define UARROW_CHAR ((unsigned char)'-')
51 52
52#include "sane_ctype.h" 53#include <linux/ctype.h>
53 54
54struct annotation_options annotation__default_options = { 55struct annotation_options annotation__default_options = {
55 .use_offset = true, 56 .use_offset = true,
@@ -144,6 +145,7 @@ static int arch__associate_ins_ops(struct arch* arch, const char *name, struct i
144#include "arch/arc/annotate/instructions.c" 145#include "arch/arc/annotate/instructions.c"
145#include "arch/arm/annotate/instructions.c" 146#include "arch/arm/annotate/instructions.c"
146#include "arch/arm64/annotate/instructions.c" 147#include "arch/arm64/annotate/instructions.c"
148#include "arch/csky/annotate/instructions.c"
147#include "arch/x86/annotate/instructions.c" 149#include "arch/x86/annotate/instructions.c"
148#include "arch/powerpc/annotate/instructions.c" 150#include "arch/powerpc/annotate/instructions.c"
149#include "arch/s390/annotate/instructions.c" 151#include "arch/s390/annotate/instructions.c"
@@ -163,6 +165,10 @@ static struct arch architectures[] = {
163 .init = arm64__annotate_init, 165 .init = arm64__annotate_init,
164 }, 166 },
165 { 167 {
168 .name = "csky",
169 .init = csky__annotate_init,
170 },
171 {
166 .name = "x86", 172 .name = "x86",
167 .init = x86__annotate_init, 173 .init = x86__annotate_init,
168 .instructions = x86__instructions, 174 .instructions = x86__instructions,
@@ -557,7 +563,7 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy
557 if (comment == NULL) 563 if (comment == NULL)
558 return 0; 564 return 0;
559 565
560 comment = ltrim(comment); 566 comment = skip_spaces(comment);
561 comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); 567 comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
562 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 568 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
563 569
@@ -602,7 +608,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops
602 if (comment == NULL) 608 if (comment == NULL)
603 return 0; 609 return 0;
604 610
605 comment = ltrim(comment); 611 comment = skip_spaces(comment);
606 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); 612 comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
607 613
608 return 0; 614 return 0;
@@ -931,9 +937,8 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
931 if (sym == NULL) 937 if (sym == NULL)
932 return 0; 938 return 0;
933 src = symbol__hists(sym, evsel->evlist->nr_entries); 939 src = symbol__hists(sym, evsel->evlist->nr_entries);
934 if (src == NULL) 940 return (src) ? __symbol__inc_addr_samples(sym, map, src, evsel->idx,
935 return -ENOMEM; 941 addr, sample) : 0;
936 return __symbol__inc_addr_samples(sym, map, src, evsel->idx, addr, sample);
937} 942}
938 943
939static int symbol__account_cycles(u64 addr, u64 start, 944static int symbol__account_cycles(u64 addr, u64 start,
@@ -1099,7 +1104,7 @@ static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, str
1099 1104
1100static int disasm_line__parse(char *line, const char **namep, char **rawp) 1105static int disasm_line__parse(char *line, const char **namep, char **rawp)
1101{ 1106{
1102 char tmp, *name = ltrim(line); 1107 char tmp, *name = skip_spaces(line);
1103 1108
1104 if (name[0] == '\0') 1109 if (name[0] == '\0')
1105 return -1; 1110 return -1;
@@ -1117,7 +1122,7 @@ static int disasm_line__parse(char *line, const char **namep, char **rawp)
1117 goto out_free_name; 1122 goto out_free_name;
1118 1123
1119 (*rawp)[0] = tmp; 1124 (*rawp)[0] = tmp;
1120 *rawp = ltrim(*rawp); 1125 *rawp = skip_spaces(*rawp);
1121 1126
1122 return 0; 1127 return 0;
1123 1128
@@ -1496,7 +1501,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file,
1496 return -1; 1501 return -1;
1497 1502
1498 line_ip = -1; 1503 line_ip = -1;
1499 parsed_line = rtrim(line); 1504 parsed_line = strim(line);
1500 1505
1501 /* /filename:linenr ? Save line number and ignore. */ 1506 /* /filename:linenr ? Save line number and ignore. */
1502 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { 1507 if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
@@ -1504,7 +1509,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file,
1504 return 0; 1509 return 0;
1505 } 1510 }
1506 1511
1507 tmp = ltrim(parsed_line); 1512 tmp = skip_spaces(parsed_line);
1508 if (*tmp) { 1513 if (*tmp) {
1509 /* 1514 /*
1510 * Parse hexa addresses followed by ':' 1515 * Parse hexa addresses followed by ':'
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 66e82bd0683e..bc215fe0b4b4 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -51,7 +51,7 @@
51#include "arm-spe.h" 51#include "arm-spe.h"
52#include "s390-cpumsf.h" 52#include "s390-cpumsf.h"
53 53
54#include "sane_ctype.h" 54#include <linux/ctype.h>
55#include "symbol/kallsyms.h" 55#include "symbol/kallsyms.h"
56 56
57static bool auxtrace__dont_decode(struct perf_session *session) 57static bool auxtrace__dont_decode(struct perf_session *session)
@@ -1001,7 +1001,8 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
1001 } 1001 }
1002 1002
1003 if (!str) { 1003 if (!str) {
1004 itrace_synth_opts__set_default(synth_opts, false); 1004 itrace_synth_opts__set_default(synth_opts,
1005 synth_opts->default_no_sample);
1005 return 0; 1006 return 0;
1006 } 1007 }
1007 1008
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index d62f60eb5df4..e9b4c5edf78b 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -74,6 +74,8 @@ enum itrace_period_type {
74 * @period_type: 'instructions' events period type 74 * @period_type: 'instructions' events period type
75 * @initial_skip: skip N events at the beginning. 75 * @initial_skip: skip N events at the beginning.
76 * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all 76 * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
77 * @ptime_range: time intervals to trace or NULL
78 * @range_num: number of time intervals to trace
77 */ 79 */
78struct itrace_synth_opts { 80struct itrace_synth_opts {
79 bool set; 81 bool set;
@@ -98,6 +100,8 @@ struct itrace_synth_opts {
98 enum itrace_period_type period_type; 100 enum itrace_period_type period_type;
99 unsigned long initial_skip; 101 unsigned long initial_skip;
100 unsigned long *cpu_bitmap; 102 unsigned long *cpu_bitmap;
103 struct perf_time_interval *ptime_range;
104 int range_num;
101}; 105};
102 106
103/** 107/**
@@ -590,6 +594,21 @@ static inline void auxtrace__free(struct perf_session *session)
590" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \ 594" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \
591" concatenate multiple options. Default is ibxwpe or cewp\n" 595" concatenate multiple options. Default is ibxwpe or cewp\n"
592 596
597static inline
598void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts,
599 struct perf_time_interval *ptime_range,
600 int range_num)
601{
602 opts->ptime_range = ptime_range;
603 opts->range_num = range_num;
604}
605
606static inline
607void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts)
608{
609 opts->ptime_range = NULL;
610 opts->range_num = 0;
611}
593 612
594#else 613#else
595 614
@@ -733,6 +752,21 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
733 752
734#define ITRACE_HELP "" 753#define ITRACE_HELP ""
735 754
755static inline
756void itrace_synth_opts__set_time_range(struct itrace_synth_opts *opts
757 __maybe_unused,
758 struct perf_time_interval *ptime_range
759 __maybe_unused,
760 int range_num __maybe_unused)
761{
762}
763
764static inline
765void itrace_synth_opts__clear_time_range(struct itrace_synth_opts *opts
766 __maybe_unused)
767{
768}
769
736#endif 770#endif
737 771
738#endif 772#endif
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 0c5517a8d0b7..89c6913dfc25 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -29,7 +29,7 @@
29#include "probe-file.h" 29#include "probe-file.h"
30#include "strlist.h" 30#include "strlist.h"
31 31
32#include "sane_ctype.h" 32#include <linux/ctype.h>
33 33
34static bool no_buildid_cache; 34static bool no_buildid_cache;
35 35
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 7e3c1b60120c..752cce853e51 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -24,7 +24,7 @@
24#include <unistd.h> 24#include <unistd.h>
25#include <linux/string.h> 25#include <linux/string.h>
26 26
27#include "sane_ctype.h" 27#include <linux/ctype.h>
28 28
29#define MAXNAME (256) 29#define MAXNAME (256)
30 30
@@ -739,11 +739,15 @@ int perf_config(config_fn_t fn, void *data)
739 if (ret < 0) { 739 if (ret < 0) {
740 pr_err("Error: wrong config key-value pair %s=%s\n", 740 pr_err("Error: wrong config key-value pair %s=%s\n",
741 key, value); 741 key, value);
742 break; 742 /*
743 * Can't be just a 'break', as perf_config_set__for_each_entry()
744 * expands to two nested for() loops.
745 */
746 goto out;
743 } 747 }
744 } 748 }
745 } 749 }
746 750out:
747 return ret; 751 return ret;
748} 752}
749 753
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 0b599229bc7e..0d8fbedf7bd5 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -10,7 +10,7 @@
10#include <linux/bitmap.h> 10#include <linux/bitmap.h>
11#include "asm/bug.h" 11#include "asm/bug.h"
12 12
13#include "sane_ctype.h" 13#include <linux/ctype.h>
14 14
15static int max_cpu_num; 15static int max_cpu_num;
16static int max_present_cpu_num; 16static int max_present_cpu_num;
@@ -373,6 +373,46 @@ int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
373 return 0; 373 return 0;
374} 374}
375 375
376int cpu_map__get_die_id(int cpu)
377{
378 int value, ret = cpu__get_topology_int(cpu, "die_id", &value);
379
380 return ret ?: value;
381}
382
383int cpu_map__get_die(struct cpu_map *map, int idx, void *data)
384{
385 int cpu, die_id, s;
386
387 if (idx > map->nr)
388 return -1;
389
390 cpu = map->map[idx];
391
392 die_id = cpu_map__get_die_id(cpu);
393 /* There is no die_id on legacy system. */
394 if (die_id == -1)
395 die_id = 0;
396
397 s = cpu_map__get_socket(map, idx, data);
398 if (s == -1)
399 return -1;
400
401 /*
402 * Encode socket in bit range 15:8
403 * die_id is relative to socket, and
404 * we need a global id. So we combine
405 * socket + die id
406 */
407 if (WARN_ONCE(die_id >> 8, "The die id number is too big.\n"))
408 return -1;
409
410 if (WARN_ONCE(s >> 8, "The socket id number is too big.\n"))
411 return -1;
412
413 return (s << 8) | (die_id & 0xff);
414}
415
376int cpu_map__get_core_id(int cpu) 416int cpu_map__get_core_id(int cpu)
377{ 417{
378 int value, ret = cpu__get_topology_int(cpu, "core_id", &value); 418 int value, ret = cpu__get_topology_int(cpu, "core_id", &value);
@@ -381,7 +421,7 @@ int cpu_map__get_core_id(int cpu)
381 421
382int cpu_map__get_core(struct cpu_map *map, int idx, void *data) 422int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
383{ 423{
384 int cpu, s; 424 int cpu, s_die;
385 425
386 if (idx > map->nr) 426 if (idx > map->nr)
387 return -1; 427 return -1;
@@ -390,17 +430,22 @@ int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
390 430
391 cpu = cpu_map__get_core_id(cpu); 431 cpu = cpu_map__get_core_id(cpu);
392 432
393 s = cpu_map__get_socket(map, idx, data); 433 /* s_die is the combination of socket + die id */
394 if (s == -1) 434 s_die = cpu_map__get_die(map, idx, data);
435 if (s_die == -1)
395 return -1; 436 return -1;
396 437
397 /* 438 /*
398 * encode socket in upper 16 bits 439 * encode socket in bit range 31:24
399 * core_id is relative to socket, and 440 * encode die id in bit range 23:16
441 * core_id is relative to socket and die,
400 * we need a global id. So we combine 442 * we need a global id. So we combine
401 * socket+ core id 443 * socket + die id + core id
402 */ 444 */
403 return (s << 16) | (cpu & 0xffff); 445 if (WARN_ONCE(cpu >> 16, "The core id number is too big.\n"))
446 return -1;
447
448 return (s_die << 16) | (cpu & 0xffff);
404} 449}
405 450
406int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp) 451int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
@@ -408,6 +453,11 @@ int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
408 return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL); 453 return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
409} 454}
410 455
456int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep)
457{
458 return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL);
459}
460
411int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep) 461int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
412{ 462{
413 return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL); 463 return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index f00ce624b9f7..1265f0e33920 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -25,9 +25,12 @@ size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size);
25size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); 25size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
26int cpu_map__get_socket_id(int cpu); 26int cpu_map__get_socket_id(int cpu);
27int cpu_map__get_socket(struct cpu_map *map, int idx, void *data); 27int cpu_map__get_socket(struct cpu_map *map, int idx, void *data);
28int cpu_map__get_die_id(int cpu);
29int cpu_map__get_die(struct cpu_map *map, int idx, void *data);
28int cpu_map__get_core_id(int cpu); 30int cpu_map__get_core_id(int cpu);
29int cpu_map__get_core(struct cpu_map *map, int idx, void *data); 31int cpu_map__get_core(struct cpu_map *map, int idx, void *data);
30int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); 32int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
33int cpu_map__build_die_map(struct cpu_map *cpus, struct cpu_map **diep);
31int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); 34int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
32const struct cpu_map *cpu_map__online(void); /* thread unsafe */ 35const struct cpu_map *cpu_map__online(void); /* thread unsafe */
33 36
@@ -43,7 +46,12 @@ static inline int cpu_map__socket(struct cpu_map *sock, int s)
43 46
44static inline int cpu_map__id_to_socket(int id) 47static inline int cpu_map__id_to_socket(int id)
45{ 48{
46 return id >> 16; 49 return id >> 24;
50}
51
52static inline int cpu_map__id_to_die(int id)
53{
54 return (id >> 16) & 0xff;
47} 55}
48 56
49static inline int cpu_map__id_to_cpu(int id) 57static inline int cpu_map__id_to_cpu(int id)
diff --git a/tools/perf/util/cputopo.c b/tools/perf/util/cputopo.c
index ece0710249d4..26e73a4bd4fe 100644
--- a/tools/perf/util/cputopo.c
+++ b/tools/perf/util/cputopo.c
@@ -1,5 +1,6 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <sys/param.h> 2#include <sys/param.h>
3#include <sys/utsname.h>
3#include <inttypes.h> 4#include <inttypes.h>
4#include <api/fs/fs.h> 5#include <api/fs/fs.h>
5 6
@@ -8,11 +9,14 @@
8#include "util.h" 9#include "util.h"
9#include "env.h" 10#include "env.h"
10 11
11
12#define CORE_SIB_FMT \ 12#define CORE_SIB_FMT \
13 "%s/devices/system/cpu/cpu%d/topology/core_siblings_list" 13 "%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
14#define DIE_SIB_FMT \
15 "%s/devices/system/cpu/cpu%d/topology/die_cpus_list"
14#define THRD_SIB_FMT \ 16#define THRD_SIB_FMT \
15 "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list" 17 "%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
18#define THRD_SIB_FMT_NEW \
19 "%s/devices/system/cpu/cpu%d/topology/core_cpus_list"
16#define NODE_ONLINE_FMT \ 20#define NODE_ONLINE_FMT \
17 "%s/devices/system/node/online" 21 "%s/devices/system/node/online"
18#define NODE_MEMINFO_FMT \ 22#define NODE_MEMINFO_FMT \
@@ -34,12 +38,12 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
34 sysfs__mountpoint(), cpu); 38 sysfs__mountpoint(), cpu);
35 fp = fopen(filename, "r"); 39 fp = fopen(filename, "r");
36 if (!fp) 40 if (!fp)
37 goto try_threads; 41 goto try_dies;
38 42
39 sret = getline(&buf, &len, fp); 43 sret = getline(&buf, &len, fp);
40 fclose(fp); 44 fclose(fp);
41 if (sret <= 0) 45 if (sret <= 0)
42 goto try_threads; 46 goto try_dies;
43 47
44 p = strchr(buf, '\n'); 48 p = strchr(buf, '\n');
45 if (p) 49 if (p)
@@ -57,9 +61,44 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
57 } 61 }
58 ret = 0; 62 ret = 0;
59 63
64try_dies:
65 if (!tp->die_siblings)
66 goto try_threads;
67
68 scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
69 sysfs__mountpoint(), cpu);
70 fp = fopen(filename, "r");
71 if (!fp)
72 goto try_threads;
73
74 sret = getline(&buf, &len, fp);
75 fclose(fp);
76 if (sret <= 0)
77 goto try_threads;
78
79 p = strchr(buf, '\n');
80 if (p)
81 *p = '\0';
82
83 for (i = 0; i < tp->die_sib; i++) {
84 if (!strcmp(buf, tp->die_siblings[i]))
85 break;
86 }
87 if (i == tp->die_sib) {
88 tp->die_siblings[i] = buf;
89 tp->die_sib++;
90 buf = NULL;
91 len = 0;
92 }
93 ret = 0;
94
60try_threads: 95try_threads:
61 scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT, 96 scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW,
62 sysfs__mountpoint(), cpu); 97 sysfs__mountpoint(), cpu);
98 if (access(filename, F_OK) == -1) {
99 scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT,
100 sysfs__mountpoint(), cpu);
101 }
63 fp = fopen(filename, "r"); 102 fp = fopen(filename, "r");
64 if (!fp) 103 if (!fp)
65 goto done; 104 goto done;
@@ -98,21 +137,46 @@ void cpu_topology__delete(struct cpu_topology *tp)
98 for (i = 0 ; i < tp->core_sib; i++) 137 for (i = 0 ; i < tp->core_sib; i++)
99 zfree(&tp->core_siblings[i]); 138 zfree(&tp->core_siblings[i]);
100 139
140 if (tp->die_sib) {
141 for (i = 0 ; i < tp->die_sib; i++)
142 zfree(&tp->die_siblings[i]);
143 }
144
101 for (i = 0 ; i < tp->thread_sib; i++) 145 for (i = 0 ; i < tp->thread_sib; i++)
102 zfree(&tp->thread_siblings[i]); 146 zfree(&tp->thread_siblings[i]);
103 147
104 free(tp); 148 free(tp);
105} 149}
106 150
151static bool has_die_topology(void)
152{
153 char filename[MAXPATHLEN];
154 struct utsname uts;
155
156 if (uname(&uts) < 0)
157 return false;
158
159 if (strncmp(uts.machine, "x86_64", 6))
160 return false;
161
162 scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
163 sysfs__mountpoint(), 0);
164 if (access(filename, F_OK) == -1)
165 return false;
166
167 return true;
168}
169
107struct cpu_topology *cpu_topology__new(void) 170struct cpu_topology *cpu_topology__new(void)
108{ 171{
109 struct cpu_topology *tp = NULL; 172 struct cpu_topology *tp = NULL;
110 void *addr; 173 void *addr;
111 u32 nr, i; 174 u32 nr, i, nr_addr;
112 size_t sz; 175 size_t sz;
113 long ncpus; 176 long ncpus;
114 int ret = -1; 177 int ret = -1;
115 struct cpu_map *map; 178 struct cpu_map *map;
179 bool has_die = has_die_topology();
116 180
117 ncpus = cpu__max_present_cpu(); 181 ncpus = cpu__max_present_cpu();
118 182
@@ -126,7 +190,11 @@ struct cpu_topology *cpu_topology__new(void)
126 nr = (u32)(ncpus & UINT_MAX); 190 nr = (u32)(ncpus & UINT_MAX);
127 191
128 sz = nr * sizeof(char *); 192 sz = nr * sizeof(char *);
129 addr = calloc(1, sizeof(*tp) + 2 * sz); 193 if (has_die)
194 nr_addr = 3;
195 else
196 nr_addr = 2;
197 addr = calloc(1, sizeof(*tp) + nr_addr * sz);
130 if (!addr) 198 if (!addr)
131 goto out_free; 199 goto out_free;
132 200
@@ -134,6 +202,10 @@ struct cpu_topology *cpu_topology__new(void)
134 addr += sizeof(*tp); 202 addr += sizeof(*tp);
135 tp->core_siblings = addr; 203 tp->core_siblings = addr;
136 addr += sz; 204 addr += sz;
205 if (has_die) {
206 tp->die_siblings = addr;
207 addr += sz;
208 }
137 tp->thread_siblings = addr; 209 tp->thread_siblings = addr;
138 210
139 for (i = 0; i < nr; i++) { 211 for (i = 0; i < nr; i++) {
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
index 47a97e71acdf..bae2f1d41856 100644
--- a/tools/perf/util/cputopo.h
+++ b/tools/perf/util/cputopo.h
@@ -7,8 +7,10 @@
7 7
8struct cpu_topology { 8struct cpu_topology {
9 u32 core_sib; 9 u32 core_sib;
10 u32 die_sib;
10 u32 thread_sib; 11 u32 thread_sib;
11 char **core_siblings; 12 char **core_siblings;
13 char **die_siblings;
12 char **thread_siblings; 14 char **thread_siblings;
13}; 15};
14 16
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 39fe21e1cf93..bb45e23018ee 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -18,8 +18,6 @@
18#include "intlist.h" 18#include "intlist.h"
19#include "util.h" 19#include "util.h"
20 20
21#define MAX_BUFFER 1024
22
23/* use raw logging */ 21/* use raw logging */
24#ifdef CS_DEBUG_RAW 22#ifdef CS_DEBUG_RAW
25#define CS_LOG_RAW_FRAMES 23#define CS_LOG_RAW_FRAMES
@@ -31,33 +29,26 @@
31#endif 29#endif
32#endif 30#endif
33 31
34#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL
35
36struct cs_etm_decoder { 32struct cs_etm_decoder {
37 void *data; 33 void *data;
38 void (*packet_printer)(const char *msg); 34 void (*packet_printer)(const char *msg);
39 dcd_tree_handle_t dcd_tree; 35 dcd_tree_handle_t dcd_tree;
40 cs_etm_mem_cb_type mem_access; 36 cs_etm_mem_cb_type mem_access;
41 ocsd_datapath_resp_t prev_return; 37 ocsd_datapath_resp_t prev_return;
42 u32 packet_count;
43 u32 head;
44 u32 tail;
45 struct cs_etm_packet packet_buffer[MAX_BUFFER];
46}; 38};
47 39
48static u32 40static u32
49cs_etm_decoder__mem_access(const void *context, 41cs_etm_decoder__mem_access(const void *context,
50 const ocsd_vaddr_t address, 42 const ocsd_vaddr_t address,
51 const ocsd_mem_space_acc_t mem_space __maybe_unused, 43 const ocsd_mem_space_acc_t mem_space __maybe_unused,
44 const u8 trace_chan_id,
52 const u32 req_size, 45 const u32 req_size,
53 u8 *buffer) 46 u8 *buffer)
54{ 47{
55 struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; 48 struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
56 49
57 return decoder->mem_access(decoder->data, 50 return decoder->mem_access(decoder->data, trace_chan_id,
58 address, 51 address, req_size, buffer);
59 req_size,
60 buffer);
61} 52}
62 53
63int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, 54int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
@@ -66,9 +57,10 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
66{ 57{
67 decoder->mem_access = cb_func; 58 decoder->mem_access = cb_func;
68 59
69 if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end, 60 if (ocsd_dt_add_callback_trcid_mem_acc(decoder->dcd_tree, start, end,
70 OCSD_MEM_SPACE_ANY, 61 OCSD_MEM_SPACE_ANY,
71 cs_etm_decoder__mem_access, decoder)) 62 cs_etm_decoder__mem_access,
63 decoder))
72 return -1; 64 return -1;
73 65
74 return 0; 66 return 0;
@@ -88,14 +80,14 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
88 return 0; 80 return 0;
89} 81}
90 82
91int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, 83int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
92 struct cs_etm_packet *packet) 84 struct cs_etm_packet *packet)
93{ 85{
94 if (!decoder || !packet) 86 if (!packet_queue || !packet)
95 return -EINVAL; 87 return -EINVAL;
96 88
97 /* Nothing to do, might as well just return */ 89 /* Nothing to do, might as well just return */
98 if (decoder->packet_count == 0) 90 if (packet_queue->packet_count == 0)
99 return 0; 91 return 0;
100 /* 92 /*
101 * The queueing process in function cs_etm_decoder__buffer_packet() 93 * The queueing process in function cs_etm_decoder__buffer_packet()
@@ -106,11 +98,12 @@ int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
106 * value. Otherwise the first element of the packet queue is not 98 * value. Otherwise the first element of the packet queue is not
107 * used. 99 * used.
108 */ 100 */
109 decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); 101 packet_queue->head = (packet_queue->head + 1) &
102 (CS_ETM_PACKET_MAX_BUFFER - 1);
110 103
111 *packet = decoder->packet_buffer[decoder->head]; 104 *packet = packet_queue->packet_buffer[packet_queue->head];
112 105
113 decoder->packet_count--; 106 packet_queue->packet_count--;
114 107
115 return 1; 108 return 1;
116} 109}
@@ -276,84 +269,130 @@ cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params,
276 trace_config); 269 trace_config);
277} 270}
278 271
279static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) 272static ocsd_datapath_resp_t
273cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
274 struct cs_etm_packet_queue *packet_queue,
275 const uint8_t trace_chan_id)
280{ 276{
281 int i; 277 /* No timestamp packet has been received, nothing to do */
282 278 if (!packet_queue->timestamp)
283 decoder->head = 0; 279 return OCSD_RESP_CONT;
284 decoder->tail = 0; 280
285 decoder->packet_count = 0; 281 packet_queue->timestamp = packet_queue->next_timestamp;
286 for (i = 0; i < MAX_BUFFER; i++) { 282
287 decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN; 283 /* Estimate the timestamp for the next range packet */
288 decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR; 284 packet_queue->next_timestamp += packet_queue->instr_count;
289 decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR; 285 packet_queue->instr_count = 0;
290 decoder->packet_buffer[i].instr_count = 0; 286
291 decoder->packet_buffer[i].last_instr_taken_branch = false; 287 /* Tell the front end which traceid_queue needs attention */
292 decoder->packet_buffer[i].last_instr_size = 0; 288 cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
293 decoder->packet_buffer[i].last_instr_type = 0; 289
294 decoder->packet_buffer[i].last_instr_subtype = 0; 290 return OCSD_RESP_WAIT;
295 decoder->packet_buffer[i].last_instr_cond = 0; 291}
296 decoder->packet_buffer[i].flags = 0; 292
297 decoder->packet_buffer[i].exception_number = UINT32_MAX; 293static ocsd_datapath_resp_t
298 decoder->packet_buffer[i].trace_chan_id = UINT8_MAX; 294cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
299 decoder->packet_buffer[i].cpu = INT_MIN; 295 const ocsd_generic_trace_elem *elem,
296 const uint8_t trace_chan_id)
297{
298 struct cs_etm_packet_queue *packet_queue;
299
300 /* First get the packet queue for this traceID */
301 packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
302 if (!packet_queue)
303 return OCSD_RESP_FATAL_SYS_ERR;
304
305 /*
306 * We've seen a timestamp packet before - simply record the new value.
307 * Function do_soft_timestamp() will report the value to the front end,
308 * hence asking the decoder to keep decoding rather than stopping.
309 */
310 if (packet_queue->timestamp) {
311 packet_queue->next_timestamp = elem->timestamp;
312 return OCSD_RESP_CONT;
300 } 313 }
314
315 /*
316 * This is the first timestamp we've seen since the beginning of traces
317 * or a discontinuity. Since timestamps packets are generated *after*
318 * range packets have been generated, we need to estimate the time at
319 * which instructions started by substracting the number of instructions
320 * executed to the timestamp.
321 */
322 packet_queue->timestamp = elem->timestamp - packet_queue->instr_count;
323 packet_queue->next_timestamp = elem->timestamp;
324 packet_queue->instr_count = 0;
325
326 /* Tell the front end which traceid_queue needs attention */
327 cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
328
329 /* Halt processing until we are being told to proceed */
330 return OCSD_RESP_WAIT;
331}
332
333static void
334cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
335{
336 packet_queue->timestamp = 0;
337 packet_queue->next_timestamp = 0;
338 packet_queue->instr_count = 0;
301} 339}
302 340
303static ocsd_datapath_resp_t 341static ocsd_datapath_resp_t
304cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, 342cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
305 const u8 trace_chan_id, 343 const u8 trace_chan_id,
306 enum cs_etm_sample_type sample_type) 344 enum cs_etm_sample_type sample_type)
307{ 345{
308 u32 et = 0; 346 u32 et = 0;
309 int cpu; 347 int cpu;
310 348
311 if (decoder->packet_count >= MAX_BUFFER - 1) 349 if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1)
312 return OCSD_RESP_FATAL_SYS_ERR; 350 return OCSD_RESP_FATAL_SYS_ERR;
313 351
314 if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0) 352 if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
315 return OCSD_RESP_FATAL_SYS_ERR; 353 return OCSD_RESP_FATAL_SYS_ERR;
316 354
317 et = decoder->tail; 355 et = packet_queue->tail;
318 et = (et + 1) & (MAX_BUFFER - 1); 356 et = (et + 1) & (CS_ETM_PACKET_MAX_BUFFER - 1);
319 decoder->tail = et; 357 packet_queue->tail = et;
320 decoder->packet_count++; 358 packet_queue->packet_count++;
321 359
322 decoder->packet_buffer[et].sample_type = sample_type; 360 packet_queue->packet_buffer[et].sample_type = sample_type;
323 decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN; 361 packet_queue->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
324 decoder->packet_buffer[et].cpu = cpu; 362 packet_queue->packet_buffer[et].cpu = cpu;
325 decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR; 363 packet_queue->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
326 decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR; 364 packet_queue->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
327 decoder->packet_buffer[et].instr_count = 0; 365 packet_queue->packet_buffer[et].instr_count = 0;
328 decoder->packet_buffer[et].last_instr_taken_branch = false; 366 packet_queue->packet_buffer[et].last_instr_taken_branch = false;
329 decoder->packet_buffer[et].last_instr_size = 0; 367 packet_queue->packet_buffer[et].last_instr_size = 0;
330 decoder->packet_buffer[et].last_instr_type = 0; 368 packet_queue->packet_buffer[et].last_instr_type = 0;
331 decoder->packet_buffer[et].last_instr_subtype = 0; 369 packet_queue->packet_buffer[et].last_instr_subtype = 0;
332 decoder->packet_buffer[et].last_instr_cond = 0; 370 packet_queue->packet_buffer[et].last_instr_cond = 0;
333 decoder->packet_buffer[et].flags = 0; 371 packet_queue->packet_buffer[et].flags = 0;
334 decoder->packet_buffer[et].exception_number = UINT32_MAX; 372 packet_queue->packet_buffer[et].exception_number = UINT32_MAX;
335 decoder->packet_buffer[et].trace_chan_id = trace_chan_id; 373 packet_queue->packet_buffer[et].trace_chan_id = trace_chan_id;
336 374
337 if (decoder->packet_count == MAX_BUFFER - 1) 375 if (packet_queue->packet_count == CS_ETM_PACKET_MAX_BUFFER - 1)
338 return OCSD_RESP_WAIT; 376 return OCSD_RESP_WAIT;
339 377
340 return OCSD_RESP_CONT; 378 return OCSD_RESP_CONT;
341} 379}
342 380
343static ocsd_datapath_resp_t 381static ocsd_datapath_resp_t
344cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, 382cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
383 struct cs_etm_packet_queue *packet_queue,
345 const ocsd_generic_trace_elem *elem, 384 const ocsd_generic_trace_elem *elem,
346 const uint8_t trace_chan_id) 385 const uint8_t trace_chan_id)
347{ 386{
348 int ret = 0; 387 int ret = 0;
349 struct cs_etm_packet *packet; 388 struct cs_etm_packet *packet;
350 389
351 ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 390 ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id,
352 CS_ETM_RANGE); 391 CS_ETM_RANGE);
353 if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) 392 if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
354 return ret; 393 return ret;
355 394
356 packet = &decoder->packet_buffer[decoder->tail]; 395 packet = &packet_queue->packet_buffer[packet_queue->tail];
357 396
358 switch (elem->isa) { 397 switch (elem->isa) {
359 case ocsd_isa_aarch64: 398 case ocsd_isa_aarch64:
@@ -396,43 +435,90 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
396 435
397 packet->last_instr_size = elem->last_instr_sz; 436 packet->last_instr_size = elem->last_instr_sz;
398 437
438 /* per-thread scenario, no need to generate a timestamp */
439 if (cs_etm__etmq_is_timeless(etmq))
440 goto out;
441
442 /*
443 * The packet queue is full and we haven't seen a timestamp (had we
444 * seen one the packet queue wouldn't be full). Let the front end
445 * deal with it.
446 */
447 if (ret == OCSD_RESP_WAIT)
448 goto out;
449
450 packet_queue->instr_count += elem->num_instr_range;
451 /* Tell the front end we have a new timestamp to process */
452 ret = cs_etm_decoder__do_soft_timestamp(etmq, packet_queue,
453 trace_chan_id);
454out:
399 return ret; 455 return ret;
400} 456}
401 457
402static ocsd_datapath_resp_t 458static ocsd_datapath_resp_t
403cs_etm_decoder__buffer_discontinuity(struct cs_etm_decoder *decoder, 459cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
404 const uint8_t trace_chan_id) 460 const uint8_t trace_chan_id)
405{ 461{
406 return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 462 /*
463 * Something happened and who knows when we'll get new traces so
464 * reset time statistics.
465 */
466 cs_etm_decoder__reset_timestamp(queue);
467 return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
407 CS_ETM_DISCONTINUITY); 468 CS_ETM_DISCONTINUITY);
408} 469}
409 470
410static ocsd_datapath_resp_t 471static ocsd_datapath_resp_t
411cs_etm_decoder__buffer_exception(struct cs_etm_decoder *decoder, 472cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
412 const ocsd_generic_trace_elem *elem, 473 const ocsd_generic_trace_elem *elem,
413 const uint8_t trace_chan_id) 474 const uint8_t trace_chan_id)
414{ int ret = 0; 475{ int ret = 0;
415 struct cs_etm_packet *packet; 476 struct cs_etm_packet *packet;
416 477
417 ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 478 ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id,
418 CS_ETM_EXCEPTION); 479 CS_ETM_EXCEPTION);
419 if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) 480 if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
420 return ret; 481 return ret;
421 482
422 packet = &decoder->packet_buffer[decoder->tail]; 483 packet = &queue->packet_buffer[queue->tail];
423 packet->exception_number = elem->exception_number; 484 packet->exception_number = elem->exception_number;
424 485
425 return ret; 486 return ret;
426} 487}
427 488
428static ocsd_datapath_resp_t 489static ocsd_datapath_resp_t
429cs_etm_decoder__buffer_exception_ret(struct cs_etm_decoder *decoder, 490cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue,
430 const uint8_t trace_chan_id) 491 const uint8_t trace_chan_id)
431{ 492{
432 return cs_etm_decoder__buffer_packet(decoder, trace_chan_id, 493 return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
433 CS_ETM_EXCEPTION_RET); 494 CS_ETM_EXCEPTION_RET);
434} 495}
435 496
497static ocsd_datapath_resp_t
498cs_etm_decoder__set_tid(struct cs_etm_queue *etmq,
499 struct cs_etm_packet_queue *packet_queue,
500 const ocsd_generic_trace_elem *elem,
501 const uint8_t trace_chan_id)
502{
503 pid_t tid;
504
505 /* Ignore PE_CONTEXT packets that don't have a valid contextID */
506 if (!elem->context.ctxt_id_valid)
507 return OCSD_RESP_CONT;
508
509 tid = elem->context.context_id;
510 if (cs_etm__etmq_set_tid(etmq, tid, trace_chan_id))
511 return OCSD_RESP_FATAL_SYS_ERR;
512
513 /*
514 * A timestamp is generated after a PE_CONTEXT element so make sure
515 * to rely on that coming one.
516 */
517 cs_etm_decoder__reset_timestamp(packet_queue);
518
519 return OCSD_RESP_CONT;
520}
521
436static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( 522static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
437 const void *context, 523 const void *context,
438 const ocsd_trc_index_t indx __maybe_unused, 524 const ocsd_trc_index_t indx __maybe_unused,
@@ -441,6 +527,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
441{ 527{
442 ocsd_datapath_resp_t resp = OCSD_RESP_CONT; 528 ocsd_datapath_resp_t resp = OCSD_RESP_CONT;
443 struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; 529 struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
530 struct cs_etm_queue *etmq = decoder->data;
531 struct cs_etm_packet_queue *packet_queue;
532
533 /* First get the packet queue for this traceID */
534 packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
535 if (!packet_queue)
536 return OCSD_RESP_FATAL_SYS_ERR;
444 537
445 switch (elem->elem_type) { 538 switch (elem->elem_type) {
446 case OCSD_GEN_TRC_ELEM_UNKNOWN: 539 case OCSD_GEN_TRC_ELEM_UNKNOWN:
@@ -448,24 +541,30 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
448 case OCSD_GEN_TRC_ELEM_EO_TRACE: 541 case OCSD_GEN_TRC_ELEM_EO_TRACE:
449 case OCSD_GEN_TRC_ELEM_NO_SYNC: 542 case OCSD_GEN_TRC_ELEM_NO_SYNC:
450 case OCSD_GEN_TRC_ELEM_TRACE_ON: 543 case OCSD_GEN_TRC_ELEM_TRACE_ON:
451 resp = cs_etm_decoder__buffer_discontinuity(decoder, 544 resp = cs_etm_decoder__buffer_discontinuity(packet_queue,
452 trace_chan_id); 545 trace_chan_id);
453 break; 546 break;
454 case OCSD_GEN_TRC_ELEM_INSTR_RANGE: 547 case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
455 resp = cs_etm_decoder__buffer_range(decoder, elem, 548 resp = cs_etm_decoder__buffer_range(etmq, packet_queue, elem,
456 trace_chan_id); 549 trace_chan_id);
457 break; 550 break;
458 case OCSD_GEN_TRC_ELEM_EXCEPTION: 551 case OCSD_GEN_TRC_ELEM_EXCEPTION:
459 resp = cs_etm_decoder__buffer_exception(decoder, elem, 552 resp = cs_etm_decoder__buffer_exception(packet_queue, elem,
460 trace_chan_id); 553 trace_chan_id);
461 break; 554 break;
462 case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: 555 case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
463 resp = cs_etm_decoder__buffer_exception_ret(decoder, 556 resp = cs_etm_decoder__buffer_exception_ret(packet_queue,
464 trace_chan_id); 557 trace_chan_id);
465 break; 558 break;
559 case OCSD_GEN_TRC_ELEM_TIMESTAMP:
560 resp = cs_etm_decoder__do_hard_timestamp(etmq, elem,
561 trace_chan_id);
562 break;
466 case OCSD_GEN_TRC_ELEM_PE_CONTEXT: 563 case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
564 resp = cs_etm_decoder__set_tid(etmq, packet_queue,
565 elem, trace_chan_id);
566 break;
467 case OCSD_GEN_TRC_ELEM_ADDR_NACC: 567 case OCSD_GEN_TRC_ELEM_ADDR_NACC:
468 case OCSD_GEN_TRC_ELEM_TIMESTAMP:
469 case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: 568 case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
470 case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: 569 case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
471 case OCSD_GEN_TRC_ELEM_EVENT: 570 case OCSD_GEN_TRC_ELEM_EVENT:
@@ -554,7 +653,6 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
554 653
555 decoder->data = d_params->data; 654 decoder->data = d_params->data;
556 decoder->prev_return = OCSD_RESP_CONT; 655 decoder->prev_return = OCSD_RESP_CONT;
557 cs_etm_decoder__clear_buffer(decoder);
558 format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED : 656 format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED :
559 OCSD_TRC_SRC_SINGLE); 657 OCSD_TRC_SRC_SINGLE);
560 flags = 0; 658 flags = 0;
@@ -577,7 +675,7 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
577 /* init library print logging support */ 675 /* init library print logging support */
578 ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder); 676 ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder);
579 if (ret != 0) 677 if (ret != 0)
580 goto err_free_decoder_tree; 678 goto err_free_decoder;
581 679
582 /* init raw frame logging if required */ 680 /* init raw frame logging if required */
583 cs_etm_decoder__init_raw_frame_logging(d_params, decoder); 681 cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
@@ -587,15 +685,13 @@ cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
587 &t_params[i], 685 &t_params[i],
588 decoder); 686 decoder);
589 if (ret != 0) 687 if (ret != 0)
590 goto err_free_decoder_tree; 688 goto err_free_decoder;
591 } 689 }
592 690
593 return decoder; 691 return decoder;
594 692
595err_free_decoder_tree:
596 ocsd_destroy_dcd_tree(decoder->dcd_tree);
597err_free_decoder: 693err_free_decoder:
598 free(decoder); 694 cs_etm_decoder__free(decoder);
599 return NULL; 695 return NULL;
600} 696}
601 697
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 3ab11dfa92ae..11f3391d06f2 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -14,43 +14,12 @@
14#include <stdio.h> 14#include <stdio.h>
15 15
16struct cs_etm_decoder; 16struct cs_etm_decoder;
17 17struct cs_etm_packet;
18enum cs_etm_sample_type { 18struct cs_etm_packet_queue;
19 CS_ETM_EMPTY,
20 CS_ETM_RANGE,
21 CS_ETM_DISCONTINUITY,
22 CS_ETM_EXCEPTION,
23 CS_ETM_EXCEPTION_RET,
24};
25
26enum cs_etm_isa {
27 CS_ETM_ISA_UNKNOWN,
28 CS_ETM_ISA_A64,
29 CS_ETM_ISA_A32,
30 CS_ETM_ISA_T32,
31};
32
33struct cs_etm_packet {
34 enum cs_etm_sample_type sample_type;
35 enum cs_etm_isa isa;
36 u64 start_addr;
37 u64 end_addr;
38 u32 instr_count;
39 u32 last_instr_type;
40 u32 last_instr_subtype;
41 u32 flags;
42 u32 exception_number;
43 u8 last_instr_cond;
44 u8 last_instr_taken_branch;
45 u8 last_instr_size;
46 u8 trace_chan_id;
47 int cpu;
48};
49 19
50struct cs_etm_queue; 20struct cs_etm_queue;
51 21
52typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64, 22typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u8, u64, size_t, u8 *);
53 size_t, u8 *);
54 23
55struct cs_etmv3_trace_params { 24struct cs_etmv3_trace_params {
56 u32 reg_ctrl; 25 u32 reg_ctrl;
@@ -119,7 +88,7 @@ int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
119 u64 start, u64 end, 88 u64 start, u64 end,
120 cs_etm_mem_cb_type cb_func); 89 cs_etm_mem_cb_type cb_func);
121 90
122int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, 91int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
123 struct cs_etm_packet *packet); 92 struct cs_etm_packet *packet);
124 93
125int cs_etm_decoder__reset(struct cs_etm_decoder *decoder); 94int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index de488b43f440..0c7776b51045 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -29,6 +29,7 @@
29#include "thread.h" 29#include "thread.h"
30#include "thread_map.h" 30#include "thread_map.h"
31#include "thread-stack.h" 31#include "thread-stack.h"
32#include <tools/libc_compat.h>
32#include "util.h" 33#include "util.h"
33 34
34#define MAX_TIMESTAMP (~0ULL) 35#define MAX_TIMESTAMP (~0ULL)
@@ -60,33 +61,55 @@ struct cs_etm_auxtrace {
60 unsigned int pmu_type; 61 unsigned int pmu_type;
61}; 62};
62 63
63struct cs_etm_queue { 64struct cs_etm_traceid_queue {
64 struct cs_etm_auxtrace *etm; 65 u8 trace_chan_id;
65 struct thread *thread;
66 struct cs_etm_decoder *decoder;
67 struct auxtrace_buffer *buffer;
68 union perf_event *event_buf;
69 unsigned int queue_nr;
70 pid_t pid, tid; 66 pid_t pid, tid;
71 int cpu;
72 u64 offset;
73 u64 period_instructions; 67 u64 period_instructions;
68 size_t last_branch_pos;
69 union perf_event *event_buf;
70 struct thread *thread;
74 struct branch_stack *last_branch; 71 struct branch_stack *last_branch;
75 struct branch_stack *last_branch_rb; 72 struct branch_stack *last_branch_rb;
76 size_t last_branch_pos;
77 struct cs_etm_packet *prev_packet; 73 struct cs_etm_packet *prev_packet;
78 struct cs_etm_packet *packet; 74 struct cs_etm_packet *packet;
75 struct cs_etm_packet_queue packet_queue;
76};
77
78struct cs_etm_queue {
79 struct cs_etm_auxtrace *etm;
80 struct cs_etm_decoder *decoder;
81 struct auxtrace_buffer *buffer;
82 unsigned int queue_nr;
83 u8 pending_timestamp;
84 u64 offset;
79 const unsigned char *buf; 85 const unsigned char *buf;
80 size_t buf_len, buf_used; 86 size_t buf_len, buf_used;
87 /* Conversion between traceID and index in traceid_queues array */
88 struct intlist *traceid_queues_list;
89 struct cs_etm_traceid_queue **traceid_queues;
81}; 90};
82 91
83static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 92static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
93static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
84static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, 94static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
85 pid_t tid); 95 pid_t tid);
96static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
97static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
86 98
87/* PTMs ETMIDR [11:8] set to b0011 */ 99/* PTMs ETMIDR [11:8] set to b0011 */
88#define ETMIDR_PTM_VERSION 0x00000300 100#define ETMIDR_PTM_VERSION 0x00000300
89 101
102/*
103 * A struct auxtrace_heap_item only has a queue_nr and a timestamp to
104 * work with. One option is to modify to auxtrace_heap_XYZ() API or simply
105 * encode the etm queue number as the upper 16 bit and the channel as
106 * the lower 16 bit.
107 */
108#define TO_CS_QUEUE_NR(queue_nr, trace_id_chan) \
109 (queue_nr << 16 | trace_chan_id)
110#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
111#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
112
90static u32 cs_etm__get_v7_protocol_version(u32 etmidr) 113static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
91{ 114{
92 etmidr &= ETMIDR_PTM_VERSION; 115 etmidr &= ETMIDR_PTM_VERSION;
@@ -125,6 +148,216 @@ int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
125 return 0; 148 return 0;
126} 149}
127 150
151void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
152 u8 trace_chan_id)
153{
154 /*
155 * Wnen a timestamp packet is encountered the backend code
156 * is stopped so that the front end has time to process packets
157 * that were accumulated in the traceID queue. Since there can
158 * be more than one channel per cs_etm_queue, we need to specify
159 * what traceID queue needs servicing.
160 */
161 etmq->pending_timestamp = trace_chan_id;
162}
163
164static u64 cs_etm__etmq_get_timestamp(struct cs_etm_queue *etmq,
165 u8 *trace_chan_id)
166{
167 struct cs_etm_packet_queue *packet_queue;
168
169 if (!etmq->pending_timestamp)
170 return 0;
171
172 if (trace_chan_id)
173 *trace_chan_id = etmq->pending_timestamp;
174
175 packet_queue = cs_etm__etmq_get_packet_queue(etmq,
176 etmq->pending_timestamp);
177 if (!packet_queue)
178 return 0;
179
180 /* Acknowledge pending status */
181 etmq->pending_timestamp = 0;
182
183 /* See function cs_etm_decoder__do_{hard|soft}_timestamp() */
184 return packet_queue->timestamp;
185}
186
187static void cs_etm__clear_packet_queue(struct cs_etm_packet_queue *queue)
188{
189 int i;
190
191 queue->head = 0;
192 queue->tail = 0;
193 queue->packet_count = 0;
194 for (i = 0; i < CS_ETM_PACKET_MAX_BUFFER; i++) {
195 queue->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
196 queue->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
197 queue->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
198 queue->packet_buffer[i].instr_count = 0;
199 queue->packet_buffer[i].last_instr_taken_branch = false;
200 queue->packet_buffer[i].last_instr_size = 0;
201 queue->packet_buffer[i].last_instr_type = 0;
202 queue->packet_buffer[i].last_instr_subtype = 0;
203 queue->packet_buffer[i].last_instr_cond = 0;
204 queue->packet_buffer[i].flags = 0;
205 queue->packet_buffer[i].exception_number = UINT32_MAX;
206 queue->packet_buffer[i].trace_chan_id = UINT8_MAX;
207 queue->packet_buffer[i].cpu = INT_MIN;
208 }
209}
210
211static void cs_etm__clear_all_packet_queues(struct cs_etm_queue *etmq)
212{
213 int idx;
214 struct int_node *inode;
215 struct cs_etm_traceid_queue *tidq;
216 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
217
218 intlist__for_each_entry(inode, traceid_queues_list) {
219 idx = (int)(intptr_t)inode->priv;
220 tidq = etmq->traceid_queues[idx];
221 cs_etm__clear_packet_queue(&tidq->packet_queue);
222 }
223}
224
225static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
226 struct cs_etm_traceid_queue *tidq,
227 u8 trace_chan_id)
228{
229 int rc = -ENOMEM;
230 struct auxtrace_queue *queue;
231 struct cs_etm_auxtrace *etm = etmq->etm;
232
233 cs_etm__clear_packet_queue(&tidq->packet_queue);
234
235 queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
236 tidq->tid = queue->tid;
237 tidq->pid = -1;
238 tidq->trace_chan_id = trace_chan_id;
239
240 tidq->packet = zalloc(sizeof(struct cs_etm_packet));
241 if (!tidq->packet)
242 goto out;
243
244 tidq->prev_packet = zalloc(sizeof(struct cs_etm_packet));
245 if (!tidq->prev_packet)
246 goto out_free;
247
248 if (etm->synth_opts.last_branch) {
249 size_t sz = sizeof(struct branch_stack);
250
251 sz += etm->synth_opts.last_branch_sz *
252 sizeof(struct branch_entry);
253 tidq->last_branch = zalloc(sz);
254 if (!tidq->last_branch)
255 goto out_free;
256 tidq->last_branch_rb = zalloc(sz);
257 if (!tidq->last_branch_rb)
258 goto out_free;
259 }
260
261 tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
262 if (!tidq->event_buf)
263 goto out_free;
264
265 return 0;
266
267out_free:
268 zfree(&tidq->last_branch_rb);
269 zfree(&tidq->last_branch);
270 zfree(&tidq->prev_packet);
271 zfree(&tidq->packet);
272out:
273 return rc;
274}
275
276static struct cs_etm_traceid_queue
277*cs_etm__etmq_get_traceid_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
278{
279 int idx;
280 struct int_node *inode;
281 struct intlist *traceid_queues_list;
282 struct cs_etm_traceid_queue *tidq, **traceid_queues;
283 struct cs_etm_auxtrace *etm = etmq->etm;
284
285 if (etm->timeless_decoding)
286 trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
287
288 traceid_queues_list = etmq->traceid_queues_list;
289
290 /*
291 * Check if the traceid_queue exist for this traceID by looking
292 * in the queue list.
293 */
294 inode = intlist__find(traceid_queues_list, trace_chan_id);
295 if (inode) {
296 idx = (int)(intptr_t)inode->priv;
297 return etmq->traceid_queues[idx];
298 }
299
300 /* We couldn't find a traceid_queue for this traceID, allocate one */
301 tidq = malloc(sizeof(*tidq));
302 if (!tidq)
303 return NULL;
304
305 memset(tidq, 0, sizeof(*tidq));
306
307 /* Get a valid index for the new traceid_queue */
308 idx = intlist__nr_entries(traceid_queues_list);
309 /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
310 inode = intlist__findnew(traceid_queues_list, trace_chan_id);
311 if (!inode)
312 goto out_free;
313
314 /* Associate this traceID with this index */
315 inode->priv = (void *)(intptr_t)idx;
316
317 if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id))
318 goto out_free;
319
320 /* Grow the traceid_queues array by one unit */
321 traceid_queues = etmq->traceid_queues;
322 traceid_queues = reallocarray(traceid_queues,
323 idx + 1,
324 sizeof(*traceid_queues));
325
326 /*
327 * On failure reallocarray() returns NULL and the original block of
328 * memory is left untouched.
329 */
330 if (!traceid_queues)
331 goto out_free;
332
333 traceid_queues[idx] = tidq;
334 etmq->traceid_queues = traceid_queues;
335
336 return etmq->traceid_queues[idx];
337
338out_free:
339 /*
340 * Function intlist__remove() removes the inode from the list
341 * and delete the memory associated to it.
342 */
343 intlist__remove(traceid_queues_list, inode);
344 free(tidq);
345
346 return NULL;
347}
348
349struct cs_etm_packet_queue
350*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id)
351{
352 struct cs_etm_traceid_queue *tidq;
353
354 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
355 if (tidq)
356 return &tidq->packet_queue;
357
358 return NULL;
359}
360
128static void cs_etm__packet_dump(const char *pkt_string) 361static void cs_etm__packet_dump(const char *pkt_string)
129{ 362{
130 const char *color = PERF_COLOR_BLUE; 363 const char *color = PERF_COLOR_BLUE;
@@ -276,15 +509,53 @@ static int cs_etm__flush_events(struct perf_session *session,
276 if (!tool->ordered_events) 509 if (!tool->ordered_events)
277 return -EINVAL; 510 return -EINVAL;
278 511
279 if (!etm->timeless_decoding)
280 return -EINVAL;
281
282 ret = cs_etm__update_queues(etm); 512 ret = cs_etm__update_queues(etm);
283 513
284 if (ret < 0) 514 if (ret < 0)
285 return ret; 515 return ret;
286 516
287 return cs_etm__process_timeless_queues(etm, -1); 517 if (etm->timeless_decoding)
518 return cs_etm__process_timeless_queues(etm, -1);
519
520 return cs_etm__process_queues(etm);
521}
522
523static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
524{
525 int idx;
526 uintptr_t priv;
527 struct int_node *inode, *tmp;
528 struct cs_etm_traceid_queue *tidq;
529 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
530
531 intlist__for_each_entry_safe(inode, tmp, traceid_queues_list) {
532 priv = (uintptr_t)inode->priv;
533 idx = priv;
534
535 /* Free this traceid_queue from the array */
536 tidq = etmq->traceid_queues[idx];
537 thread__zput(tidq->thread);
538 zfree(&tidq->event_buf);
539 zfree(&tidq->last_branch);
540 zfree(&tidq->last_branch_rb);
541 zfree(&tidq->prev_packet);
542 zfree(&tidq->packet);
543 zfree(&tidq);
544
545 /*
546 * Function intlist__remove() removes the inode from the list
547 * and delete the memory associated to it.
548 */
549 intlist__remove(traceid_queues_list, inode);
550 }
551
552 /* Then the RB tree itself */
553 intlist__delete(traceid_queues_list);
554 etmq->traceid_queues_list = NULL;
555
556 /* finally free the traceid_queues array */
557 free(etmq->traceid_queues);
558 etmq->traceid_queues = NULL;
288} 559}
289 560
290static void cs_etm__free_queue(void *priv) 561static void cs_etm__free_queue(void *priv)
@@ -294,13 +565,8 @@ static void cs_etm__free_queue(void *priv)
294 if (!etmq) 565 if (!etmq)
295 return; 566 return;
296 567
297 thread__zput(etmq->thread);
298 cs_etm_decoder__free(etmq->decoder); 568 cs_etm_decoder__free(etmq->decoder);
299 zfree(&etmq->event_buf); 569 cs_etm__free_traceid_queues(etmq);
300 zfree(&etmq->last_branch);
301 zfree(&etmq->last_branch_rb);
302 zfree(&etmq->prev_packet);
303 zfree(&etmq->packet);
304 free(etmq); 570 free(etmq);
305} 571}
306 572
@@ -365,23 +631,27 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
365 } 631 }
366} 632}
367 633
368static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, 634static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
369 size_t size, u8 *buffer) 635 u64 address, size_t size, u8 *buffer)
370{ 636{
371 u8 cpumode; 637 u8 cpumode;
372 u64 offset; 638 u64 offset;
373 int len; 639 int len;
374 struct thread *thread; 640 struct thread *thread;
375 struct machine *machine; 641 struct machine *machine;
376 struct addr_location al; 642 struct addr_location al;
643 struct cs_etm_traceid_queue *tidq;
377 644
378 if (!etmq) 645 if (!etmq)
379 return 0; 646 return 0;
380 647
381 machine = etmq->etm->machine; 648 machine = etmq->etm->machine;
382 cpumode = cs_etm__cpu_mode(etmq, address); 649 cpumode = cs_etm__cpu_mode(etmq, address);
650 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
651 if (!tidq)
652 return 0;
383 653
384 thread = etmq->thread; 654 thread = tidq->thread;
385 if (!thread) { 655 if (!thread) {
386 if (cpumode != PERF_RECORD_MISC_KERNEL) 656 if (cpumode != PERF_RECORD_MISC_KERNEL)
387 return 0; 657 return 0;
@@ -412,35 +682,13 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
412 struct cs_etm_decoder_params d_params; 682 struct cs_etm_decoder_params d_params;
413 struct cs_etm_trace_params *t_params = NULL; 683 struct cs_etm_trace_params *t_params = NULL;
414 struct cs_etm_queue *etmq; 684 struct cs_etm_queue *etmq;
415 size_t szp = sizeof(struct cs_etm_packet);
416 685
417 etmq = zalloc(sizeof(*etmq)); 686 etmq = zalloc(sizeof(*etmq));
418 if (!etmq) 687 if (!etmq)
419 return NULL; 688 return NULL;
420 689
421 etmq->packet = zalloc(szp); 690 etmq->traceid_queues_list = intlist__new(NULL);
422 if (!etmq->packet) 691 if (!etmq->traceid_queues_list)
423 goto out_free;
424
425 etmq->prev_packet = zalloc(szp);
426 if (!etmq->prev_packet)
427 goto out_free;
428
429 if (etm->synth_opts.last_branch) {
430 size_t sz = sizeof(struct branch_stack);
431
432 sz += etm->synth_opts.last_branch_sz *
433 sizeof(struct branch_entry);
434 etmq->last_branch = zalloc(sz);
435 if (!etmq->last_branch)
436 goto out_free;
437 etmq->last_branch_rb = zalloc(sz);
438 if (!etmq->last_branch_rb)
439 goto out_free;
440 }
441
442 etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
443 if (!etmq->event_buf)
444 goto out_free; 692 goto out_free;
445 693
446 /* Use metadata to fill in trace parameters for trace decoder */ 694 /* Use metadata to fill in trace parameters for trace decoder */
@@ -477,12 +725,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm)
477out_free_decoder: 725out_free_decoder:
478 cs_etm_decoder__free(etmq->decoder); 726 cs_etm_decoder__free(etmq->decoder);
479out_free: 727out_free:
480 zfree(&t_params); 728 intlist__delete(etmq->traceid_queues_list);
481 zfree(&etmq->event_buf);
482 zfree(&etmq->last_branch);
483 zfree(&etmq->last_branch_rb);
484 zfree(&etmq->prev_packet);
485 zfree(&etmq->packet);
486 free(etmq); 729 free(etmq);
487 730
488 return NULL; 731 return NULL;
@@ -493,6 +736,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
493 unsigned int queue_nr) 736 unsigned int queue_nr)
494{ 737{
495 int ret = 0; 738 int ret = 0;
739 unsigned int cs_queue_nr;
740 u8 trace_chan_id;
741 u64 timestamp;
496 struct cs_etm_queue *etmq = queue->priv; 742 struct cs_etm_queue *etmq = queue->priv;
497 743
498 if (list_empty(&queue->head) || etmq) 744 if (list_empty(&queue->head) || etmq)
@@ -508,12 +754,69 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
508 queue->priv = etmq; 754 queue->priv = etmq;
509 etmq->etm = etm; 755 etmq->etm = etm;
510 etmq->queue_nr = queue_nr; 756 etmq->queue_nr = queue_nr;
511 etmq->cpu = queue->cpu;
512 etmq->tid = queue->tid;
513 etmq->pid = -1;
514 etmq->offset = 0; 757 etmq->offset = 0;
515 etmq->period_instructions = 0;
516 758
759 if (etm->timeless_decoding)
760 goto out;
761
762 /*
763 * We are under a CPU-wide trace scenario. As such we need to know
764 * when the code that generated the traces started to execute so that
765 * it can be correlated with execution on other CPUs. So we get a
766 * handle on the beginning of traces and decode until we find a
767 * timestamp. The timestamp is then added to the auxtrace min heap
768 * in order to know what nibble (of all the etmqs) to decode first.
769 */
770 while (1) {
771 /*
772 * Fetch an aux_buffer from this etmq. Bail if no more
773 * blocks or an error has been encountered.
774 */
775 ret = cs_etm__get_data_block(etmq);
776 if (ret <= 0)
777 goto out;
778
779 /*
780 * Run decoder on the trace block. The decoder will stop when
781 * encountering a timestamp, a full packet queue or the end of
782 * trace for that block.
783 */
784 ret = cs_etm__decode_data_block(etmq);
785 if (ret)
786 goto out;
787
788 /*
789 * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all
790 * the timestamp calculation for us.
791 */
792 timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
793
794 /* We found a timestamp, no need to continue. */
795 if (timestamp)
796 break;
797
798 /*
799 * We didn't find a timestamp so empty all the traceid packet
800 * queues before looking for another timestamp packet, either
801 * in the current data block or a new one. Packets that were
802 * just decoded are useless since no timestamp has been
803 * associated with them. As such simply discard them.
804 */
805 cs_etm__clear_all_packet_queues(etmq);
806 }
807
808 /*
809 * We have a timestamp. Add it to the min heap to reflect when
810 * instructions conveyed by the range packets of this traceID queue
811 * started to execute. Once the same has been done for all the traceID
812 * queues of each etmq, redenring and decoding can start in
813 * chronological order.
814 *
815 * Note that packets decoded above are still in the traceID's packet
816 * queue and will be processed in cs_etm__process_queues().
817 */
818 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_id_chan);
819 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
517out: 820out:
518 return ret; 821 return ret;
519} 822}
@@ -545,10 +848,12 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
545 return 0; 848 return 0;
546} 849}
547 850
548static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq) 851static inline
852void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq,
853 struct cs_etm_traceid_queue *tidq)
549{ 854{
550 struct branch_stack *bs_src = etmq->last_branch_rb; 855 struct branch_stack *bs_src = tidq->last_branch_rb;
551 struct branch_stack *bs_dst = etmq->last_branch; 856 struct branch_stack *bs_dst = tidq->last_branch;
552 size_t nr = 0; 857 size_t nr = 0;
553 858
554 /* 859 /*
@@ -568,9 +873,9 @@ static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
568 * two steps. First, copy the branches from the most recently inserted 873 * two steps. First, copy the branches from the most recently inserted
569 * branch ->last_branch_pos until the end of bs_src->entries buffer. 874 * branch ->last_branch_pos until the end of bs_src->entries buffer.
570 */ 875 */
571 nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos; 876 nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
572 memcpy(&bs_dst->entries[0], 877 memcpy(&bs_dst->entries[0],
573 &bs_src->entries[etmq->last_branch_pos], 878 &bs_src->entries[tidq->last_branch_pos],
574 sizeof(struct branch_entry) * nr); 879 sizeof(struct branch_entry) * nr);
575 880
576 /* 881 /*
@@ -583,21 +888,24 @@ static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
583 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { 888 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
584 memcpy(&bs_dst->entries[nr], 889 memcpy(&bs_dst->entries[nr],
585 &bs_src->entries[0], 890 &bs_src->entries[0],
586 sizeof(struct branch_entry) * etmq->last_branch_pos); 891 sizeof(struct branch_entry) * tidq->last_branch_pos);
587 } 892 }
588} 893}
589 894
590static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq) 895static inline
896void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq)
591{ 897{
592 etmq->last_branch_pos = 0; 898 tidq->last_branch_pos = 0;
593 etmq->last_branch_rb->nr = 0; 899 tidq->last_branch_rb->nr = 0;
594} 900}
595 901
596static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, 902static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
597 u64 addr) { 903 u8 trace_chan_id, u64 addr)
904{
598 u8 instrBytes[2]; 905 u8 instrBytes[2];
599 906
600 cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes); 907 cs_etm__mem_access(etmq, trace_chan_id, addr,
908 ARRAY_SIZE(instrBytes), instrBytes);
601 /* 909 /*
602 * T32 instruction size is indicated by bits[15:11] of the first 910 * T32 instruction size is indicated by bits[15:11] of the first
603 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 911 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
@@ -626,6 +934,7 @@ u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
626} 934}
627 935
628static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, 936static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
937 u64 trace_chan_id,
629 const struct cs_etm_packet *packet, 938 const struct cs_etm_packet *packet,
630 u64 offset) 939 u64 offset)
631{ 940{
@@ -633,7 +942,8 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
633 u64 addr = packet->start_addr; 942 u64 addr = packet->start_addr;
634 943
635 while (offset > 0) { 944 while (offset > 0) {
636 addr += cs_etm__t32_instr_size(etmq, addr); 945 addr += cs_etm__t32_instr_size(etmq,
946 trace_chan_id, addr);
637 offset--; 947 offset--;
638 } 948 }
639 return addr; 949 return addr;
@@ -643,9 +953,10 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
643 return packet->start_addr + offset * 4; 953 return packet->start_addr + offset * 4;
644} 954}
645 955
646static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq) 956static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq,
957 struct cs_etm_traceid_queue *tidq)
647{ 958{
648 struct branch_stack *bs = etmq->last_branch_rb; 959 struct branch_stack *bs = tidq->last_branch_rb;
649 struct branch_entry *be; 960 struct branch_entry *be;
650 961
651 /* 962 /*
@@ -654,14 +965,14 @@ static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
654 * buffer down. After writing the first element of the stack, move the 965 * buffer down. After writing the first element of the stack, move the
655 * insert position back to the end of the buffer. 966 * insert position back to the end of the buffer.
656 */ 967 */
657 if (!etmq->last_branch_pos) 968 if (!tidq->last_branch_pos)
658 etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; 969 tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
659 970
660 etmq->last_branch_pos -= 1; 971 tidq->last_branch_pos -= 1;
661 972
662 be = &bs->entries[etmq->last_branch_pos]; 973 be = &bs->entries[tidq->last_branch_pos];
663 be->from = cs_etm__last_executed_instr(etmq->prev_packet); 974 be->from = cs_etm__last_executed_instr(tidq->prev_packet);
664 be->to = cs_etm__first_executed_instr(etmq->packet); 975 be->to = cs_etm__first_executed_instr(tidq->packet);
665 /* No support for mispredict */ 976 /* No support for mispredict */
666 be->flags.mispred = 0; 977 be->flags.mispred = 0;
667 be->flags.predicted = 1; 978 be->flags.predicted = 1;
@@ -725,31 +1036,53 @@ cs_etm__get_trace(struct cs_etm_queue *etmq)
725} 1036}
726 1037
727static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, 1038static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
728 struct auxtrace_queue *queue) 1039 struct cs_etm_traceid_queue *tidq)
729{ 1040{
730 struct cs_etm_queue *etmq = queue->priv; 1041 if ((!tidq->thread) && (tidq->tid != -1))
1042 tidq->thread = machine__find_thread(etm->machine, -1,
1043 tidq->tid);
731 1044
732 /* CPU-wide tracing isn't supported yet */ 1045 if (tidq->thread)
733 if (queue->tid == -1) 1046 tidq->pid = tidq->thread->pid_;
734 return; 1047}
735 1048
736 if ((!etmq->thread) && (etmq->tid != -1)) 1049int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
737 etmq->thread = machine__find_thread(etm->machine, -1, 1050 pid_t tid, u8 trace_chan_id)
738 etmq->tid); 1051{
1052 int cpu, err = -EINVAL;
1053 struct cs_etm_auxtrace *etm = etmq->etm;
1054 struct cs_etm_traceid_queue *tidq;
739 1055
740 if (etmq->thread) { 1056 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
741 etmq->pid = etmq->thread->pid_; 1057 if (!tidq)
742 if (queue->cpu == -1) 1058 return err;
743 etmq->cpu = etmq->thread->cpu; 1059
744 } 1060 if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
1061 return err;
1062
1063 err = machine__set_current_tid(etm->machine, cpu, tid, tid);
1064 if (err)
1065 return err;
1066
1067 tidq->tid = tid;
1068 thread__zput(tidq->thread);
1069
1070 cs_etm__set_pid_tid_cpu(etm, tidq);
1071 return 0;
1072}
1073
1074bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq)
1075{
1076 return !!etmq->etm->timeless_decoding;
745} 1077}
746 1078
747static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, 1079static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
1080 struct cs_etm_traceid_queue *tidq,
748 u64 addr, u64 period) 1081 u64 addr, u64 period)
749{ 1082{
750 int ret = 0; 1083 int ret = 0;
751 struct cs_etm_auxtrace *etm = etmq->etm; 1084 struct cs_etm_auxtrace *etm = etmq->etm;
752 union perf_event *event = etmq->event_buf; 1085 union perf_event *event = tidq->event_buf;
753 struct perf_sample sample = {.ip = 0,}; 1086 struct perf_sample sample = {.ip = 0,};
754 1087
755 event->sample.header.type = PERF_RECORD_SAMPLE; 1088 event->sample.header.type = PERF_RECORD_SAMPLE;
@@ -757,19 +1090,19 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
757 event->sample.header.size = sizeof(struct perf_event_header); 1090 event->sample.header.size = sizeof(struct perf_event_header);
758 1091
759 sample.ip = addr; 1092 sample.ip = addr;
760 sample.pid = etmq->pid; 1093 sample.pid = tidq->pid;
761 sample.tid = etmq->tid; 1094 sample.tid = tidq->tid;
762 sample.id = etmq->etm->instructions_id; 1095 sample.id = etmq->etm->instructions_id;
763 sample.stream_id = etmq->etm->instructions_id; 1096 sample.stream_id = etmq->etm->instructions_id;
764 sample.period = period; 1097 sample.period = period;
765 sample.cpu = etmq->packet->cpu; 1098 sample.cpu = tidq->packet->cpu;
766 sample.flags = etmq->prev_packet->flags; 1099 sample.flags = tidq->prev_packet->flags;
767 sample.insn_len = 1; 1100 sample.insn_len = 1;
768 sample.cpumode = event->sample.header.misc; 1101 sample.cpumode = event->sample.header.misc;
769 1102
770 if (etm->synth_opts.last_branch) { 1103 if (etm->synth_opts.last_branch) {
771 cs_etm__copy_last_branch_rb(etmq); 1104 cs_etm__copy_last_branch_rb(etmq, tidq);
772 sample.branch_stack = etmq->last_branch; 1105 sample.branch_stack = tidq->last_branch;
773 } 1106 }
774 1107
775 if (etm->synth_opts.inject) { 1108 if (etm->synth_opts.inject) {
@@ -787,7 +1120,7 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
787 ret); 1120 ret);
788 1121
789 if (etm->synth_opts.last_branch) 1122 if (etm->synth_opts.last_branch)
790 cs_etm__reset_last_branch_rb(etmq); 1123 cs_etm__reset_last_branch_rb(tidq);
791 1124
792 return ret; 1125 return ret;
793} 1126}
@@ -796,33 +1129,34 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
796 * The cs etm packet encodes an instruction range between a branch target 1129 * The cs etm packet encodes an instruction range between a branch target
797 * and the next taken branch. Generate sample accordingly. 1130 * and the next taken branch. Generate sample accordingly.
798 */ 1131 */
799static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq) 1132static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
1133 struct cs_etm_traceid_queue *tidq)
800{ 1134{
801 int ret = 0; 1135 int ret = 0;
802 struct cs_etm_auxtrace *etm = etmq->etm; 1136 struct cs_etm_auxtrace *etm = etmq->etm;
803 struct perf_sample sample = {.ip = 0,}; 1137 struct perf_sample sample = {.ip = 0,};
804 union perf_event *event = etmq->event_buf; 1138 union perf_event *event = tidq->event_buf;
805 struct dummy_branch_stack { 1139 struct dummy_branch_stack {
806 u64 nr; 1140 u64 nr;
807 struct branch_entry entries; 1141 struct branch_entry entries;
808 } dummy_bs; 1142 } dummy_bs;
809 u64 ip; 1143 u64 ip;
810 1144
811 ip = cs_etm__last_executed_instr(etmq->prev_packet); 1145 ip = cs_etm__last_executed_instr(tidq->prev_packet);
812 1146
813 event->sample.header.type = PERF_RECORD_SAMPLE; 1147 event->sample.header.type = PERF_RECORD_SAMPLE;
814 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip); 1148 event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
815 event->sample.header.size = sizeof(struct perf_event_header); 1149 event->sample.header.size = sizeof(struct perf_event_header);
816 1150
817 sample.ip = ip; 1151 sample.ip = ip;
818 sample.pid = etmq->pid; 1152 sample.pid = tidq->pid;
819 sample.tid = etmq->tid; 1153 sample.tid = tidq->tid;
820 sample.addr = cs_etm__first_executed_instr(etmq->packet); 1154 sample.addr = cs_etm__first_executed_instr(tidq->packet);
821 sample.id = etmq->etm->branches_id; 1155 sample.id = etmq->etm->branches_id;
822 sample.stream_id = etmq->etm->branches_id; 1156 sample.stream_id = etmq->etm->branches_id;
823 sample.period = 1; 1157 sample.period = 1;
824 sample.cpu = etmq->packet->cpu; 1158 sample.cpu = tidq->packet->cpu;
825 sample.flags = etmq->prev_packet->flags; 1159 sample.flags = tidq->prev_packet->flags;
826 sample.cpumode = event->sample.header.misc; 1160 sample.cpumode = event->sample.header.misc;
827 1161
828 /* 1162 /*
@@ -965,33 +1299,35 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
965 return 0; 1299 return 0;
966} 1300}
967 1301
968static int cs_etm__sample(struct cs_etm_queue *etmq) 1302static int cs_etm__sample(struct cs_etm_queue *etmq,
1303 struct cs_etm_traceid_queue *tidq)
969{ 1304{
970 struct cs_etm_auxtrace *etm = etmq->etm; 1305 struct cs_etm_auxtrace *etm = etmq->etm;
971 struct cs_etm_packet *tmp; 1306 struct cs_etm_packet *tmp;
972 int ret; 1307 int ret;
973 u64 instrs_executed = etmq->packet->instr_count; 1308 u8 trace_chan_id = tidq->trace_chan_id;
1309 u64 instrs_executed = tidq->packet->instr_count;
974 1310
975 etmq->period_instructions += instrs_executed; 1311 tidq->period_instructions += instrs_executed;
976 1312
977 /* 1313 /*
978 * Record a branch when the last instruction in 1314 * Record a branch when the last instruction in
979 * PREV_PACKET is a branch. 1315 * PREV_PACKET is a branch.
980 */ 1316 */
981 if (etm->synth_opts.last_branch && 1317 if (etm->synth_opts.last_branch &&
982 etmq->prev_packet->sample_type == CS_ETM_RANGE && 1318 tidq->prev_packet->sample_type == CS_ETM_RANGE &&
983 etmq->prev_packet->last_instr_taken_branch) 1319 tidq->prev_packet->last_instr_taken_branch)
984 cs_etm__update_last_branch_rb(etmq); 1320 cs_etm__update_last_branch_rb(etmq, tidq);
985 1321
986 if (etm->sample_instructions && 1322 if (etm->sample_instructions &&
987 etmq->period_instructions >= etm->instructions_sample_period) { 1323 tidq->period_instructions >= etm->instructions_sample_period) {
988 /* 1324 /*
989 * Emit instruction sample periodically 1325 * Emit instruction sample periodically
990 * TODO: allow period to be defined in cycles and clock time 1326 * TODO: allow period to be defined in cycles and clock time
991 */ 1327 */
992 1328
993 /* Get number of instructions executed after the sample point */ 1329 /* Get number of instructions executed after the sample point */
994 u64 instrs_over = etmq->period_instructions - 1330 u64 instrs_over = tidq->period_instructions -
995 etm->instructions_sample_period; 1331 etm->instructions_sample_period;
996 1332
997 /* 1333 /*
@@ -1000,31 +1336,32 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
1000 * executed, but PC has not advanced to next instruction) 1336 * executed, but PC has not advanced to next instruction)
1001 */ 1337 */
1002 u64 offset = (instrs_executed - instrs_over - 1); 1338 u64 offset = (instrs_executed - instrs_over - 1);
1003 u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset); 1339 u64 addr = cs_etm__instr_addr(etmq, trace_chan_id,
1340 tidq->packet, offset);
1004 1341
1005 ret = cs_etm__synth_instruction_sample( 1342 ret = cs_etm__synth_instruction_sample(
1006 etmq, addr, etm->instructions_sample_period); 1343 etmq, tidq, addr, etm->instructions_sample_period);
1007 if (ret) 1344 if (ret)
1008 return ret; 1345 return ret;
1009 1346
1010 /* Carry remaining instructions into next sample period */ 1347 /* Carry remaining instructions into next sample period */
1011 etmq->period_instructions = instrs_over; 1348 tidq->period_instructions = instrs_over;
1012 } 1349 }
1013 1350
1014 if (etm->sample_branches) { 1351 if (etm->sample_branches) {
1015 bool generate_sample = false; 1352 bool generate_sample = false;
1016 1353
1017 /* Generate sample for tracing on packet */ 1354 /* Generate sample for tracing on packet */
1018 if (etmq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) 1355 if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
1019 generate_sample = true; 1356 generate_sample = true;
1020 1357
1021 /* Generate sample for branch taken packet */ 1358 /* Generate sample for branch taken packet */
1022 if (etmq->prev_packet->sample_type == CS_ETM_RANGE && 1359 if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
1023 etmq->prev_packet->last_instr_taken_branch) 1360 tidq->prev_packet->last_instr_taken_branch)
1024 generate_sample = true; 1361 generate_sample = true;
1025 1362
1026 if (generate_sample) { 1363 if (generate_sample) {
1027 ret = cs_etm__synth_branch_sample(etmq); 1364 ret = cs_etm__synth_branch_sample(etmq, tidq);
1028 if (ret) 1365 if (ret)
1029 return ret; 1366 return ret;
1030 } 1367 }
@@ -1035,15 +1372,15 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
1035 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1372 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
1036 * the next incoming packet. 1373 * the next incoming packet.
1037 */ 1374 */
1038 tmp = etmq->packet; 1375 tmp = tidq->packet;
1039 etmq->packet = etmq->prev_packet; 1376 tidq->packet = tidq->prev_packet;
1040 etmq->prev_packet = tmp; 1377 tidq->prev_packet = tmp;
1041 } 1378 }
1042 1379
1043 return 0; 1380 return 0;
1044} 1381}
1045 1382
1046static int cs_etm__exception(struct cs_etm_queue *etmq) 1383static int cs_etm__exception(struct cs_etm_traceid_queue *tidq)
1047{ 1384{
1048 /* 1385 /*
1049 * When the exception packet is inserted, whether the last instruction 1386 * When the exception packet is inserted, whether the last instruction
@@ -1056,24 +1393,25 @@ static int cs_etm__exception(struct cs_etm_queue *etmq)
1056 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful 1393 * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful
1057 * for generating instruction and branch samples. 1394 * for generating instruction and branch samples.
1058 */ 1395 */
1059 if (etmq->prev_packet->sample_type == CS_ETM_RANGE) 1396 if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
1060 etmq->prev_packet->last_instr_taken_branch = true; 1397 tidq->prev_packet->last_instr_taken_branch = true;
1061 1398
1062 return 0; 1399 return 0;
1063} 1400}
1064 1401
1065static int cs_etm__flush(struct cs_etm_queue *etmq) 1402static int cs_etm__flush(struct cs_etm_queue *etmq,
1403 struct cs_etm_traceid_queue *tidq)
1066{ 1404{
1067 int err = 0; 1405 int err = 0;
1068 struct cs_etm_auxtrace *etm = etmq->etm; 1406 struct cs_etm_auxtrace *etm = etmq->etm;
1069 struct cs_etm_packet *tmp; 1407 struct cs_etm_packet *tmp;
1070 1408
1071 /* Handle start tracing packet */ 1409 /* Handle start tracing packet */
1072 if (etmq->prev_packet->sample_type == CS_ETM_EMPTY) 1410 if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
1073 goto swap_packet; 1411 goto swap_packet;
1074 1412
1075 if (etmq->etm->synth_opts.last_branch && 1413 if (etmq->etm->synth_opts.last_branch &&
1076 etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1414 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1077 /* 1415 /*
1078 * Generate a last branch event for the branches left in the 1416 * Generate a last branch event for the branches left in the
1079 * circular buffer at the end of the trace. 1417 * circular buffer at the end of the trace.
@@ -1081,21 +1419,21 @@ static int cs_etm__flush(struct cs_etm_queue *etmq)
1081 * Use the address of the end of the last reported execution 1419 * Use the address of the end of the last reported execution
1082 * range 1420 * range
1083 */ 1421 */
1084 u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); 1422 u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1085 1423
1086 err = cs_etm__synth_instruction_sample( 1424 err = cs_etm__synth_instruction_sample(
1087 etmq, addr, 1425 etmq, tidq, addr,
1088 etmq->period_instructions); 1426 tidq->period_instructions);
1089 if (err) 1427 if (err)
1090 return err; 1428 return err;
1091 1429
1092 etmq->period_instructions = 0; 1430 tidq->period_instructions = 0;
1093 1431
1094 } 1432 }
1095 1433
1096 if (etm->sample_branches && 1434 if (etm->sample_branches &&
1097 etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1435 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1098 err = cs_etm__synth_branch_sample(etmq); 1436 err = cs_etm__synth_branch_sample(etmq, tidq);
1099 if (err) 1437 if (err)
1100 return err; 1438 return err;
1101 } 1439 }
@@ -1106,15 +1444,16 @@ swap_packet:
1106 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for 1444 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
1107 * the next incoming packet. 1445 * the next incoming packet.
1108 */ 1446 */
1109 tmp = etmq->packet; 1447 tmp = tidq->packet;
1110 etmq->packet = etmq->prev_packet; 1448 tidq->packet = tidq->prev_packet;
1111 etmq->prev_packet = tmp; 1449 tidq->prev_packet = tmp;
1112 } 1450 }
1113 1451
1114 return err; 1452 return err;
1115} 1453}
1116 1454
1117static int cs_etm__end_block(struct cs_etm_queue *etmq) 1455static int cs_etm__end_block(struct cs_etm_queue *etmq,
1456 struct cs_etm_traceid_queue *tidq)
1118{ 1457{
1119 int err; 1458 int err;
1120 1459
@@ -1128,20 +1467,20 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq)
1128 * the trace. 1467 * the trace.
1129 */ 1468 */
1130 if (etmq->etm->synth_opts.last_branch && 1469 if (etmq->etm->synth_opts.last_branch &&
1131 etmq->prev_packet->sample_type == CS_ETM_RANGE) { 1470 tidq->prev_packet->sample_type == CS_ETM_RANGE) {
1132 /* 1471 /*
1133 * Use the address of the end of the last reported execution 1472 * Use the address of the end of the last reported execution
1134 * range. 1473 * range.
1135 */ 1474 */
1136 u64 addr = cs_etm__last_executed_instr(etmq->prev_packet); 1475 u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
1137 1476
1138 err = cs_etm__synth_instruction_sample( 1477 err = cs_etm__synth_instruction_sample(
1139 etmq, addr, 1478 etmq, tidq, addr,
1140 etmq->period_instructions); 1479 tidq->period_instructions);
1141 if (err) 1480 if (err)
1142 return err; 1481 return err;
1143 1482
1144 etmq->period_instructions = 0; 1483 tidq->period_instructions = 0;
1145 } 1484 }
1146 1485
1147 return 0; 1486 return 0;
@@ -1173,12 +1512,13 @@ static int cs_etm__get_data_block(struct cs_etm_queue *etmq)
1173 return etmq->buf_len; 1512 return etmq->buf_len;
1174} 1513}
1175 1514
1176static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, 1515static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id,
1177 struct cs_etm_packet *packet, 1516 struct cs_etm_packet *packet,
1178 u64 end_addr) 1517 u64 end_addr)
1179{ 1518{
1180 u16 instr16; 1519 /* Initialise to keep compiler happy */
1181 u32 instr32; 1520 u16 instr16 = 0;
1521 u32 instr32 = 0;
1182 u64 addr; 1522 u64 addr;
1183 1523
1184 switch (packet->isa) { 1524 switch (packet->isa) {
@@ -1196,7 +1536,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
1196 * so below only read 2 bytes as instruction size for T32. 1536 * so below only read 2 bytes as instruction size for T32.
1197 */ 1537 */
1198 addr = end_addr - 2; 1538 addr = end_addr - 2;
1199 cs_etm__mem_access(etmq, addr, sizeof(instr16), (u8 *)&instr16); 1539 cs_etm__mem_access(etmq, trace_chan_id, addr,
1540 sizeof(instr16), (u8 *)&instr16);
1200 if ((instr16 & 0xFF00) == 0xDF00) 1541 if ((instr16 & 0xFF00) == 0xDF00)
1201 return true; 1542 return true;
1202 1543
@@ -1211,7 +1552,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
1211 * +---------+---------+-------------------------+ 1552 * +---------+---------+-------------------------+
1212 */ 1553 */
1213 addr = end_addr - 4; 1554 addr = end_addr - 4;
1214 cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); 1555 cs_etm__mem_access(etmq, trace_chan_id, addr,
1556 sizeof(instr32), (u8 *)&instr32);
1215 if ((instr32 & 0x0F000000) == 0x0F000000 && 1557 if ((instr32 & 0x0F000000) == 0x0F000000 &&
1216 (instr32 & 0xF0000000) != 0xF0000000) 1558 (instr32 & 0xF0000000) != 0xF0000000)
1217 return true; 1559 return true;
@@ -1227,7 +1569,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
1227 * +-----------------------+---------+-----------+ 1569 * +-----------------------+---------+-----------+
1228 */ 1570 */
1229 addr = end_addr - 4; 1571 addr = end_addr - 4;
1230 cs_etm__mem_access(etmq, addr, sizeof(instr32), (u8 *)&instr32); 1572 cs_etm__mem_access(etmq, trace_chan_id, addr,
1573 sizeof(instr32), (u8 *)&instr32);
1231 if ((instr32 & 0xFFE0001F) == 0xd4000001) 1574 if ((instr32 & 0xFFE0001F) == 0xd4000001)
1232 return true; 1575 return true;
1233 1576
@@ -1240,10 +1583,12 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq,
1240 return false; 1583 return false;
1241} 1584}
1242 1585
1243static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic) 1586static bool cs_etm__is_syscall(struct cs_etm_queue *etmq,
1587 struct cs_etm_traceid_queue *tidq, u64 magic)
1244{ 1588{
1245 struct cs_etm_packet *packet = etmq->packet; 1589 u8 trace_chan_id = tidq->trace_chan_id;
1246 struct cs_etm_packet *prev_packet = etmq->prev_packet; 1590 struct cs_etm_packet *packet = tidq->packet;
1591 struct cs_etm_packet *prev_packet = tidq->prev_packet;
1247 1592
1248 if (magic == __perf_cs_etmv3_magic) 1593 if (magic == __perf_cs_etmv3_magic)
1249 if (packet->exception_number == CS_ETMV3_EXC_SVC) 1594 if (packet->exception_number == CS_ETMV3_EXC_SVC)
@@ -1256,7 +1601,7 @@ static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic)
1256 */ 1601 */
1257 if (magic == __perf_cs_etmv4_magic) { 1602 if (magic == __perf_cs_etmv4_magic) {
1258 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1603 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
1259 cs_etm__is_svc_instr(etmq, prev_packet, 1604 cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
1260 prev_packet->end_addr)) 1605 prev_packet->end_addr))
1261 return true; 1606 return true;
1262 } 1607 }
@@ -1264,9 +1609,10 @@ static bool cs_etm__is_syscall(struct cs_etm_queue *etmq, u64 magic)
1264 return false; 1609 return false;
1265} 1610}
1266 1611
1267static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic) 1612static bool cs_etm__is_async_exception(struct cs_etm_traceid_queue *tidq,
1613 u64 magic)
1268{ 1614{
1269 struct cs_etm_packet *packet = etmq->packet; 1615 struct cs_etm_packet *packet = tidq->packet;
1270 1616
1271 if (magic == __perf_cs_etmv3_magic) 1617 if (magic == __perf_cs_etmv3_magic)
1272 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT || 1618 if (packet->exception_number == CS_ETMV3_EXC_DEBUG_HALT ||
@@ -1289,10 +1635,13 @@ static bool cs_etm__is_async_exception(struct cs_etm_queue *etmq, u64 magic)
1289 return false; 1635 return false;
1290} 1636}
1291 1637
1292static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic) 1638static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq,
1639 struct cs_etm_traceid_queue *tidq,
1640 u64 magic)
1293{ 1641{
1294 struct cs_etm_packet *packet = etmq->packet; 1642 u8 trace_chan_id = tidq->trace_chan_id;
1295 struct cs_etm_packet *prev_packet = etmq->prev_packet; 1643 struct cs_etm_packet *packet = tidq->packet;
1644 struct cs_etm_packet *prev_packet = tidq->prev_packet;
1296 1645
1297 if (magic == __perf_cs_etmv3_magic) 1646 if (magic == __perf_cs_etmv3_magic)
1298 if (packet->exception_number == CS_ETMV3_EXC_SMC || 1647 if (packet->exception_number == CS_ETMV3_EXC_SMC ||
@@ -1316,7 +1665,7 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic)
1316 * (SMC, HVC) are taken as sync exceptions. 1665 * (SMC, HVC) are taken as sync exceptions.
1317 */ 1666 */
1318 if (packet->exception_number == CS_ETMV4_EXC_CALL && 1667 if (packet->exception_number == CS_ETMV4_EXC_CALL &&
1319 !cs_etm__is_svc_instr(etmq, prev_packet, 1668 !cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
1320 prev_packet->end_addr)) 1669 prev_packet->end_addr))
1321 return true; 1670 return true;
1322 1671
@@ -1335,10 +1684,12 @@ static bool cs_etm__is_sync_exception(struct cs_etm_queue *etmq, u64 magic)
1335 return false; 1684 return false;
1336} 1685}
1337 1686
1338static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq) 1687static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
1688 struct cs_etm_traceid_queue *tidq)
1339{ 1689{
1340 struct cs_etm_packet *packet = etmq->packet; 1690 struct cs_etm_packet *packet = tidq->packet;
1341 struct cs_etm_packet *prev_packet = etmq->prev_packet; 1691 struct cs_etm_packet *prev_packet = tidq->prev_packet;
1692 u8 trace_chan_id = tidq->trace_chan_id;
1342 u64 magic; 1693 u64 magic;
1343 int ret; 1694 int ret;
1344 1695
@@ -1419,7 +1770,8 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
1419 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH | 1770 if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
1420 PERF_IP_FLAG_RETURN | 1771 PERF_IP_FLAG_RETURN |
1421 PERF_IP_FLAG_INTERRUPT) && 1772 PERF_IP_FLAG_INTERRUPT) &&
1422 cs_etm__is_svc_instr(etmq, packet, packet->start_addr)) 1773 cs_etm__is_svc_instr(etmq, trace_chan_id,
1774 packet, packet->start_addr))
1423 prev_packet->flags = PERF_IP_FLAG_BRANCH | 1775 prev_packet->flags = PERF_IP_FLAG_BRANCH |
1424 PERF_IP_FLAG_RETURN | 1776 PERF_IP_FLAG_RETURN |
1425 PERF_IP_FLAG_SYSCALLRET; 1777 PERF_IP_FLAG_SYSCALLRET;
@@ -1440,7 +1792,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
1440 return ret; 1792 return ret;
1441 1793
1442 /* The exception is for system call. */ 1794 /* The exception is for system call. */
1443 if (cs_etm__is_syscall(etmq, magic)) 1795 if (cs_etm__is_syscall(etmq, tidq, magic))
1444 packet->flags = PERF_IP_FLAG_BRANCH | 1796 packet->flags = PERF_IP_FLAG_BRANCH |
1445 PERF_IP_FLAG_CALL | 1797 PERF_IP_FLAG_CALL |
1446 PERF_IP_FLAG_SYSCALLRET; 1798 PERF_IP_FLAG_SYSCALLRET;
@@ -1448,7 +1800,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
1448 * The exceptions are triggered by external signals from bus, 1800 * The exceptions are triggered by external signals from bus,
1449 * interrupt controller, debug module, PE reset or halt. 1801 * interrupt controller, debug module, PE reset or halt.
1450 */ 1802 */
1451 else if (cs_etm__is_async_exception(etmq, magic)) 1803 else if (cs_etm__is_async_exception(tidq, magic))
1452 packet->flags = PERF_IP_FLAG_BRANCH | 1804 packet->flags = PERF_IP_FLAG_BRANCH |
1453 PERF_IP_FLAG_CALL | 1805 PERF_IP_FLAG_CALL |
1454 PERF_IP_FLAG_ASYNC | 1806 PERF_IP_FLAG_ASYNC |
@@ -1457,7 +1809,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq)
1457 * Otherwise, exception is caused by trap, instruction & 1809 * Otherwise, exception is caused by trap, instruction &
1458 * data fault, or alignment errors. 1810 * data fault, or alignment errors.
1459 */ 1811 */
1460 else if (cs_etm__is_sync_exception(etmq, magic)) 1812 else if (cs_etm__is_sync_exception(etmq, tidq, magic))
1461 packet->flags = PERF_IP_FLAG_BRANCH | 1813 packet->flags = PERF_IP_FLAG_BRANCH |
1462 PERF_IP_FLAG_CALL | 1814 PERF_IP_FLAG_CALL |
1463 PERF_IP_FLAG_INTERRUPT; 1815 PERF_IP_FLAG_INTERRUPT;
@@ -1539,75 +1891,106 @@ out:
1539 return ret; 1891 return ret;
1540} 1892}
1541 1893
1542static int cs_etm__process_decoder_queue(struct cs_etm_queue *etmq) 1894static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq,
1895 struct cs_etm_traceid_queue *tidq)
1543{ 1896{
1544 int ret; 1897 int ret;
1898 struct cs_etm_packet_queue *packet_queue;
1545 1899
1546 /* Process each packet in this chunk */ 1900 packet_queue = &tidq->packet_queue;
1547 while (1) {
1548 ret = cs_etm_decoder__get_packet(etmq->decoder,
1549 etmq->packet);
1550 if (ret <= 0)
1551 /*
1552 * Stop processing this chunk on
1553 * end of data or error
1554 */
1555 break;
1556 1901
1902 /* Process each packet in this chunk */
1903 while (1) {
1904 ret = cs_etm_decoder__get_packet(packet_queue,
1905 tidq->packet);
1906 if (ret <= 0)
1557 /* 1907 /*
1558 * Since packet addresses are swapped in packet 1908 * Stop processing this chunk on
1559 * handling within below switch() statements, 1909 * end of data or error
1560 * thus setting sample flags must be called
1561 * prior to switch() statement to use address
1562 * information before packets swapping.
1563 */ 1910 */
1564 ret = cs_etm__set_sample_flags(etmq); 1911 break;
1565 if (ret < 0) 1912
1566 break; 1913 /*
1567 1914 * Since packet addresses are swapped in packet
1568 switch (etmq->packet->sample_type) { 1915 * handling within below switch() statements,
1569 case CS_ETM_RANGE: 1916 * thus setting sample flags must be called
1570 /* 1917 * prior to switch() statement to use address
1571 * If the packet contains an instruction 1918 * information before packets swapping.
1572 * range, generate instruction sequence 1919 */
1573 * events. 1920 ret = cs_etm__set_sample_flags(etmq, tidq);
1574 */ 1921 if (ret < 0)
1575 cs_etm__sample(etmq); 1922 break;
1576 break; 1923
1577 case CS_ETM_EXCEPTION: 1924 switch (tidq->packet->sample_type) {
1578 case CS_ETM_EXCEPTION_RET: 1925 case CS_ETM_RANGE:
1579 /* 1926 /*
1580 * If the exception packet is coming, 1927 * If the packet contains an instruction
1581 * make sure the previous instruction 1928 * range, generate instruction sequence
1582 * range packet to be handled properly. 1929 * events.
1583 */ 1930 */
1584 cs_etm__exception(etmq); 1931 cs_etm__sample(etmq, tidq);
1585 break; 1932 break;
1586 case CS_ETM_DISCONTINUITY: 1933 case CS_ETM_EXCEPTION:
1587 /* 1934 case CS_ETM_EXCEPTION_RET:
1588 * Discontinuity in trace, flush 1935 /*
1589 * previous branch stack 1936 * If the exception packet is coming,
1590 */ 1937 * make sure the previous instruction
1591 cs_etm__flush(etmq); 1938 * range packet to be handled properly.
1592 break; 1939 */
1593 case CS_ETM_EMPTY: 1940 cs_etm__exception(tidq);
1594 /* 1941 break;
1595 * Should not receive empty packet, 1942 case CS_ETM_DISCONTINUITY:
1596 * report error. 1943 /*
1597 */ 1944 * Discontinuity in trace, flush
1598 pr_err("CS ETM Trace: empty packet\n"); 1945 * previous branch stack
1599 return -EINVAL; 1946 */
1600 default: 1947 cs_etm__flush(etmq, tidq);
1601 break; 1948 break;
1602 } 1949 case CS_ETM_EMPTY:
1950 /*
1951 * Should not receive empty packet,
1952 * report error.
1953 */
1954 pr_err("CS ETM Trace: empty packet\n");
1955 return -EINVAL;
1956 default:
1957 break;
1603 } 1958 }
1959 }
1604 1960
1605 return ret; 1961 return ret;
1606} 1962}
1607 1963
1964static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq)
1965{
1966 int idx;
1967 struct int_node *inode;
1968 struct cs_etm_traceid_queue *tidq;
1969 struct intlist *traceid_queues_list = etmq->traceid_queues_list;
1970
1971 intlist__for_each_entry(inode, traceid_queues_list) {
1972 idx = (int)(intptr_t)inode->priv;
1973 tidq = etmq->traceid_queues[idx];
1974
1975 /* Ignore return value */
1976 cs_etm__process_traceid_queue(etmq, tidq);
1977
1978 /*
1979 * Generate an instruction sample with the remaining
1980 * branchstack entries.
1981 */
1982 cs_etm__flush(etmq, tidq);
1983 }
1984}
1985
1608static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 1986static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
1609{ 1987{
1610 int err = 0; 1988 int err = 0;
1989 struct cs_etm_traceid_queue *tidq;
1990
1991 tidq = cs_etm__etmq_get_traceid_queue(etmq, CS_ETM_PER_THREAD_TRACEID);
1992 if (!tidq)
1993 return -EINVAL;
1611 1994
1612 /* Go through each buffer in the queue and decode them one by one */ 1995 /* Go through each buffer in the queue and decode them one by one */
1613 while (1) { 1996 while (1) {
@@ -1626,13 +2009,13 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
1626 * an error occurs other than hoping the next one will 2009 * an error occurs other than hoping the next one will
1627 * be better. 2010 * be better.
1628 */ 2011 */
1629 err = cs_etm__process_decoder_queue(etmq); 2012 err = cs_etm__process_traceid_queue(etmq, tidq);
1630 2013
1631 } while (etmq->buf_len); 2014 } while (etmq->buf_len);
1632 2015
1633 if (err == 0) 2016 if (err == 0)
1634 /* Flush any remaining branch stack entries */ 2017 /* Flush any remaining branch stack entries */
1635 err = cs_etm__end_block(etmq); 2018 err = cs_etm__end_block(etmq, tidq);
1636 } 2019 }
1637 2020
1638 return err; 2021 return err;
@@ -1647,9 +2030,19 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
1647 for (i = 0; i < queues->nr_queues; i++) { 2030 for (i = 0; i < queues->nr_queues; i++) {
1648 struct auxtrace_queue *queue = &etm->queues.queue_array[i]; 2031 struct auxtrace_queue *queue = &etm->queues.queue_array[i];
1649 struct cs_etm_queue *etmq = queue->priv; 2032 struct cs_etm_queue *etmq = queue->priv;
2033 struct cs_etm_traceid_queue *tidq;
2034
2035 if (!etmq)
2036 continue;
2037
2038 tidq = cs_etm__etmq_get_traceid_queue(etmq,
2039 CS_ETM_PER_THREAD_TRACEID);
2040
2041 if (!tidq)
2042 continue;
1650 2043
1651 if (etmq && ((tid == -1) || (etmq->tid == tid))) { 2044 if ((tid == -1) || (tidq->tid == tid)) {
1652 cs_etm__set_pid_tid_cpu(etm, queue); 2045 cs_etm__set_pid_tid_cpu(etm, tidq);
1653 cs_etm__run_decoder(etmq); 2046 cs_etm__run_decoder(etmq);
1654 } 2047 }
1655 } 2048 }
@@ -1657,6 +2050,164 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
1657 return 0; 2050 return 0;
1658} 2051}
1659 2052
2053static int cs_etm__process_queues(struct cs_etm_auxtrace *etm)
2054{
2055 int ret = 0;
2056 unsigned int cs_queue_nr, queue_nr;
2057 u8 trace_chan_id;
2058 u64 timestamp;
2059 struct auxtrace_queue *queue;
2060 struct cs_etm_queue *etmq;
2061 struct cs_etm_traceid_queue *tidq;
2062
2063 while (1) {
2064 if (!etm->heap.heap_cnt)
2065 goto out;
2066
2067 /* Take the entry at the top of the min heap */
2068 cs_queue_nr = etm->heap.heap_array[0].queue_nr;
2069 queue_nr = TO_QUEUE_NR(cs_queue_nr);
2070 trace_chan_id = TO_TRACE_CHAN_ID(cs_queue_nr);
2071 queue = &etm->queues.queue_array[queue_nr];
2072 etmq = queue->priv;
2073
2074 /*
2075 * Remove the top entry from the heap since we are about
2076 * to process it.
2077 */
2078 auxtrace_heap__pop(&etm->heap);
2079
2080 tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id);
2081 if (!tidq) {
2082 /*
2083 * No traceID queue has been allocated for this traceID,
2084 * which means something somewhere went very wrong. No
2085 * other choice than simply exit.
2086 */
2087 ret = -EINVAL;
2088 goto out;
2089 }
2090
2091 /*
2092 * Packets associated with this timestamp are already in
2093 * the etmq's traceID queue, so process them.
2094 */
2095 ret = cs_etm__process_traceid_queue(etmq, tidq);
2096 if (ret < 0)
2097 goto out;
2098
2099 /*
2100 * Packets for this timestamp have been processed, time to
2101 * move on to the next timestamp, fetching a new auxtrace_buffer
2102 * if need be.
2103 */
2104refetch:
2105 ret = cs_etm__get_data_block(etmq);
2106 if (ret < 0)
2107 goto out;
2108
2109 /*
2110 * No more auxtrace_buffers to process in this etmq, simply
2111 * move on to another entry in the auxtrace_heap.
2112 */
2113 if (!ret)
2114 continue;
2115
2116 ret = cs_etm__decode_data_block(etmq);
2117 if (ret)
2118 goto out;
2119
2120 timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
2121
2122 if (!timestamp) {
2123 /*
2124 * Function cs_etm__decode_data_block() returns when
2125 * there is no more traces to decode in the current
2126 * auxtrace_buffer OR when a timestamp has been
2127 * encountered on any of the traceID queues. Since we
2128 * did not get a timestamp, there is no more traces to
2129 * process in this auxtrace_buffer. As such empty and
2130 * flush all traceID queues.
2131 */
2132 cs_etm__clear_all_traceid_queues(etmq);
2133
2134 /* Fetch another auxtrace_buffer for this etmq */
2135 goto refetch;
2136 }
2137
2138 /*
2139 * Add to the min heap the timestamp for packets that have
2140 * just been decoded. They will be processed and synthesized
2141 * during the next call to cs_etm__process_traceid_queue() for
2142 * this queue/traceID.
2143 */
2144 cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
2145 ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, timestamp);
2146 }
2147
2148out:
2149 return ret;
2150}
2151
2152static int cs_etm__process_itrace_start(struct cs_etm_auxtrace *etm,
2153 union perf_event *event)
2154{
2155 struct thread *th;
2156
2157 if (etm->timeless_decoding)
2158 return 0;
2159
2160 /*
2161 * Add the tid/pid to the log so that we can get a match when
2162 * we get a contextID from the decoder.
2163 */
2164 th = machine__findnew_thread(etm->machine,
2165 event->itrace_start.pid,
2166 event->itrace_start.tid);
2167 if (!th)
2168 return -ENOMEM;
2169
2170 thread__put(th);
2171
2172 return 0;
2173}
2174
2175static int cs_etm__process_switch_cpu_wide(struct cs_etm_auxtrace *etm,
2176 union perf_event *event)
2177{
2178 struct thread *th;
2179 bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
2180
2181 /*
2182 * Context switch in per-thread mode are irrelevant since perf
2183 * will start/stop tracing as the process is scheduled.
2184 */
2185 if (etm->timeless_decoding)
2186 return 0;
2187
2188 /*
2189 * SWITCH_IN events carry the next process to be switched out while
2190 * SWITCH_OUT events carry the process to be switched in. As such
2191 * we don't care about IN events.
2192 */
2193 if (!out)
2194 return 0;
2195
2196 /*
2197 * Add the tid/pid to the log so that we can get a match when
2198 * we get a contextID from the decoder.
2199 */
2200 th = machine__findnew_thread(etm->machine,
2201 event->context_switch.next_prev_pid,
2202 event->context_switch.next_prev_tid);
2203 if (!th)
2204 return -ENOMEM;
2205
2206 thread__put(th);
2207
2208 return 0;
2209}
2210
1660static int cs_etm__process_event(struct perf_session *session, 2211static int cs_etm__process_event(struct perf_session *session,
1661 union perf_event *event, 2212 union perf_event *event,
1662 struct perf_sample *sample, 2213 struct perf_sample *sample,
@@ -1676,9 +2227,6 @@ static int cs_etm__process_event(struct perf_session *session,
1676 return -EINVAL; 2227 return -EINVAL;
1677 } 2228 }
1678 2229
1679 if (!etm->timeless_decoding)
1680 return -EINVAL;
1681
1682 if (sample->time && (sample->time != (u64) -1)) 2230 if (sample->time && (sample->time != (u64) -1))
1683 timestamp = sample->time; 2231 timestamp = sample->time;
1684 else 2232 else
@@ -1690,10 +2238,20 @@ static int cs_etm__process_event(struct perf_session *session,
1690 return err; 2238 return err;
1691 } 2239 }
1692 2240
1693 if (event->header.type == PERF_RECORD_EXIT) 2241 if (etm->timeless_decoding &&
2242 event->header.type == PERF_RECORD_EXIT)
1694 return cs_etm__process_timeless_queues(etm, 2243 return cs_etm__process_timeless_queues(etm,
1695 event->fork.tid); 2244 event->fork.tid);
1696 2245
2246 if (event->header.type == PERF_RECORD_ITRACE_START)
2247 return cs_etm__process_itrace_start(etm, event);
2248 else if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
2249 return cs_etm__process_switch_cpu_wide(etm, event);
2250
2251 if (!etm->timeless_decoding &&
2252 event->header.type == PERF_RECORD_AUX)
2253 return cs_etm__process_queues(etm);
2254
1697 return 0; 2255 return 0;
1698} 2256}
1699 2257
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 0e97c196147a..bc848fd095f4 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -9,6 +9,7 @@
9 9
10#include "util/event.h" 10#include "util/event.h"
11#include "util/session.h" 11#include "util/session.h"
12#include <linux/bits.h>
12 13
13/* Versionning header in case things need tro change in the future. That way 14/* Versionning header in case things need tro change in the future. That way
14 * decoding of old snapshot is still possible. 15 * decoding of old snapshot is still possible.
@@ -97,12 +98,72 @@ enum {
97 CS_ETMV4_EXC_END = 31, 98 CS_ETMV4_EXC_END = 31,
98}; 99};
99 100
101enum cs_etm_sample_type {
102 CS_ETM_EMPTY,
103 CS_ETM_RANGE,
104 CS_ETM_DISCONTINUITY,
105 CS_ETM_EXCEPTION,
106 CS_ETM_EXCEPTION_RET,
107};
108
109enum cs_etm_isa {
110 CS_ETM_ISA_UNKNOWN,
111 CS_ETM_ISA_A64,
112 CS_ETM_ISA_A32,
113 CS_ETM_ISA_T32,
114};
115
100/* RB tree for quick conversion between traceID and metadata pointers */ 116/* RB tree for quick conversion between traceID and metadata pointers */
101struct intlist *traceid_list; 117struct intlist *traceid_list;
102 118
119struct cs_etm_queue;
120
121struct cs_etm_packet {
122 enum cs_etm_sample_type sample_type;
123 enum cs_etm_isa isa;
124 u64 start_addr;
125 u64 end_addr;
126 u32 instr_count;
127 u32 last_instr_type;
128 u32 last_instr_subtype;
129 u32 flags;
130 u32 exception_number;
131 u8 last_instr_cond;
132 u8 last_instr_taken_branch;
133 u8 last_instr_size;
134 u8 trace_chan_id;
135 int cpu;
136};
137
138#define CS_ETM_PACKET_MAX_BUFFER 1024
139
140/*
141 * When working with per-thread scenarios the process under trace can
142 * be scheduled on any CPU and as such, more than one traceID may be
143 * associated with the same process. Since a traceID of '0' is illegal
144 * as per the CoreSight architecture, use that specific value to
145 * identify the queue where all packets (with any traceID) are
146 * aggregated.
147 */
148#define CS_ETM_PER_THREAD_TRACEID 0
149
150struct cs_etm_packet_queue {
151 u32 packet_count;
152 u32 head;
153 u32 tail;
154 u32 instr_count;
155 u64 timestamp;
156 u64 next_timestamp;
157 struct cs_etm_packet packet_buffer[CS_ETM_PACKET_MAX_BUFFER];
158};
159
103#define KiB(x) ((x) * 1024) 160#define KiB(x) ((x) * 1024)
104#define MiB(x) ((x) * 1024 * 1024) 161#define MiB(x) ((x) * 1024 * 1024)
105 162
163#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL
164
165#define BMVAL(val, lsb, msb) ((val & GENMASK(msb, lsb)) >> lsb)
166
106#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64)) 167#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
107 168
108#define __perf_cs_etmv3_magic 0x3030303030303030ULL 169#define __perf_cs_etmv3_magic 0x3030303030303030ULL
@@ -114,6 +175,13 @@ struct intlist *traceid_list;
114int cs_etm__process_auxtrace_info(union perf_event *event, 175int cs_etm__process_auxtrace_info(union perf_event *event,
115 struct perf_session *session); 176 struct perf_session *session);
116int cs_etm__get_cpu(u8 trace_chan_id, int *cpu); 177int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
178int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
179 pid_t tid, u8 trace_chan_id);
180bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq);
181void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
182 u8 trace_chan_id);
183struct cs_etm_packet_queue
184*cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id);
117#else 185#else
118static inline int 186static inline int
119cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, 187cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
@@ -127,6 +195,32 @@ static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused,
127{ 195{
128 return -1; 196 return -1;
129} 197}
198
199static inline int cs_etm__etmq_set_tid(
200 struct cs_etm_queue *etmq __maybe_unused,
201 pid_t tid __maybe_unused,
202 u8 trace_chan_id __maybe_unused)
203{
204 return -1;
205}
206
207static inline bool cs_etm__etmq_is_timeless(
208 struct cs_etm_queue *etmq __maybe_unused)
209{
210 /* What else to return? */
211 return true;
212}
213
214static inline void cs_etm__etmq_set_traceid_queue_timestamp(
215 struct cs_etm_queue *etmq __maybe_unused,
216 u8 trace_chan_id __maybe_unused) {}
217
218static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue(
219 struct cs_etm_queue *etmq __maybe_unused,
220 u8 trace_chan_id __maybe_unused)
221{
222 return NULL;
223}
130#endif 224#endif
131 225
132#endif 226#endif
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
deleted file mode 100644
index ee4c1e8ed54b..000000000000
--- a/tools/perf/util/ctype.c
+++ /dev/null
@@ -1,49 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Sane locale-independent, ASCII ctype.
4 *
5 * No surprises, and works with signed and unsigned chars.
6 */
7#include "sane_ctype.h"
8
9enum {
10 S = GIT_SPACE,
11 A = GIT_ALPHA,
12 D = GIT_DIGIT,
13 G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
14 R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */
15 P = GIT_PRINT_EXTRA, /* printable - alpha - digit - glob - regex */
16
17 PS = GIT_SPACE | GIT_PRINT_EXTRA,
18};
19
20unsigned char sane_ctype[256] = {
21/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
22
23 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
24 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */
25 PS,P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
26 D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
27 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
28 A, A, A, A, A, A, A, A, A, A, A, G, G, P, R, P, /* 80.. 95 */
29 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
30 A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */
31 /* Nothing in the 128.. range */
32};
33
34const char *graph_line =
35 "_____________________________________________________________________"
36 "_____________________________________________________________________"
37 "_____________________________________________________________________";
38const char *graph_dotted_line =
39 "---------------------------------------------------------------------"
40 "---------------------------------------------------------------------"
41 "---------------------------------------------------------------------";
42const char *spaces =
43 " "
44 " "
45 " ";
46const char *dots =
47 "....................................................................."
48 "....................................................................."
49 ".....................................................................";
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index b79e1d6839ed..7b06e7373b9e 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -29,7 +29,7 @@
29#include "evsel.h" 29#include "evsel.h"
30#include "machine.h" 30#include "machine.h"
31#include "config.h" 31#include "config.h"
32#include "sane_ctype.h" 32#include <linux/ctype.h>
33 33
34#define pr_N(n, fmt, ...) \ 34#define pr_N(n, fmt, ...) \
35 eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) 35 eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index 3d6459626c2a..3cc578343f48 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -21,7 +21,7 @@
21#include "util.h" 21#include "util.h"
22#include "target.h" 22#include "target.h"
23 23
24#include "sane_ctype.h" 24#include <linux/ctype.h>
25 25
26int verbose; 26int verbose;
27bool dump_trace = false, quiet = false; 27bool dump_trace = false, quiet = false;
diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
index e4c486756053..5b4900d67c80 100644
--- a/tools/perf/util/demangle-java.c
+++ b/tools/perf/util/demangle-java.c
@@ -8,7 +8,7 @@
8 8
9#include "demangle-java.h" 9#include "demangle-java.h"
10 10
11#include "sane_ctype.h" 11#include <linux/ctype.h>
12 12
13enum { 13enum {
14 MODE_PREFIX = 0, 14 MODE_PREFIX = 0,
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index e059976d9d93..c7fde04400f7 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1,6 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <asm/bug.h> 2#include <asm/bug.h>
3#include <linux/kernel.h> 3#include <linux/kernel.h>
4#include <linux/string.h>
4#include <sys/time.h> 5#include <sys/time.h>
5#include <sys/resource.h> 6#include <sys/resource.h>
6#include <sys/types.h> 7#include <sys/types.h>
@@ -9,6 +10,8 @@
9#include <errno.h> 10#include <errno.h>
10#include <fcntl.h> 11#include <fcntl.h>
11#include <libgen.h> 12#include <libgen.h>
13#include <bpf/libbpf.h>
14#include "bpf-event.h"
12#include "compress.h" 15#include "compress.h"
13#include "namespaces.h" 16#include "namespaces.h"
14#include "path.h" 17#include "path.h"
@@ -392,7 +395,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
392 return -ENOMEM; 395 return -ENOMEM;
393 } 396 }
394 397
395 strxfrchar(m->name, '-', '_'); 398 strreplace(m->name, '-', '_');
396 } 399 }
397 400
398 return 0; 401 return 0;
@@ -706,6 +709,44 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
706 return false; 709 return false;
707} 710}
708 711
712static ssize_t bpf_read(struct dso *dso, u64 offset, char *data)
713{
714 struct bpf_prog_info_node *node;
715 ssize_t size = DSO__DATA_CACHE_SIZE;
716 u64 len;
717 u8 *buf;
718
719 node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
720 if (!node || !node->info_linear) {
721 dso->data.status = DSO_DATA_STATUS_ERROR;
722 return -1;
723 }
724
725 len = node->info_linear->info.jited_prog_len;
726 buf = (u8 *)(uintptr_t)node->info_linear->info.jited_prog_insns;
727
728 if (offset >= len)
729 return -1;
730
731 size = (ssize_t)min(len - offset, (u64)size);
732 memcpy(data, buf + offset, size);
733 return size;
734}
735
736static int bpf_size(struct dso *dso)
737{
738 struct bpf_prog_info_node *node;
739
740 node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, dso->bpf_prog.id);
741 if (!node || !node->info_linear) {
742 dso->data.status = DSO_DATA_STATUS_ERROR;
743 return -1;
744 }
745
746 dso->data.file_size = node->info_linear->info.jited_prog_len;
747 return 0;
748}
749
709static void 750static void
710dso_cache__free(struct dso *dso) 751dso_cache__free(struct dso *dso)
711{ 752{
@@ -794,48 +835,53 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset,
794 return cache_size; 835 return cache_size;
795} 836}
796 837
797static ssize_t 838static ssize_t file_read(struct dso *dso, struct machine *machine,
798dso_cache__read(struct dso *dso, struct machine *machine, 839 u64 offset, char *data)
799 u64 offset, u8 *data, ssize_t size)
800{ 840{
801 struct dso_cache *cache;
802 struct dso_cache *old;
803 ssize_t ret; 841 ssize_t ret;
804 842
805 do { 843 pthread_mutex_lock(&dso__data_open_lock);
806 u64 cache_offset;
807 844
808 cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); 845 /*
809 if (!cache) 846 * dso->data.fd might be closed if other thread opened another
810 return -ENOMEM; 847 * file (dso) due to open file limit (RLIMIT_NOFILE).
848 */
849 try_to_open_dso(dso, machine);
811 850
812 pthread_mutex_lock(&dso__data_open_lock); 851 if (dso->data.fd < 0) {
852 dso->data.status = DSO_DATA_STATUS_ERROR;
853 ret = -errno;
854 goto out;
855 }
813 856
814 /* 857 ret = pread(dso->data.fd, data, DSO__DATA_CACHE_SIZE, offset);
815 * dso->data.fd might be closed if other thread opened another 858out:
816 * file (dso) due to open file limit (RLIMIT_NOFILE). 859 pthread_mutex_unlock(&dso__data_open_lock);
817 */ 860 return ret;
818 try_to_open_dso(dso, machine); 861}
819 862
820 if (dso->data.fd < 0) { 863static ssize_t
821 ret = -errno; 864dso_cache__read(struct dso *dso, struct machine *machine,
822 dso->data.status = DSO_DATA_STATUS_ERROR; 865 u64 offset, u8 *data, ssize_t size)
823 break; 866{
824 } 867 u64 cache_offset = offset & DSO__DATA_CACHE_MASK;
868 struct dso_cache *cache;
869 struct dso_cache *old;
870 ssize_t ret;
825 871
826 cache_offset = offset & DSO__DATA_CACHE_MASK; 872 cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
873 if (!cache)
874 return -ENOMEM;
827 875
828 ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset); 876 if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
829 if (ret <= 0) 877 ret = bpf_read(dso, cache_offset, cache->data);
830 break; 878 else
879 ret = file_read(dso, machine, cache_offset, cache->data);
831 880
881 if (ret > 0) {
832 cache->offset = cache_offset; 882 cache->offset = cache_offset;
833 cache->size = ret; 883 cache->size = ret;
834 } while (0);
835
836 pthread_mutex_unlock(&dso__data_open_lock);
837 884
838 if (ret > 0) {
839 old = dso_cache__insert(dso, cache); 885 old = dso_cache__insert(dso, cache);
840 if (old) { 886 if (old) {
841 /* we lose the race */ 887 /* we lose the race */
@@ -898,18 +944,12 @@ static ssize_t cached_read(struct dso *dso, struct machine *machine,
898 return r; 944 return r;
899} 945}
900 946
901int dso__data_file_size(struct dso *dso, struct machine *machine) 947static int file_size(struct dso *dso, struct machine *machine)
902{ 948{
903 int ret = 0; 949 int ret = 0;
904 struct stat st; 950 struct stat st;
905 char sbuf[STRERR_BUFSIZE]; 951 char sbuf[STRERR_BUFSIZE];
906 952
907 if (dso->data.file_size)
908 return 0;
909
910 if (dso->data.status == DSO_DATA_STATUS_ERROR)
911 return -1;
912
913 pthread_mutex_lock(&dso__data_open_lock); 953 pthread_mutex_lock(&dso__data_open_lock);
914 954
915 /* 955 /*
@@ -938,6 +978,20 @@ out:
938 return ret; 978 return ret;
939} 979}
940 980
981int dso__data_file_size(struct dso *dso, struct machine *machine)
982{
983 if (dso->data.file_size)
984 return 0;
985
986 if (dso->data.status == DSO_DATA_STATUS_ERROR)
987 return -1;
988
989 if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
990 return bpf_size(dso);
991
992 return file_size(dso, machine);
993}
994
941/** 995/**
942 * dso__data_size - Return dso data size 996 * dso__data_size - Return dso data size
943 * @dso: dso object 997 * @dso: dso object
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 6a3eaf7d9353..22eee8942527 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -1,7 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include "cpumap.h" 2#include "cpumap.h"
3#include "env.h" 3#include "env.h"
4#include "sane_ctype.h" 4#include <linux/ctype.h>
5#include "util.h" 5#include "util.h"
6#include "bpf-event.h" 6#include "bpf-event.h"
7#include <errno.h> 7#include <errno.h>
@@ -246,6 +246,7 @@ int perf_env__read_cpu_topology_map(struct perf_env *env)
246 for (cpu = 0; cpu < nr_cpus; ++cpu) { 246 for (cpu = 0; cpu < nr_cpus; ++cpu) {
247 env->cpu[cpu].core_id = cpu_map__get_core_id(cpu); 247 env->cpu[cpu].core_id = cpu_map__get_core_id(cpu);
248 env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu); 248 env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu);
249 env->cpu[cpu].die_id = cpu_map__get_die_id(cpu);
249 } 250 }
250 251
251 env->nr_cpus_avail = nr_cpus; 252 env->nr_cpus_avail = nr_cpus;
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 271a90b326c4..d5d9865aa812 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -9,6 +9,7 @@
9 9
10struct cpu_topology_map { 10struct cpu_topology_map {
11 int socket_id; 11 int socket_id;
12 int die_id;
12 int core_id; 13 int core_id;
13}; 14};
14 15
@@ -49,6 +50,7 @@ struct perf_env {
49 50
50 int nr_cmdline; 51 int nr_cmdline;
51 int nr_sibling_cores; 52 int nr_sibling_cores;
53 int nr_sibling_dies;
52 int nr_sibling_threads; 54 int nr_sibling_threads;
53 int nr_numa_nodes; 55 int nr_numa_nodes;
54 int nr_memory_nodes; 56 int nr_memory_nodes;
@@ -57,6 +59,7 @@ struct perf_env {
57 char *cmdline; 59 char *cmdline;
58 const char **cmdline_argv; 60 const char **cmdline_argv;
59 char *sibling_cores; 61 char *sibling_cores;
62 char *sibling_dies;
60 char *sibling_threads; 63 char *sibling_threads;
61 char *pmu_mappings; 64 char *pmu_mappings;
62 struct cpu_topology_map *cpu; 65 struct cpu_topology_map *cpu;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index d1ad6c419724..e1d0c5ba1f92 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -20,7 +20,7 @@
20#include "strlist.h" 20#include "strlist.h"
21#include "thread.h" 21#include "thread.h"
22#include "thread_map.h" 22#include "thread_map.h"
23#include "sane_ctype.h" 23#include <linux/ctype.h>
24#include "map.h" 24#include "map.h"
25#include "symbol.h" 25#include "symbol.h"
26#include "symbol/kallsyms.h" 26#include "symbol/kallsyms.h"
@@ -158,9 +158,7 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
158 if (name) { 158 if (name) {
159 char *nl; 159 char *nl;
160 160
161 name += 5; /* strlen("Name:") */ 161 name = skip_spaces(name + 5); /* strlen("Name:") */
162 name = ltrim(name);
163
164 nl = strchr(name, '\n'); 162 nl = strchr(name, '\n');
165 if (nl) 163 if (nl)
166 *nl = '\0'; 164 *nl = '\0';
@@ -1486,7 +1484,7 @@ static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
1486 1484
1487size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp) 1485size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
1488{ 1486{
1489 return fprintf(fp, " ksymbol event with addr %" PRIx64 " len %u type %u flags 0x%x name %s\n", 1487 return fprintf(fp, " addr %" PRIx64 " len %u type %u flags 0x%x name %s\n",
1490 event->ksymbol_event.addr, event->ksymbol_event.len, 1488 event->ksymbol_event.addr, event->ksymbol_event.len,
1491 event->ksymbol_event.ksym_type, 1489 event->ksymbol_event.ksym_type,
1492 event->ksymbol_event.flags, event->ksymbol_event.name); 1490 event->ksymbol_event.flags, event->ksymbol_event.name);
@@ -1494,7 +1492,7 @@ size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp)
1494 1492
1495size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp) 1493size_t perf_event__fprintf_bpf_event(union perf_event *event, FILE *fp)
1496{ 1494{
1497 return fprintf(fp, " bpf event with type %u, flags %u, id %u\n", 1495 return fprintf(fp, " type %u, flags %u, id %u\n",
1498 event->bpf_event.type, event->bpf_event.flags, 1496 event->bpf_event.type, event->bpf_event.flags,
1499 event->bpf_event.id); 1497 event->bpf_event.id);
1500} 1498}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 9e999550f247..1f1da6082806 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -204,6 +204,8 @@ struct perf_sample {
204 u64 period; 204 u64 period;
205 u64 weight; 205 u64 weight;
206 u64 transaction; 206 u64 transaction;
207 u64 insn_cnt;
208 u64 cyc_cnt;
207 u32 cpu; 209 u32 cpu;
208 u32 raw_size; 210 u32 raw_size;
209 u64 data_src; 211 u64 data_src;
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 69beb9f80f07..7fb4ae82f34c 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -35,10 +35,11 @@
35#include "debug.h" 35#include "debug.h"
36#include "trace-event.h" 36#include "trace-event.h"
37#include "stat.h" 37#include "stat.h"
38#include "string2.h"
38#include "memswap.h" 39#include "memswap.h"
39#include "util/parse-branch-options.h" 40#include "util/parse-branch-options.h"
40 41
41#include "sane_ctype.h" 42#include <linux/ctype.h>
42 43
43struct perf_missing_features perf_missing_features; 44struct perf_missing_features perf_missing_features;
44 45
@@ -589,6 +590,9 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
589{ 590{
590 char bf[128]; 591 char bf[128];
591 592
593 if (!evsel)
594 goto out_unknown;
595
592 if (evsel->name) 596 if (evsel->name)
593 return evsel->name; 597 return evsel->name;
594 598
@@ -628,7 +632,10 @@ const char *perf_evsel__name(struct perf_evsel *evsel)
628 632
629 evsel->name = strdup(bf); 633 evsel->name = strdup(bf);
630 634
631 return evsel->name ?: "unknown"; 635 if (evsel->name)
636 return evsel->name;
637out_unknown:
638 return "unknown";
632} 639}
633 640
634const char *perf_evsel__group_name(struct perf_evsel *evsel) 641const char *perf_evsel__group_name(struct perf_evsel *evsel)
@@ -679,6 +686,10 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
679 686
680 attr->sample_max_stack = param->max_stack; 687 attr->sample_max_stack = param->max_stack;
681 688
689 if (opts->kernel_callchains)
690 attr->exclude_callchain_user = 1;
691 if (opts->user_callchains)
692 attr->exclude_callchain_kernel = 1;
682 if (param->record_mode == CALLCHAIN_LBR) { 693 if (param->record_mode == CALLCHAIN_LBR) {
683 if (!opts->branch_stack) { 694 if (!opts->branch_stack) {
684 if (attr->exclude_user) { 695 if (attr->exclude_user) {
@@ -701,7 +712,14 @@ static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
701 if (!function) { 712 if (!function) {
702 perf_evsel__set_sample_bit(evsel, REGS_USER); 713 perf_evsel__set_sample_bit(evsel, REGS_USER);
703 perf_evsel__set_sample_bit(evsel, STACK_USER); 714 perf_evsel__set_sample_bit(evsel, STACK_USER);
704 attr->sample_regs_user |= PERF_REGS_MASK; 715 if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
716 attr->sample_regs_user |= DWARF_MINIMAL_REGS;
717 pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
718 "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
719 "so the minimal registers set (IP, SP) is explicitly forced.\n");
720 } else {
721 attr->sample_regs_user |= PERF_REGS_MASK;
722 }
705 attr->sample_stack_user = param->dump_size; 723 attr->sample_stack_user = param->dump_size;
706 attr->exclude_callchain_user = 1; 724 attr->exclude_callchain_user = 1;
707 } else { 725 } else {
@@ -1136,9 +1154,6 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
1136 1154
1137static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) 1155static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
1138{ 1156{
1139 if (evsel->system_wide)
1140 nthreads = 1;
1141
1142 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int)); 1157 evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
1143 1158
1144 if (evsel->fd) { 1159 if (evsel->fd) {
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index fb0aa661644b..6a93ff5d8db5 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -13,6 +13,7 @@
13#include <linux/list.h> 13#include <linux/list.h>
14#include <linux/kernel.h> 14#include <linux/kernel.h>
15#include <linux/bitops.h> 15#include <linux/bitops.h>
16#include <linux/string.h>
16#include <linux/stringify.h> 17#include <linux/stringify.h>
17#include <sys/stat.h> 18#include <sys/stat.h>
18#include <sys/utsname.h> 19#include <sys/utsname.h>
@@ -43,7 +44,7 @@
43#include "cputopo.h" 44#include "cputopo.h"
44#include "bpf-event.h" 45#include "bpf-event.h"
45 46
46#include "sane_ctype.h" 47#include <linux/ctype.h>
47 48
48/* 49/*
49 * magic2 = "PERFILE2" 50 * magic2 = "PERFILE2"
@@ -416,10 +417,8 @@ static int __write_cpudesc(struct feat_fd *ff, const char *cpuinfo_proc)
416 while (*p) { 417 while (*p) {
417 if (isspace(*p)) { 418 if (isspace(*p)) {
418 char *r = p + 1; 419 char *r = p + 1;
419 char *q = r; 420 char *q = skip_spaces(r);
420 *p = ' '; 421 *p = ' ';
421 while (*q && isspace(*q))
422 q++;
423 if (q != (p+1)) 422 if (q != (p+1))
424 while ((*r++ = *q++)); 423 while ((*r++ = *q++));
425 } 424 }
@@ -599,6 +598,27 @@ static int write_cpu_topology(struct feat_fd *ff,
599 if (ret < 0) 598 if (ret < 0)
600 return ret; 599 return ret;
601 } 600 }
601
602 if (!tp->die_sib)
603 goto done;
604
605 ret = do_write(ff, &tp->die_sib, sizeof(tp->die_sib));
606 if (ret < 0)
607 goto done;
608
609 for (i = 0; i < tp->die_sib; i++) {
610 ret = do_write_string(ff, tp->die_siblings[i]);
611 if (ret < 0)
612 goto done;
613 }
614
615 for (j = 0; j < perf_env.nr_cpus_avail; j++) {
616 ret = do_write(ff, &perf_env.cpu[j].die_id,
617 sizeof(perf_env.cpu[j].die_id));
618 if (ret < 0)
619 return ret;
620 }
621
602done: 622done:
603 cpu_topology__delete(tp); 623 cpu_topology__delete(tp);
604 return ret; 624 return ret;
@@ -1028,7 +1048,7 @@ static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 lev
1028 return -1; 1048 return -1;
1029 1049
1030 cache->type[len] = 0; 1050 cache->type[len] = 0;
1031 cache->type = rtrim(cache->type); 1051 cache->type = strim(cache->type);
1032 1052
1033 scnprintf(file, PATH_MAX, "%s/size", path); 1053 scnprintf(file, PATH_MAX, "%s/size", path);
1034 if (sysfs__read_str(file, &cache->size, &len)) { 1054 if (sysfs__read_str(file, &cache->size, &len)) {
@@ -1037,7 +1057,7 @@ static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 lev
1037 } 1057 }
1038 1058
1039 cache->size[len] = 0; 1059 cache->size[len] = 0;
1040 cache->size = rtrim(cache->size); 1060 cache->size = strim(cache->size);
1041 1061
1042 scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path); 1062 scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
1043 if (sysfs__read_str(file, &cache->map, &len)) { 1063 if (sysfs__read_str(file, &cache->map, &len)) {
@@ -1047,7 +1067,7 @@ static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 lev
1047 } 1067 }
1048 1068
1049 cache->map[len] = 0; 1069 cache->map[len] = 0;
1050 cache->map = rtrim(cache->map); 1070 cache->map = strim(cache->map);
1051 return 0; 1071 return 0;
1052} 1072}
1053 1073
@@ -1100,7 +1120,7 @@ static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
1100 return 0; 1120 return 0;
1101} 1121}
1102 1122
1103#define MAX_CACHES 2000 1123#define MAX_CACHES (MAX_NR_CPUS * 4)
1104 1124
1105static int write_cache(struct feat_fd *ff, 1125static int write_cache(struct feat_fd *ff,
1106 struct perf_evlist *evlist __maybe_unused) 1126 struct perf_evlist *evlist __maybe_unused)
@@ -1439,10 +1459,20 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
1439 str = ph->env.sibling_cores; 1459 str = ph->env.sibling_cores;
1440 1460
1441 for (i = 0; i < nr; i++) { 1461 for (i = 0; i < nr; i++) {
1442 fprintf(fp, "# sibling cores : %s\n", str); 1462 fprintf(fp, "# sibling sockets : %s\n", str);
1443 str += strlen(str) + 1; 1463 str += strlen(str) + 1;
1444 } 1464 }
1445 1465
1466 if (ph->env.nr_sibling_dies) {
1467 nr = ph->env.nr_sibling_dies;
1468 str = ph->env.sibling_dies;
1469
1470 for (i = 0; i < nr; i++) {
1471 fprintf(fp, "# sibling dies : %s\n", str);
1472 str += strlen(str) + 1;
1473 }
1474 }
1475
1446 nr = ph->env.nr_sibling_threads; 1476 nr = ph->env.nr_sibling_threads;
1447 str = ph->env.sibling_threads; 1477 str = ph->env.sibling_threads;
1448 1478
@@ -1451,12 +1481,28 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
1451 str += strlen(str) + 1; 1481 str += strlen(str) + 1;
1452 } 1482 }
1453 1483
1454 if (ph->env.cpu != NULL) { 1484 if (ph->env.nr_sibling_dies) {
1455 for (i = 0; i < cpu_nr; i++) 1485 if (ph->env.cpu != NULL) {
1456 fprintf(fp, "# CPU %d: Core ID %d, Socket ID %d\n", i, 1486 for (i = 0; i < cpu_nr; i++)
1457 ph->env.cpu[i].core_id, ph->env.cpu[i].socket_id); 1487 fprintf(fp, "# CPU %d: Core ID %d, "
1458 } else 1488 "Die ID %d, Socket ID %d\n",
1459 fprintf(fp, "# Core ID and Socket ID information is not available\n"); 1489 i, ph->env.cpu[i].core_id,
1490 ph->env.cpu[i].die_id,
1491 ph->env.cpu[i].socket_id);
1492 } else
1493 fprintf(fp, "# Core ID, Die ID and Socket ID "
1494 "information is not available\n");
1495 } else {
1496 if (ph->env.cpu != NULL) {
1497 for (i = 0; i < cpu_nr; i++)
1498 fprintf(fp, "# CPU %d: Core ID %d, "
1499 "Socket ID %d\n",
1500 i, ph->env.cpu[i].core_id,
1501 ph->env.cpu[i].socket_id);
1502 } else
1503 fprintf(fp, "# Core ID and Socket ID "
1504 "information is not available\n");
1505 }
1460} 1506}
1461 1507
1462static void print_clockid(struct feat_fd *ff, FILE *fp) 1508static void print_clockid(struct feat_fd *ff, FILE *fp)
@@ -2214,6 +2260,7 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
2214 goto free_cpu; 2260 goto free_cpu;
2215 2261
2216 ph->env.cpu[i].core_id = nr; 2262 ph->env.cpu[i].core_id = nr;
2263 size += sizeof(u32);
2217 2264
2218 if (do_read_u32(ff, &nr)) 2265 if (do_read_u32(ff, &nr))
2219 goto free_cpu; 2266 goto free_cpu;
@@ -2225,6 +2272,40 @@ static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
2225 } 2272 }
2226 2273
2227 ph->env.cpu[i].socket_id = nr; 2274 ph->env.cpu[i].socket_id = nr;
2275 size += sizeof(u32);
2276 }
2277
2278 /*
2279 * The header may be from old perf,
2280 * which doesn't include die information.
2281 */
2282 if (ff->size <= size)
2283 return 0;
2284
2285 if (do_read_u32(ff, &nr))
2286 return -1;
2287
2288 ph->env.nr_sibling_dies = nr;
2289 size += sizeof(u32);
2290
2291 for (i = 0; i < nr; i++) {
2292 str = do_read_string(ff);
2293 if (!str)
2294 goto error;
2295
2296 /* include a NULL character at the end */
2297 if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
2298 goto error;
2299 size += string_size(str);
2300 free(str);
2301 }
2302 ph->env.sibling_dies = strbuf_detach(&sb, NULL);
2303
2304 for (i = 0; i < (u32)cpu_nr; i++) {
2305 if (do_read_u32(ff, &nr))
2306 goto free_cpu;
2307
2308 ph->env.cpu[i].die_id = nr;
2228 } 2309 }
2229 2310
2230 return 0; 2311 return 0;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 7ace7a10054d..27cecb59f866 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -376,6 +376,24 @@ void hists__delete_entries(struct hists *hists)
376 } 376 }
377} 377}
378 378
379struct hist_entry *hists__get_entry(struct hists *hists, int idx)
380{
381 struct rb_node *next = rb_first_cached(&hists->entries);
382 struct hist_entry *n;
383 int i = 0;
384
385 while (next) {
386 n = rb_entry(next, struct hist_entry, rb_node);
387 if (i == idx)
388 return n;
389
390 next = rb_next(&n->rb_node);
391 i++;
392 }
393
394 return NULL;
395}
396
379/* 397/*
380 * histogram, sorted on item, collects periods 398 * histogram, sorted on item, collects periods
381 */ 399 */
@@ -574,6 +592,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
574 */ 592 */
575 mem_info__zput(entry->mem_info); 593 mem_info__zput(entry->mem_info);
576 594
595 block_info__zput(entry->block_info);
596
577 /* If the map of an existing hist_entry has 597 /* If the map of an existing hist_entry has
578 * become out-of-date due to an exec() or 598 * become out-of-date due to an exec() or
579 * similar, update it. Otherwise we will 599 * similar, update it. Otherwise we will
@@ -645,6 +665,7 @@ __hists__add_entry(struct hists *hists,
645 struct symbol *sym_parent, 665 struct symbol *sym_parent,
646 struct branch_info *bi, 666 struct branch_info *bi,
647 struct mem_info *mi, 667 struct mem_info *mi,
668 struct block_info *block_info,
648 struct perf_sample *sample, 669 struct perf_sample *sample,
649 bool sample_self, 670 bool sample_self,
650 struct hist_entry_ops *ops) 671 struct hist_entry_ops *ops)
@@ -677,6 +698,7 @@ __hists__add_entry(struct hists *hists,
677 .hists = hists, 698 .hists = hists,
678 .branch_info = bi, 699 .branch_info = bi,
679 .mem_info = mi, 700 .mem_info = mi,
701 .block_info = block_info,
680 .transaction = sample->transaction, 702 .transaction = sample->transaction,
681 .raw_data = sample->raw_data, 703 .raw_data = sample->raw_data,
682 .raw_size = sample->raw_size, 704 .raw_size = sample->raw_size,
@@ -699,7 +721,7 @@ struct hist_entry *hists__add_entry(struct hists *hists,
699 struct perf_sample *sample, 721 struct perf_sample *sample,
700 bool sample_self) 722 bool sample_self)
701{ 723{
702 return __hists__add_entry(hists, al, sym_parent, bi, mi, 724 return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL,
703 sample, sample_self, NULL); 725 sample, sample_self, NULL);
704} 726}
705 727
@@ -712,10 +734,22 @@ struct hist_entry *hists__add_entry_ops(struct hists *hists,
712 struct perf_sample *sample, 734 struct perf_sample *sample,
713 bool sample_self) 735 bool sample_self)
714{ 736{
715 return __hists__add_entry(hists, al, sym_parent, bi, mi, 737 return __hists__add_entry(hists, al, sym_parent, bi, mi, NULL,
716 sample, sample_self, ops); 738 sample, sample_self, ops);
717} 739}
718 740
741struct hist_entry *hists__add_entry_block(struct hists *hists,
742 struct addr_location *al,
743 struct block_info *block_info)
744{
745 struct hist_entry entry = {
746 .block_info = block_info,
747 .hists = hists,
748 }, *he = hists__findnew_entry(hists, &entry, al, false);
749
750 return he;
751}
752
719static int 753static int
720iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused, 754iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
721 struct addr_location *al __maybe_unused) 755 struct addr_location *al __maybe_unused)
@@ -1213,6 +1247,9 @@ void hist_entry__delete(struct hist_entry *he)
1213 mem_info__zput(he->mem_info); 1247 mem_info__zput(he->mem_info);
1214 } 1248 }
1215 1249
1250 if (he->block_info)
1251 block_info__zput(he->block_info);
1252
1216 zfree(&he->res_samples); 1253 zfree(&he->res_samples);
1217 zfree(&he->stat_acc); 1254 zfree(&he->stat_acc);
1218 free_srcline(he->srcline); 1255 free_srcline(he->srcline);
@@ -2561,7 +2598,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
2561 char unit; 2598 char unit;
2562 int printed; 2599 int printed;
2563 const struct dso *dso = hists->dso_filter; 2600 const struct dso *dso = hists->dso_filter;
2564 const struct thread *thread = hists->thread_filter; 2601 struct thread *thread = hists->thread_filter;
2565 int socket_id = hists->socket_filter; 2602 int socket_id = hists->socket_filter;
2566 unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE]; 2603 unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
2567 u64 nr_events = hists->stats.total_period; 2604 u64 nr_events = hists->stats.total_period;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 76ff6c6d03b8..24635f36148d 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -16,6 +16,7 @@ struct addr_location;
16struct map_symbol; 16struct map_symbol;
17struct mem_info; 17struct mem_info;
18struct branch_info; 18struct branch_info;
19struct block_info;
19struct symbol; 20struct symbol;
20 21
21enum hist_filter { 22enum hist_filter {
@@ -149,6 +150,10 @@ struct hist_entry *hists__add_entry_ops(struct hists *hists,
149 struct perf_sample *sample, 150 struct perf_sample *sample,
150 bool sample_self); 151 bool sample_self);
151 152
153struct hist_entry *hists__add_entry_block(struct hists *hists,
154 struct addr_location *al,
155 struct block_info *bi);
156
152int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, 157int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
153 int max_stack_depth, void *arg); 158 int max_stack_depth, void *arg);
154 159
@@ -178,6 +183,8 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
178void hists__delete_entries(struct hists *hists); 183void hists__delete_entries(struct hists *hists);
179void hists__output_recalc_col_len(struct hists *hists, int max_rows); 184void hists__output_recalc_col_len(struct hists *hists, int max_rows);
180 185
186struct hist_entry *hists__get_entry(struct hists *hists, int idx);
187
181u64 hists__total_period(struct hists *hists); 188u64 hists__total_period(struct hists *hists);
182void hists__reset_stats(struct hists *hists); 189void hists__reset_stats(struct hists *hists);
183void hists__inc_stats(struct hists *hists, struct hist_entry *h); 190void hists__inc_stats(struct hists *hists, struct hist_entry *h);
@@ -243,6 +250,7 @@ struct perf_hpp {
243 size_t size; 250 size_t size;
244 const char *sep; 251 const char *sep;
245 void *ptr; 252 void *ptr;
253 bool skip;
246}; 254};
247 255
248struct perf_hpp_fmt { 256struct perf_hpp_fmt {
diff --git a/tools/perf/util/include/linux/ctype.h b/tools/perf/util/include/linux/ctype.h
deleted file mode 100644
index a53d4ee1e0b7..000000000000
--- a/tools/perf/util/include/linux/ctype.h
+++ /dev/null
@@ -1 +0,0 @@
1#include "../util.h"
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 9d189e90fbdc..4d14e78c5927 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -95,6 +95,7 @@ struct intel_pt_decoder {
95 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, 95 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
96 uint64_t max_insn_cnt, void *data); 96 uint64_t max_insn_cnt, void *data);
97 bool (*pgd_ip)(uint64_t ip, void *data); 97 bool (*pgd_ip)(uint64_t ip, void *data);
98 int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
98 void *data; 99 void *data;
99 struct intel_pt_state state; 100 struct intel_pt_state state;
100 const unsigned char *buf; 101 const unsigned char *buf;
@@ -107,6 +108,7 @@ struct intel_pt_decoder {
107 bool have_cyc; 108 bool have_cyc;
108 bool fixup_last_mtc; 109 bool fixup_last_mtc;
109 bool have_last_ip; 110 bool have_last_ip;
111 bool in_psb;
110 enum intel_pt_param_flags flags; 112 enum intel_pt_param_flags flags;
111 uint64_t pos; 113 uint64_t pos;
112 uint64_t last_ip; 114 uint64_t last_ip;
@@ -115,6 +117,7 @@ struct intel_pt_decoder {
115 uint64_t timestamp; 117 uint64_t timestamp;
116 uint64_t tsc_timestamp; 118 uint64_t tsc_timestamp;
117 uint64_t ref_timestamp; 119 uint64_t ref_timestamp;
120 uint64_t buf_timestamp;
118 uint64_t sample_timestamp; 121 uint64_t sample_timestamp;
119 uint64_t ret_addr; 122 uint64_t ret_addr;
120 uint64_t ctc_timestamp; 123 uint64_t ctc_timestamp;
@@ -130,6 +133,10 @@ struct intel_pt_decoder {
130 int mtc_shift; 133 int mtc_shift;
131 struct intel_pt_stack stack; 134 struct intel_pt_stack stack;
132 enum intel_pt_pkt_state pkt_state; 135 enum intel_pt_pkt_state pkt_state;
136 enum intel_pt_pkt_ctx pkt_ctx;
137 enum intel_pt_pkt_ctx prev_pkt_ctx;
138 enum intel_pt_blk_type blk_type;
139 int blk_type_pos;
133 struct intel_pt_pkt packet; 140 struct intel_pt_pkt packet;
134 struct intel_pt_pkt tnt; 141 struct intel_pt_pkt tnt;
135 int pkt_step; 142 int pkt_step;
@@ -151,6 +158,11 @@ struct intel_pt_decoder {
151 uint64_t period_mask; 158 uint64_t period_mask;
152 uint64_t period_ticks; 159 uint64_t period_ticks;
153 uint64_t last_masked_timestamp; 160 uint64_t last_masked_timestamp;
161 uint64_t tot_cyc_cnt;
162 uint64_t sample_tot_cyc_cnt;
163 uint64_t base_cyc_cnt;
164 uint64_t cyc_cnt_timestamp;
165 double tsc_to_cyc;
154 bool continuous_period; 166 bool continuous_period;
155 bool overflow; 167 bool overflow;
156 bool set_fup_tx_flags; 168 bool set_fup_tx_flags;
@@ -158,6 +170,8 @@ struct intel_pt_decoder {
158 bool set_fup_mwait; 170 bool set_fup_mwait;
159 bool set_fup_pwre; 171 bool set_fup_pwre;
160 bool set_fup_exstop; 172 bool set_fup_exstop;
173 bool set_fup_bep;
174 bool sample_cyc;
161 unsigned int fup_tx_flags; 175 unsigned int fup_tx_flags;
162 unsigned int tx_flags; 176 unsigned int tx_flags;
163 uint64_t fup_ptw_payload; 177 uint64_t fup_ptw_payload;
@@ -217,6 +231,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
217 decoder->get_trace = params->get_trace; 231 decoder->get_trace = params->get_trace;
218 decoder->walk_insn = params->walk_insn; 232 decoder->walk_insn = params->walk_insn;
219 decoder->pgd_ip = params->pgd_ip; 233 decoder->pgd_ip = params->pgd_ip;
234 decoder->lookahead = params->lookahead;
220 decoder->data = params->data; 235 decoder->data = params->data;
221 decoder->return_compression = params->return_compression; 236 decoder->return_compression = params->return_compression;
222 decoder->branch_enable = params->branch_enable; 237 decoder->branch_enable = params->branch_enable;
@@ -470,7 +485,21 @@ static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
470 return -EBADMSG; 485 return -EBADMSG;
471} 486}
472 487
473static int intel_pt_get_data(struct intel_pt_decoder *decoder) 488static inline void intel_pt_update_sample_time(struct intel_pt_decoder *decoder)
489{
490 decoder->sample_timestamp = decoder->timestamp;
491 decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
492}
493
494static void intel_pt_reposition(struct intel_pt_decoder *decoder)
495{
496 decoder->ip = 0;
497 decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
498 decoder->timestamp = 0;
499 decoder->have_tma = false;
500}
501
502static int intel_pt_get_data(struct intel_pt_decoder *decoder, bool reposition)
474{ 503{
475 struct intel_pt_buffer buffer = { .buf = 0, }; 504 struct intel_pt_buffer buffer = { .buf = 0, };
476 int ret; 505 int ret;
@@ -487,12 +516,10 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder)
487 intel_pt_log("No more data\n"); 516 intel_pt_log("No more data\n");
488 return -ENODATA; 517 return -ENODATA;
489 } 518 }
490 if (!buffer.consecutive) { 519 decoder->buf_timestamp = buffer.ref_timestamp;
491 decoder->ip = 0; 520 if (!buffer.consecutive || reposition) {
492 decoder->pkt_state = INTEL_PT_STATE_NO_PSB; 521 intel_pt_reposition(decoder);
493 decoder->ref_timestamp = buffer.ref_timestamp; 522 decoder->ref_timestamp = buffer.ref_timestamp;
494 decoder->timestamp = 0;
495 decoder->have_tma = false;
496 decoder->state.trace_nr = buffer.trace_nr; 523 decoder->state.trace_nr = buffer.trace_nr;
497 intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", 524 intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
498 decoder->ref_timestamp); 525 decoder->ref_timestamp);
@@ -502,10 +529,11 @@ static int intel_pt_get_data(struct intel_pt_decoder *decoder)
502 return 0; 529 return 0;
503} 530}
504 531
505static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) 532static int intel_pt_get_next_data(struct intel_pt_decoder *decoder,
533 bool reposition)
506{ 534{
507 if (!decoder->next_buf) 535 if (!decoder->next_buf)
508 return intel_pt_get_data(decoder); 536 return intel_pt_get_data(decoder, reposition);
509 537
510 decoder->buf = decoder->next_buf; 538 decoder->buf = decoder->next_buf;
511 decoder->len = decoder->next_len; 539 decoder->len = decoder->next_len;
@@ -524,7 +552,7 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
524 len = decoder->len; 552 len = decoder->len;
525 memcpy(buf, decoder->buf, len); 553 memcpy(buf, decoder->buf, len);
526 554
527 ret = intel_pt_get_data(decoder); 555 ret = intel_pt_get_data(decoder, false);
528 if (ret) { 556 if (ret) {
529 decoder->pos += old_len; 557 decoder->pos += old_len;
530 return ret < 0 ? ret : -EINVAL; 558 return ret < 0 ? ret : -EINVAL;
@@ -536,7 +564,8 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
536 memcpy(buf + len, decoder->buf, n); 564 memcpy(buf + len, decoder->buf, n);
537 len += n; 565 len += n;
538 566
539 ret = intel_pt_get_packet(buf, len, &decoder->packet); 567 decoder->prev_pkt_ctx = decoder->pkt_ctx;
568 ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx);
540 if (ret < (int)old_len) { 569 if (ret < (int)old_len) {
541 decoder->next_buf = decoder->buf; 570 decoder->next_buf = decoder->buf;
542 decoder->next_len = decoder->len; 571 decoder->next_len = decoder->len;
@@ -571,6 +600,7 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
571{ 600{
572 struct intel_pt_pkt_info pkt_info; 601 struct intel_pt_pkt_info pkt_info;
573 const unsigned char *buf = decoder->buf; 602 const unsigned char *buf = decoder->buf;
603 enum intel_pt_pkt_ctx pkt_ctx = decoder->pkt_ctx;
574 size_t len = decoder->len; 604 size_t len = decoder->len;
575 int ret; 605 int ret;
576 606
@@ -589,7 +619,8 @@ static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
589 if (!len) 619 if (!len)
590 return INTEL_PT_NEED_MORE_BYTES; 620 return INTEL_PT_NEED_MORE_BYTES;
591 621
592 ret = intel_pt_get_packet(buf, len, &pkt_info.packet); 622 ret = intel_pt_get_packet(buf, len, &pkt_info.packet,
623 &pkt_ctx);
593 if (!ret) 624 if (!ret)
594 return INTEL_PT_NEED_MORE_BYTES; 625 return INTEL_PT_NEED_MORE_BYTES;
595 if (ret < 0) 626 if (ret < 0)
@@ -664,6 +695,10 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
664 case INTEL_PT_MNT: 695 case INTEL_PT_MNT:
665 case INTEL_PT_PTWRITE: 696 case INTEL_PT_PTWRITE:
666 case INTEL_PT_PTWRITE_IP: 697 case INTEL_PT_PTWRITE_IP:
698 case INTEL_PT_BBP:
699 case INTEL_PT_BIP:
700 case INTEL_PT_BEP:
701 case INTEL_PT_BEP_IP:
667 return 0; 702 return 0;
668 703
669 case INTEL_PT_MTC: 704 case INTEL_PT_MTC:
@@ -850,13 +885,14 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
850 decoder->len -= decoder->pkt_step; 885 decoder->len -= decoder->pkt_step;
851 886
852 if (!decoder->len) { 887 if (!decoder->len) {
853 ret = intel_pt_get_next_data(decoder); 888 ret = intel_pt_get_next_data(decoder, false);
854 if (ret) 889 if (ret)
855 return ret; 890 return ret;
856 } 891 }
857 892
893 decoder->prev_pkt_ctx = decoder->pkt_ctx;
858 ret = intel_pt_get_packet(decoder->buf, decoder->len, 894 ret = intel_pt_get_packet(decoder->buf, decoder->len,
859 &decoder->packet); 895 &decoder->packet, &decoder->pkt_ctx);
860 if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 && 896 if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
861 decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { 897 decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
862 ret = intel_pt_get_split_packet(decoder); 898 ret = intel_pt_get_split_packet(decoder);
@@ -1094,6 +1130,14 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
1094 decoder->state.to_ip = 0; 1130 decoder->state.to_ip = 0;
1095 ret = true; 1131 ret = true;
1096 } 1132 }
1133 if (decoder->set_fup_bep) {
1134 decoder->set_fup_bep = false;
1135 decoder->state.type |= INTEL_PT_BLK_ITEMS;
1136 decoder->state.type &= ~INTEL_PT_BRANCH;
1137 decoder->state.from_ip = decoder->ip;
1138 decoder->state.to_ip = 0;
1139 ret = true;
1140 }
1097 return ret; 1141 return ret;
1098} 1142}
1099 1143
@@ -1308,10 +1352,10 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1308 decoder->ip += intel_pt_insn.length; 1352 decoder->ip += intel_pt_insn.length;
1309 return 0; 1353 return 0;
1310 } 1354 }
1355 decoder->sample_cyc = false;
1311 decoder->ip += intel_pt_insn.length; 1356 decoder->ip += intel_pt_insn.length;
1312 if (!decoder->tnt.count) { 1357 if (!decoder->tnt.count) {
1313 decoder->sample_timestamp = decoder->timestamp; 1358 intel_pt_update_sample_time(decoder);
1314 decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
1315 return -EAGAIN; 1359 return -EAGAIN;
1316 } 1360 }
1317 decoder->tnt.payload <<= 1; 1361 decoder->tnt.payload <<= 1;
@@ -1345,6 +1389,21 @@ static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
1345 return 0; 1389 return 0;
1346} 1390}
1347 1391
1392static uint64_t intel_pt_8b_tsc(uint64_t timestamp, uint64_t ref_timestamp)
1393{
1394 timestamp |= (ref_timestamp & (0xffULL << 56));
1395
1396 if (timestamp < ref_timestamp) {
1397 if (ref_timestamp - timestamp > (1ULL << 55))
1398 timestamp += (1ULL << 56);
1399 } else {
1400 if (timestamp - ref_timestamp > (1ULL << 55))
1401 timestamp -= (1ULL << 56);
1402 }
1403
1404 return timestamp;
1405}
1406
1348static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) 1407static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
1349{ 1408{
1350 uint64_t timestamp; 1409 uint64_t timestamp;
@@ -1352,15 +1411,8 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
1352 decoder->have_tma = false; 1411 decoder->have_tma = false;
1353 1412
1354 if (decoder->ref_timestamp) { 1413 if (decoder->ref_timestamp) {
1355 timestamp = decoder->packet.payload | 1414 timestamp = intel_pt_8b_tsc(decoder->packet.payload,
1356 (decoder->ref_timestamp & (0xffULL << 56)); 1415 decoder->ref_timestamp);
1357 if (timestamp < decoder->ref_timestamp) {
1358 if (decoder->ref_timestamp - timestamp > (1ULL << 55))
1359 timestamp += (1ULL << 56);
1360 } else {
1361 if (timestamp - decoder->ref_timestamp > (1ULL << 55))
1362 timestamp -= (1ULL << 56);
1363 }
1364 decoder->tsc_timestamp = timestamp; 1416 decoder->tsc_timestamp = timestamp;
1365 decoder->timestamp = timestamp; 1417 decoder->timestamp = timestamp;
1366 decoder->ref_timestamp = 0; 1418 decoder->ref_timestamp = 0;
@@ -1404,6 +1456,42 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
1404 return -EOVERFLOW; 1456 return -EOVERFLOW;
1405} 1457}
1406 1458
1459static inline void intel_pt_mtc_cyc_cnt_pge(struct intel_pt_decoder *decoder)
1460{
1461 if (decoder->have_cyc)
1462 return;
1463
1464 decoder->cyc_cnt_timestamp = decoder->timestamp;
1465 decoder->base_cyc_cnt = decoder->tot_cyc_cnt;
1466}
1467
1468static inline void intel_pt_mtc_cyc_cnt_cbr(struct intel_pt_decoder *decoder)
1469{
1470 decoder->tsc_to_cyc = decoder->cbr / decoder->max_non_turbo_ratio_fp;
1471
1472 if (decoder->pge)
1473 intel_pt_mtc_cyc_cnt_pge(decoder);
1474}
1475
1476static inline void intel_pt_mtc_cyc_cnt_upd(struct intel_pt_decoder *decoder)
1477{
1478 uint64_t tot_cyc_cnt, tsc_delta;
1479
1480 if (decoder->have_cyc)
1481 return;
1482
1483 decoder->sample_cyc = true;
1484
1485 if (!decoder->pge || decoder->timestamp <= decoder->cyc_cnt_timestamp)
1486 return;
1487
1488 tsc_delta = decoder->timestamp - decoder->cyc_cnt_timestamp;
1489 tot_cyc_cnt = tsc_delta * decoder->tsc_to_cyc + decoder->base_cyc_cnt;
1490
1491 if (tot_cyc_cnt > decoder->tot_cyc_cnt)
1492 decoder->tot_cyc_cnt = tot_cyc_cnt;
1493}
1494
1407static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) 1495static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
1408{ 1496{
1409 uint32_t ctc = decoder->packet.payload; 1497 uint32_t ctc = decoder->packet.payload;
@@ -1413,6 +1501,11 @@ static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
1413 if (!decoder->tsc_ctc_ratio_d) 1501 if (!decoder->tsc_ctc_ratio_d)
1414 return; 1502 return;
1415 1503
1504 if (decoder->pge && !decoder->in_psb)
1505 intel_pt_mtc_cyc_cnt_pge(decoder);
1506 else
1507 intel_pt_mtc_cyc_cnt_upd(decoder);
1508
1416 decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; 1509 decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
1417 decoder->ctc_timestamp = decoder->tsc_timestamp - fc; 1510 decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
1418 if (decoder->tsc_ctc_mult) { 1511 if (decoder->tsc_ctc_mult) {
@@ -1468,6 +1561,8 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
1468 else 1561 else
1469 decoder->timestamp = timestamp; 1562 decoder->timestamp = timestamp;
1470 1563
1564 intel_pt_mtc_cyc_cnt_upd(decoder);
1565
1471 decoder->timestamp_insn_cnt = 0; 1566 decoder->timestamp_insn_cnt = 0;
1472 decoder->last_mtc = mtc; 1567 decoder->last_mtc = mtc;
1473 1568
@@ -1492,6 +1587,8 @@ static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
1492 1587
1493 decoder->cbr = cbr; 1588 decoder->cbr = cbr;
1494 decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; 1589 decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
1590
1591 intel_pt_mtc_cyc_cnt_cbr(decoder);
1495} 1592}
1496 1593
1497static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) 1594static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
@@ -1501,6 +1598,9 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
1501 decoder->have_cyc = true; 1598 decoder->have_cyc = true;
1502 1599
1503 decoder->cycle_cnt += decoder->packet.payload; 1600 decoder->cycle_cnt += decoder->packet.payload;
1601 if (decoder->pge)
1602 decoder->tot_cyc_cnt += decoder->packet.payload;
1603 decoder->sample_cyc = true;
1504 1604
1505 if (!decoder->cyc_ref_timestamp) 1605 if (!decoder->cyc_ref_timestamp)
1506 return; 1606 return;
@@ -1523,19 +1623,62 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
1523 intel_pt_log_to("Setting timestamp", decoder->timestamp); 1623 intel_pt_log_to("Setting timestamp", decoder->timestamp);
1524} 1624}
1525 1625
1626static void intel_pt_bbp(struct intel_pt_decoder *decoder)
1627{
1628 if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) {
1629 memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask));
1630 decoder->state.items.is_32_bit = false;
1631 }
1632 decoder->blk_type = decoder->packet.payload;
1633 decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type);
1634 if (decoder->blk_type == INTEL_PT_GP_REGS)
1635 decoder->state.items.is_32_bit = decoder->packet.count;
1636 if (decoder->blk_type_pos < 0) {
1637 intel_pt_log("WARNING: Unknown block type %u\n",
1638 decoder->blk_type);
1639 } else if (decoder->state.items.mask[decoder->blk_type_pos]) {
1640 intel_pt_log("WARNING: Duplicate block type %u\n",
1641 decoder->blk_type);
1642 }
1643}
1644
1645static void intel_pt_bip(struct intel_pt_decoder *decoder)
1646{
1647 uint32_t id = decoder->packet.count;
1648 uint32_t bit = 1 << id;
1649 int pos = decoder->blk_type_pos;
1650
1651 if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) {
1652 intel_pt_log("WARNING: Unknown block item %u type %d\n",
1653 id, decoder->blk_type);
1654 return;
1655 }
1656
1657 if (decoder->state.items.mask[pos] & bit) {
1658 intel_pt_log("WARNING: Duplicate block item %u type %d\n",
1659 id, decoder->blk_type);
1660 }
1661
1662 decoder->state.items.mask[pos] |= bit;
1663 decoder->state.items.val[pos][id] = decoder->packet.payload;
1664}
1665
1526/* Walk PSB+ packets when already in sync. */ 1666/* Walk PSB+ packets when already in sync. */
1527static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) 1667static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
1528{ 1668{
1529 int err; 1669 int err;
1530 1670
1671 decoder->in_psb = true;
1672
1531 while (1) { 1673 while (1) {
1532 err = intel_pt_get_next_packet(decoder); 1674 err = intel_pt_get_next_packet(decoder);
1533 if (err) 1675 if (err)
1534 return err; 1676 goto out;
1535 1677
1536 switch (decoder->packet.type) { 1678 switch (decoder->packet.type) {
1537 case INTEL_PT_PSBEND: 1679 case INTEL_PT_PSBEND:
1538 return 0; 1680 err = 0;
1681 goto out;
1539 1682
1540 case INTEL_PT_TIP_PGD: 1683 case INTEL_PT_TIP_PGD:
1541 case INTEL_PT_TIP_PGE: 1684 case INTEL_PT_TIP_PGE:
@@ -1551,12 +1694,18 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
1551 case INTEL_PT_MWAIT: 1694 case INTEL_PT_MWAIT:
1552 case INTEL_PT_PWRE: 1695 case INTEL_PT_PWRE:
1553 case INTEL_PT_PWRX: 1696 case INTEL_PT_PWRX:
1697 case INTEL_PT_BBP:
1698 case INTEL_PT_BIP:
1699 case INTEL_PT_BEP:
1700 case INTEL_PT_BEP_IP:
1554 decoder->have_tma = false; 1701 decoder->have_tma = false;
1555 intel_pt_log("ERROR: Unexpected packet\n"); 1702 intel_pt_log("ERROR: Unexpected packet\n");
1556 return -EAGAIN; 1703 err = -EAGAIN;
1704 goto out;
1557 1705
1558 case INTEL_PT_OVF: 1706 case INTEL_PT_OVF:
1559 return intel_pt_overflow(decoder); 1707 err = intel_pt_overflow(decoder);
1708 goto out;
1560 1709
1561 case INTEL_PT_TSC: 1710 case INTEL_PT_TSC:
1562 intel_pt_calc_tsc_timestamp(decoder); 1711 intel_pt_calc_tsc_timestamp(decoder);
@@ -1602,6 +1751,10 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
1602 break; 1751 break;
1603 } 1752 }
1604 } 1753 }
1754out:
1755 decoder->in_psb = false;
1756
1757 return err;
1605} 1758}
1606 1759
1607static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) 1760static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
@@ -1638,6 +1791,10 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
1638 case INTEL_PT_MWAIT: 1791 case INTEL_PT_MWAIT:
1639 case INTEL_PT_PWRE: 1792 case INTEL_PT_PWRE:
1640 case INTEL_PT_PWRX: 1793 case INTEL_PT_PWRX:
1794 case INTEL_PT_BBP:
1795 case INTEL_PT_BIP:
1796 case INTEL_PT_BEP:
1797 case INTEL_PT_BEP_IP:
1641 intel_pt_log("ERROR: Missing TIP after FUP\n"); 1798 intel_pt_log("ERROR: Missing TIP after FUP\n");
1642 decoder->pkt_state = INTEL_PT_STATE_ERR3; 1799 decoder->pkt_state = INTEL_PT_STATE_ERR3;
1643 decoder->pkt_step = 0; 1800 decoder->pkt_step = 0;
@@ -1675,6 +1832,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
1675 decoder->state.to_ip = decoder->ip; 1832 decoder->state.to_ip = decoder->ip;
1676 } 1833 }
1677 decoder->state.type |= INTEL_PT_TRACE_BEGIN; 1834 decoder->state.type |= INTEL_PT_TRACE_BEGIN;
1835 intel_pt_mtc_cyc_cnt_pge(decoder);
1678 return 0; 1836 return 0;
1679 1837
1680 case INTEL_PT_TIP: 1838 case INTEL_PT_TIP:
@@ -1745,6 +1903,7 @@ next:
1745 1903
1746 case INTEL_PT_TIP_PGE: { 1904 case INTEL_PT_TIP_PGE: {
1747 decoder->pge = true; 1905 decoder->pge = true;
1906 intel_pt_mtc_cyc_cnt_pge(decoder);
1748 if (decoder->packet.count == 0) { 1907 if (decoder->packet.count == 0) {
1749 intel_pt_log_at("Skipping zero TIP.PGE", 1908 intel_pt_log_at("Skipping zero TIP.PGE",
1750 decoder->pos); 1909 decoder->pos);
@@ -1816,6 +1975,13 @@ next:
1816 goto next; 1975 goto next;
1817 if (err) 1976 if (err)
1818 return err; 1977 return err;
1978 /*
1979 * PSB+ CBR will not have changed but cater for the
1980 * possibility of another CBR change that gets caught up
1981 * in the PSB+.
1982 */
1983 if (decoder->cbr != decoder->cbr_seen)
1984 return 0;
1819 break; 1985 break;
1820 1986
1821 case INTEL_PT_PIP: 1987 case INTEL_PT_PIP:
@@ -1856,16 +2022,8 @@ next:
1856 2022
1857 case INTEL_PT_CBR: 2023 case INTEL_PT_CBR:
1858 intel_pt_calc_cbr(decoder); 2024 intel_pt_calc_cbr(decoder);
1859 if (!decoder->branch_enable && 2025 if (decoder->cbr != decoder->cbr_seen)
1860 decoder->cbr != decoder->cbr_seen) {
1861 decoder->cbr_seen = decoder->cbr;
1862 decoder->state.type = INTEL_PT_CBR_CHG;
1863 decoder->state.from_ip = decoder->ip;
1864 decoder->state.to_ip = 0;
1865 decoder->state.cbr_payload =
1866 decoder->packet.payload;
1867 return 0; 2026 return 0;
1868 }
1869 break; 2027 break;
1870 2028
1871 case INTEL_PT_MODE_EXEC: 2029 case INTEL_PT_MODE_EXEC:
@@ -1957,6 +2115,33 @@ next:
1957 decoder->state.pwrx_payload = decoder->packet.payload; 2115 decoder->state.pwrx_payload = decoder->packet.payload;
1958 return 0; 2116 return 0;
1959 2117
2118 case INTEL_PT_BBP:
2119 intel_pt_bbp(decoder);
2120 break;
2121
2122 case INTEL_PT_BIP:
2123 intel_pt_bip(decoder);
2124 break;
2125
2126 case INTEL_PT_BEP:
2127 decoder->state.type = INTEL_PT_BLK_ITEMS;
2128 decoder->state.from_ip = decoder->ip;
2129 decoder->state.to_ip = 0;
2130 return 0;
2131
2132 case INTEL_PT_BEP_IP:
2133 err = intel_pt_get_next_packet(decoder);
2134 if (err)
2135 return err;
2136 if (decoder->packet.type == INTEL_PT_FUP) {
2137 decoder->set_fup_bep = true;
2138 no_tip = true;
2139 } else {
2140 intel_pt_log_at("ERROR: Missing FUP after BEP",
2141 decoder->pos);
2142 }
2143 goto next;
2144
1960 default: 2145 default:
1961 return intel_pt_bug(decoder); 2146 return intel_pt_bug(decoder);
1962 } 2147 }
@@ -1975,10 +2160,12 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
1975{ 2160{
1976 int err; 2161 int err;
1977 2162
2163 decoder->in_psb = true;
2164
1978 while (1) { 2165 while (1) {
1979 err = intel_pt_get_next_packet(decoder); 2166 err = intel_pt_get_next_packet(decoder);
1980 if (err) 2167 if (err)
1981 return err; 2168 goto out;
1982 2169
1983 switch (decoder->packet.type) { 2170 switch (decoder->packet.type) {
1984 case INTEL_PT_TIP_PGD: 2171 case INTEL_PT_TIP_PGD:
@@ -1993,8 +2180,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
1993 case INTEL_PT_MWAIT: 2180 case INTEL_PT_MWAIT:
1994 case INTEL_PT_PWRE: 2181 case INTEL_PT_PWRE:
1995 case INTEL_PT_PWRX: 2182 case INTEL_PT_PWRX:
2183 case INTEL_PT_BBP:
2184 case INTEL_PT_BIP:
2185 case INTEL_PT_BEP:
2186 case INTEL_PT_BEP_IP:
1996 intel_pt_log("ERROR: Unexpected packet\n"); 2187 intel_pt_log("ERROR: Unexpected packet\n");
1997 return -ENOENT; 2188 err = -ENOENT;
2189 goto out;
1998 2190
1999 case INTEL_PT_FUP: 2191 case INTEL_PT_FUP:
2000 decoder->pge = true; 2192 decoder->pge = true;
@@ -2053,16 +2245,20 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
2053 decoder->pkt_state = INTEL_PT_STATE_ERR4; 2245 decoder->pkt_state = INTEL_PT_STATE_ERR4;
2054 else 2246 else
2055 decoder->pkt_state = INTEL_PT_STATE_ERR3; 2247 decoder->pkt_state = INTEL_PT_STATE_ERR3;
2056 return -ENOENT; 2248 err = -ENOENT;
2249 goto out;
2057 2250
2058 case INTEL_PT_BAD: /* Does not happen */ 2251 case INTEL_PT_BAD: /* Does not happen */
2059 return intel_pt_bug(decoder); 2252 err = intel_pt_bug(decoder);
2253 goto out;
2060 2254
2061 case INTEL_PT_OVF: 2255 case INTEL_PT_OVF:
2062 return intel_pt_overflow(decoder); 2256 err = intel_pt_overflow(decoder);
2257 goto out;
2063 2258
2064 case INTEL_PT_PSBEND: 2259 case INTEL_PT_PSBEND:
2065 return 0; 2260 err = 0;
2261 goto out;
2066 2262
2067 case INTEL_PT_PSB: 2263 case INTEL_PT_PSB:
2068 case INTEL_PT_VMCS: 2264 case INTEL_PT_VMCS:
@@ -2072,6 +2268,10 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
2072 break; 2268 break;
2073 } 2269 }
2074 } 2270 }
2271out:
2272 decoder->in_psb = false;
2273
2274 return err;
2075} 2275}
2076 2276
2077static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) 2277static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
@@ -2086,18 +2286,30 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
2086 switch (decoder->packet.type) { 2286 switch (decoder->packet.type) {
2087 case INTEL_PT_TIP_PGD: 2287 case INTEL_PT_TIP_PGD:
2088 decoder->continuous_period = false; 2288 decoder->continuous_period = false;
2089 __fallthrough; 2289 decoder->pge = false;
2290 if (intel_pt_have_ip(decoder))
2291 intel_pt_set_ip(decoder);
2292 if (!decoder->ip)
2293 break;
2294 decoder->state.type |= INTEL_PT_TRACE_END;
2295 return 0;
2296
2090 case INTEL_PT_TIP_PGE: 2297 case INTEL_PT_TIP_PGE:
2298 decoder->pge = true;
2299 intel_pt_mtc_cyc_cnt_pge(decoder);
2300 if (intel_pt_have_ip(decoder))
2301 intel_pt_set_ip(decoder);
2302 if (!decoder->ip)
2303 break;
2304 decoder->state.type |= INTEL_PT_TRACE_BEGIN;
2305 return 0;
2306
2091 case INTEL_PT_TIP: 2307 case INTEL_PT_TIP:
2092 decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; 2308 decoder->pge = true;
2093 if (intel_pt_have_ip(decoder)) 2309 if (intel_pt_have_ip(decoder))
2094 intel_pt_set_ip(decoder); 2310 intel_pt_set_ip(decoder);
2095 if (!decoder->ip) 2311 if (!decoder->ip)
2096 break; 2312 break;
2097 if (decoder->packet.type == INTEL_PT_TIP_PGE)
2098 decoder->state.type |= INTEL_PT_TRACE_BEGIN;
2099 if (decoder->packet.type == INTEL_PT_TIP_PGD)
2100 decoder->state.type |= INTEL_PT_TRACE_END;
2101 return 0; 2313 return 0;
2102 2314
2103 case INTEL_PT_FUP: 2315 case INTEL_PT_FUP:
@@ -2178,6 +2390,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
2178 case INTEL_PT_MWAIT: 2390 case INTEL_PT_MWAIT:
2179 case INTEL_PT_PWRE: 2391 case INTEL_PT_PWRE:
2180 case INTEL_PT_PWRX: 2392 case INTEL_PT_PWRX:
2393 case INTEL_PT_BBP:
2394 case INTEL_PT_BIP:
2395 case INTEL_PT_BEP:
2396 case INTEL_PT_BEP_IP:
2181 default: 2397 default:
2182 break; 2398 break;
2183 } 2399 }
@@ -2193,6 +2409,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
2193 decoder->set_fup_mwait = false; 2409 decoder->set_fup_mwait = false;
2194 decoder->set_fup_pwre = false; 2410 decoder->set_fup_pwre = false;
2195 decoder->set_fup_exstop = false; 2411 decoder->set_fup_exstop = false;
2412 decoder->set_fup_bep = false;
2196 2413
2197 if (!decoder->branch_enable) { 2414 if (!decoder->branch_enable) {
2198 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; 2415 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
@@ -2250,7 +2467,7 @@ static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
2250 decoder->pos += decoder->len; 2467 decoder->pos += decoder->len;
2251 decoder->len = 0; 2468 decoder->len = 0;
2252 2469
2253 ret = intel_pt_get_next_data(decoder); 2470 ret = intel_pt_get_next_data(decoder, false);
2254 if (ret) 2471 if (ret)
2255 return ret; 2472 return ret;
2256 2473
@@ -2276,7 +2493,7 @@ static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
2276 intel_pt_log("Scanning for PSB\n"); 2493 intel_pt_log("Scanning for PSB\n");
2277 while (1) { 2494 while (1) {
2278 if (!decoder->len) { 2495 if (!decoder->len) {
2279 ret = intel_pt_get_next_data(decoder); 2496 ret = intel_pt_get_next_data(decoder, false);
2280 if (ret) 2497 if (ret)
2281 return ret; 2498 return ret;
2282 } 2499 }
@@ -2404,18 +2621,24 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2404 if (err) { 2621 if (err) {
2405 decoder->state.err = intel_pt_ext_err(err); 2622 decoder->state.err = intel_pt_ext_err(err);
2406 decoder->state.from_ip = decoder->ip; 2623 decoder->state.from_ip = decoder->ip;
2407 decoder->sample_timestamp = decoder->timestamp; 2624 intel_pt_update_sample_time(decoder);
2408 decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; 2625 decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
2409 } else { 2626 } else {
2410 decoder->state.err = 0; 2627 decoder->state.err = 0;
2411 if (decoder->cbr != decoder->cbr_seen && decoder->state.type) { 2628 if (decoder->cbr != decoder->cbr_seen) {
2412 decoder->cbr_seen = decoder->cbr; 2629 decoder->cbr_seen = decoder->cbr;
2630 if (!decoder->state.type) {
2631 decoder->state.from_ip = decoder->ip;
2632 decoder->state.to_ip = 0;
2633 }
2413 decoder->state.type |= INTEL_PT_CBR_CHG; 2634 decoder->state.type |= INTEL_PT_CBR_CHG;
2414 decoder->state.cbr_payload = decoder->cbr_payload; 2635 decoder->state.cbr_payload = decoder->cbr_payload;
2636 decoder->state.cbr = decoder->cbr;
2415 } 2637 }
2416 if (intel_pt_sample_time(decoder->pkt_state)) { 2638 if (intel_pt_sample_time(decoder->pkt_state)) {
2417 decoder->sample_timestamp = decoder->timestamp; 2639 intel_pt_update_sample_time(decoder);
2418 decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; 2640 if (decoder->sample_cyc)
2641 decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
2419 } 2642 }
2420 } 2643 }
2421 2644
@@ -2423,6 +2646,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2423 decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); 2646 decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
2424 decoder->state.cr3 = decoder->cr3; 2647 decoder->state.cr3 = decoder->cr3;
2425 decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; 2648 decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
2649 decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt;
2426 2650
2427 return &decoder->state; 2651 return &decoder->state;
2428} 2652}
@@ -2526,11 +2750,12 @@ static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
2526static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc, 2750static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
2527 size_t *rem) 2751 size_t *rem)
2528{ 2752{
2753 enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
2529 struct intel_pt_pkt packet; 2754 struct intel_pt_pkt packet;
2530 int ret; 2755 int ret;
2531 2756
2532 while (len) { 2757 while (len) {
2533 ret = intel_pt_get_packet(buf, len, &packet); 2758 ret = intel_pt_get_packet(buf, len, &packet, &ctx);
2534 if (ret <= 0) 2759 if (ret <= 0)
2535 return false; 2760 return false;
2536 if (packet.type == INTEL_PT_TSC) { 2761 if (packet.type == INTEL_PT_TSC) {
@@ -2732,3 +2957,131 @@ unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
2732 return buf_b; /* No overlap */ 2957 return buf_b; /* No overlap */
2733 } 2958 }
2734} 2959}
2960
2961/**
2962 * struct fast_forward_data - data used by intel_pt_ff_cb().
2963 * @timestamp: timestamp to fast forward towards
2964 * @buf_timestamp: buffer timestamp of last buffer with trace data earlier than
2965 * the fast forward timestamp.
2966 */
2967struct fast_forward_data {
2968 uint64_t timestamp;
2969 uint64_t buf_timestamp;
2970};
2971
2972/**
2973 * intel_pt_ff_cb - fast forward lookahead callback.
2974 * @buffer: Intel PT trace buffer
2975 * @data: opaque pointer to fast forward data (struct fast_forward_data)
2976 *
2977 * Determine if @buffer trace is past the fast forward timestamp.
2978 *
2979 * Return: 1 (stop lookahead) if @buffer trace is past the fast forward
2980 * timestamp, and 0 otherwise.
2981 */
2982static int intel_pt_ff_cb(struct intel_pt_buffer *buffer, void *data)
2983{
2984 struct fast_forward_data *d = data;
2985 unsigned char *buf;
2986 uint64_t tsc;
2987 size_t rem;
2988 size_t len;
2989
2990 buf = (unsigned char *)buffer->buf;
2991 len = buffer->len;
2992
2993 if (!intel_pt_next_psb(&buf, &len) ||
2994 !intel_pt_next_tsc(buf, len, &tsc, &rem))
2995 return 0;
2996
2997 tsc = intel_pt_8b_tsc(tsc, buffer->ref_timestamp);
2998
2999 intel_pt_log("Buffer 1st timestamp " x64_fmt " ref timestamp " x64_fmt "\n",
3000 tsc, buffer->ref_timestamp);
3001
3002 /*
3003 * If the buffer contains a timestamp earlier that the fast forward
3004 * timestamp, then record it, else stop.
3005 */
3006 if (tsc < d->timestamp)
3007 d->buf_timestamp = buffer->ref_timestamp;
3008 else
3009 return 1;
3010
3011 return 0;
3012}
3013
3014/**
3015 * intel_pt_fast_forward - reposition decoder forwards.
3016 * @decoder: Intel PT decoder
3017 * @timestamp: timestamp to fast forward towards
3018 *
3019 * Reposition decoder at the last PSB with a timestamp earlier than @timestamp.
3020 *
3021 * Return: 0 on success or negative error code on failure.
3022 */
3023int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp)
3024{
3025 struct fast_forward_data d = { .timestamp = timestamp };
3026 unsigned char *buf;
3027 size_t len;
3028 int err;
3029
3030 intel_pt_log("Fast forward towards timestamp " x64_fmt "\n", timestamp);
3031
3032 /* Find buffer timestamp of buffer to fast forward to */
3033 err = decoder->lookahead(decoder->data, intel_pt_ff_cb, &d);
3034 if (err < 0)
3035 return err;
3036
3037 /* Walk to buffer with same buffer timestamp */
3038 if (d.buf_timestamp) {
3039 do {
3040 decoder->pos += decoder->len;
3041 decoder->len = 0;
3042 err = intel_pt_get_next_data(decoder, true);
3043 /* -ENOLINK means non-consecutive trace */
3044 if (err && err != -ENOLINK)
3045 return err;
3046 } while (decoder->buf_timestamp != d.buf_timestamp);
3047 }
3048
3049 if (!decoder->buf)
3050 return 0;
3051
3052 buf = (unsigned char *)decoder->buf;
3053 len = decoder->len;
3054
3055 if (!intel_pt_next_psb(&buf, &len))
3056 return 0;
3057
3058 /*
3059 * Walk PSBs while the PSB timestamp is less than the fast forward
3060 * timestamp.
3061 */
3062 do {
3063 uint64_t tsc;
3064 size_t rem;
3065
3066 if (!intel_pt_next_tsc(buf, len, &tsc, &rem))
3067 break;
3068 tsc = intel_pt_8b_tsc(tsc, decoder->buf_timestamp);
3069 /*
3070 * A TSC packet can slip past MTC packets but, after fast
3071 * forward, decoding starts at the TSC timestamp. That means
3072 * the timestamps may not be exactly the same as the timestamps
3073 * that would have been decoded without fast forward.
3074 */
3075 if (tsc < timestamp) {
3076 intel_pt_log("Fast forward to next PSB timestamp " x64_fmt "\n", tsc);
3077 decoder->pos += decoder->len - len;
3078 decoder->buf = buf;
3079 decoder->len = len;
3080 intel_pt_reposition(decoder);
3081 } else {
3082 break;
3083 }
3084 } while (intel_pt_step_psb(&buf, &len));
3085
3086 return 0;
3087}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 1e8cfdc7bfab..e289e463d635 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -30,6 +30,7 @@ enum intel_pt_sample_type {
30 INTEL_PT_CBR_CHG = 1 << 8, 30 INTEL_PT_CBR_CHG = 1 << 8,
31 INTEL_PT_TRACE_BEGIN = 1 << 9, 31 INTEL_PT_TRACE_BEGIN = 1 << 9,
32 INTEL_PT_TRACE_END = 1 << 10, 32 INTEL_PT_TRACE_END = 1 << 10,
33 INTEL_PT_BLK_ITEMS = 1 << 11,
33}; 34};
34 35
35enum intel_pt_period_type { 36enum intel_pt_period_type {
@@ -61,6 +62,141 @@ enum intel_pt_param_flags {
61 INTEL_PT_FUP_WITH_NLIP = 1 << 0, 62 INTEL_PT_FUP_WITH_NLIP = 1 << 0,
62}; 63};
63 64
65enum intel_pt_blk_type {
66 INTEL_PT_GP_REGS = 1,
67 INTEL_PT_PEBS_BASIC = 4,
68 INTEL_PT_PEBS_MEM = 5,
69 INTEL_PT_LBR_0 = 8,
70 INTEL_PT_LBR_1 = 9,
71 INTEL_PT_LBR_2 = 10,
72 INTEL_PT_XMM = 16,
73 INTEL_PT_BLK_TYPE_MAX
74};
75
76/*
77 * The block type numbers are not sequential but here they are given sequential
78 * positions to avoid wasting space for array placement.
79 */
80enum intel_pt_blk_type_pos {
81 INTEL_PT_GP_REGS_POS,
82 INTEL_PT_PEBS_BASIC_POS,
83 INTEL_PT_PEBS_MEM_POS,
84 INTEL_PT_LBR_0_POS,
85 INTEL_PT_LBR_1_POS,
86 INTEL_PT_LBR_2_POS,
87 INTEL_PT_XMM_POS,
88 INTEL_PT_BLK_TYPE_CNT
89};
90
91/* Get the array position for a block type */
92static inline int intel_pt_blk_type_pos(enum intel_pt_blk_type blk_type)
93{
94#define BLK_TYPE(bt) [INTEL_PT_##bt] = INTEL_PT_##bt##_POS + 1
95 const int map[INTEL_PT_BLK_TYPE_MAX] = {
96 BLK_TYPE(GP_REGS),
97 BLK_TYPE(PEBS_BASIC),
98 BLK_TYPE(PEBS_MEM),
99 BLK_TYPE(LBR_0),
100 BLK_TYPE(LBR_1),
101 BLK_TYPE(LBR_2),
102 BLK_TYPE(XMM),
103 };
104#undef BLK_TYPE
105
106 return blk_type < INTEL_PT_BLK_TYPE_MAX ? map[blk_type] - 1 : -1;
107}
108
109#define INTEL_PT_BLK_ITEM_ID_CNT 32
110
111/*
112 * Use unions so that the block items can be accessed by name or by array index.
113 * There is an array of 32-bit masks for each block type, which indicate which
114 * values are present. Then arrays of 32 64-bit values for each block type.
115 */
116struct intel_pt_blk_items {
117 union {
118 uint32_t mask[INTEL_PT_BLK_TYPE_CNT];
119 struct {
120 uint32_t has_rflags:1;
121 uint32_t has_rip:1;
122 uint32_t has_rax:1;
123 uint32_t has_rcx:1;
124 uint32_t has_rdx:1;
125 uint32_t has_rbx:1;
126 uint32_t has_rsp:1;
127 uint32_t has_rbp:1;
128 uint32_t has_rsi:1;
129 uint32_t has_rdi:1;
130 uint32_t has_r8:1;
131 uint32_t has_r9:1;
132 uint32_t has_r10:1;
133 uint32_t has_r11:1;
134 uint32_t has_r12:1;
135 uint32_t has_r13:1;
136 uint32_t has_r14:1;
137 uint32_t has_r15:1;
138 uint32_t has_unused_0:14;
139 uint32_t has_ip:1;
140 uint32_t has_applicable_counters:1;
141 uint32_t has_timestamp:1;
142 uint32_t has_unused_1:29;
143 uint32_t has_mem_access_address:1;
144 uint32_t has_mem_aux_info:1;
145 uint32_t has_mem_access_latency:1;
146 uint32_t has_tsx_aux_info:1;
147 uint32_t has_unused_2:28;
148 uint32_t has_lbr_0;
149 uint32_t has_lbr_1;
150 uint32_t has_lbr_2;
151 uint32_t has_xmm;
152 };
153 };
154 union {
155 uint64_t val[INTEL_PT_BLK_TYPE_CNT][INTEL_PT_BLK_ITEM_ID_CNT];
156 struct {
157 struct {
158 uint64_t rflags;
159 uint64_t rip;
160 uint64_t rax;
161 uint64_t rcx;
162 uint64_t rdx;
163 uint64_t rbx;
164 uint64_t rsp;
165 uint64_t rbp;
166 uint64_t rsi;
167 uint64_t rdi;
168 uint64_t r8;
169 uint64_t r9;
170 uint64_t r10;
171 uint64_t r11;
172 uint64_t r12;
173 uint64_t r13;
174 uint64_t r14;
175 uint64_t r15;
176 uint64_t unused_0[INTEL_PT_BLK_ITEM_ID_CNT - 18];
177 };
178 struct {
179 uint64_t ip;
180 uint64_t applicable_counters;
181 uint64_t timestamp;
182 uint64_t unused_1[INTEL_PT_BLK_ITEM_ID_CNT - 3];
183 };
184 struct {
185 uint64_t mem_access_address;
186 uint64_t mem_aux_info;
187 uint64_t mem_access_latency;
188 uint64_t tsx_aux_info;
189 uint64_t unused_2[INTEL_PT_BLK_ITEM_ID_CNT - 4];
190 };
191 uint64_t lbr_0[INTEL_PT_BLK_ITEM_ID_CNT];
192 uint64_t lbr_1[INTEL_PT_BLK_ITEM_ID_CNT];
193 uint64_t lbr_2[INTEL_PT_BLK_ITEM_ID_CNT];
194 uint64_t xmm[INTEL_PT_BLK_ITEM_ID_CNT];
195 };
196 };
197 bool is_32_bit;
198};
199
64struct intel_pt_state { 200struct intel_pt_state {
65 enum intel_pt_sample_type type; 201 enum intel_pt_sample_type type;
66 int err; 202 int err;
@@ -68,6 +204,7 @@ struct intel_pt_state {
68 uint64_t to_ip; 204 uint64_t to_ip;
69 uint64_t cr3; 205 uint64_t cr3;
70 uint64_t tot_insn_cnt; 206 uint64_t tot_insn_cnt;
207 uint64_t tot_cyc_cnt;
71 uint64_t timestamp; 208 uint64_t timestamp;
72 uint64_t est_timestamp; 209 uint64_t est_timestamp;
73 uint64_t trace_nr; 210 uint64_t trace_nr;
@@ -76,10 +213,12 @@ struct intel_pt_state {
76 uint64_t pwre_payload; 213 uint64_t pwre_payload;
77 uint64_t pwrx_payload; 214 uint64_t pwrx_payload;
78 uint64_t cbr_payload; 215 uint64_t cbr_payload;
216 uint32_t cbr;
79 uint32_t flags; 217 uint32_t flags;
80 enum intel_pt_insn_op insn_op; 218 enum intel_pt_insn_op insn_op;
81 int insn_len; 219 int insn_len;
82 char insn[INTEL_PT_INSN_BUF_SZ]; 220 char insn[INTEL_PT_INSN_BUF_SZ];
221 struct intel_pt_blk_items items;
83}; 222};
84 223
85struct intel_pt_insn; 224struct intel_pt_insn;
@@ -92,12 +231,15 @@ struct intel_pt_buffer {
92 uint64_t trace_nr; 231 uint64_t trace_nr;
93}; 232};
94 233
234typedef int (*intel_pt_lookahead_cb_t)(struct intel_pt_buffer *, void *);
235
95struct intel_pt_params { 236struct intel_pt_params {
96 int (*get_trace)(struct intel_pt_buffer *buffer, void *data); 237 int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
97 int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, 238 int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
98 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, 239 uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
99 uint64_t max_insn_cnt, void *data); 240 uint64_t max_insn_cnt, void *data);
100 bool (*pgd_ip)(uint64_t ip, void *data); 241 bool (*pgd_ip)(uint64_t ip, void *data);
242 int (*lookahead)(void *data, intel_pt_lookahead_cb_t cb, void *cb_data);
101 void *data; 243 void *data;
102 bool return_compression; 244 bool return_compression;
103 bool branch_enable; 245 bool branch_enable;
@@ -117,6 +259,8 @@ void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
117 259
118const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); 260const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
119 261
262int intel_pt_fast_forward(struct intel_pt_decoder *decoder, uint64_t timestamp);
263
120unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, 264unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
121 unsigned char *buf_b, size_t len_b, 265 unsigned char *buf_b, size_t len_b,
122 bool have_tsc, bool *consecutive); 266 bool have_tsc, bool *consecutive);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 605fce537d80..0ccf10a0bf44 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -62,6 +62,10 @@ static const char * const packet_name[] = {
62 [INTEL_PT_MWAIT] = "MWAIT", 62 [INTEL_PT_MWAIT] = "MWAIT",
63 [INTEL_PT_PWRE] = "PWRE", 63 [INTEL_PT_PWRE] = "PWRE",
64 [INTEL_PT_PWRX] = "PWRX", 64 [INTEL_PT_PWRX] = "PWRX",
65 [INTEL_PT_BBP] = "BBP",
66 [INTEL_PT_BIP] = "BIP",
67 [INTEL_PT_BEP] = "BEP",
68 [INTEL_PT_BEP_IP] = "BEP",
65}; 69};
66 70
67const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) 71const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -280,6 +284,55 @@ static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
280 return 7; 284 return 7;
281} 285}
282 286
287static int intel_pt_get_bbp(const unsigned char *buf, size_t len,
288 struct intel_pt_pkt *packet)
289{
290 if (len < 3)
291 return INTEL_PT_NEED_MORE_BYTES;
292 packet->type = INTEL_PT_BBP;
293 packet->count = buf[2] >> 7;
294 packet->payload = buf[2] & 0x1f;
295 return 3;
296}
297
298static int intel_pt_get_bip_4(const unsigned char *buf, size_t len,
299 struct intel_pt_pkt *packet)
300{
301 if (len < 5)
302 return INTEL_PT_NEED_MORE_BYTES;
303 packet->type = INTEL_PT_BIP;
304 packet->count = buf[0] >> 3;
305 memcpy_le64(&packet->payload, buf + 1, 4);
306 return 5;
307}
308
309static int intel_pt_get_bip_8(const unsigned char *buf, size_t len,
310 struct intel_pt_pkt *packet)
311{
312 if (len < 9)
313 return INTEL_PT_NEED_MORE_BYTES;
314 packet->type = INTEL_PT_BIP;
315 packet->count = buf[0] >> 3;
316 memcpy_le64(&packet->payload, buf + 1, 8);
317 return 9;
318}
319
320static int intel_pt_get_bep(size_t len, struct intel_pt_pkt *packet)
321{
322 if (len < 2)
323 return INTEL_PT_NEED_MORE_BYTES;
324 packet->type = INTEL_PT_BEP;
325 return 2;
326}
327
328static int intel_pt_get_bep_ip(size_t len, struct intel_pt_pkt *packet)
329{
330 if (len < 2)
331 return INTEL_PT_NEED_MORE_BYTES;
332 packet->type = INTEL_PT_BEP_IP;
333 return 2;
334}
335
283static int intel_pt_get_ext(const unsigned char *buf, size_t len, 336static int intel_pt_get_ext(const unsigned char *buf, size_t len,
284 struct intel_pt_pkt *packet) 337 struct intel_pt_pkt *packet)
285{ 338{
@@ -320,6 +373,12 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
320 return intel_pt_get_pwre(buf, len, packet); 373 return intel_pt_get_pwre(buf, len, packet);
321 case 0xA2: /* PWRX */ 374 case 0xA2: /* PWRX */
322 return intel_pt_get_pwrx(buf, len, packet); 375 return intel_pt_get_pwrx(buf, len, packet);
376 case 0x63: /* BBP */
377 return intel_pt_get_bbp(buf, len, packet);
378 case 0x33: /* BEP no IP */
379 return intel_pt_get_bep(len, packet);
380 case 0xb3: /* BEP with IP */
381 return intel_pt_get_bep_ip(len, packet);
323 default: 382 default:
324 return INTEL_PT_BAD_PACKET; 383 return INTEL_PT_BAD_PACKET;
325 } 384 }
@@ -468,7 +527,8 @@ static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
468} 527}
469 528
470static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, 529static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
471 struct intel_pt_pkt *packet) 530 struct intel_pt_pkt *packet,
531 enum intel_pt_pkt_ctx ctx)
472{ 532{
473 unsigned int byte; 533 unsigned int byte;
474 534
@@ -478,6 +538,22 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
478 return INTEL_PT_NEED_MORE_BYTES; 538 return INTEL_PT_NEED_MORE_BYTES;
479 539
480 byte = buf[0]; 540 byte = buf[0];
541
542 switch (ctx) {
543 case INTEL_PT_NO_CTX:
544 break;
545 case INTEL_PT_BLK_4_CTX:
546 if ((byte & 0x7) == 4)
547 return intel_pt_get_bip_4(buf, len, packet);
548 break;
549 case INTEL_PT_BLK_8_CTX:
550 if ((byte & 0x7) == 4)
551 return intel_pt_get_bip_8(buf, len, packet);
552 break;
553 default:
554 break;
555 };
556
481 if (!(byte & BIT(0))) { 557 if (!(byte & BIT(0))) {
482 if (byte == 0) 558 if (byte == 0)
483 return intel_pt_get_pad(packet); 559 return intel_pt_get_pad(packet);
@@ -516,15 +592,65 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
516 } 592 }
517} 593}
518 594
595void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
596 enum intel_pt_pkt_ctx *ctx)
597{
598 switch (packet->type) {
599 case INTEL_PT_BAD:
600 case INTEL_PT_PAD:
601 case INTEL_PT_TSC:
602 case INTEL_PT_TMA:
603 case INTEL_PT_MTC:
604 case INTEL_PT_FUP:
605 case INTEL_PT_CYC:
606 case INTEL_PT_CBR:
607 case INTEL_PT_MNT:
608 case INTEL_PT_EXSTOP:
609 case INTEL_PT_EXSTOP_IP:
610 case INTEL_PT_PWRE:
611 case INTEL_PT_PWRX:
612 case INTEL_PT_BIP:
613 break;
614 case INTEL_PT_TNT:
615 case INTEL_PT_TIP:
616 case INTEL_PT_TIP_PGD:
617 case INTEL_PT_TIP_PGE:
618 case INTEL_PT_MODE_EXEC:
619 case INTEL_PT_MODE_TSX:
620 case INTEL_PT_PIP:
621 case INTEL_PT_OVF:
622 case INTEL_PT_VMCS:
623 case INTEL_PT_TRACESTOP:
624 case INTEL_PT_PSB:
625 case INTEL_PT_PSBEND:
626 case INTEL_PT_PTWRITE:
627 case INTEL_PT_PTWRITE_IP:
628 case INTEL_PT_MWAIT:
629 case INTEL_PT_BEP:
630 case INTEL_PT_BEP_IP:
631 *ctx = INTEL_PT_NO_CTX;
632 break;
633 case INTEL_PT_BBP:
634 if (packet->count)
635 *ctx = INTEL_PT_BLK_4_CTX;
636 else
637 *ctx = INTEL_PT_BLK_8_CTX;
638 break;
639 default:
640 break;
641 }
642}
643
519int intel_pt_get_packet(const unsigned char *buf, size_t len, 644int intel_pt_get_packet(const unsigned char *buf, size_t len,
520 struct intel_pt_pkt *packet) 645 struct intel_pt_pkt *packet, enum intel_pt_pkt_ctx *ctx)
521{ 646{
522 int ret; 647 int ret;
523 648
524 ret = intel_pt_do_get_packet(buf, len, packet); 649 ret = intel_pt_do_get_packet(buf, len, packet, *ctx);
525 if (ret > 0) { 650 if (ret > 0) {
526 while (ret < 8 && len > (size_t)ret && !buf[ret]) 651 while (ret < 8 && len > (size_t)ret && !buf[ret])
527 ret += 1; 652 ret += 1;
653 intel_pt_upd_pkt_ctx(packet, ctx);
528 } 654 }
529 return ret; 655 return ret;
530} 656}
@@ -602,8 +728,10 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
602 return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload); 728 return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
603 case INTEL_PT_PTWRITE_IP: 729 case INTEL_PT_PTWRITE_IP:
604 return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload); 730 return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
731 case INTEL_PT_BEP:
605 case INTEL_PT_EXSTOP: 732 case INTEL_PT_EXSTOP:
606 return snprintf(buf, buf_len, "%s IP:0", name); 733 return snprintf(buf, buf_len, "%s IP:0", name);
734 case INTEL_PT_BEP_IP:
607 case INTEL_PT_EXSTOP_IP: 735 case INTEL_PT_EXSTOP_IP:
608 return snprintf(buf, buf_len, "%s IP:1", name); 736 return snprintf(buf, buf_len, "%s IP:1", name);
609 case INTEL_PT_MWAIT: 737 case INTEL_PT_MWAIT:
@@ -621,6 +749,12 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
621 (unsigned int)((payload >> 4) & 0xf), 749 (unsigned int)((payload >> 4) & 0xf),
622 (unsigned int)(payload & 0xf), 750 (unsigned int)(payload & 0xf),
623 (unsigned int)((payload >> 8) & 0xf)); 751 (unsigned int)((payload >> 8) & 0xf));
752 case INTEL_PT_BBP:
753 return snprintf(buf, buf_len, "%s SZ %s-byte Type 0x%llx",
754 name, packet->count ? "4" : "8", payload);
755 case INTEL_PT_BIP:
756 return snprintf(buf, buf_len, "%s ID 0x%02x Value 0x%llx",
757 name, packet->count, payload);
624 default: 758 default:
625 break; 759 break;
626 } 760 }
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index a7aefaa08588..17ca9b56d72f 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -50,6 +50,10 @@ enum intel_pt_pkt_type {
50 INTEL_PT_MWAIT, 50 INTEL_PT_MWAIT,
51 INTEL_PT_PWRE, 51 INTEL_PT_PWRE,
52 INTEL_PT_PWRX, 52 INTEL_PT_PWRX,
53 INTEL_PT_BBP,
54 INTEL_PT_BIP,
55 INTEL_PT_BEP,
56 INTEL_PT_BEP_IP,
53}; 57};
54 58
55struct intel_pt_pkt { 59struct intel_pt_pkt {
@@ -58,10 +62,25 @@ struct intel_pt_pkt {
58 uint64_t payload; 62 uint64_t payload;
59}; 63};
60 64
65/*
66 * Decoding of BIP packets conflicts with single-byte TNT packets. Since BIP
67 * packets only occur in the context of a block (i.e. between BBP and BEP), that
68 * context must be recorded and passed to the packet decoder.
69 */
70enum intel_pt_pkt_ctx {
71 INTEL_PT_NO_CTX, /* BIP packets are invalid */
72 INTEL_PT_BLK_4_CTX, /* 4-byte BIP packets */
73 INTEL_PT_BLK_8_CTX, /* 8-byte BIP packets */
74};
75
61const char *intel_pt_pkt_name(enum intel_pt_pkt_type); 76const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
62 77
63int intel_pt_get_packet(const unsigned char *buf, size_t len, 78int intel_pt_get_packet(const unsigned char *buf, size_t len,
64 struct intel_pt_pkt *packet); 79 struct intel_pt_pkt *packet,
80 enum intel_pt_pkt_ctx *ctx);
81
82void intel_pt_upd_pkt_ctx(const struct intel_pt_pkt *packet,
83 enum intel_pt_pkt_ctx *ctx);
65 84
66int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len); 85int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
67 86
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index d6f1b2a03f9b..470aaae9d930 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -33,6 +33,9 @@
33#include "tsc.h" 33#include "tsc.h"
34#include "intel-pt.h" 34#include "intel-pt.h"
35#include "config.h" 35#include "config.h"
36#include "time-utils.h"
37
38#include "../arch/x86/include/uapi/asm/perf_regs.h"
36 39
37#include "intel-pt-decoder/intel-pt-log.h" 40#include "intel-pt-decoder/intel-pt-log.h"
38#include "intel-pt-decoder/intel-pt-decoder.h" 41#include "intel-pt-decoder/intel-pt-decoder.h"
@@ -41,6 +44,11 @@
41 44
42#define MAX_TIMESTAMP (~0ULL) 45#define MAX_TIMESTAMP (~0ULL)
43 46
47struct range {
48 u64 start;
49 u64 end;
50};
51
44struct intel_pt { 52struct intel_pt {
45 struct auxtrace auxtrace; 53 struct auxtrace auxtrace;
46 struct auxtrace_queues queues; 54 struct auxtrace_queues queues;
@@ -95,6 +103,9 @@ struct intel_pt {
95 u64 pwrx_id; 103 u64 pwrx_id;
96 u64 cbr_id; 104 u64 cbr_id;
97 105
106 bool sample_pebs;
107 struct perf_evsel *pebs_evsel;
108
98 u64 tsc_bit; 109 u64 tsc_bit;
99 u64 mtc_bit; 110 u64 mtc_bit;
100 u64 mtc_freq_bits; 111 u64 mtc_freq_bits;
@@ -109,6 +120,9 @@ struct intel_pt {
109 120
110 char *filter; 121 char *filter;
111 struct addr_filters filts; 122 struct addr_filters filts;
123
124 struct range *time_ranges;
125 unsigned int range_cnt;
112}; 126};
113 127
114enum switch_state { 128enum switch_state {
@@ -145,9 +159,19 @@ struct intel_pt_queue {
145 bool have_sample; 159 bool have_sample;
146 u64 time; 160 u64 time;
147 u64 timestamp; 161 u64 timestamp;
162 u64 sel_timestamp;
163 bool sel_start;
164 unsigned int sel_idx;
148 u32 flags; 165 u32 flags;
149 u16 insn_len; 166 u16 insn_len;
150 u64 last_insn_cnt; 167 u64 last_insn_cnt;
168 u64 ipc_insn_cnt;
169 u64 ipc_cyc_cnt;
170 u64 last_in_insn_cnt;
171 u64 last_in_cyc_cnt;
172 u64 last_br_insn_cnt;
173 u64 last_br_cyc_cnt;
174 unsigned int cbr_seen;
151 char insn[INTEL_PT_INSN_BUF_SZ]; 175 char insn[INTEL_PT_INSN_BUF_SZ];
152}; 176};
153 177
@@ -159,13 +183,14 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
159 int ret, pkt_len, i; 183 int ret, pkt_len, i;
160 char desc[INTEL_PT_PKT_DESC_MAX]; 184 char desc[INTEL_PT_PKT_DESC_MAX];
161 const char *color = PERF_COLOR_BLUE; 185 const char *color = PERF_COLOR_BLUE;
186 enum intel_pt_pkt_ctx ctx = INTEL_PT_NO_CTX;
162 187
163 color_fprintf(stdout, color, 188 color_fprintf(stdout, color,
164 ". ... Intel Processor Trace data: size %zu bytes\n", 189 ". ... Intel Processor Trace data: size %zu bytes\n",
165 len); 190 len);
166 191
167 while (len) { 192 while (len) {
168 ret = intel_pt_get_packet(buf, len, &packet); 193 ret = intel_pt_get_packet(buf, len, &packet, &ctx);
169 if (ret > 0) 194 if (ret > 0)
170 pkt_len = ret; 195 pkt_len = ret;
171 else 196 else
@@ -224,32 +249,13 @@ static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *
224 return 0; 249 return 0;
225} 250}
226 251
227/* This function assumes data is processed sequentially only */ 252static int intel_pt_get_buffer(struct intel_pt_queue *ptq,
228static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) 253 struct auxtrace_buffer *buffer,
254 struct auxtrace_buffer *old_buffer,
255 struct intel_pt_buffer *b)
229{ 256{
230 struct intel_pt_queue *ptq = data;
231 struct auxtrace_buffer *buffer = ptq->buffer;
232 struct auxtrace_buffer *old_buffer = ptq->old_buffer;
233 struct auxtrace_queue *queue;
234 bool might_overlap; 257 bool might_overlap;
235 258
236 if (ptq->stop) {
237 b->len = 0;
238 return 0;
239 }
240
241 queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
242
243 buffer = auxtrace_buffer__next(queue, buffer);
244 if (!buffer) {
245 if (old_buffer)
246 auxtrace_buffer__drop_data(old_buffer);
247 b->len = 0;
248 return 0;
249 }
250
251 ptq->buffer = buffer;
252
253 if (!buffer->data) { 259 if (!buffer->data) {
254 int fd = perf_data__fd(ptq->pt->session->data); 260 int fd = perf_data__fd(ptq->pt->session->data);
255 261
@@ -279,6 +285,95 @@ static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
279 b->consecutive = true; 285 b->consecutive = true;
280 } 286 }
281 287
288 return 0;
289}
290
291/* Do not drop buffers with references - refer intel_pt_get_trace() */
292static void intel_pt_lookahead_drop_buffer(struct intel_pt_queue *ptq,
293 struct auxtrace_buffer *buffer)
294{
295 if (!buffer || buffer == ptq->buffer || buffer == ptq->old_buffer)
296 return;
297
298 auxtrace_buffer__drop_data(buffer);
299}
300
301/* Must be serialized with respect to intel_pt_get_trace() */
302static int intel_pt_lookahead(void *data, intel_pt_lookahead_cb_t cb,
303 void *cb_data)
304{
305 struct intel_pt_queue *ptq = data;
306 struct auxtrace_buffer *buffer = ptq->buffer;
307 struct auxtrace_buffer *old_buffer = ptq->old_buffer;
308 struct auxtrace_queue *queue;
309 int err = 0;
310
311 queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
312
313 while (1) {
314 struct intel_pt_buffer b = { .len = 0 };
315
316 buffer = auxtrace_buffer__next(queue, buffer);
317 if (!buffer)
318 break;
319
320 err = intel_pt_get_buffer(ptq, buffer, old_buffer, &b);
321 if (err)
322 break;
323
324 if (b.len) {
325 intel_pt_lookahead_drop_buffer(ptq, old_buffer);
326 old_buffer = buffer;
327 } else {
328 intel_pt_lookahead_drop_buffer(ptq, buffer);
329 continue;
330 }
331
332 err = cb(&b, cb_data);
333 if (err)
334 break;
335 }
336
337 if (buffer != old_buffer)
338 intel_pt_lookahead_drop_buffer(ptq, buffer);
339 intel_pt_lookahead_drop_buffer(ptq, old_buffer);
340
341 return err;
342}
343
344/*
345 * This function assumes data is processed sequentially only.
346 * Must be serialized with respect to intel_pt_lookahead()
347 */
348static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
349{
350 struct intel_pt_queue *ptq = data;
351 struct auxtrace_buffer *buffer = ptq->buffer;
352 struct auxtrace_buffer *old_buffer = ptq->old_buffer;
353 struct auxtrace_queue *queue;
354 int err;
355
356 if (ptq->stop) {
357 b->len = 0;
358 return 0;
359 }
360
361 queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
362
363 buffer = auxtrace_buffer__next(queue, buffer);
364 if (!buffer) {
365 if (old_buffer)
366 auxtrace_buffer__drop_data(old_buffer);
367 b->len = 0;
368 return 0;
369 }
370
371 ptq->buffer = buffer;
372
373 err = intel_pt_get_buffer(ptq, buffer, old_buffer, b);
374 if (err)
375 return err;
376
282 if (ptq->step_through_buffers) 377 if (ptq->step_through_buffers)
283 ptq->stop = true; 378 ptq->stop = true;
284 379
@@ -798,6 +893,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
798 893
799 params.get_trace = intel_pt_get_trace; 894 params.get_trace = intel_pt_get_trace;
800 params.walk_insn = intel_pt_walk_next_insn; 895 params.walk_insn = intel_pt_walk_next_insn;
896 params.lookahead = intel_pt_lookahead;
801 params.data = ptq; 897 params.data = ptq;
802 params.return_compression = intel_pt_return_compression(pt); 898 params.return_compression = intel_pt_return_compression(pt);
803 params.branch_enable = intel_pt_branch_enable(pt); 899 params.branch_enable = intel_pt_branch_enable(pt);
@@ -921,6 +1017,23 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
921 ptq->flags |= PERF_IP_FLAG_TRACE_END; 1017 ptq->flags |= PERF_IP_FLAG_TRACE_END;
922} 1018}
923 1019
1020static void intel_pt_setup_time_range(struct intel_pt *pt,
1021 struct intel_pt_queue *ptq)
1022{
1023 if (!pt->range_cnt)
1024 return;
1025
1026 ptq->sel_timestamp = pt->time_ranges[0].start;
1027 ptq->sel_idx = 0;
1028
1029 if (ptq->sel_timestamp) {
1030 ptq->sel_start = true;
1031 } else {
1032 ptq->sel_timestamp = pt->time_ranges[0].end;
1033 ptq->sel_start = false;
1034 }
1035}
1036
924static int intel_pt_setup_queue(struct intel_pt *pt, 1037static int intel_pt_setup_queue(struct intel_pt *pt,
925 struct auxtrace_queue *queue, 1038 struct auxtrace_queue *queue,
926 unsigned int queue_nr) 1039 unsigned int queue_nr)
@@ -940,11 +1053,15 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
940 ptq->cpu = queue->cpu; 1053 ptq->cpu = queue->cpu;
941 ptq->tid = queue->tid; 1054 ptq->tid = queue->tid;
942 1055
1056 ptq->cbr_seen = UINT_MAX;
1057
943 if (pt->sampling_mode && !pt->snapshot_mode && 1058 if (pt->sampling_mode && !pt->snapshot_mode &&
944 pt->timeless_decoding) 1059 pt->timeless_decoding)
945 ptq->step_through_buffers = true; 1060 ptq->step_through_buffers = true;
946 1061
947 ptq->sync_switch = pt->sync_switch; 1062 ptq->sync_switch = pt->sync_switch;
1063
1064 intel_pt_setup_time_range(pt, ptq);
948 } 1065 }
949 1066
950 if (!ptq->on_heap && 1067 if (!ptq->on_heap &&
@@ -959,6 +1076,14 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
959 intel_pt_log("queue %u getting timestamp\n", queue_nr); 1076 intel_pt_log("queue %u getting timestamp\n", queue_nr);
960 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", 1077 intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
961 queue_nr, ptq->cpu, ptq->pid, ptq->tid); 1078 queue_nr, ptq->cpu, ptq->pid, ptq->tid);
1079
1080 if (ptq->sel_start && ptq->sel_timestamp) {
1081 ret = intel_pt_fast_forward(ptq->decoder,
1082 ptq->sel_timestamp);
1083 if (ret)
1084 return ret;
1085 }
1086
962 while (1) { 1087 while (1) {
963 state = intel_pt_decode(ptq->decoder); 1088 state = intel_pt_decode(ptq->decoder);
964 if (state->err) { 1089 if (state->err) {
@@ -978,6 +1103,9 @@ static int intel_pt_setup_queue(struct intel_pt *pt,
978 queue_nr, ptq->timestamp); 1103 queue_nr, ptq->timestamp);
979 ptq->state = state; 1104 ptq->state = state;
980 ptq->have_sample = true; 1105 ptq->have_sample = true;
1106 if (ptq->sel_start && ptq->sel_timestamp &&
1107 ptq->timestamp < ptq->sel_timestamp)
1108 ptq->have_sample = false;
981 intel_pt_sample_flags(ptq); 1109 intel_pt_sample_flags(ptq);
982 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); 1110 ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
983 if (ret) 1111 if (ret)
@@ -1059,28 +1187,48 @@ static inline bool intel_pt_skip_event(struct intel_pt *pt)
1059 pt->num_events++ < pt->synth_opts.initial_skip; 1187 pt->num_events++ < pt->synth_opts.initial_skip;
1060} 1188}
1061 1189
1190/*
1191 * Cannot count CBR as skipped because it won't go away until cbr == cbr_seen.
1192 * Also ensure CBR is first non-skipped event by allowing for 4 more samples
1193 * from this decoder state.
1194 */
1195static inline bool intel_pt_skip_cbr_event(struct intel_pt *pt)
1196{
1197 return pt->synth_opts.initial_skip &&
1198 pt->num_events + 4 < pt->synth_opts.initial_skip;
1199}
1200
1201static void intel_pt_prep_a_sample(struct intel_pt_queue *ptq,
1202 union perf_event *event,
1203 struct perf_sample *sample)
1204{
1205 event->sample.header.type = PERF_RECORD_SAMPLE;
1206 event->sample.header.size = sizeof(struct perf_event_header);
1207
1208 sample->pid = ptq->pid;
1209 sample->tid = ptq->tid;
1210 sample->cpu = ptq->cpu;
1211 sample->insn_len = ptq->insn_len;
1212 memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1213}
1214
1062static void intel_pt_prep_b_sample(struct intel_pt *pt, 1215static void intel_pt_prep_b_sample(struct intel_pt *pt,
1063 struct intel_pt_queue *ptq, 1216 struct intel_pt_queue *ptq,
1064 union perf_event *event, 1217 union perf_event *event,
1065 struct perf_sample *sample) 1218 struct perf_sample *sample)
1066{ 1219{
1220 intel_pt_prep_a_sample(ptq, event, sample);
1221
1067 if (!pt->timeless_decoding) 1222 if (!pt->timeless_decoding)
1068 sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc); 1223 sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
1069 1224
1070 sample->ip = ptq->state->from_ip; 1225 sample->ip = ptq->state->from_ip;
1071 sample->cpumode = intel_pt_cpumode(pt, sample->ip); 1226 sample->cpumode = intel_pt_cpumode(pt, sample->ip);
1072 sample->pid = ptq->pid;
1073 sample->tid = ptq->tid;
1074 sample->addr = ptq->state->to_ip; 1227 sample->addr = ptq->state->to_ip;
1075 sample->period = 1; 1228 sample->period = 1;
1076 sample->cpu = ptq->cpu;
1077 sample->flags = ptq->flags; 1229 sample->flags = ptq->flags;
1078 sample->insn_len = ptq->insn_len;
1079 memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
1080 1230
1081 event->sample.header.type = PERF_RECORD_SAMPLE;
1082 event->sample.header.misc = sample->cpumode; 1231 event->sample.header.misc = sample->cpumode;
1083 event->sample.header.size = sizeof(struct perf_event_header);
1084} 1232}
1085 1233
1086static int intel_pt_inject_event(union perf_event *event, 1234static int intel_pt_inject_event(union perf_event *event,
@@ -1153,6 +1301,13 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
1153 sample.branch_stack = (struct branch_stack *)&dummy_bs; 1301 sample.branch_stack = (struct branch_stack *)&dummy_bs;
1154 } 1302 }
1155 1303
1304 sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt;
1305 if (sample.cyc_cnt) {
1306 sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt;
1307 ptq->last_br_insn_cnt = ptq->ipc_insn_cnt;
1308 ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
1309 }
1310
1156 return intel_pt_deliver_synth_b_event(pt, event, &sample, 1311 return intel_pt_deliver_synth_b_event(pt, event, &sample,
1157 pt->branches_sample_type); 1312 pt->branches_sample_type);
1158} 1313}
@@ -1208,6 +1363,13 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
1208 sample.stream_id = ptq->pt->instructions_id; 1363 sample.stream_id = ptq->pt->instructions_id;
1209 sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; 1364 sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
1210 1365
1366 sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
1367 if (sample.cyc_cnt) {
1368 sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt;
1369 ptq->last_in_insn_cnt = ptq->ipc_insn_cnt;
1370 ptq->last_in_cyc_cnt = ptq->ipc_cyc_cnt;
1371 }
1372
1211 ptq->last_insn_cnt = ptq->state->tot_insn_cnt; 1373 ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
1212 1374
1213 return intel_pt_deliver_synth_event(pt, ptq, event, &sample, 1375 return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
@@ -1281,9 +1443,11 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
1281 struct perf_synth_intel_cbr raw; 1443 struct perf_synth_intel_cbr raw;
1282 u32 flags; 1444 u32 flags;
1283 1445
1284 if (intel_pt_skip_event(pt)) 1446 if (intel_pt_skip_cbr_event(pt))
1285 return 0; 1447 return 0;
1286 1448
1449 ptq->cbr_seen = ptq->state->cbr;
1450
1287 intel_pt_prep_p_sample(pt, ptq, event, &sample); 1451 intel_pt_prep_p_sample(pt, ptq, event, &sample);
1288 1452
1289 sample.id = ptq->pt->cbr_id; 1453 sample.id = ptq->pt->cbr_id;
@@ -1401,6 +1565,261 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
1401 pt->pwr_events_sample_type); 1565 pt->pwr_events_sample_type);
1402} 1566}
1403 1567
1568/*
1569 * PEBS gp_regs array indexes plus 1 so that 0 means not present. Refer
1570 * intel_pt_add_gp_regs().
1571 */
1572static const int pebs_gp_regs[] = {
1573 [PERF_REG_X86_FLAGS] = 1,
1574 [PERF_REG_X86_IP] = 2,
1575 [PERF_REG_X86_AX] = 3,
1576 [PERF_REG_X86_CX] = 4,
1577 [PERF_REG_X86_DX] = 5,
1578 [PERF_REG_X86_BX] = 6,
1579 [PERF_REG_X86_SP] = 7,
1580 [PERF_REG_X86_BP] = 8,
1581 [PERF_REG_X86_SI] = 9,
1582 [PERF_REG_X86_DI] = 10,
1583 [PERF_REG_X86_R8] = 11,
1584 [PERF_REG_X86_R9] = 12,
1585 [PERF_REG_X86_R10] = 13,
1586 [PERF_REG_X86_R11] = 14,
1587 [PERF_REG_X86_R12] = 15,
1588 [PERF_REG_X86_R13] = 16,
1589 [PERF_REG_X86_R14] = 17,
1590 [PERF_REG_X86_R15] = 18,
1591};
1592
1593static u64 *intel_pt_add_gp_regs(struct regs_dump *intr_regs, u64 *pos,
1594 const struct intel_pt_blk_items *items,
1595 u64 regs_mask)
1596{
1597 const u64 *gp_regs = items->val[INTEL_PT_GP_REGS_POS];
1598 u32 mask = items->mask[INTEL_PT_GP_REGS_POS];
1599 u32 bit;
1600 int i;
1601
1602 for (i = 0, bit = 1; i < PERF_REG_X86_64_MAX; i++, bit <<= 1) {
1603 /* Get the PEBS gp_regs array index */
1604 int n = pebs_gp_regs[i] - 1;
1605
1606 if (n < 0)
1607 continue;
1608 /*
1609 * Add only registers that were requested (i.e. 'regs_mask') and
1610 * that were provided (i.e. 'mask'), and update the resulting
1611 * mask (i.e. 'intr_regs->mask') accordingly.
1612 */
1613 if (mask & 1 << n && regs_mask & bit) {
1614 intr_regs->mask |= bit;
1615 *pos++ = gp_regs[n];
1616 }
1617 }
1618
1619 return pos;
1620}
1621
1622#ifndef PERF_REG_X86_XMM0
1623#define PERF_REG_X86_XMM0 32
1624#endif
1625
1626static void intel_pt_add_xmm(struct regs_dump *intr_regs, u64 *pos,
1627 const struct intel_pt_blk_items *items,
1628 u64 regs_mask)
1629{
1630 u32 mask = items->has_xmm & (regs_mask >> PERF_REG_X86_XMM0);
1631 const u64 *xmm = items->xmm;
1632
1633 /*
1634 * If there are any XMM registers, then there should be all of them.
1635 * Nevertheless, follow the logic to add only registers that were
1636 * requested (i.e. 'regs_mask') and that were provided (i.e. 'mask'),
1637 * and update the resulting mask (i.e. 'intr_regs->mask') accordingly.
1638 */
1639 intr_regs->mask |= (u64)mask << PERF_REG_X86_XMM0;
1640
1641 for (; mask; mask >>= 1, xmm++) {
1642 if (mask & 1)
1643 *pos++ = *xmm;
1644 }
1645}
1646
1647#define LBR_INFO_MISPRED (1ULL << 63)
1648#define LBR_INFO_IN_TX (1ULL << 62)
1649#define LBR_INFO_ABORT (1ULL << 61)
1650#define LBR_INFO_CYCLES 0xffff
1651
1652/* Refer kernel's intel_pmu_store_pebs_lbrs() */
1653static u64 intel_pt_lbr_flags(u64 info)
1654{
1655 union {
1656 struct branch_flags flags;
1657 u64 result;
1658 } u = {
1659 .flags = {
1660 .mispred = !!(info & LBR_INFO_MISPRED),
1661 .predicted = !(info & LBR_INFO_MISPRED),
1662 .in_tx = !!(info & LBR_INFO_IN_TX),
1663 .abort = !!(info & LBR_INFO_ABORT),
1664 .cycles = info & LBR_INFO_CYCLES,
1665 }
1666 };
1667
1668 return u.result;
1669}
1670
1671static void intel_pt_add_lbrs(struct branch_stack *br_stack,
1672 const struct intel_pt_blk_items *items)
1673{
1674 u64 *to;
1675 int i;
1676
1677 br_stack->nr = 0;
1678
1679 to = &br_stack->entries[0].from;
1680
1681 for (i = INTEL_PT_LBR_0_POS; i <= INTEL_PT_LBR_2_POS; i++) {
1682 u32 mask = items->mask[i];
1683 const u64 *from = items->val[i];
1684
1685 for (; mask; mask >>= 3, from += 3) {
1686 if ((mask & 7) == 7) {
1687 *to++ = from[0];
1688 *to++ = from[1];
1689 *to++ = intel_pt_lbr_flags(from[2]);
1690 br_stack->nr += 1;
1691 }
1692 }
1693 }
1694}
1695
1696/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
1697#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3)
1698
1699static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
1700{
1701 const struct intel_pt_blk_items *items = &ptq->state->items;
1702 struct perf_sample sample = { .ip = 0, };
1703 union perf_event *event = ptq->event_buf;
1704 struct intel_pt *pt = ptq->pt;
1705 struct perf_evsel *evsel = pt->pebs_evsel;
1706 u64 sample_type = evsel->attr.sample_type;
1707 u64 id = evsel->id[0];
1708 u8 cpumode;
1709
1710 if (intel_pt_skip_event(pt))
1711 return 0;
1712
1713 intel_pt_prep_a_sample(ptq, event, &sample);
1714
1715 sample.id = id;
1716 sample.stream_id = id;
1717
1718 if (!evsel->attr.freq)
1719 sample.period = evsel->attr.sample_period;
1720
1721 /* No support for non-zero CS base */
1722 if (items->has_ip)
1723 sample.ip = items->ip;
1724 else if (items->has_rip)
1725 sample.ip = items->rip;
1726 else
1727 sample.ip = ptq->state->from_ip;
1728
1729 /* No support for guest mode at this time */
1730 cpumode = sample.ip < ptq->pt->kernel_start ?
1731 PERF_RECORD_MISC_USER :
1732 PERF_RECORD_MISC_KERNEL;
1733
1734 event->sample.header.misc = cpumode | PERF_RECORD_MISC_EXACT_IP;
1735
1736 sample.cpumode = cpumode;
1737
1738 if (sample_type & PERF_SAMPLE_TIME) {
1739 u64 timestamp = 0;
1740
1741 if (items->has_timestamp)
1742 timestamp = items->timestamp;
1743 else if (!pt->timeless_decoding)
1744 timestamp = ptq->timestamp;
1745 if (timestamp)
1746 sample.time = tsc_to_perf_time(timestamp, &pt->tc);
1747 }
1748
1749 if (sample_type & PERF_SAMPLE_CALLCHAIN &&
1750 pt->synth_opts.callchain) {
1751 thread_stack__sample(ptq->thread, ptq->cpu, ptq->chain,
1752 pt->synth_opts.callchain_sz, sample.ip,
1753 pt->kernel_start);
1754 sample.callchain = ptq->chain;
1755 }
1756
1757 if (sample_type & PERF_SAMPLE_REGS_INTR &&
1758 items->mask[INTEL_PT_GP_REGS_POS]) {
1759 u64 regs[sizeof(sample.intr_regs.mask)];
1760 u64 regs_mask = evsel->attr.sample_regs_intr;
1761 u64 *pos;
1762
1763 sample.intr_regs.abi = items->is_32_bit ?
1764 PERF_SAMPLE_REGS_ABI_32 :
1765 PERF_SAMPLE_REGS_ABI_64;
1766 sample.intr_regs.regs = regs;
1767
1768 pos = intel_pt_add_gp_regs(&sample.intr_regs, regs, items, regs_mask);
1769
1770 intel_pt_add_xmm(&sample.intr_regs, pos, items, regs_mask);
1771 }
1772
1773 if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
1774 struct {
1775 struct branch_stack br_stack;
1776 struct branch_entry entries[LBRS_MAX];
1777 } br;
1778
1779 if (items->mask[INTEL_PT_LBR_0_POS] ||
1780 items->mask[INTEL_PT_LBR_1_POS] ||
1781 items->mask[INTEL_PT_LBR_2_POS]) {
1782 intel_pt_add_lbrs(&br.br_stack, items);
1783 sample.branch_stack = &br.br_stack;
1784 } else if (pt->synth_opts.last_branch) {
1785 intel_pt_copy_last_branch_rb(ptq);
1786 sample.branch_stack = ptq->last_branch;
1787 } else {
1788 br.br_stack.nr = 0;
1789 sample.branch_stack = &br.br_stack;
1790 }
1791 }
1792
1793 if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
1794 sample.addr = items->mem_access_address;
1795
1796 if (sample_type & PERF_SAMPLE_WEIGHT) {
1797 /*
1798 * Refer kernel's setup_pebs_adaptive_sample_data() and
1799 * intel_hsw_weight().
1800 */
1801 if (items->has_mem_access_latency)
1802 sample.weight = items->mem_access_latency;
1803 if (!sample.weight && items->has_tsx_aux_info) {
1804 /* Cycles last block */
1805 sample.weight = (u32)items->tsx_aux_info;
1806 }
1807 }
1808
1809 if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) {
1810 u64 ax = items->has_rax ? items->rax : 0;
1811 /* Refer kernel's intel_hsw_transaction() */
1812 u64 txn = (u8)(items->tsx_aux_info >> 32);
1813
1814 /* For RTM XABORTs also log the abort code from AX */
1815 if (txn & PERF_TXN_TRANSACTION && ax & 1)
1816 txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
1817 sample.transaction = txn;
1818 }
1819
1820 return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type);
1821}
1822
1404static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, 1823static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
1405 pid_t pid, pid_t tid, u64 ip, u64 timestamp) 1824 pid_t pid, pid_t tid, u64 ip, u64 timestamp)
1406{ 1825{
@@ -1465,8 +1884,7 @@ static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
1465} 1884}
1466 1885
1467#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \ 1886#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
1468 INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \ 1887 INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT)
1469 INTEL_PT_CBR_CHG)
1470 1888
1471static int intel_pt_sample(struct intel_pt_queue *ptq) 1889static int intel_pt_sample(struct intel_pt_queue *ptq)
1472{ 1890{
@@ -1479,31 +1897,52 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1479 1897
1480 ptq->have_sample = false; 1898 ptq->have_sample = false;
1481 1899
1482 if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) { 1900 if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) {
1483 if (state->type & INTEL_PT_CBR_CHG) { 1901 /*
1902 * Cycle count and instruction count only go together to create
1903 * a valid IPC ratio when the cycle count changes.
1904 */
1905 ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt;
1906 ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt;
1907 }
1908
1909 /*
1910 * Do PEBS first to allow for the possibility that the PEBS timestamp
1911 * precedes the current timestamp.
1912 */
1913 if (pt->sample_pebs && state->type & INTEL_PT_BLK_ITEMS) {
1914 err = intel_pt_synth_pebs_sample(ptq);
1915 if (err)
1916 return err;
1917 }
1918
1919 if (pt->sample_pwr_events) {
1920 if (ptq->state->cbr != ptq->cbr_seen) {
1484 err = intel_pt_synth_cbr_sample(ptq); 1921 err = intel_pt_synth_cbr_sample(ptq);
1485 if (err) 1922 if (err)
1486 return err; 1923 return err;
1487 } 1924 }
1488 if (state->type & INTEL_PT_MWAIT_OP) { 1925 if (state->type & INTEL_PT_PWR_EVT) {
1489 err = intel_pt_synth_mwait_sample(ptq); 1926 if (state->type & INTEL_PT_MWAIT_OP) {
1490 if (err) 1927 err = intel_pt_synth_mwait_sample(ptq);
1491 return err; 1928 if (err)
1492 } 1929 return err;
1493 if (state->type & INTEL_PT_PWR_ENTRY) { 1930 }
1494 err = intel_pt_synth_pwre_sample(ptq); 1931 if (state->type & INTEL_PT_PWR_ENTRY) {
1495 if (err) 1932 err = intel_pt_synth_pwre_sample(ptq);
1496 return err; 1933 if (err)
1497 } 1934 return err;
1498 if (state->type & INTEL_PT_EX_STOP) { 1935 }
1499 err = intel_pt_synth_exstop_sample(ptq); 1936 if (state->type & INTEL_PT_EX_STOP) {
1500 if (err) 1937 err = intel_pt_synth_exstop_sample(ptq);
1501 return err; 1938 if (err)
1502 } 1939 return err;
1503 if (state->type & INTEL_PT_PWR_EXIT) { 1940 }
1504 err = intel_pt_synth_pwrx_sample(ptq); 1941 if (state->type & INTEL_PT_PWR_EXIT) {
1505 if (err) 1942 err = intel_pt_synth_pwrx_sample(ptq);
1506 return err; 1943 if (err)
1944 return err;
1945 }
1507 } 1946 }
1508 } 1947 }
1509 1948
@@ -1641,10 +2080,83 @@ static void intel_pt_enable_sync_switch(struct intel_pt *pt)
1641 } 2080 }
1642} 2081}
1643 2082
2083/*
2084 * To filter against time ranges, it is only necessary to look at the next start
2085 * or end time.
2086 */
2087static bool intel_pt_next_time(struct intel_pt_queue *ptq)
2088{
2089 struct intel_pt *pt = ptq->pt;
2090
2091 if (ptq->sel_start) {
2092 /* Next time is an end time */
2093 ptq->sel_start = false;
2094 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].end;
2095 return true;
2096 } else if (ptq->sel_idx + 1 < pt->range_cnt) {
2097 /* Next time is a start time */
2098 ptq->sel_start = true;
2099 ptq->sel_idx += 1;
2100 ptq->sel_timestamp = pt->time_ranges[ptq->sel_idx].start;
2101 return true;
2102 }
2103
2104 /* No next time */
2105 return false;
2106}
2107
2108static int intel_pt_time_filter(struct intel_pt_queue *ptq, u64 *ff_timestamp)
2109{
2110 int err;
2111
2112 while (1) {
2113 if (ptq->sel_start) {
2114 if (ptq->timestamp >= ptq->sel_timestamp) {
2115 /* After start time, so consider next time */
2116 intel_pt_next_time(ptq);
2117 if (!ptq->sel_timestamp) {
2118 /* No end time */
2119 return 0;
2120 }
2121 /* Check against end time */
2122 continue;
2123 }
2124 /* Before start time, so fast forward */
2125 ptq->have_sample = false;
2126 if (ptq->sel_timestamp > *ff_timestamp) {
2127 if (ptq->sync_switch) {
2128 intel_pt_next_tid(ptq->pt, ptq);
2129 ptq->switch_state = INTEL_PT_SS_UNKNOWN;
2130 }
2131 *ff_timestamp = ptq->sel_timestamp;
2132 err = intel_pt_fast_forward(ptq->decoder,
2133 ptq->sel_timestamp);
2134 if (err)
2135 return err;
2136 }
2137 return 0;
2138 } else if (ptq->timestamp > ptq->sel_timestamp) {
2139 /* After end time, so consider next time */
2140 if (!intel_pt_next_time(ptq)) {
2141 /* No next time range, so stop decoding */
2142 ptq->have_sample = false;
2143 ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
2144 return 1;
2145 }
2146 /* Check against next start time */
2147 continue;
2148 } else {
2149 /* Before end time */
2150 return 0;
2151 }
2152 }
2153}
2154
1644static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) 2155static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1645{ 2156{
1646 const struct intel_pt_state *state = ptq->state; 2157 const struct intel_pt_state *state = ptq->state;
1647 struct intel_pt *pt = ptq->pt; 2158 struct intel_pt *pt = ptq->pt;
2159 u64 ff_timestamp = 0;
1648 int err; 2160 int err;
1649 2161
1650 if (!pt->kernel_start) { 2162 if (!pt->kernel_start) {
@@ -1709,6 +2221,12 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
1709 ptq->timestamp = state->timestamp; 2221 ptq->timestamp = state->timestamp;
1710 } 2222 }
1711 2223
2224 if (ptq->sel_timestamp) {
2225 err = intel_pt_time_filter(ptq, &ff_timestamp);
2226 if (err)
2227 return err;
2228 }
2229
1712 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { 2230 if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
1713 *timestamp = ptq->timestamp; 2231 *timestamp = ptq->timestamp;
1714 return 0; 2232 return 0;
@@ -1850,7 +2368,6 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
1850 2368
1851 switch (ptq->switch_state) { 2369 switch (ptq->switch_state) {
1852 case INTEL_PT_SS_NOT_TRACING: 2370 case INTEL_PT_SS_NOT_TRACING:
1853 ptq->next_tid = -1;
1854 break; 2371 break;
1855 case INTEL_PT_SS_UNKNOWN: 2372 case INTEL_PT_SS_UNKNOWN:
1856 case INTEL_PT_SS_TRACING: 2373 case INTEL_PT_SS_TRACING:
@@ -1870,13 +2387,14 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
1870 ptq->switch_state = INTEL_PT_SS_TRACING; 2387 ptq->switch_state = INTEL_PT_SS_TRACING;
1871 break; 2388 break;
1872 case INTEL_PT_SS_EXPECTING_SWITCH_IP: 2389 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
1873 ptq->next_tid = tid;
1874 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); 2390 intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
1875 break; 2391 break;
1876 default: 2392 default:
1877 break; 2393 break;
1878 } 2394 }
1879 2395
2396 ptq->next_tid = -1;
2397
1880 return 1; 2398 return 1;
1881} 2399}
1882 2400
@@ -1905,6 +2423,44 @@ static int intel_pt_process_switch(struct intel_pt *pt,
1905 return machine__set_current_tid(pt->machine, cpu, -1, tid); 2423 return machine__set_current_tid(pt->machine, cpu, -1, tid);
1906} 2424}
1907 2425
2426static int intel_pt_context_switch_in(struct intel_pt *pt,
2427 struct perf_sample *sample)
2428{
2429 pid_t pid = sample->pid;
2430 pid_t tid = sample->tid;
2431 int cpu = sample->cpu;
2432
2433 if (pt->sync_switch) {
2434 struct intel_pt_queue *ptq;
2435
2436 ptq = intel_pt_cpu_to_ptq(pt, cpu);
2437 if (ptq && ptq->sync_switch) {
2438 ptq->next_tid = -1;
2439 switch (ptq->switch_state) {
2440 case INTEL_PT_SS_NOT_TRACING:
2441 case INTEL_PT_SS_UNKNOWN:
2442 case INTEL_PT_SS_TRACING:
2443 break;
2444 case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
2445 case INTEL_PT_SS_EXPECTING_SWITCH_IP:
2446 ptq->switch_state = INTEL_PT_SS_TRACING;
2447 break;
2448 default:
2449 break;
2450 }
2451 }
2452 }
2453
2454 /*
2455 * If the current tid has not been updated yet, ensure it is now that
2456 * a "switch in" event has occurred.
2457 */
2458 if (machine__get_current_tid(pt->machine, cpu) == tid)
2459 return 0;
2460
2461 return machine__set_current_tid(pt->machine, cpu, pid, tid);
2462}
2463
1908static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event, 2464static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
1909 struct perf_sample *sample) 2465 struct perf_sample *sample)
1910{ 2466{
@@ -1916,7 +2472,7 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
1916 2472
1917 if (pt->have_sched_switch == 3) { 2473 if (pt->have_sched_switch == 3) {
1918 if (!out) 2474 if (!out)
1919 return 0; 2475 return intel_pt_context_switch_in(pt, sample);
1920 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) { 2476 if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
1921 pr_err("Expecting CPU-wide context switch event\n"); 2477 pr_err("Expecting CPU-wide context switch event\n");
1922 return -EINVAL; 2478 return -EINVAL;
@@ -2076,6 +2632,7 @@ static void intel_pt_free(struct perf_session *session)
2076 thread__put(pt->unknown_thread); 2632 thread__put(pt->unknown_thread);
2077 addr_filters__exit(&pt->filts); 2633 addr_filters__exit(&pt->filts);
2078 zfree(&pt->filter); 2634 zfree(&pt->filter);
2635 zfree(&pt->time_ranges);
2079 free(pt); 2636 free(pt);
2080} 2637}
2081 2638
@@ -2373,6 +2930,85 @@ static int intel_pt_perf_config(const char *var, const char *value, void *data)
2373 return 0; 2930 return 0;
2374} 2931}
2375 2932
2933/* Find least TSC which converts to ns or later */
2934static u64 intel_pt_tsc_start(u64 ns, struct intel_pt *pt)
2935{
2936 u64 tsc, tm;
2937
2938 tsc = perf_time_to_tsc(ns, &pt->tc);
2939
2940 while (1) {
2941 tm = tsc_to_perf_time(tsc, &pt->tc);
2942 if (tm < ns)
2943 break;
2944 tsc -= 1;
2945 }
2946
2947 while (tm < ns)
2948 tm = tsc_to_perf_time(++tsc, &pt->tc);
2949
2950 return tsc;
2951}
2952
2953/* Find greatest TSC which converts to ns or earlier */
2954static u64 intel_pt_tsc_end(u64 ns, struct intel_pt *pt)
2955{
2956 u64 tsc, tm;
2957
2958 tsc = perf_time_to_tsc(ns, &pt->tc);
2959
2960 while (1) {
2961 tm = tsc_to_perf_time(tsc, &pt->tc);
2962 if (tm > ns)
2963 break;
2964 tsc += 1;
2965 }
2966
2967 while (tm > ns)
2968 tm = tsc_to_perf_time(--tsc, &pt->tc);
2969
2970 return tsc;
2971}
2972
2973static int intel_pt_setup_time_ranges(struct intel_pt *pt,
2974 struct itrace_synth_opts *opts)
2975{
2976 struct perf_time_interval *p = opts->ptime_range;
2977 int n = opts->range_num;
2978 int i;
2979
2980 if (!n || !p || pt->timeless_decoding)
2981 return 0;
2982
2983 pt->time_ranges = calloc(n, sizeof(struct range));
2984 if (!pt->time_ranges)
2985 return -ENOMEM;
2986
2987 pt->range_cnt = n;
2988
2989 intel_pt_log("%s: %u range(s)\n", __func__, n);
2990
2991 for (i = 0; i < n; i++) {
2992 struct range *r = &pt->time_ranges[i];
2993 u64 ts = p[i].start;
2994 u64 te = p[i].end;
2995
2996 /*
2997 * Take care to ensure the TSC range matches the perf-time range
2998 * when converted back to perf-time.
2999 */
3000 r->start = ts ? intel_pt_tsc_start(ts, pt) : 0;
3001 r->end = te ? intel_pt_tsc_end(te, pt) : 0;
3002
3003 intel_pt_log("range %d: perf time interval: %"PRIu64" to %"PRIu64"\n",
3004 i, ts, te);
3005 intel_pt_log("range %d: TSC time interval: %#"PRIx64" to %#"PRIx64"\n",
3006 i, r->start, r->end);
3007 }
3008
3009 return 0;
3010}
3011
2376static const char * const intel_pt_info_fmts[] = { 3012static const char * const intel_pt_info_fmts[] = {
2377 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", 3013 [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
2378 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", 3014 [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
@@ -2579,7 +3215,8 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
2579 } else { 3215 } else {
2580 itrace_synth_opts__set_default(&pt->synth_opts, 3216 itrace_synth_opts__set_default(&pt->synth_opts,
2581 session->itrace_synth_opts->default_no_sample); 3217 session->itrace_synth_opts->default_no_sample);
2582 if (use_browser != -1) { 3218 if (!session->itrace_synth_opts->default_no_sample &&
3219 !session->itrace_synth_opts->inject) {
2583 pt->synth_opts.branches = false; 3220 pt->synth_opts.branches = false;
2584 pt->synth_opts.callchain = true; 3221 pt->synth_opts.callchain = true;
2585 } 3222 }
@@ -2604,6 +3241,12 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
2604 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000; 3241 pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
2605 } 3242 }
2606 3243
3244 if (session->itrace_synth_opts) {
3245 err = intel_pt_setup_time_ranges(pt, session->itrace_synth_opts);
3246 if (err)
3247 goto err_delete_thread;
3248 }
3249
2607 if (pt->synth_opts.calls) 3250 if (pt->synth_opts.calls)
2608 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | 3251 pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
2609 PERF_IP_FLAG_TRACE_END; 3252 PERF_IP_FLAG_TRACE_END;
@@ -2644,6 +3287,7 @@ err_free_queues:
2644err_free: 3287err_free:
2645 addr_filters__exit(&pt->filts); 3288 addr_filters__exit(&pt->filts);
2646 zfree(&pt->filter); 3289 zfree(&pt->filter);
3290 zfree(&pt->time_ranges);
2647 free(pt); 3291 free(pt);
2648 return err; 3292 return err;
2649} 3293}
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index eda28d3570bc..28908afedec4 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -28,7 +28,7 @@
28#include "genelf.h" 28#include "genelf.h"
29#include "../builtin.h" 29#include "../builtin.h"
30 30
31#include "sane_ctype.h" 31#include <linux/ctype.h>
32 32
33struct jit_buf_desc { 33struct jit_buf_desc {
34 struct perf_data *output; 34 struct perf_data *output;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index e00dc413652d..147ed85ea2bc 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -15,6 +15,7 @@
15#include "strlist.h" 15#include "strlist.h"
16#include "thread.h" 16#include "thread.h"
17#include "vdso.h" 17#include "vdso.h"
18#include "util.h"
18#include <stdbool.h> 19#include <stdbool.h>
19#include <sys/types.h> 20#include <sys/types.h>
20#include <sys/stat.h> 21#include <sys/stat.h>
@@ -24,7 +25,7 @@
24#include "asm/bug.h" 25#include "asm/bug.h"
25#include "bpf-event.h" 26#include "bpf-event.h"
26 27
27#include "sane_ctype.h" 28#include <linux/ctype.h>
28#include <symbol/kallsyms.h> 29#include <symbol/kallsyms.h>
29#include <linux/mman.h> 30#include <linux/mman.h>
30 31
@@ -716,12 +717,12 @@ static int machine__process_ksymbol_register(struct machine *machine,
716 return -ENOMEM; 717 return -ENOMEM;
717 718
718 map->start = event->ksymbol_event.addr; 719 map->start = event->ksymbol_event.addr;
719 map->pgoff = map->start;
720 map->end = map->start + event->ksymbol_event.len; 720 map->end = map->start + event->ksymbol_event.len;
721 map_groups__insert(&machine->kmaps, map); 721 map_groups__insert(&machine->kmaps, map);
722 } 722 }
723 723
724 sym = symbol__new(event->ksymbol_event.addr, event->ksymbol_event.len, 724 sym = symbol__new(map->map_ip(map, map->start),
725 event->ksymbol_event.len,
725 0, 0, event->ksymbol_event.name); 726 0, 0, event->ksymbol_event.name);
726 if (!sym) 727 if (!sym)
727 return -ENOMEM; 728 return -ENOMEM;
@@ -1253,9 +1254,9 @@ static char *get_kernel_version(const char *root_dir)
1253 return NULL; 1254 return NULL;
1254 1255
1255 tmp = fgets(version, sizeof(version), file); 1256 tmp = fgets(version, sizeof(version), file);
1256 if (!tmp)
1257 *version = '\0';
1258 fclose(file); 1257 fclose(file);
1258 if (!tmp)
1259 return NULL;
1259 1260
1260 name = strstr(version, prefix); 1261 name = strstr(version, prefix);
1261 if (!name) 1262 if (!name)
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index ee71efb9db62..6fce983c6115 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -405,6 +405,7 @@ size_t map__fprintf(struct map *map, FILE *fp)
405 405
406size_t map__fprintf_dsoname(struct map *map, FILE *fp) 406size_t map__fprintf_dsoname(struct map *map, FILE *fp)
407{ 407{
408 char buf[symbol_conf.pad_output_len_dso + 1];
408 const char *dsoname = "[unknown]"; 409 const char *dsoname = "[unknown]";
409 410
410 if (map && map->dso) { 411 if (map && map->dso) {
@@ -414,6 +415,11 @@ size_t map__fprintf_dsoname(struct map *map, FILE *fp)
414 dsoname = map->dso->name; 415 dsoname = map->dso->name;
415 } 416 }
416 417
418 if (symbol_conf.pad_output_len_dso) {
419 scnprintf_pad(buf, symbol_conf.pad_output_len_dso, "%s", dsoname);
420 dsoname = buf;
421 }
422
417 return fprintf(fp, "%s", dsoname); 423 return fprintf(fp, "%s", dsoname);
418} 424}
419 425
diff --git a/tools/perf/util/map_groups.h b/tools/perf/util/map_groups.h
index 4dcda33e0fdf..5f25efa6d6bc 100644
--- a/tools/perf/util/map_groups.h
+++ b/tools/perf/util/map_groups.h
@@ -88,4 +88,6 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, FILE
88 88
89struct map *map_groups__find_by_name(struct map_groups *mg, const char *name); 89struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
90 90
91int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map);
92
91#endif // __PERF_MAP_GROUPS_H 93#endif // __PERF_MAP_GROUPS_H
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 699e020737d9..d8164574cb16 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -17,7 +17,7 @@
17#include "pmu-events/pmu-events.h" 17#include "pmu-events/pmu-events.h"
18#include "strlist.h" 18#include "strlist.h"
19#include <assert.h> 19#include <assert.h>
20#include <ctype.h> 20#include <linux/ctype.h>
21 21
22struct metric_event *metricgroup__lookup(struct rblist *metric_events, 22struct metric_event *metricgroup__lookup(struct rblist *metric_events,
23 struct perf_evsel *evsel, 23 struct perf_evsel *evsel,
@@ -85,26 +85,49 @@ struct egroup {
85 const char *metric_expr; 85 const char *metric_expr;
86}; 86};
87 87
88static struct perf_evsel *find_evsel(struct perf_evlist *perf_evlist, 88static bool record_evsel(int *ind, struct perf_evsel **start,
89 const char **ids, 89 int idnum,
90 int idnum, 90 struct perf_evsel **metric_events,
91 struct perf_evsel **metric_events) 91 struct perf_evsel *ev)
92{
93 metric_events[*ind] = ev;
94 if (*ind == 0)
95 *start = ev;
96 if (++*ind == idnum) {
97 metric_events[*ind] = NULL;
98 return true;
99 }
100 return false;
101}
102
103static struct perf_evsel *find_evsel_group(struct perf_evlist *perf_evlist,
104 const char **ids,
105 int idnum,
106 struct perf_evsel **metric_events)
92{ 107{
93 struct perf_evsel *ev, *start = NULL; 108 struct perf_evsel *ev, *start = NULL;
94 int ind = 0; 109 int ind = 0;
95 110
96 evlist__for_each_entry (perf_evlist, ev) { 111 evlist__for_each_entry (perf_evlist, ev) {
112 if (ev->collect_stat)
113 continue;
97 if (!strcmp(ev->name, ids[ind])) { 114 if (!strcmp(ev->name, ids[ind])) {
98 metric_events[ind] = ev; 115 if (record_evsel(&ind, &start, idnum,
99 if (ind == 0) 116 metric_events, ev))
100 start = ev;
101 if (++ind == idnum) {
102 metric_events[ind] = NULL;
103 return start; 117 return start;
104 }
105 } else { 118 } else {
119 /*
120 * We saw some other event that is not
121 * in our list of events. Discard
122 * the whole match and start again.
123 */
106 ind = 0; 124 ind = 0;
107 start = NULL; 125 start = NULL;
126 if (!strcmp(ev->name, ids[ind])) {
127 if (record_evsel(&ind, &start, idnum,
128 metric_events, ev))
129 return start;
130 }
108 } 131 }
109 } 132 }
110 /* 133 /*
@@ -134,8 +157,8 @@ static int metricgroup__setup_events(struct list_head *groups,
134 ret = -ENOMEM; 157 ret = -ENOMEM;
135 break; 158 break;
136 } 159 }
137 evsel = find_evsel(perf_evlist, eg->ids, eg->idnum, 160 evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum,
138 metric_events); 161 metric_events);
139 if (!evsel) { 162 if (!evsel) {
140 pr_debug("Cannot resolve %s: %s\n", 163 pr_debug("Cannot resolve %s: %s\n",
141 eg->metric_name, eg->metric_expr); 164 eg->metric_name, eg->metric_expr);
@@ -308,10 +331,9 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
308 struct mep *me; 331 struct mep *me;
309 char *s; 332 char *s;
310 333
334 g = skip_spaces(g);
311 if (*g == 0) 335 if (*g == 0)
312 g = "No_group"; 336 g = "No_group";
313 while (isspace(*g))
314 g++;
315 if (filter && !strstr(g, filter)) 337 if (filter && !strstr(g, filter))
316 continue; 338 continue;
317 if (raw) 339 if (raw)
@@ -353,7 +375,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
353 struct mep *me = container_of(node, struct mep, nd); 375 struct mep *me = container_of(node, struct mep, nd);
354 376
355 if (metricgroups) 377 if (metricgroups)
356 printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n"); 378 printf("%s%s%s", me->name, metrics && !raw ? ":" : "", raw ? " " : "\n");
357 if (metrics) 379 if (metrics)
358 metricgroup__print_strlist(me->metrics, raw); 380 metricgroup__print_strlist(me->metrics, raw);
359 next = rb_next(node); 381 next = rb_next(node);
@@ -387,6 +409,7 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
387 const char **ids; 409 const char **ids;
388 int idnum; 410 int idnum;
389 struct egroup *eg; 411 struct egroup *eg;
412 bool no_group = false;
390 413
391 pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name); 414 pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
392 415
@@ -397,11 +420,25 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
397 strbuf_addf(events, ","); 420 strbuf_addf(events, ",");
398 for (j = 0; j < idnum; j++) { 421 for (j = 0; j < idnum; j++) {
399 pr_debug("found event %s\n", ids[j]); 422 pr_debug("found event %s\n", ids[j]);
423 /*
424 * Duration time maps to a software event and can make
425 * groups not count. Always use it outside a
426 * group.
427 */
428 if (!strcmp(ids[j], "duration_time")) {
429 if (j > 0)
430 strbuf_addf(events, "}:W,");
431 strbuf_addf(events, "duration_time");
432 no_group = true;
433 continue;
434 }
400 strbuf_addf(events, "%s%s", 435 strbuf_addf(events, "%s%s",
401 j == 0 ? "{" : ",", 436 j == 0 || no_group ? "{" : ",",
402 ids[j]); 437 ids[j]);
438 no_group = false;
403 } 439 }
404 strbuf_addf(events, "}:W"); 440 if (!no_group)
441 strbuf_addf(events, "}:W");
405 442
406 eg = malloc(sizeof(struct egroup)); 443 eg = malloc(sizeof(struct egroup));
407 if (!eg) { 444 if (!eg) {
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index cb9c246c8962..47fe34e5f7d5 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -29,12 +29,16 @@ uint64_t arch__user_reg_mask(void);
29#ifdef HAVE_PERF_REGS_SUPPORT 29#ifdef HAVE_PERF_REGS_SUPPORT
30#include <perf_regs.h> 30#include <perf_regs.h>
31 31
32#define DWARF_MINIMAL_REGS ((1ULL << PERF_REG_IP) | (1ULL << PERF_REG_SP))
33
32int perf_reg_value(u64 *valp, struct regs_dump *regs, int id); 34int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
33 35
34#else 36#else
35#define PERF_REGS_MASK 0 37#define PERF_REGS_MASK 0
36#define PERF_REGS_MAX 0 38#define PERF_REGS_MAX 0
37 39
40#define DWARF_MINIMAL_REGS PERF_REGS_MASK
41
38static inline const char *perf_reg_name(int id __maybe_unused) 42static inline const char *perf_reg_name(int id __maybe_unused)
39{ 43{
40 return NULL; 44 return NULL;
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index e0429f4ef335..55f4de6442e3 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1,6 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <linux/list.h> 2#include <linux/list.h>
3#include <linux/compiler.h> 3#include <linux/compiler.h>
4#include <linux/string.h>
4#include <sys/types.h> 5#include <sys/types.h>
5#include <errno.h> 6#include <errno.h>
6#include <fcntl.h> 7#include <fcntl.h>
@@ -394,7 +395,7 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI
394 buf[ret] = 0; 395 buf[ret] = 0;
395 396
396 /* Remove trailing newline from sysfs file */ 397 /* Remove trailing newline from sysfs file */
397 rtrim(buf); 398 strim(buf);
398 399
399 return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL, 400 return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL,
400 NULL, NULL, NULL); 401 NULL, NULL, NULL);
@@ -700,6 +701,46 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
700 return map; 701 return map;
701} 702}
702 703
704static bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
705{
706 char *tmp = NULL, *tok, *str;
707 bool res;
708
709 str = strdup(pmu_name);
710 if (!str)
711 return false;
712
713 /*
714 * uncore alias may be from different PMU with common prefix
715 */
716 tok = strtok_r(str, ",", &tmp);
717 if (strncmp(pmu_name, tok, strlen(tok))) {
718 res = false;
719 goto out;
720 }
721
722 /*
723 * Match more complex aliases where the alias name is a comma-delimited
724 * list of tokens, orderly contained in the matching PMU name.
725 *
726 * Example: For alias "socket,pmuname" and PMU "socketX_pmunameY", we
727 * match "socket" in "socketX_pmunameY" and then "pmuname" in
728 * "pmunameY".
729 */
730 for (; tok; name += strlen(tok), tok = strtok_r(NULL, ",", &tmp)) {
731 name = strstr(name, tok);
732 if (!name) {
733 res = false;
734 goto out;
735 }
736 }
737
738 res = true;
739out:
740 free(str);
741 return res;
742}
743
703/* 744/*
704 * From the pmu_events_map, find the table of PMU events that corresponds 745 * From the pmu_events_map, find the table of PMU events that corresponds
705 * to the current running CPU. Then, add all PMU events from that table 746 * to the current running CPU. Then, add all PMU events from that table
@@ -709,9 +750,7 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
709{ 750{
710 int i; 751 int i;
711 struct pmu_events_map *map; 752 struct pmu_events_map *map;
712 struct pmu_event *pe;
713 const char *name = pmu->name; 753 const char *name = pmu->name;
714 const char *pname;
715 754
716 map = perf_pmu__find_map(pmu); 755 map = perf_pmu__find_map(pmu);
717 if (!map) 756 if (!map)
@@ -722,28 +761,22 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
722 */ 761 */
723 i = 0; 762 i = 0;
724 while (1) { 763 while (1) {
764 const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu";
765 struct pmu_event *pe = &map->table[i++];
766 const char *pname = pe->pmu ? pe->pmu : cpu_name;
725 767
726 pe = &map->table[i++];
727 if (!pe->name) { 768 if (!pe->name) {
728 if (pe->metric_group || pe->metric_name) 769 if (pe->metric_group || pe->metric_name)
729 continue; 770 continue;
730 break; 771 break;
731 } 772 }
732 773
733 if (!is_arm_pmu_core(name)) { 774 if (pmu_is_uncore(name) &&
734 pname = pe->pmu ? pe->pmu : "cpu"; 775 pmu_uncore_alias_match(pname, name))
735 776 goto new_alias;
736 /*
737 * uncore alias may be from different PMU
738 * with common prefix
739 */
740 if (pmu_is_uncore(name) &&
741 !strncmp(pname, name, strlen(pname)))
742 goto new_alias;
743 777
744 if (strcmp(pname, name)) 778 if (strcmp(pname, name))
745 continue; 779 continue;
746 }
747 780
748new_alias: 781new_alias:
749 /* need type casts to override 'const' */ 782 /* need type casts to override 'const' */
@@ -1343,7 +1376,7 @@ static void wordwrap(char *s, int start, int max, int corr)
1343 break; 1376 break;
1344 s += wlen; 1377 s += wlen;
1345 column += n; 1378 column += n;
1346 s = ltrim(s); 1379 s = skip_spaces(s);
1347 } 1380 }
1348} 1381}
1349 1382
diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c
index 23e367063446..599a1543871d 100644
--- a/tools/perf/util/print_binary.c
+++ b/tools/perf/util/print_binary.c
@@ -1,7 +1,7 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include "print_binary.h" 2#include "print_binary.h"
3#include <linux/log2.h> 3#include <linux/log2.h>
4#include "sane_ctype.h" 4#include <linux/ctype.h>
5 5
6int binary__fprintf(unsigned char *data, size_t len, 6int binary__fprintf(unsigned char *data, size_t len,
7 size_t bytes_per_line, binary__fprintf_t printer, 7 size_t bytes_per_line, binary__fprintf_t printer,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 2ebf8673f8e9..6f24eaf6e504 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -39,7 +39,7 @@
39#include "session.h" 39#include "session.h"
40#include "string2.h" 40#include "string2.h"
41 41
42#include "sane_ctype.h" 42#include <linux/ctype.h>
43 43
44#define PERFPROBE_GROUP "probe" 44#define PERFPROBE_GROUP "probe"
45 45
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 16252980ff00..670c477bf8cf 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -5,7 +5,7 @@
5#include <stdbool.h> 5#include <stdbool.h>
6#include "intlist.h" 6#include "intlist.h"
7#include "probe-event.h" 7#include "probe-event.h"
8#include "sane_ctype.h" 8#include <linux/ctype.h>
9 9
10#define MAX_PROBE_BUFFER 1024 10#define MAX_PROBE_BUFFER 1024
11#define MAX_PROBES 128 11#define MAX_PROBES 128
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 7aa0ea64544e..2237bac9fadb 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -6,7 +6,7 @@
6# 6#
7 7
8util/python.c 8util/python.c
9util/ctype.c 9../lib/ctype.c
10util/evlist.c 10util/evlist.c
11util/evsel.c 11util/evsel.c
12util/cpumap.c 12util/cpumap.c
@@ -16,6 +16,7 @@ util/namespaces.c
16../lib/bitmap.c 16../lib/bitmap.c
17../lib/find_bit.c 17../lib/find_bit.c
18../lib/hweight.c 18../lib/hweight.c
19../lib/string.c
19../lib/vsprintf.c 20../lib/vsprintf.c
20util/thread_map.c 21util/thread_map.c
21util/util.c 22util/util.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 6aa7e2352e16..1e5b6718dcea 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -12,6 +12,7 @@
12#include "print_binary.h" 12#include "print_binary.h"
13#include "thread_map.h" 13#include "thread_map.h"
14#include "mmap.h" 14#include "mmap.h"
15#include "util.h"
15 16
16#if PY_MAJOR_VERSION < 3 17#if PY_MAJOR_VERSION < 3
17#define _PyUnicode_FromString(arg) \ 18#define _PyUnicode_FromString(arg) \
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index c215704931dc..10d36d9b7909 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -17,8 +17,8 @@
17 * see Documentation/perf.data-file-format.txt. 17 * see Documentation/perf.data-file-format.txt.
18 * PERF_RECORD_AUXTRACE_INFO: 18 * PERF_RECORD_AUXTRACE_INFO:
19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This 19 * Defines a table of contains for PERF_RECORD_AUXTRACE records. This
20 * record is generated during 'perf record' command. Each record contains up 20 * record is generated during 'perf record' command. Each record contains
21 * to 256 entries describing offset and size of the AUXTRACE data in the 21 * up to 256 entries describing offset and size of the AUXTRACE data in the
22 * perf.data file. 22 * perf.data file.
23 * PERF_RECORD_AUXTRACE_ERROR: 23 * PERF_RECORD_AUXTRACE_ERROR:
24 * Indicates an error during AUXTRACE collection such as buffer overflow. 24 * Indicates an error during AUXTRACE collection such as buffer overflow.
@@ -237,10 +237,33 @@ static int s390_cpumcf_dumpctr(struct s390_cpumsf *sf,
237 return rc; 237 return rc;
238} 238}
239 239
240/* Display s390 CPU measurement facility basic-sampling data entry */ 240/* Display s390 CPU measurement facility basic-sampling data entry
241 * Data written on s390 in big endian byte order and contains bit
242 * fields across byte boundaries.
243 */
241static bool s390_cpumsf_basic_show(const char *color, size_t pos, 244static bool s390_cpumsf_basic_show(const char *color, size_t pos,
242 struct hws_basic_entry *basic) 245 struct hws_basic_entry *basicp)
243{ 246{
247 struct hws_basic_entry *basic = basicp;
248#if __BYTE_ORDER == __LITTLE_ENDIAN
249 struct hws_basic_entry local;
250 unsigned long long word = be64toh(*(unsigned long long *)basicp);
251
252 memset(&local, 0, sizeof(local));
253 local.def = be16toh(basicp->def);
254 local.prim_asn = word & 0xffff;
255 local.CL = word >> 30 & 0x3;
256 local.I = word >> 32 & 0x1;
257 local.AS = word >> 33 & 0x3;
258 local.P = word >> 35 & 0x1;
259 local.W = word >> 36 & 0x1;
260 local.T = word >> 37 & 0x1;
261 local.U = word >> 40 & 0xf;
262 local.ia = be64toh(basicp->ia);
263 local.gpp = be64toh(basicp->gpp);
264 local.hpp = be64toh(basicp->hpp);
265 basic = &local;
266#endif
244 if (basic->def != 1) { 267 if (basic->def != 1) {
245 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos); 268 pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
246 return false; 269 return false;
@@ -258,10 +281,22 @@ static bool s390_cpumsf_basic_show(const char *color, size_t pos,
258 return true; 281 return true;
259} 282}
260 283
261/* Display s390 CPU measurement facility diagnostic-sampling data entry */ 284/* Display s390 CPU measurement facility diagnostic-sampling data entry.
285 * Data written on s390 in big endian byte order and contains bit
286 * fields across byte boundaries.
287 */
262static bool s390_cpumsf_diag_show(const char *color, size_t pos, 288static bool s390_cpumsf_diag_show(const char *color, size_t pos,
263 struct hws_diag_entry *diag) 289 struct hws_diag_entry *diagp)
264{ 290{
291 struct hws_diag_entry *diag = diagp;
292#if __BYTE_ORDER == __LITTLE_ENDIAN
293 struct hws_diag_entry local;
294 unsigned long long word = be64toh(*(unsigned long long *)diagp);
295
296 local.def = be16toh(diagp->def);
297 local.I = word >> 32 & 0x1;
298 diag = &local;
299#endif
265 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) { 300 if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
266 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos); 301 pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
267 return false; 302 return false;
@@ -272,35 +307,52 @@ static bool s390_cpumsf_diag_show(const char *color, size_t pos,
272} 307}
273 308
274/* Return TOD timestamp contained in an trailer entry */ 309/* Return TOD timestamp contained in an trailer entry */
275static unsigned long long trailer_timestamp(struct hws_trailer_entry *te) 310static unsigned long long trailer_timestamp(struct hws_trailer_entry *te,
311 int idx)
276{ 312{
277 /* te->t set: TOD in STCKE format, bytes 8-15 313 /* te->t set: TOD in STCKE format, bytes 8-15
278 * to->t not set: TOD in STCK format, bytes 0-7 314 * to->t not set: TOD in STCK format, bytes 0-7
279 */ 315 */
280 unsigned long long ts; 316 unsigned long long ts;
281 317
282 memcpy(&ts, &te->timestamp[te->t], sizeof(ts)); 318 memcpy(&ts, &te->timestamp[idx], sizeof(ts));
283 return ts; 319 return be64toh(ts);
284} 320}
285 321
286/* Display s390 CPU measurement facility trailer entry */ 322/* Display s390 CPU measurement facility trailer entry */
287static bool s390_cpumsf_trailer_show(const char *color, size_t pos, 323static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
288 struct hws_trailer_entry *te) 324 struct hws_trailer_entry *te)
289{ 325{
326#if __BYTE_ORDER == __LITTLE_ENDIAN
327 struct hws_trailer_entry local;
328 const unsigned long long flags = be64toh(te->flags);
329
330 memset(&local, 0, sizeof(local));
331 local.f = flags >> 63 & 0x1;
332 local.a = flags >> 62 & 0x1;
333 local.t = flags >> 61 & 0x1;
334 local.bsdes = be16toh((flags >> 16 & 0xffff));
335 local.dsdes = be16toh((flags & 0xffff));
336 memcpy(&local.timestamp, te->timestamp, sizeof(te->timestamp));
337 local.overflow = be64toh(te->overflow);
338 local.clock_base = be64toh(te->progusage[0]) >> 63 & 1;
339 local.progusage2 = be64toh(te->progusage2);
340 te = &local;
341#endif
290 if (te->bsdes != sizeof(struct hws_basic_entry)) { 342 if (te->bsdes != sizeof(struct hws_basic_entry)) {
291 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos); 343 pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
292 return false; 344 return false;
293 } 345 }
294 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" 346 color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
295 " dsdes:%d Overflow:%lld Time:%#llx\n" 347 " dsdes:%d Overflow:%lld Time:%#llx\n"
296 "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n", 348 "\t\tC:%d TOD:%#lx\n",
297 pos, 349 pos,
298 te->f ? 'F' : ' ', 350 te->f ? 'F' : ' ',
299 te->a ? 'A' : ' ', 351 te->a ? 'A' : ' ',
300 te->t ? 'T' : ' ', 352 te->t ? 'T' : ' ',
301 te->bsdes, te->dsdes, te->overflow, 353 te->bsdes, te->dsdes, te->overflow,
302 trailer_timestamp(te), te->clock_base, te->progusage2, 354 trailer_timestamp(te, te->clock_base),
303 te->progusage[0], te->progusage[1]); 355 te->clock_base, te->progusage2);
304 return true; 356 return true;
305} 357}
306 358
@@ -327,13 +379,13 @@ static bool s390_cpumsf_validate(int machine_type,
327 *dsdes = *bsdes = 0; 379 *dsdes = *bsdes = 0;
328 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */ 380 if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
329 return false; 381 return false;
330 if (basic->def != 1) /* No basic set entry, must be first */ 382 if (be16toh(basic->def) != 1) /* No basic set entry, must be first */
331 return false; 383 return false;
332 /* Check for trailer entry at end of SDB */ 384 /* Check for trailer entry at end of SDB */
333 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 385 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
334 - sizeof(*te)); 386 - sizeof(*te));
335 *bsdes = te->bsdes; 387 *bsdes = be16toh(te->bsdes);
336 *dsdes = te->dsdes; 388 *dsdes = be16toh(te->dsdes);
337 if (!te->bsdes && !te->dsdes) { 389 if (!te->bsdes && !te->dsdes) {
338 /* Very old hardware, use CPUID */ 390 /* Very old hardware, use CPUID */
339 switch (machine_type) { 391 switch (machine_type) {
@@ -495,19 +547,27 @@ static bool s390_cpumsf_make_event(size_t pos,
495static unsigned long long get_trailer_time(const unsigned char *buf) 547static unsigned long long get_trailer_time(const unsigned char *buf)
496{ 548{
497 struct hws_trailer_entry *te; 549 struct hws_trailer_entry *te;
498 unsigned long long aux_time; 550 unsigned long long aux_time, progusage2;
551 bool clock_base;
499 552
500 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ 553 te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
501 - sizeof(*te)); 554 - sizeof(*te));
502 555
503 if (!te->clock_base) /* TOD_CLOCK_BASE value missing */ 556#if __BYTE_ORDER == __LITTLE_ENDIAN
557 clock_base = be64toh(te->progusage[0]) >> 63 & 0x1;
558 progusage2 = be64toh(te->progusage[1]);
559#else
560 clock_base = te->clock_base;
561 progusage2 = te->progusage2;
562#endif
563 if (!clock_base) /* TOD_CLOCK_BASE value missing */
504 return 0; 564 return 0;
505 565
506 /* Correct calculation to convert time stamp in trailer entry to 566 /* Correct calculation to convert time stamp in trailer entry to
507 * nano seconds (taken from arch/s390 function tod_to_ns()). 567 * nano seconds (taken from arch/s390 function tod_to_ns()).
508 * TOD_CLOCK_BASE is stored in trailer entry member progusage2. 568 * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
509 */ 569 */
510 aux_time = trailer_timestamp(te) - te->progusage2; 570 aux_time = trailer_timestamp(te, clock_base) - progusage2;
511 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9); 571 aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
512 return aux_time; 572 return aux_time;
513} 573}
diff --git a/tools/perf/util/sane_ctype.h b/tools/perf/util/sane_ctype.h
deleted file mode 100644
index c2b42ff9ff32..000000000000
--- a/tools/perf/util/sane_ctype.h
+++ /dev/null
@@ -1,52 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _PERF_SANE_CTYPE_H
3#define _PERF_SANE_CTYPE_H
4
5extern const char *graph_line;
6extern const char *graph_dotted_line;
7extern const char *spaces;
8extern const char *dots;
9
10/* Sane ctype - no locale, and works with signed chars */
11#undef isascii
12#undef isspace
13#undef isdigit
14#undef isxdigit
15#undef isalpha
16#undef isprint
17#undef isalnum
18#undef islower
19#undef isupper
20#undef tolower
21#undef toupper
22
23extern unsigned char sane_ctype[256];
24#define GIT_SPACE 0x01
25#define GIT_DIGIT 0x02
26#define GIT_ALPHA 0x04
27#define GIT_GLOB_SPECIAL 0x08
28#define GIT_REGEX_SPECIAL 0x10
29#define GIT_PRINT_EXTRA 0x20
30#define GIT_PRINT 0x3E
31#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
32#define isascii(x) (((x) & ~0x7f) == 0)
33#define isspace(x) sane_istest(x,GIT_SPACE)
34#define isdigit(x) sane_istest(x,GIT_DIGIT)
35#define isxdigit(x) \
36 (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G')
37#define isalpha(x) sane_istest(x,GIT_ALPHA)
38#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
39#define isprint(x) sane_istest(x,GIT_PRINT)
40#define islower(x) (sane_istest(x,GIT_ALPHA) && (x & 0x20))
41#define isupper(x) (sane_istest(x,GIT_ALPHA) && !(x & 0x20))
42#define tolower(x) sane_case((unsigned char)(x), 0x20)
43#define toupper(x) sane_case((unsigned char)(x), 0)
44
45static inline int sane_case(int x, int high)
46{
47 if (sane_istest(x, GIT_ALPHA))
48 x = (x & ~0x20) | high;
49 return x;
50}
51
52#endif /* _PERF_SANE_CTYPE_H */
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 22f52b669871..112bed65232f 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -112,6 +112,7 @@ struct tables {
112 PyObject *sample_handler; 112 PyObject *sample_handler;
113 PyObject *call_path_handler; 113 PyObject *call_path_handler;
114 PyObject *call_return_handler; 114 PyObject *call_return_handler;
115 PyObject *synth_handler;
115 bool db_export_mode; 116 bool db_export_mode;
116}; 117};
117 118
@@ -947,6 +948,12 @@ static int tuple_set_string(PyObject *t, unsigned int pos, const char *s)
947 return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s)); 948 return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s));
948} 949}
949 950
951static int tuple_set_bytes(PyObject *t, unsigned int pos, void *bytes,
952 unsigned int sz)
953{
954 return PyTuple_SetItem(t, pos, _PyBytes_FromStringAndSize(bytes, sz));
955}
956
950static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel) 957static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel)
951{ 958{
952 struct tables *tables = container_of(dbe, struct tables, dbe); 959 struct tables *tables = container_of(dbe, struct tables, dbe);
@@ -1105,13 +1112,13 @@ static int python_export_branch_type(struct db_export *dbe, u32 branch_type,
1105 return 0; 1112 return 0;
1106} 1113}
1107 1114
1108static int python_export_sample(struct db_export *dbe, 1115static void python_export_sample_table(struct db_export *dbe,
1109 struct export_sample *es) 1116 struct export_sample *es)
1110{ 1117{
1111 struct tables *tables = container_of(dbe, struct tables, dbe); 1118 struct tables *tables = container_of(dbe, struct tables, dbe);
1112 PyObject *t; 1119 PyObject *t;
1113 1120
1114 t = tuple_new(22); 1121 t = tuple_new(24);
1115 1122
1116 tuple_set_u64(t, 0, es->db_id); 1123 tuple_set_u64(t, 0, es->db_id);
1117 tuple_set_u64(t, 1, es->evsel->db_id); 1124 tuple_set_u64(t, 1, es->evsel->db_id);
@@ -1135,10 +1142,39 @@ static int python_export_sample(struct db_export *dbe,
1135 tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK); 1142 tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
1136 tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX)); 1143 tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
1137 tuple_set_u64(t, 21, es->call_path_id); 1144 tuple_set_u64(t, 21, es->call_path_id);
1145 tuple_set_u64(t, 22, es->sample->insn_cnt);
1146 tuple_set_u64(t, 23, es->sample->cyc_cnt);
1138 1147
1139 call_object(tables->sample_handler, t, "sample_table"); 1148 call_object(tables->sample_handler, t, "sample_table");
1140 1149
1141 Py_DECREF(t); 1150 Py_DECREF(t);
1151}
1152
1153static void python_export_synth(struct db_export *dbe, struct export_sample *es)
1154{
1155 struct tables *tables = container_of(dbe, struct tables, dbe);
1156 PyObject *t;
1157
1158 t = tuple_new(3);
1159
1160 tuple_set_u64(t, 0, es->db_id);
1161 tuple_set_u64(t, 1, es->evsel->attr.config);
1162 tuple_set_bytes(t, 2, es->sample->raw_data, es->sample->raw_size);
1163
1164 call_object(tables->synth_handler, t, "synth_data");
1165
1166 Py_DECREF(t);
1167}
1168
1169static int python_export_sample(struct db_export *dbe,
1170 struct export_sample *es)
1171{
1172 struct tables *tables = container_of(dbe, struct tables, dbe);
1173
1174 python_export_sample_table(dbe, es);
1175
1176 if (es->evsel->attr.type == PERF_TYPE_SYNTH && tables->synth_handler)
1177 python_export_synth(dbe, es);
1142 1178
1143 return 0; 1179 return 0;
1144} 1180}
@@ -1173,7 +1209,7 @@ static int python_export_call_return(struct db_export *dbe,
1173 u64 comm_db_id = cr->comm ? cr->comm->db_id : 0; 1209 u64 comm_db_id = cr->comm ? cr->comm->db_id : 0;
1174 PyObject *t; 1210 PyObject *t;
1175 1211
1176 t = tuple_new(12); 1212 t = tuple_new(14);
1177 1213
1178 tuple_set_u64(t, 0, cr->db_id); 1214 tuple_set_u64(t, 0, cr->db_id);
1179 tuple_set_u64(t, 1, cr->thread->db_id); 1215 tuple_set_u64(t, 1, cr->thread->db_id);
@@ -1187,6 +1223,8 @@ static int python_export_call_return(struct db_export *dbe,
1187 tuple_set_u64(t, 9, cr->cp->parent->db_id); 1223 tuple_set_u64(t, 9, cr->cp->parent->db_id);
1188 tuple_set_s32(t, 10, cr->flags); 1224 tuple_set_s32(t, 10, cr->flags);
1189 tuple_set_u64(t, 11, cr->parent_db_id); 1225 tuple_set_u64(t, 11, cr->parent_db_id);
1226 tuple_set_u64(t, 12, cr->insn_count);
1227 tuple_set_u64(t, 13, cr->cyc_count);
1190 1228
1191 call_object(tables->call_return_handler, t, "call_return_table"); 1229 call_object(tables->call_return_handler, t, "call_return_table");
1192 1230
@@ -1473,6 +1511,14 @@ static void set_table_handlers(struct tables *tables)
1473 SET_TABLE_HANDLER(sample); 1511 SET_TABLE_HANDLER(sample);
1474 SET_TABLE_HANDLER(call_path); 1512 SET_TABLE_HANDLER(call_path);
1475 SET_TABLE_HANDLER(call_return); 1513 SET_TABLE_HANDLER(call_return);
1514
1515 /*
1516 * Synthesized events are samples but with architecture-specific data
1517 * stored in sample->raw_data. They are exported via
1518 * python_export_sample() and consequently do not need a separate export
1519 * callback.
1520 */
1521 tables->synth_handler = get_handler("synth_data");
1476} 1522}
1477 1523
1478#if PY_MAJOR_VERSION < 3 1524#if PY_MAJOR_VERSION < 3
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 453f6f6f29f3..3b791ef2cd50 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -23,8 +23,12 @@ int smt_on(void)
23 char fn[256]; 23 char fn[256];
24 24
25 snprintf(fn, sizeof fn, 25 snprintf(fn, sizeof fn,
26 "devices/system/cpu/cpu%d/topology/thread_siblings", 26 "devices/system/cpu/cpu%d/topology/core_cpus", cpu);
27 cpu); 27 if (access(fn, F_OK) == -1) {
28 snprintf(fn, sizeof fn,
29 "devices/system/cpu/cpu%d/topology/thread_siblings",
30 cpu);
31 }
28 if (sysfs__read_str(fn, &str, &strlen) < 0) 32 if (sysfs__read_str(fn, &str, &strlen) < 0)
29 continue; 33 continue;
30 /* Entry is hex, but does not have 0x, so need custom parser */ 34 /* Entry is hex, but does not have 0x, so need custom parser */
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index ce376a73f964..a0f232151d6f 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -79,6 +79,9 @@ struct hist_entry_diff {
79 79
80 /* HISTC_WEIGHTED_DIFF */ 80 /* HISTC_WEIGHTED_DIFF */
81 s64 wdiff; 81 s64 wdiff;
82
83 /* PERF_HPP_DIFF__CYCLES */
84 s64 cycles;
82 }; 85 };
83}; 86};
84 87
@@ -144,6 +147,7 @@ struct hist_entry {
144 long time; 147 long time;
145 struct hists *hists; 148 struct hists *hists;
146 struct mem_info *mem_info; 149 struct mem_info *mem_info;
150 struct block_info *block_info;
147 void *raw_data; 151 void *raw_data;
148 u32 raw_size; 152 u32 raw_size;
149 int num_res; 153 int num_res;
@@ -285,6 +289,15 @@ struct sort_entry {
285 u8 se_width_idx; 289 u8 se_width_idx;
286}; 290};
287 291
292struct block_hist {
293 struct hists block_hists;
294 struct perf_hpp_list block_list;
295 struct perf_hpp_fmt block_fmt;
296 int block_idx;
297 bool valid;
298 struct hist_entry he;
299};
300
288extern struct sort_entry sort_thread; 301extern struct sort_entry sort_thread;
289extern struct list_head hist_entry__sort_list; 302extern struct list_head hist_entry__sort_list;
290 303
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index 10ca1533937e..dcad75daf5e4 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c
@@ -5,11 +5,13 @@
5#include <string.h> 5#include <string.h>
6 6
7#include <linux/kernel.h> 7#include <linux/kernel.h>
8#include <linux/string.h>
8 9
9#include "util/dso.h" 10#include "util/dso.h"
10#include "util/util.h" 11#include "util/util.h"
11#include "util/debug.h" 12#include "util/debug.h"
12#include "util/callchain.h" 13#include "util/callchain.h"
14#include "util/symbol_conf.h"
13#include "srcline.h" 15#include "srcline.h"
14#include "string2.h" 16#include "string2.h"
15#include "symbol.h" 17#include "symbol.h"
@@ -287,7 +289,8 @@ static int addr2line(const char *dso_name, u64 addr,
287 } 289 }
288 290
289 if (a2l == NULL) { 291 if (a2l == NULL) {
290 pr_warning("addr2line_init failed for %s\n", dso_name); 292 if (!symbol_conf.disable_add2line_warn)
293 pr_warning("addr2line_init failed for %s\n", dso_name);
291 return 0; 294 return 0;
292 } 295 }
293 296
@@ -464,7 +467,7 @@ static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
464 char *srcline; 467 char *srcline;
465 struct symbol *inline_sym; 468 struct symbol *inline_sym;
466 469
467 rtrim(funcname); 470 strim(funcname);
468 471
469 if (getline(&filename, &filelen, fp) == -1) 472 if (getline(&filename, &filelen, fp) == -1)
470 goto out; 473 goto out;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 4c53bae5644b..58df6a0dbb9f 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1,5 +1,6 @@
1#include <stdio.h> 1#include <stdio.h>
2#include <inttypes.h> 2#include <inttypes.h>
3#include <linux/string.h>
3#include <linux/time64.h> 4#include <linux/time64.h>
4#include <math.h> 5#include <math.h>
5#include "color.h" 6#include "color.h"
@@ -10,7 +11,7 @@
10#include "thread_map.h" 11#include "thread_map.h"
11#include "cpumap.h" 12#include "cpumap.h"
12#include "string2.h" 13#include "string2.h"
13#include "sane_ctype.h" 14#include <linux/ctype.h>
14#include "cgroup.h" 15#include "cgroup.h"
15#include <math.h> 16#include <math.h>
16#include <api/fs/fs.h> 17#include <api/fs/fs.h>
@@ -69,8 +70,9 @@ static void aggr_printout(struct perf_stat_config *config,
69{ 70{
70 switch (config->aggr_mode) { 71 switch (config->aggr_mode) {
71 case AGGR_CORE: 72 case AGGR_CORE:
72 fprintf(config->output, "S%d-C%*d%s%*d%s", 73 fprintf(config->output, "S%d-D%d-C%*d%s%*d%s",
73 cpu_map__id_to_socket(id), 74 cpu_map__id_to_socket(id),
75 cpu_map__id_to_die(id),
74 config->csv_output ? 0 : -8, 76 config->csv_output ? 0 : -8,
75 cpu_map__id_to_cpu(id), 77 cpu_map__id_to_cpu(id),
76 config->csv_sep, 78 config->csv_sep,
@@ -78,6 +80,16 @@ static void aggr_printout(struct perf_stat_config *config,
78 nr, 80 nr,
79 config->csv_sep); 81 config->csv_sep);
80 break; 82 break;
83 case AGGR_DIE:
84 fprintf(config->output, "S%d-D%*d%s%*d%s",
85 cpu_map__id_to_socket(id << 16),
86 config->csv_output ? 0 : -8,
87 cpu_map__id_to_die(id << 16),
88 config->csv_sep,
89 config->csv_output ? 0 : 4,
90 nr,
91 config->csv_sep);
92 break;
81 case AGGR_SOCKET: 93 case AGGR_SOCKET:
82 fprintf(config->output, "S%*d%s%*d%s", 94 fprintf(config->output, "S%*d%s%*d%s",
83 config->csv_output ? 0 : -5, 95 config->csv_output ? 0 : -5,
@@ -89,8 +101,9 @@ static void aggr_printout(struct perf_stat_config *config,
89 break; 101 break;
90 case AGGR_NONE: 102 case AGGR_NONE:
91 if (evsel->percore) { 103 if (evsel->percore) {
92 fprintf(config->output, "S%d-C%*d%s", 104 fprintf(config->output, "S%d-D%d-C%*d%s",
93 cpu_map__id_to_socket(id), 105 cpu_map__id_to_socket(id),
106 cpu_map__id_to_die(id),
94 config->csv_output ? 0 : -5, 107 config->csv_output ? 0 : -5,
95 cpu_map__id_to_cpu(id), config->csv_sep); 108 cpu_map__id_to_cpu(id), config->csv_sep);
96 } else { 109 } else {
@@ -199,13 +212,11 @@ static void print_metric_csv(struct perf_stat_config *config __maybe_unused,
199 return; 212 return;
200 } 213 }
201 snprintf(buf, sizeof(buf), fmt, val); 214 snprintf(buf, sizeof(buf), fmt, val);
202 ends = vals = ltrim(buf); 215 ends = vals = skip_spaces(buf);
203 while (isdigit(*ends) || *ends == '.') 216 while (isdigit(*ends) || *ends == '.')
204 ends++; 217 ends++;
205 *ends = 0; 218 *ends = 0;
206 while (isspace(*unit)) 219 fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, skip_spaces(unit));
207 unit++;
208 fprintf(out, "%s%s%s%s", config->csv_sep, vals, config->csv_sep, unit);
209} 220}
210 221
211/* Filter out some columns that don't work well in metrics only mode */ 222/* Filter out some columns that don't work well in metrics only mode */
@@ -269,7 +280,7 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused
269 return; 280 return;
270 unit = fixunit(tbuf, os->evsel, unit); 281 unit = fixunit(tbuf, os->evsel, unit);
271 snprintf(buf, sizeof buf, fmt, val); 282 snprintf(buf, sizeof buf, fmt, val);
272 ends = vals = ltrim(buf); 283 ends = vals = skip_spaces(buf);
273 while (isdigit(*ends) || *ends == '.') 284 while (isdigit(*ends) || *ends == '.')
274 ends++; 285 ends++;
275 *ends = 0; 286 *ends = 0;
@@ -407,6 +418,7 @@ static void printout(struct perf_stat_config *config, int id, int nr,
407 [AGGR_THREAD] = 1, 418 [AGGR_THREAD] = 1,
408 [AGGR_NONE] = 1, 419 [AGGR_NONE] = 1,
409 [AGGR_SOCKET] = 2, 420 [AGGR_SOCKET] = 2,
421 [AGGR_DIE] = 2,
410 [AGGR_CORE] = 2, 422 [AGGR_CORE] = 2,
411 }; 423 };
412 424
@@ -542,7 +554,8 @@ static void collect_all_aliases(struct perf_stat_config *config, struct perf_evs
542 alias->scale != counter->scale || 554 alias->scale != counter->scale ||
543 alias->cgrp != counter->cgrp || 555 alias->cgrp != counter->cgrp ||
544 strcmp(alias->unit, counter->unit) || 556 strcmp(alias->unit, counter->unit) ||
545 perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter)) 557 perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter) ||
558 !strcmp(alias->pmu_name, counter->pmu_name))
546 break; 559 break;
547 alias->merged_stat = true; 560 alias->merged_stat = true;
548 cb(config, alias, data, false); 561 cb(config, alias, data, false);
@@ -879,7 +892,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
879} 892}
880 893
881static int aggr_header_lens[] = { 894static int aggr_header_lens[] = {
882 [AGGR_CORE] = 18, 895 [AGGR_CORE] = 24,
896 [AGGR_DIE] = 18,
883 [AGGR_SOCKET] = 12, 897 [AGGR_SOCKET] = 12,
884 [AGGR_NONE] = 6, 898 [AGGR_NONE] = 6,
885 [AGGR_THREAD] = 24, 899 [AGGR_THREAD] = 24,
@@ -888,6 +902,7 @@ static int aggr_header_lens[] = {
888 902
889static const char *aggr_header_csv[] = { 903static const char *aggr_header_csv[] = {
890 [AGGR_CORE] = "core,cpus,", 904 [AGGR_CORE] = "core,cpus,",
905 [AGGR_DIE] = "die,cpus",
891 [AGGR_SOCKET] = "socket,cpus", 906 [AGGR_SOCKET] = "socket,cpus",
892 [AGGR_NONE] = "cpu,", 907 [AGGR_NONE] = "cpu,",
893 [AGGR_THREAD] = "comm-pid,", 908 [AGGR_THREAD] = "comm-pid,",
@@ -954,8 +969,13 @@ static void print_interval(struct perf_stat_config *config,
954 if (!metric_only) 969 if (!metric_only)
955 fprintf(output, " counts %*s events\n", unit_width, "unit"); 970 fprintf(output, " counts %*s events\n", unit_width, "unit");
956 break; 971 break;
972 case AGGR_DIE:
973 fprintf(output, "# time die cpus");
974 if (!metric_only)
975 fprintf(output, " counts %*s events\n", unit_width, "unit");
976 break;
957 case AGGR_CORE: 977 case AGGR_CORE:
958 fprintf(output, "# time core cpus"); 978 fprintf(output, "# time core cpus");
959 if (!metric_only) 979 if (!metric_only)
960 fprintf(output, " counts %*s events\n", unit_width, "unit"); 980 fprintf(output, " counts %*s events\n", unit_width, "unit");
961 break; 981 break;
@@ -1165,6 +1185,7 @@ perf_evlist__print_counters(struct perf_evlist *evlist,
1165 1185
1166 switch (config->aggr_mode) { 1186 switch (config->aggr_mode) {
1167 case AGGR_CORE: 1187 case AGGR_CORE:
1188 case AGGR_DIE:
1168 case AGGR_SOCKET: 1189 case AGGR_SOCKET:
1169 print_aggr(config, evlist, prefix); 1190 print_aggr(config, evlist, prefix);
1170 break; 1191 break;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 83d8094be4fe..cb891e5c2969 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -12,6 +12,7 @@
12/* 12/*
13 * AGGR_GLOBAL: Use CPU 0 13 * AGGR_GLOBAL: Use CPU 0
14 * AGGR_SOCKET: Use first CPU of socket 14 * AGGR_SOCKET: Use first CPU of socket
15 * AGGR_DIE: Use first CPU of die
15 * AGGR_CORE: Use first CPU of core 16 * AGGR_CORE: Use first CPU of core
16 * AGGR_NONE: Use matching CPU 17 * AGGR_NONE: Use matching CPU
17 * AGGR_THREAD: Not supported? 18 * AGGR_THREAD: Not supported?
@@ -303,7 +304,7 @@ static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
303 struct perf_evsel *c2; 304 struct perf_evsel *c2;
304 305
305 evlist__for_each_entry (evsel_list, c2) { 306 evlist__for_each_entry (evsel_list, c2) {
306 if (!strcasecmp(c2->name, name)) 307 if (!strcasecmp(c2->name, name) && !c2->collect_stat)
307 return c2; 308 return c2;
308 } 309 }
309 return NULL; 310 return NULL;
@@ -342,7 +343,8 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
342 if (leader) { 343 if (leader) {
343 /* Search in group */ 344 /* Search in group */
344 for_each_group_member (oc, leader) { 345 for_each_group_member (oc, leader) {
345 if (!strcasecmp(oc->name, metric_names[i])) { 346 if (!strcasecmp(oc->name, metric_names[i]) &&
347 !oc->collect_stat) {
346 found = true; 348 found = true;
347 break; 349 break;
348 } 350 }
@@ -722,6 +724,7 @@ static void generic_metric(struct perf_stat_config *config,
722 double ratio; 724 double ratio;
723 int i; 725 int i;
724 void *ctxp = out->ctx; 726 void *ctxp = out->ctx;
727 char *n, *pn;
725 728
726 expr__ctx_init(&pctx); 729 expr__ctx_init(&pctx);
727 expr__add_id(&pctx, name, avg); 730 expr__add_id(&pctx, name, avg);
@@ -741,7 +744,19 @@ static void generic_metric(struct perf_stat_config *config,
741 stats = &v->stats; 744 stats = &v->stats;
742 scale = 1.0; 745 scale = 1.0;
743 } 746 }
744 expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale); 747
748 n = strdup(metric_events[i]->name);
749 if (!n)
750 return;
751 /*
752 * This display code with --no-merge adds [cpu] postfixes.
753 * These are not supported by the parser. Remove everything
754 * after the space.
755 */
756 pn = strchr(n, ' ');
757 if (pn)
758 *pn = 0;
759 expr__add_id(&pctx, n, avg_stats(stats)*scale);
745 } 760 }
746 if (!metric_events[i]) { 761 if (!metric_events[i]) {
747 const char *p = metric_expr; 762 const char *p = metric_expr;
@@ -758,6 +773,9 @@ static void generic_metric(struct perf_stat_config *config,
758 (metric_name ? metric_name : name) : "", 0); 773 (metric_name ? metric_name : name) : "", 0);
759 } else 774 } else
760 print_metric(config, ctxp, NULL, NULL, "", 0); 775 print_metric(config, ctxp, NULL, NULL, "", 0);
776
777 for (i = 1; i < pctx.num_ids; i++)
778 free((void *)pctx.ids[i].name);
761} 779}
762 780
763void perf_stat__print_shadow_stats(struct perf_stat_config *config, 781void perf_stat__print_shadow_stats(struct perf_stat_config *config,
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c3115d939b0b..d91fe754b6d2 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -272,6 +272,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
272 switch (config->aggr_mode) { 272 switch (config->aggr_mode) {
273 case AGGR_THREAD: 273 case AGGR_THREAD:
274 case AGGR_CORE: 274 case AGGR_CORE:
275 case AGGR_DIE:
275 case AGGR_SOCKET: 276 case AGGR_SOCKET:
276 case AGGR_NONE: 277 case AGGR_NONE:
277 if (!evsel->snapshot) 278 if (!evsel->snapshot)
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 2f9c9159a364..7032dd1eeac2 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -44,6 +44,7 @@ enum aggr_mode {
44 AGGR_NONE, 44 AGGR_NONE,
45 AGGR_GLOBAL, 45 AGGR_GLOBAL,
46 AGGR_SOCKET, 46 AGGR_SOCKET,
47 AGGR_DIE,
47 AGGR_CORE, 48 AGGR_CORE,
48 AGGR_THREAD, 49 AGGR_THREAD,
49 AGGR_UNSET, 50 AGGR_UNSET,
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
index 7f3253d44afd..90ea2b209cbb 100644
--- a/tools/perf/util/strfilter.c
+++ b/tools/perf/util/strfilter.c
@@ -4,7 +4,8 @@
4#include "strfilter.h" 4#include "strfilter.h"
5 5
6#include <errno.h> 6#include <errno.h>
7#include "sane_ctype.h" 7#include <linux/ctype.h>
8#include <linux/string.h>
8 9
9/* Operators */ 10/* Operators */
10static const char *OP_and = "&"; /* Logical AND */ 11static const char *OP_and = "&"; /* Logical AND */
@@ -37,8 +38,7 @@ static const char *get_token(const char *s, const char **e)
37{ 38{
38 const char *p; 39 const char *p;
39 40
40 while (isspace(*s)) /* Skip spaces */ 41 s = skip_spaces(s);
41 s++;
42 42
43 if (*s == '\0') { 43 if (*s == '\0') {
44 p = s; 44 p = s;
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
index d8bfd0c4d2cb..52603876c548 100644
--- a/tools/perf/util/string.c
+++ b/tools/perf/util/string.c
@@ -4,7 +4,16 @@
4#include <linux/string.h> 4#include <linux/string.h>
5#include <stdlib.h> 5#include <stdlib.h>
6 6
7#include "sane_ctype.h" 7#include <linux/ctype.h>
8
9const char *graph_dotted_line =
10 "---------------------------------------------------------------------"
11 "---------------------------------------------------------------------"
12 "---------------------------------------------------------------------";
13const char *dots =
14 "....................................................................."
15 "....................................................................."
16 ".....................................................................";
8 17
9#define K 1024LL 18#define K 1024LL
10/* 19/*
@@ -60,109 +69,6 @@ out_err:
60 return -1; 69 return -1;
61} 70}
62 71
63/*
64 * Helper function for splitting a string into an argv-like array.
65 * originally copied from lib/argv_split.c
66 */
67static const char *skip_sep(const char *cp)
68{
69 while (*cp && isspace(*cp))
70 cp++;
71
72 return cp;
73}
74
75static const char *skip_arg(const char *cp)
76{
77 while (*cp && !isspace(*cp))
78 cp++;
79
80 return cp;
81}
82
83static int count_argc(const char *str)
84{
85 int count = 0;
86
87 while (*str) {
88 str = skip_sep(str);
89 if (*str) {
90 count++;
91 str = skip_arg(str);
92 }
93 }
94
95 return count;
96}
97
98/**
99 * argv_free - free an argv
100 * @argv - the argument vector to be freed
101 *
102 * Frees an argv and the strings it points to.
103 */
104void argv_free(char **argv)
105{
106 char **p;
107 for (p = argv; *p; p++) {
108 free(*p);
109 *p = NULL;
110 }
111
112 free(argv);
113}
114
115/**
116 * argv_split - split a string at whitespace, returning an argv
117 * @str: the string to be split
118 * @argcp: returned argument count
119 *
120 * Returns an array of pointers to strings which are split out from
121 * @str. This is performed by strictly splitting on white-space; no
122 * quote processing is performed. Multiple whitespace characters are
123 * considered to be a single argument separator. The returned array
124 * is always NULL-terminated. Returns NULL on memory allocation
125 * failure.
126 */
127char **argv_split(const char *str, int *argcp)
128{
129 int argc = count_argc(str);
130 char **argv = calloc(argc + 1, sizeof(*argv));
131 char **argvp;
132
133 if (argv == NULL)
134 goto out;
135
136 if (argcp)
137 *argcp = argc;
138
139 argvp = argv;
140
141 while (*str) {
142 str = skip_sep(str);
143
144 if (*str) {
145 const char *p = str;
146 char *t;
147
148 str = skip_arg(str);
149
150 t = strndup(p, str-p);
151 if (t == NULL)
152 goto fail;
153 *argvp++ = t;
154 }
155 }
156 *argvp = NULL;
157
158out:
159 return argv;
160
161fail:
162 argv_free(argv);
163 return NULL;
164}
165
166/* Character class matching */ 72/* Character class matching */
167static bool __match_charclass(const char *pat, char c, const char **npat) 73static bool __match_charclass(const char *pat, char c, const char **npat)
168{ 74{
@@ -303,61 +209,6 @@ int strtailcmp(const char *s1, const char *s2)
303 return 0; 209 return 0;
304} 210}
305 211
306/**
307 * strxfrchar - Locate and replace character in @s
308 * @s: The string to be searched/changed.
309 * @from: Source character to be replaced.
310 * @to: Destination character.
311 *
312 * Return pointer to the changed string.
313 */
314char *strxfrchar(char *s, char from, char to)
315{
316 char *p = s;
317
318 while ((p = strchr(p, from)) != NULL)
319 *p++ = to;
320
321 return s;
322}
323
324/**
325 * ltrim - Removes leading whitespace from @s.
326 * @s: The string to be stripped.
327 *
328 * Return pointer to the first non-whitespace character in @s.
329 */
330char *ltrim(char *s)
331{
332 while (isspace(*s))
333 s++;
334
335 return s;
336}
337
338/**
339 * rtrim - Removes trailing whitespace from @s.
340 * @s: The string to be stripped.
341 *
342 * Note that the first trailing whitespace is replaced with a %NUL-terminator
343 * in the given string @s. Returns @s.
344 */
345char *rtrim(char *s)
346{
347 size_t size = strlen(s);
348 char *end;
349
350 if (!size)
351 return s;
352
353 end = s + size - 1;
354 while (end >= s && isspace(*end))
355 end--;
356 *(end + 1) = '\0';
357
358 return s;
359}
360
361char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints) 212char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
362{ 213{
363 /* 214 /*
diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h
index 4c68a09b97e8..708805f5573e 100644
--- a/tools/perf/util/string2.h
+++ b/tools/perf/util/string2.h
@@ -2,13 +2,15 @@
2#ifndef PERF_STRING_H 2#ifndef PERF_STRING_H
3#define PERF_STRING_H 3#define PERF_STRING_H
4 4
5#include <linux/string.h>
5#include <linux/types.h> 6#include <linux/types.h>
6#include <stddef.h> 7#include <stddef.h>
7#include <string.h> 8#include <string.h>
8 9
10extern const char *graph_dotted_line;
11extern const char *dots;
12
9s64 perf_atoll(const char *str); 13s64 perf_atoll(const char *str);
10char **argv_split(const char *str, int *argcp);
11void argv_free(char **argv);
12bool strglobmatch(const char *str, const char *pat); 14bool strglobmatch(const char *str, const char *pat);
13bool strglobmatch_nocase(const char *str, const char *pat); 15bool strglobmatch_nocase(const char *str, const char *pat);
14bool strlazymatch(const char *str, const char *pat); 16bool strlazymatch(const char *str, const char *pat);
@@ -17,15 +19,6 @@ static inline bool strisglob(const char *str)
17 return strpbrk(str, "*?[") != NULL; 19 return strpbrk(str, "*?[") != NULL;
18} 20}
19int strtailcmp(const char *s1, const char *s2); 21int strtailcmp(const char *s1, const char *s2);
20char *strxfrchar(char *s, char from, char to);
21
22char *ltrim(char *s);
23char *rtrim(char *s);
24
25static inline char *trim(char *s)
26{
27 return ltrim(rtrim(s));
28}
29 22
30char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints); 23char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints);
31 24
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 4ad106a5f2c0..62008756d8cc 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -14,7 +14,8 @@
14#include "machine.h" 14#include "machine.h"
15#include "vdso.h" 15#include "vdso.h"
16#include "debug.h" 16#include "debug.h"
17#include "sane_ctype.h" 17#include "util.h"
18#include <linux/ctype.h>
18#include <symbol/kallsyms.h> 19#include <symbol/kallsyms.h>
19 20
20#ifndef EM_AARCH64 21#ifndef EM_AARCH64
@@ -699,7 +700,6 @@ bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
699int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, 700int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
700 enum dso_binary_type type) 701 enum dso_binary_type type)
701{ 702{
702 int err = -1;
703 GElf_Ehdr ehdr; 703 GElf_Ehdr ehdr;
704 Elf *elf; 704 Elf *elf;
705 int fd; 705 int fd;
@@ -793,7 +793,7 @@ out_elf_end:
793 elf_end(elf); 793 elf_end(elf);
794out_close: 794out_close:
795 close(fd); 795 close(fd);
796 return err; 796 return -1;
797} 797}
798 798
799/** 799/**
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5cbad55cd99d..ae2ce255e848 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -25,7 +25,7 @@
25#include "namespaces.h" 25#include "namespaces.h"
26#include "header.h" 26#include "header.h"
27#include "path.h" 27#include "path.h"
28#include "sane_ctype.h" 28#include <linux/ctype.h>
29 29
30#include <elf.h> 30#include <elf.h>
31#include <limits.h> 31#include <limits.h>
@@ -1166,6 +1166,85 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
1166 return 0; 1166 return 0;
1167} 1167}
1168 1168
1169/*
1170 * Merges map into map_groups by splitting the new map
1171 * within the existing map regions.
1172 */
1173int map_groups__merge_in(struct map_groups *kmaps, struct map *new_map)
1174{
1175 struct map *old_map;
1176 LIST_HEAD(merged);
1177
1178 for (old_map = map_groups__first(kmaps); old_map;
1179 old_map = map_groups__next(old_map)) {
1180
1181 /* no overload with this one */
1182 if (new_map->end < old_map->start ||
1183 new_map->start >= old_map->end)
1184 continue;
1185
1186 if (new_map->start < old_map->start) {
1187 /*
1188 * |new......
1189 * |old....
1190 */
1191 if (new_map->end < old_map->end) {
1192 /*
1193 * |new......| -> |new..|
1194 * |old....| -> |old....|
1195 */
1196 new_map->end = old_map->start;
1197 } else {
1198 /*
1199 * |new.............| -> |new..| |new..|
1200 * |old....| -> |old....|
1201 */
1202 struct map *m = map__clone(new_map);
1203
1204 if (!m)
1205 return -ENOMEM;
1206
1207 m->end = old_map->start;
1208 list_add_tail(&m->node, &merged);
1209 new_map->start = old_map->end;
1210 }
1211 } else {
1212 /*
1213 * |new......
1214 * |old....
1215 */
1216 if (new_map->end < old_map->end) {
1217 /*
1218 * |new..| -> x
1219 * |old.........| -> |old.........|
1220 */
1221 map__put(new_map);
1222 new_map = NULL;
1223 break;
1224 } else {
1225 /*
1226 * |new......| -> |new...|
1227 * |old....| -> |old....|
1228 */
1229 new_map->start = old_map->end;
1230 }
1231 }
1232 }
1233
1234 while (!list_empty(&merged)) {
1235 old_map = list_entry(merged.next, struct map, node);
1236 list_del_init(&old_map->node);
1237 map_groups__insert(kmaps, old_map);
1238 map__put(old_map);
1239 }
1240
1241 if (new_map) {
1242 map_groups__insert(kmaps, new_map);
1243 map__put(new_map);
1244 }
1245 return 0;
1246}
1247
1169static int dso__load_kcore(struct dso *dso, struct map *map, 1248static int dso__load_kcore(struct dso *dso, struct map *map,
1170 const char *kallsyms_filename) 1249 const char *kallsyms_filename)
1171{ 1250{
@@ -1222,7 +1301,12 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1222 while (old_map) { 1301 while (old_map) {
1223 struct map *next = map_groups__next(old_map); 1302 struct map *next = map_groups__next(old_map);
1224 1303
1225 if (old_map != map) 1304 /*
1305 * We need to preserve eBPF maps even if they are
1306 * covered by kcore, because we need to access
1307 * eBPF dso for source data.
1308 */
1309 if (old_map != map && !__map__is_bpf_prog(old_map))
1226 map_groups__remove(kmaps, old_map); 1310 map_groups__remove(kmaps, old_map);
1227 old_map = next; 1311 old_map = next;
1228 } 1312 }
@@ -1256,11 +1340,16 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
1256 map_groups__remove(kmaps, map); 1340 map_groups__remove(kmaps, map);
1257 map_groups__insert(kmaps, map); 1341 map_groups__insert(kmaps, map);
1258 map__put(map); 1342 map__put(map);
1343 map__put(new_map);
1259 } else { 1344 } else {
1260 map_groups__insert(kmaps, new_map); 1345 /*
1346 * Merge kcore map into existing maps,
1347 * and ensure that current maps (eBPF)
1348 * stay intact.
1349 */
1350 if (map_groups__merge_in(kmaps, new_map))
1351 goto out_err;
1261 } 1352 }
1262
1263 map__put(new_map);
1264 } 1353 }
1265 1354
1266 if (machine__is(machine, "x86_64")) { 1355 if (machine__is(machine, "x86_64")) {
@@ -2262,3 +2351,25 @@ struct mem_info *mem_info__new(void)
2262 refcount_set(&mi->refcnt, 1); 2351 refcount_set(&mi->refcnt, 1);
2263 return mi; 2352 return mi;
2264} 2353}
2354
2355struct block_info *block_info__get(struct block_info *bi)
2356{
2357 if (bi)
2358 refcount_inc(&bi->refcnt);
2359 return bi;
2360}
2361
2362void block_info__put(struct block_info *bi)
2363{
2364 if (bi && refcount_dec_and_test(&bi->refcnt))
2365 free(bi);
2366}
2367
2368struct block_info *block_info__new(void)
2369{
2370 struct block_info *bi = zalloc(sizeof(*bi));
2371
2372 if (bi)
2373 refcount_set(&bi->refcnt, 1);
2374 return bi;
2375}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 9a8fe012910a..12755b42ea93 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -131,6 +131,17 @@ struct mem_info {
131 refcount_t refcnt; 131 refcount_t refcnt;
132}; 132};
133 133
134struct block_info {
135 struct symbol *sym;
136 u64 start;
137 u64 end;
138 u64 cycles;
139 u64 cycles_aggr;
140 int num;
141 int num_aggr;
142 refcount_t refcnt;
143};
144
134struct addr_location { 145struct addr_location {
135 struct machine *machine; 146 struct machine *machine;
136 struct thread *thread; 147 struct thread *thread;
@@ -332,4 +343,16 @@ static inline void __mem_info__zput(struct mem_info **mi)
332 343
333#define mem_info__zput(mi) __mem_info__zput(&mi) 344#define mem_info__zput(mi) __mem_info__zput(&mi)
334 345
346struct block_info *block_info__new(void);
347struct block_info *block_info__get(struct block_info *bi);
348void block_info__put(struct block_info *bi);
349
350static inline void __block_info__zput(struct block_info **bi)
351{
352 block_info__put(*bi);
353 *bi = NULL;
354}
355
356#define block_info__zput(bi) __block_info__zput(&bi)
357
335#endif /* __PERF_SYMBOL */ 358#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index 6c55fa6fccec..e6880789864c 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -39,7 +39,9 @@ struct symbol_conf {
39 hide_unresolved, 39 hide_unresolved,
40 raw_trace, 40 raw_trace,
41 report_hierarchy, 41 report_hierarchy,
42 inline_name; 42 report_block,
43 inline_name,
44 disable_add2line_warn;
43 const char *vmlinux_name, 45 const char *vmlinux_name,
44 *kallsyms_name, 46 *kallsyms_name,
45 *source_prefix, 47 *source_prefix,
@@ -69,6 +71,7 @@ struct symbol_conf {
69 *tid_list; 71 *tid_list;
70 const char *symfs; 72 const char *symfs;
71 int res_sample; 73 int res_sample;
74 int pad_output_len_dso;
72}; 75};
73 76
74extern struct symbol_conf symbol_conf; 77extern struct symbol_conf symbol_conf;
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 4ba9e866b076..6ff1ff4d4ce7 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -40,6 +40,8 @@ enum retpoline_state_t {
40 * @timestamp: timestamp (if known) 40 * @timestamp: timestamp (if known)
41 * @ref: external reference (e.g. db_id of sample) 41 * @ref: external reference (e.g. db_id of sample)
42 * @branch_count: the branch count when the entry was created 42 * @branch_count: the branch count when the entry was created
43 * @insn_count: the instruction count when the entry was created
44 * @cyc_count the cycle count when the entry was created
43 * @db_id: id used for db-export 45 * @db_id: id used for db-export
44 * @cp: call path 46 * @cp: call path
45 * @no_call: a 'call' was not seen 47 * @no_call: a 'call' was not seen
@@ -51,6 +53,8 @@ struct thread_stack_entry {
51 u64 timestamp; 53 u64 timestamp;
52 u64 ref; 54 u64 ref;
53 u64 branch_count; 55 u64 branch_count;
56 u64 insn_count;
57 u64 cyc_count;
54 u64 db_id; 58 u64 db_id;
55 struct call_path *cp; 59 struct call_path *cp;
56 bool no_call; 60 bool no_call;
@@ -66,6 +70,8 @@ struct thread_stack_entry {
66 * @sz: current maximum stack size 70 * @sz: current maximum stack size
67 * @trace_nr: current trace number 71 * @trace_nr: current trace number
68 * @branch_count: running branch count 72 * @branch_count: running branch count
73 * @insn_count: running instruction count
74 * @cyc_count running cycle count
69 * @kernel_start: kernel start address 75 * @kernel_start: kernel start address
70 * @last_time: last timestamp 76 * @last_time: last timestamp
71 * @crp: call/return processor 77 * @crp: call/return processor
@@ -79,6 +85,8 @@ struct thread_stack {
79 size_t sz; 85 size_t sz;
80 u64 trace_nr; 86 u64 trace_nr;
81 u64 branch_count; 87 u64 branch_count;
88 u64 insn_count;
89 u64 cyc_count;
82 u64 kernel_start; 90 u64 kernel_start;
83 u64 last_time; 91 u64 last_time;
84 struct call_return_processor *crp; 92 struct call_return_processor *crp;
@@ -280,6 +288,8 @@ static int thread_stack__call_return(struct thread *thread,
280 cr.call_time = tse->timestamp; 288 cr.call_time = tse->timestamp;
281 cr.return_time = timestamp; 289 cr.return_time = timestamp;
282 cr.branch_count = ts->branch_count - tse->branch_count; 290 cr.branch_count = ts->branch_count - tse->branch_count;
291 cr.insn_count = ts->insn_count - tse->insn_count;
292 cr.cyc_count = ts->cyc_count - tse->cyc_count;
283 cr.db_id = tse->db_id; 293 cr.db_id = tse->db_id;
284 cr.call_ref = tse->ref; 294 cr.call_ref = tse->ref;
285 cr.return_ref = ref; 295 cr.return_ref = ref;
@@ -535,6 +545,8 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
535 tse->timestamp = timestamp; 545 tse->timestamp = timestamp;
536 tse->ref = ref; 546 tse->ref = ref;
537 tse->branch_count = ts->branch_count; 547 tse->branch_count = ts->branch_count;
548 tse->insn_count = ts->insn_count;
549 tse->cyc_count = ts->cyc_count;
538 tse->cp = cp; 550 tse->cp = cp;
539 tse->no_call = no_call; 551 tse->no_call = no_call;
540 tse->trace_end = trace_end; 552 tse->trace_end = trace_end;
@@ -616,6 +628,23 @@ static int thread_stack__bottom(struct thread_stack *ts,
616 true, false); 628 true, false);
617} 629}
618 630
631static int thread_stack__pop_ks(struct thread *thread, struct thread_stack *ts,
632 struct perf_sample *sample, u64 ref)
633{
634 u64 tm = sample->time;
635 int err;
636
637 /* Return to userspace, so pop all kernel addresses */
638 while (thread_stack__in_kernel(ts)) {
639 err = thread_stack__call_return(thread, ts, --ts->cnt,
640 tm, ref, true);
641 if (err)
642 return err;
643 }
644
645 return 0;
646}
647
619static int thread_stack__no_call_return(struct thread *thread, 648static int thread_stack__no_call_return(struct thread *thread,
620 struct thread_stack *ts, 649 struct thread_stack *ts,
621 struct perf_sample *sample, 650 struct perf_sample *sample,
@@ -635,12 +664,9 @@ static int thread_stack__no_call_return(struct thread *thread,
635 664
636 if (ip >= ks && addr < ks) { 665 if (ip >= ks && addr < ks) {
637 /* Return to userspace, so pop all kernel addresses */ 666 /* Return to userspace, so pop all kernel addresses */
638 while (thread_stack__in_kernel(ts)) { 667 err = thread_stack__pop_ks(thread, ts, sample, ref);
639 err = thread_stack__call_return(thread, ts, --ts->cnt, 668 if (err)
640 tm, ref, true); 669 return err;
641 if (err)
642 return err;
643 }
644 670
645 /* If the stack is empty, push the userspace address */ 671 /* If the stack is empty, push the userspace address */
646 if (!ts->cnt) { 672 if (!ts->cnt) {
@@ -650,12 +676,9 @@ static int thread_stack__no_call_return(struct thread *thread,
650 } 676 }
651 } else if (thread_stack__in_kernel(ts) && ip < ks) { 677 } else if (thread_stack__in_kernel(ts) && ip < ks) {
652 /* Return to userspace, so pop all kernel addresses */ 678 /* Return to userspace, so pop all kernel addresses */
653 while (thread_stack__in_kernel(ts)) { 679 err = thread_stack__pop_ks(thread, ts, sample, ref);
654 err = thread_stack__call_return(thread, ts, --ts->cnt, 680 if (err)
655 tm, ref, true); 681 return err;
656 if (err)
657 return err;
658 }
659 } 682 }
660 683
661 if (ts->cnt) 684 if (ts->cnt)
@@ -865,6 +888,8 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
865 } 888 }
866 889
867 ts->branch_count += 1; 890 ts->branch_count += 1;
891 ts->insn_count += sample->insn_cnt;
892 ts->cyc_count += sample->cyc_cnt;
868 ts->last_time = sample->time; 893 ts->last_time = sample->time;
869 894
870 if (sample->flags & PERF_IP_FLAG_CALL) { 895 if (sample->flags & PERF_IP_FLAG_CALL) {
@@ -896,7 +921,18 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
896 ts->rstate = X86_RETPOLINE_DETECTED; 921 ts->rstate = X86_RETPOLINE_DETECTED;
897 922
898 } else if (sample->flags & PERF_IP_FLAG_RETURN) { 923 } else if (sample->flags & PERF_IP_FLAG_RETURN) {
899 if (!sample->ip || !sample->addr) 924 if (!sample->addr) {
925 u32 return_from_kernel = PERF_IP_FLAG_SYSCALLRET |
926 PERF_IP_FLAG_INTERRUPT;
927
928 if (!(sample->flags & return_from_kernel))
929 return 0;
930
931 /* Pop kernel stack */
932 return thread_stack__pop_ks(thread, ts, sample, ref);
933 }
934
935 if (!sample->ip)
900 return 0; 936 return 0;
901 937
902 /* x86 retpoline 'return' doesn't match the stack */ 938 /* x86 retpoline 'return' doesn't match the stack */
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index 71e15d4ec533..e1ec5a58f1b2 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -43,6 +43,8 @@ enum {
43 * @call_time: timestamp of call (if known) 43 * @call_time: timestamp of call (if known)
44 * @return_time: timestamp of return (if known) 44 * @return_time: timestamp of return (if known)
45 * @branch_count: number of branches seen between call and return 45 * @branch_count: number of branches seen between call and return
46 * @insn_count: approx. number of instructions between call and return
47 * @cyc_count: approx. number of cycles between call and return
46 * @call_ref: external reference to 'call' sample (e.g. db_id) 48 * @call_ref: external reference to 'call' sample (e.g. db_id)
47 * @return_ref: external reference to 'return' sample (e.g. db_id) 49 * @return_ref: external reference to 'return' sample (e.g. db_id)
48 * @db_id: id used for db-export 50 * @db_id: id used for db-export
@@ -56,6 +58,8 @@ struct call_return {
56 u64 call_time; 58 u64 call_time;
57 u64 return_time; 59 u64 return_time;
58 u64 branch_count; 60 u64 branch_count;
61 u64 insn_count;
62 u64 cyc_count;
59 u64 call_ref; 63 u64 call_ref;
60 u64 return_ref; 64 u64 return_ref;
61 u64 db_id; 65 u64 db_id;
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 7bfb740d2ede..3e29a4e8b5e6 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -158,13 +158,13 @@ static struct namespaces *__thread__namespaces(const struct thread *thread)
158 return list_first_entry(&thread->namespaces_list, struct namespaces, list); 158 return list_first_entry(&thread->namespaces_list, struct namespaces, list);
159} 159}
160 160
161struct namespaces *thread__namespaces(const struct thread *thread) 161struct namespaces *thread__namespaces(struct thread *thread)
162{ 162{
163 struct namespaces *ns; 163 struct namespaces *ns;
164 164
165 down_read((struct rw_semaphore *)&thread->namespaces_lock); 165 down_read(&thread->namespaces_lock);
166 ns = __thread__namespaces(thread); 166 ns = __thread__namespaces(thread);
167 up_read((struct rw_semaphore *)&thread->namespaces_lock); 167 up_read(&thread->namespaces_lock);
168 168
169 return ns; 169 return ns;
170} 170}
@@ -288,13 +288,13 @@ static const char *__thread__comm_str(const struct thread *thread)
288 return comm__str(comm); 288 return comm__str(comm);
289} 289}
290 290
291const char *thread__comm_str(const struct thread *thread) 291const char *thread__comm_str(struct thread *thread)
292{ 292{
293 const char *str; 293 const char *str;
294 294
295 down_read((struct rw_semaphore *)&thread->comm_lock); 295 down_read(&thread->comm_lock);
296 str = __thread__comm_str(thread); 296 str = __thread__comm_str(thread);
297 up_read((struct rw_semaphore *)&thread->comm_lock); 297 up_read(&thread->comm_lock);
298 298
299 return str; 299 return str;
300} 300}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index cf8375c017a0..e97ef6977eb9 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -76,7 +76,7 @@ static inline void thread__exited(struct thread *thread)
76 thread->dead = true; 76 thread->dead = true;
77} 77}
78 78
79struct namespaces *thread__namespaces(const struct thread *thread); 79struct namespaces *thread__namespaces(struct thread *thread);
80int thread__set_namespaces(struct thread *thread, u64 timestamp, 80int thread__set_namespaces(struct thread *thread, u64 timestamp,
81 struct namespaces_event *event); 81 struct namespaces_event *event);
82 82
@@ -93,7 +93,7 @@ int thread__set_comm_from_proc(struct thread *thread);
93int thread__comm_len(struct thread *thread); 93int thread__comm_len(struct thread *thread);
94struct comm *thread__comm(const struct thread *thread); 94struct comm *thread__comm(const struct thread *thread);
95struct comm *thread__exec_comm(const struct thread *thread); 95struct comm *thread__exec_comm(const struct thread *thread);
96const char *thread__comm_str(const struct thread *thread); 96const char *thread__comm_str(struct thread *thread);
97int thread__insert_map(struct thread *thread, struct map *map); 97int thread__insert_map(struct thread *thread, struct map *map);
98int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone); 98int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bool do_maps_clone);
99size_t thread__fprintf(struct thread *thread, FILE *fp); 99size_t thread__fprintf(struct thread *thread, FILE *fp);
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 5d467d8ae9ab..281bf06f10f2 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -12,6 +12,7 @@
12#include "strlist.h" 12#include "strlist.h"
13#include <string.h> 13#include <string.h>
14#include <api/fs/fs.h> 14#include <api/fs/fs.h>
15#include <linux/string.h>
15#include "asm/bug.h" 16#include "asm/bug.h"
16#include "thread_map.h" 17#include "thread_map.h"
17#include "util.h" 18#include "util.h"
@@ -392,7 +393,7 @@ static int get_comm(char **comm, pid_t pid)
392 * mark the end of the string. 393 * mark the end of the string.
393 */ 394 */
394 (*comm)[size] = 0; 395 (*comm)[size] = 0;
395 rtrim(*comm); 396 strim(*comm);
396 } 397 }
397 398
398 free(path); 399 free(path);
diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
index 20663a460df3..c2abc259b51d 100644
--- a/tools/perf/util/time-utils.c
+++ b/tools/perf/util/time-utils.c
@@ -1,12 +1,14 @@
1// SPDX-License-Identifier: GPL-2.0 1// SPDX-License-Identifier: GPL-2.0
2#include <stdlib.h> 2#include <stdlib.h>
3#include <string.h> 3#include <string.h>
4#include <linux/string.h>
4#include <sys/time.h> 5#include <sys/time.h>
5#include <linux/time64.h> 6#include <linux/time64.h>
6#include <time.h> 7#include <time.h>
7#include <errno.h> 8#include <errno.h>
8#include <inttypes.h> 9#include <inttypes.h>
9#include <math.h> 10#include <math.h>
11#include <linux/ctype.h>
10 12
11#include "perf.h" 13#include "perf.h"
12#include "debug.h" 14#include "debug.h"
@@ -116,6 +118,66 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
116 return rc; 118 return rc;
117} 119}
118 120
121static int perf_time__parse_strs(struct perf_time_interval *ptime,
122 const char *ostr, int size)
123{
124 const char *cp;
125 char *str, *arg, *p;
126 int i, num = 0, rc = 0;
127
128 /* Count the commas */
129 for (cp = ostr; *cp; cp++)
130 num += !!(*cp == ',');
131
132 if (!num)
133 return -EINVAL;
134
135 BUG_ON(num > size);
136
137 str = strdup(ostr);
138 if (!str)
139 return -ENOMEM;
140
141 /* Split the string and parse each piece, except the last */
142 for (i = 0, p = str; i < num - 1; i++) {
143 arg = p;
144 /* Find next comma, there must be one */
145 p = skip_spaces(strchr(p, ',') + 1);
146 /* Skip the value, must not contain space or comma */
147 while (*p && !isspace(*p)) {
148 if (*p++ == ',') {
149 rc = -EINVAL;
150 goto out;
151 }
152 }
153 /* Split and parse */
154 if (*p)
155 *p++ = 0;
156 rc = perf_time__parse_str(ptime + i, arg);
157 if (rc < 0)
158 goto out;
159 }
160
161 /* Parse the last piece */
162 rc = perf_time__parse_str(ptime + i, p);
163 if (rc < 0)
164 goto out;
165
166 /* Check there is no overlap */
167 for (i = 0; i < num - 1; i++) {
168 if (ptime[i].end >= ptime[i + 1].start) {
169 rc = -EINVAL;
170 goto out;
171 }
172 }
173
174 rc = num;
175out:
176 free(str);
177
178 return rc;
179}
180
119static int parse_percent(double *pcnt, char *str) 181static int parse_percent(double *pcnt, char *str)
120{ 182{
121 char *c, *endptr; 183 char *c, *endptr;
@@ -135,12 +197,30 @@ static int parse_percent(double *pcnt, char *str)
135 return 0; 197 return 0;
136} 198}
137 199
200static int set_percent_time(struct perf_time_interval *ptime, double start_pcnt,
201 double end_pcnt, u64 start, u64 end)
202{
203 u64 total = end - start;
204
205 if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
206 end_pcnt < 0.0 || end_pcnt > 1.0) {
207 return -1;
208 }
209
210 ptime->start = start + round(start_pcnt * total);
211 ptime->end = start + round(end_pcnt * total);
212
213 if (ptime->end > ptime->start && ptime->end != end)
214 ptime->end -= 1;
215
216 return 0;
217}
218
138static int percent_slash_split(char *str, struct perf_time_interval *ptime, 219static int percent_slash_split(char *str, struct perf_time_interval *ptime,
139 u64 start, u64 end) 220 u64 start, u64 end)
140{ 221{
141 char *p, *end_str; 222 char *p, *end_str;
142 double pcnt, start_pcnt, end_pcnt; 223 double pcnt, start_pcnt, end_pcnt;
143 u64 total = end - start;
144 int i; 224 int i;
145 225
146 /* 226 /*
@@ -168,15 +248,7 @@ static int percent_slash_split(char *str, struct perf_time_interval *ptime,
168 start_pcnt = pcnt * (i - 1); 248 start_pcnt = pcnt * (i - 1);
169 end_pcnt = pcnt * i; 249 end_pcnt = pcnt * i;
170 250
171 if (start_pcnt < 0.0 || start_pcnt > 1.0 || 251 return set_percent_time(ptime, start_pcnt, end_pcnt, start, end);
172 end_pcnt < 0.0 || end_pcnt > 1.0) {
173 return -1;
174 }
175
176 ptime->start = start + round(start_pcnt * total);
177 ptime->end = start + round(end_pcnt * total);
178
179 return 0;
180} 252}
181 253
182static int percent_dash_split(char *str, struct perf_time_interval *ptime, 254static int percent_dash_split(char *str, struct perf_time_interval *ptime,
@@ -184,7 +256,6 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime,
184{ 256{
185 char *start_str = NULL, *end_str; 257 char *start_str = NULL, *end_str;
186 double start_pcnt, end_pcnt; 258 double start_pcnt, end_pcnt;
187 u64 total = end - start;
188 int ret; 259 int ret;
189 260
190 /* 261 /*
@@ -203,16 +274,7 @@ static int percent_dash_split(char *str, struct perf_time_interval *ptime,
203 274
204 free(start_str); 275 free(start_str);
205 276
206 if (start_pcnt < 0.0 || start_pcnt > 1.0 || 277 return set_percent_time(ptime, start_pcnt, end_pcnt, start, end);
207 end_pcnt < 0.0 || end_pcnt > 1.0 ||
208 start_pcnt > end_pcnt) {
209 return -1;
210 }
211
212 ptime->start = start + round(start_pcnt * total);
213 ptime->end = start + round(end_pcnt * total);
214
215 return 0;
216} 278}
217 279
218typedef int (*time_pecent_split)(char *, struct perf_time_interval *, 280typedef int (*time_pecent_split)(char *, struct perf_time_interval *,
@@ -389,13 +451,12 @@ bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
389 ptime = &ptime_buf[i]; 451 ptime = &ptime_buf[i];
390 452
391 if (timestamp >= ptime->start && 453 if (timestamp >= ptime->start &&
392 ((timestamp < ptime->end && i < num - 1) || 454 (timestamp <= ptime->end || !ptime->end)) {
393 (timestamp <= ptime->end && i == num - 1))) { 455 return false;
394 break;
395 } 456 }
396 } 457 }
397 458
398 return (i == num) ? true : false; 459 return true;
399} 460}
400 461
401int perf_time__parse_for_ranges(const char *time_str, 462int perf_time__parse_for_ranges(const char *time_str,
@@ -403,20 +464,20 @@ int perf_time__parse_for_ranges(const char *time_str,
403 struct perf_time_interval **ranges, 464 struct perf_time_interval **ranges,
404 int *range_size, int *range_num) 465 int *range_size, int *range_num)
405{ 466{
467 bool has_percent = strchr(time_str, '%');
406 struct perf_time_interval *ptime_range; 468 struct perf_time_interval *ptime_range;
407 int size, num, ret; 469 int size, num, ret = -EINVAL;
408 470
409 ptime_range = perf_time__range_alloc(time_str, &size); 471 ptime_range = perf_time__range_alloc(time_str, &size);
410 if (!ptime_range) 472 if (!ptime_range)
411 return -ENOMEM; 473 return -ENOMEM;
412 474
413 if (perf_time__parse_str(ptime_range, time_str) != 0) { 475 if (has_percent) {
414 if (session->evlist->first_sample_time == 0 && 476 if (session->evlist->first_sample_time == 0 &&
415 session->evlist->last_sample_time == 0) { 477 session->evlist->last_sample_time == 0) {
416 pr_err("HINT: no first/last sample time found in perf data.\n" 478 pr_err("HINT: no first/last sample time found in perf data.\n"
417 "Please use latest perf binary to execute 'perf record'\n" 479 "Please use latest perf binary to execute 'perf record'\n"
418 "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); 480 "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
419 ret = -EINVAL;
420 goto error; 481 goto error;
421 } 482 }
422 483
@@ -425,21 +486,20 @@ int perf_time__parse_for_ranges(const char *time_str,
425 time_str, 486 time_str,
426 session->evlist->first_sample_time, 487 session->evlist->first_sample_time,
427 session->evlist->last_sample_time); 488 session->evlist->last_sample_time);
428
429 if (num < 0) {
430 pr_err("Invalid time string\n");
431 ret = -EINVAL;
432 goto error;
433 }
434 } else { 489 } else {
435 num = 1; 490 num = perf_time__parse_strs(ptime_range, time_str, size);
436 } 491 }
437 492
493 if (num < 0)
494 goto error_invalid;
495
438 *range_size = size; 496 *range_size = size;
439 *range_num = num; 497 *range_num = num;
440 *ranges = ptime_range; 498 *ranges = ptime_range;
441 return 0; 499 return 0;
442 500
501error_invalid:
502 pr_err("Invalid time string\n");
443error: 503error:
444 free(ptime_range); 504 free(ptime_range);
445 return ret; 505 return ret;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 62bc61155dd1..b3982e1bb4c5 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -11,7 +11,7 @@
11#include "debug.h" 11#include "debug.h"
12#include "trace-event.h" 12#include "trace-event.h"
13 13
14#include "sane_ctype.h" 14#include <linux/ctype.h>
15 15
16static int get_common_field(struct scripting_context *context, 16static int get_common_field(struct scripting_context *context,
17 int *offset, int *size, const char *type) 17 int *offset, int *size, const char *type)
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d388f80d8703..a61535cf1bca 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -434,19 +434,6 @@ size_t hex_width(u64 v)
434 return n; 434 return n;
435} 435}
436 436
437/*
438 * While we find nice hex chars, build a long_val.
439 * Return number of chars processed.
440 */
441int hex2u64(const char *ptr, u64 *long_val)
442{
443 char *p;
444
445 *long_val = strtoull(ptr, &p, 16);
446
447 return p - ptr;
448}
449
450int perf_event_paranoid(void) 437int perf_event_paranoid(void)
451{ 438{
452 int value; 439 int value;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 09c1b0f91f65..125e215dd3d8 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -43,7 +43,6 @@ ssize_t readn(int fd, void *buf, size_t n);
43ssize_t writen(int fd, const void *buf, size_t n); 43ssize_t writen(int fd, const void *buf, size_t n);
44 44
45size_t hex_width(u64 v); 45size_t hex_width(u64 v);
46int hex2u64(const char *ptr, u64 *val);
47 46
48extern unsigned int page_size; 47extern unsigned int page_size;
49int __pure cacheline_size(void); 48int __pure cacheline_size(void);