aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-06-21 14:11:53 -0400
committerIngo Molnar <mingo@kernel.org>2017-06-21 14:11:53 -0400
commit8e70e8409102a37ab066bd91007b75fd5d113931 (patch)
tree5be56d03d491fb266007fe123a00b74dabc1ef64 /tools
parent007b811b4041989ec2dc91b9614aa2c41332723e (diff)
parent701516ae3dec801084bc913d21e03fce15c61a0b (diff)
Merge tag 'perf-core-for-mingo-4.13-20170621' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: New features: - Add support to measure SMI cost in 'perf stat' (Kan Liang) - Add support for unwinding callchains in powerpc with libdw (Paolo Bonzini) Fixes: - Fix message: cpu list option is -C not -c (Adrian Hunter) - Fix 'perf script' message: field list option is -F not -f (Adrian Hunter) - Intel PT fixes: (Adrian Hunter) o Fix missing stack clear o Ensure IP is zero when state is INTEL_PT_STATE_NO_IP o Fix last_ip usage o Ensure never to set 'last_ip' when packet 'count' is zero o Clear FUP flag on error o Fix transactions_sample_type Infrastructure changes: - Intel PT cleanups/refactorings (Adrian Hunter) o Use FUP always when scanning for an IP o Add missing __fallthrough o Remove redundant initial_skip checks o Allow decoding with branch tracing disabled o Add default config for pass-through branch enable o Add documentation for new config terms o Add decoder support for ptwrite and power event packets o Add reserved byte to CBR packet payload o Add decoder support for CBR events - Move find_process() to the only place that uses it, skimming some more fat from util.[ch] (Arnaldo Carvalho de Melo) - Do parameter validation earlier on fetch_kernel_version() (Arnaldo Carvalho de Melo) - Remove unused _ALL_SOURCE define (Arnaldo Carvalho de Melo) - Add sysfs__write_int function (Kan Liang) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/lib/api/fs/fs.c30
-rw-r--r--tools/lib/api/fs/fs.h4
-rw-r--r--tools/perf/Documentation/intel-pt.txt36
-rw-r--r--tools/perf/Documentation/perf-stat.txt14
-rw-r--r--tools/perf/Makefile.config2
-rw-r--r--tools/perf/arch/powerpc/util/Build2
-rw-r--r--tools/perf/arch/powerpc/util/unwind-libdw.c73
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c5
-rw-r--r--tools/perf/builtin-script.c2
-rw-r--r--tools/perf/builtin-stat.c49
-rw-r--r--tools/perf/util/evsel.c39
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c290
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h13
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c110
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h7
-rw-r--r--tools/perf/util/intel-pt.c23
-rw-r--r--tools/perf/util/session.c2
-rw-r--r--tools/perf/util/stat-shadow.c33
-rw-r--r--tools/perf/util/stat.c2
-rw-r--r--tools/perf/util/stat.h2
-rw-r--r--tools/perf/util/util.c52
-rw-r--r--tools/perf/util/util.h3
22 files changed, 710 insertions, 83 deletions
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 809c7721cd24..a7ecf8f469f4 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -387,6 +387,22 @@ int filename__read_str(const char *filename, char **buf, size_t *sizep)
387 return err; 387 return err;
388} 388}
389 389
390int filename__write_int(const char *filename, int value)
391{
392 int fd = open(filename, O_WRONLY), err = -1;
393 char buf[64];
394
395 if (fd < 0)
396 return err;
397
398 sprintf(buf, "%d", value);
399 if (write(fd, buf, sizeof(buf)) == sizeof(buf))
400 err = 0;
401
402 close(fd);
403 return err;
404}
405
390int procfs__read_str(const char *entry, char **buf, size_t *sizep) 406int procfs__read_str(const char *entry, char **buf, size_t *sizep)
391{ 407{
392 char path[PATH_MAX]; 408 char path[PATH_MAX];
@@ -480,3 +496,17 @@ int sysctl__read_int(const char *sysctl, int *value)
480 496
481 return filename__read_int(path, value); 497 return filename__read_int(path, value);
482} 498}
499
500int sysfs__write_int(const char *entry, int value)
501{
502 char path[PATH_MAX];
503 const char *sysfs = sysfs__mountpoint();
504
505 if (!sysfs)
506 return -1;
507
508 if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX)
509 return -1;
510
511 return filename__write_int(path, value);
512}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 956c21127d1e..45605348461e 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -31,6 +31,8 @@ int filename__read_int(const char *filename, int *value);
31int filename__read_ull(const char *filename, unsigned long long *value); 31int filename__read_ull(const char *filename, unsigned long long *value);
32int filename__read_str(const char *filename, char **buf, size_t *sizep); 32int filename__read_str(const char *filename, char **buf, size_t *sizep);
33 33
34int filename__write_int(const char *filename, int value);
35
34int procfs__read_str(const char *entry, char **buf, size_t *sizep); 36int procfs__read_str(const char *entry, char **buf, size_t *sizep);
35 37
36int sysctl__read_int(const char *sysctl, int *value); 38int sysctl__read_int(const char *sysctl, int *value);
@@ -38,4 +40,6 @@ int sysfs__read_int(const char *entry, int *value);
38int sysfs__read_ull(const char *entry, unsigned long long *value); 40int sysfs__read_ull(const char *entry, unsigned long long *value);
39int sysfs__read_str(const char *entry, char **buf, size_t *sizep); 41int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
40int sysfs__read_bool(const char *entry, bool *value); 42int sysfs__read_bool(const char *entry, bool *value);
43
44int sysfs__write_int(const char *entry, int value);
41#endif /* __API_FS__ */ 45#endif /* __API_FS__ */
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index b0b3007d3c9c..d157dee7a4ec 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -364,6 +364,42 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc
364 364
365 CYC packets are not requested by default. 365 CYC packets are not requested by default.
366 366
367pt Specifies pass-through which enables the 'branch' config term.
368
369 The default config selects 'pt' if it is available, so a user will
370 never need to specify this term.
371
372branch Enable branch tracing. Branch tracing is enabled by default so to
373 disable branch tracing use 'branch=0'.
374
375 The default config selects 'branch' if it is available.
376
377ptw Enable PTWRITE packets which are produced when a ptwrite instruction
378 is executed.
379
380 Support for this feature is indicated by:
381
382 /sys/bus/event_source/devices/intel_pt/caps/ptwrite
383
384 which contains "1" if the feature is supported and
385 "0" otherwise.
386
387fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
388 provides the address of the ptwrite instruction. In the absence of
389 fup_on_ptw, the decoder will use the address of the previous branch
390 if branch tracing is enabled, otherwise the address will be zero.
391 Note that fup_on_ptw will work even when branch tracing is disabled.
392
393pwr_evt Enable power events. The power events provide information about
394 changes to the CPU C-state.
395
396 Support for this feature is indicated by:
397
398 /sys/bus/event_source/devices/intel_pt/caps/power_event_trace
399
400 which contains "1" if the feature is supported and
401 "0" otherwise.
402
367 403
368new snapshot option 404new snapshot option
369------------------- 405-------------------
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index bd0e4417f2be..698076313606 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -239,6 +239,20 @@ taskset.
239--no-merge:: 239--no-merge::
240Do not merge results from same PMUs. 240Do not merge results from same PMUs.
241 241
242--smi-cost::
243Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
244
245During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
246freeze core counters on SMI.
247The aperf counter will not be effected by the setting.
248The cost of SMI can be measured by (aperf - unhalted core cycles).
249
250In practice, the percentages of SMI cycles is very useful for performance
251oriented analysis. --metric_only will be applied by default.
252The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
253
254Users who wants to get the actual value can apply --no-metric-only.
255
242EXAMPLES 256EXAMPLES
243-------- 257--------
244 258
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 1f4fbc9a3292..bdf0e87f9b29 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -61,7 +61,7 @@ endif
61# Disable it on all other architectures in case libdw unwind 61# Disable it on all other architectures in case libdw unwind
62# support is detected in system. Add supported architectures 62# support is detected in system. Add supported architectures
63# to the check. 63# to the check.
64ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm)) 64ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm powerpc))
65 NO_LIBDW_DWARF_UNWIND := 1 65 NO_LIBDW_DWARF_UNWIND := 1
66endif 66endif
67 67
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 90ad64b231cd..2e6595310420 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -5,4 +5,6 @@ libperf-y += perf_regs.o
5 5
6libperf-$(CONFIG_DWARF) += dwarf-regs.o 6libperf-$(CONFIG_DWARF) += dwarf-regs.o
7libperf-$(CONFIG_DWARF) += skip-callchain-idx.o 7libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
8
8libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o 9libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
10libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c
new file mode 100644
index 000000000000..3a24b3c43273
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -0,0 +1,73 @@
1#include <elfutils/libdwfl.h>
2#include "../../util/unwind-libdw.h"
3#include "../../util/perf_regs.h"
4#include "../../util/event.h"
5
6/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */
7static const int special_regs[3][2] = {
8 { 65, PERF_REG_POWERPC_LINK },
9 { 101, PERF_REG_POWERPC_XER },
10 { 109, PERF_REG_POWERPC_CTR },
11};
12
13bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
14{
15 struct unwind_info *ui = arg;
16 struct regs_dump *user_regs = &ui->sample->user_regs;
17 Dwarf_Word dwarf_regs[32], dwarf_nip;
18 size_t i;
19
20#define REG(r) ({ \
21 Dwarf_Word val = 0; \
22 perf_reg_value(&val, user_regs, PERF_REG_POWERPC_##r); \
23 val; \
24})
25
26 dwarf_regs[0] = REG(R0);
27 dwarf_regs[1] = REG(R1);
28 dwarf_regs[2] = REG(R2);
29 dwarf_regs[3] = REG(R3);
30 dwarf_regs[4] = REG(R4);
31 dwarf_regs[5] = REG(R5);
32 dwarf_regs[6] = REG(R6);
33 dwarf_regs[7] = REG(R7);
34 dwarf_regs[8] = REG(R8);
35 dwarf_regs[9] = REG(R9);
36 dwarf_regs[10] = REG(R10);
37 dwarf_regs[11] = REG(R11);
38 dwarf_regs[12] = REG(R12);
39 dwarf_regs[13] = REG(R13);
40 dwarf_regs[14] = REG(R14);
41 dwarf_regs[15] = REG(R15);
42 dwarf_regs[16] = REG(R16);
43 dwarf_regs[17] = REG(R17);
44 dwarf_regs[18] = REG(R18);
45 dwarf_regs[19] = REG(R19);
46 dwarf_regs[20] = REG(R20);
47 dwarf_regs[21] = REG(R21);
48 dwarf_regs[22] = REG(R22);
49 dwarf_regs[23] = REG(R23);
50 dwarf_regs[24] = REG(R24);
51 dwarf_regs[25] = REG(R25);
52 dwarf_regs[26] = REG(R26);
53 dwarf_regs[27] = REG(R27);
54 dwarf_regs[28] = REG(R28);
55 dwarf_regs[29] = REG(R29);
56 dwarf_regs[30] = REG(R30);
57 dwarf_regs[31] = REG(R31);
58 if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs))
59 return false;
60
61 dwarf_nip = REG(NIP);
62 dwfl_thread_state_register_pc(thread, dwarf_nip);
63 for (i = 0; i < ARRAY_SIZE(special_regs); i++) {
64 Dwarf_Word val = 0;
65 perf_reg_value(&val, user_regs, special_regs[i][1]);
66 if (!dwfl_thread_state_registers(thread,
67 special_regs[i][0], 1,
68 &val))
69 return false;
70 }
71
72 return true;
73}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 6fe667b3269e..9535be57033f 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -192,6 +192,7 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
192 int psb_cyc, psb_periods, psb_period; 192 int psb_cyc, psb_periods, psb_period;
193 int pos = 0; 193 int pos = 0;
194 u64 config; 194 u64 config;
195 char c;
195 196
196 pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); 197 pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
197 198
@@ -225,6 +226,10 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
225 } 226 }
226 } 227 }
227 228
229 if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
230 perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
231 pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
232
228 pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); 233 pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
229 234
230 intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); 235 intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index db5261c3f719..4bce7d8679cb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -385,7 +385,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
385 */ 385 */
386 if (!evsel && output[j].user_set && !output[j].wildcard_set) { 386 if (!evsel && output[j].user_set && !output[j].wildcard_set) {
387 pr_err("%s events do not exist. " 387 pr_err("%s events do not exist. "
388 "Remove corresponding -f option to proceed.\n", 388 "Remove corresponding -F option to proceed.\n",
389 event_type(j)); 389 event_type(j));
390 return -1; 390 return -1;
391 } 391 }
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ad9324d1daf9..324363054c3f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -86,6 +86,7 @@
86#define DEFAULT_SEPARATOR " " 86#define DEFAULT_SEPARATOR " "
87#define CNTR_NOT_SUPPORTED "<not supported>" 87#define CNTR_NOT_SUPPORTED "<not supported>"
88#define CNTR_NOT_COUNTED "<not counted>" 88#define CNTR_NOT_COUNTED "<not counted>"
89#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
89 90
90static void print_counters(struct timespec *ts, int argc, const char **argv); 91static void print_counters(struct timespec *ts, int argc, const char **argv);
91 92
@@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
122 NULL, 123 NULL,
123}; 124};
124 125
126static const char *smi_cost_attrs = {
127 "{"
128 "msr/aperf/,"
129 "msr/smi/,"
130 "cycles"
131 "}"
132};
133
125static struct perf_evlist *evsel_list; 134static struct perf_evlist *evsel_list;
126 135
127static struct target target = { 136static struct target target = {
@@ -137,6 +146,8 @@ static bool null_run = false;
137static int detailed_run = 0; 146static int detailed_run = 0;
138static bool transaction_run; 147static bool transaction_run;
139static bool topdown_run = false; 148static bool topdown_run = false;
149static bool smi_cost = false;
150static bool smi_reset = false;
140static bool big_num = true; 151static bool big_num = true;
141static int big_num_opt = -1; 152static int big_num_opt = -1;
142static const char *csv_sep = NULL; 153static const char *csv_sep = NULL;
@@ -1782,6 +1793,8 @@ static const struct option stat_options[] = {
1782 "Only print computed metrics. No raw values", enable_metric_only), 1793 "Only print computed metrics. No raw values", enable_metric_only),
1783 OPT_BOOLEAN(0, "topdown", &topdown_run, 1794 OPT_BOOLEAN(0, "topdown", &topdown_run,
1784 "measure topdown level 1 statistics"), 1795 "measure topdown level 1 statistics"),
1796 OPT_BOOLEAN(0, "smi-cost", &smi_cost,
1797 "measure SMI cost"),
1785 OPT_END() 1798 OPT_END()
1786}; 1799};
1787 1800
@@ -2160,6 +2173,39 @@ static int add_default_attributes(void)
2160 return 0; 2173 return 0;
2161 } 2174 }
2162 2175
2176 if (smi_cost) {
2177 int smi;
2178
2179 if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
2180 fprintf(stderr, "freeze_on_smi is not supported.\n");
2181 return -1;
2182 }
2183
2184 if (!smi) {
2185 if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
2186 fprintf(stderr, "Failed to set freeze_on_smi.\n");
2187 return -1;
2188 }
2189 smi_reset = true;
2190 }
2191
2192 if (pmu_have_event("msr", "aperf") &&
2193 pmu_have_event("msr", "smi")) {
2194 if (!force_metric_only)
2195 metric_only = true;
2196 err = parse_events(evsel_list, smi_cost_attrs, NULL);
2197 } else {
2198 fprintf(stderr, "To measure SMI cost, it needs "
2199 "msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
2200 return -1;
2201 }
2202 if (err) {
2203 fprintf(stderr, "Cannot set up SMI cost events\n");
2204 return -1;
2205 }
2206 return 0;
2207 }
2208
2163 if (topdown_run) { 2209 if (topdown_run) {
2164 char *str = NULL; 2210 char *str = NULL;
2165 bool warn = false; 2211 bool warn = false;
@@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
2742 perf_stat__exit_aggr_mode(); 2788 perf_stat__exit_aggr_mode();
2743 perf_evlist__free_stats(evsel_list); 2789 perf_evlist__free_stats(evsel_list);
2744out: 2790out:
2791 if (smi_cost && smi_reset)
2792 sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
2793
2745 perf_evlist__delete(evsel_list); 2794 perf_evlist__delete(evsel_list);
2746 return status; 2795 return status;
2747} 2796}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7f78f27f5382..6f4882f8d61f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -11,6 +11,7 @@
11#include <errno.h> 11#include <errno.h>
12#include <inttypes.h> 12#include <inttypes.h>
13#include <linux/bitops.h> 13#include <linux/bitops.h>
14#include <api/fs/fs.h>
14#include <api/fs/tracing_path.h> 15#include <api/fs/tracing_path.h>
15#include <traceevent/event-parse.h> 16#include <traceevent/event-parse.h>
16#include <linux/hw_breakpoint.h> 17#include <linux/hw_breakpoint.h>
@@ -19,6 +20,8 @@
19#include <linux/err.h> 20#include <linux/err.h>
20#include <sys/ioctl.h> 21#include <sys/ioctl.h>
21#include <sys/resource.h> 22#include <sys/resource.h>
23#include <sys/types.h>
24#include <dirent.h>
22#include "asm/bug.h" 25#include "asm/bug.h"
23#include "callchain.h" 26#include "callchain.h"
24#include "cgroup.h" 27#include "cgroup.h"
@@ -2472,6 +2475,42 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
2472 return false; 2475 return false;
2473} 2476}
2474 2477
2478static bool find_process(const char *name)
2479{
2480 size_t len = strlen(name);
2481 DIR *dir;
2482 struct dirent *d;
2483 int ret = -1;
2484
2485 dir = opendir(procfs__mountpoint());
2486 if (!dir)
2487 return false;
2488
2489 /* Walk through the directory. */
2490 while (ret && (d = readdir(dir)) != NULL) {
2491 char path[PATH_MAX];
2492 char *data;
2493 size_t size;
2494
2495 if ((d->d_type != DT_DIR) ||
2496 !strcmp(".", d->d_name) ||
2497 !strcmp("..", d->d_name))
2498 continue;
2499
2500 scnprintf(path, sizeof(path), "%s/%s/comm",
2501 procfs__mountpoint(), d->d_name);
2502
2503 if (filename__read_str(path, &data, &size))
2504 continue;
2505
2506 ret = strncmp(name, data, len);
2507 free(data);
2508 }
2509
2510 closedir(dir);
2511 return ret ? false : true;
2512}
2513
2475int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, 2514int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
2476 int err, char *msg, size_t size) 2515 int err, char *msg, size_t size)
2477{ 2516{
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 7cf7f7aca4d2..5dea06289db5 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -64,6 +64,25 @@ enum intel_pt_pkt_state {
64 INTEL_PT_STATE_FUP_NO_TIP, 64 INTEL_PT_STATE_FUP_NO_TIP,
65}; 65};
66 66
67static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
68{
69 switch (pkt_state) {
70 case INTEL_PT_STATE_NO_PSB:
71 case INTEL_PT_STATE_NO_IP:
72 case INTEL_PT_STATE_ERR_RESYNC:
73 case INTEL_PT_STATE_IN_SYNC:
74 case INTEL_PT_STATE_TNT:
75 return true;
76 case INTEL_PT_STATE_TIP:
77 case INTEL_PT_STATE_TIP_PGD:
78 case INTEL_PT_STATE_FUP:
79 case INTEL_PT_STATE_FUP_NO_TIP:
80 return false;
81 default:
82 return true;
83 };
84}
85
67#ifdef INTEL_PT_STRICT 86#ifdef INTEL_PT_STRICT
68#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB 87#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
69#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB 88#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
@@ -87,11 +106,13 @@ struct intel_pt_decoder {
87 const unsigned char *buf; 106 const unsigned char *buf;
88 size_t len; 107 size_t len;
89 bool return_compression; 108 bool return_compression;
109 bool branch_enable;
90 bool mtc_insn; 110 bool mtc_insn;
91 bool pge; 111 bool pge;
92 bool have_tma; 112 bool have_tma;
93 bool have_cyc; 113 bool have_cyc;
94 bool fixup_last_mtc; 114 bool fixup_last_mtc;
115 bool have_last_ip;
95 uint64_t pos; 116 uint64_t pos;
96 uint64_t last_ip; 117 uint64_t last_ip;
97 uint64_t ip; 118 uint64_t ip;
@@ -99,6 +120,7 @@ struct intel_pt_decoder {
99 uint64_t timestamp; 120 uint64_t timestamp;
100 uint64_t tsc_timestamp; 121 uint64_t tsc_timestamp;
101 uint64_t ref_timestamp; 122 uint64_t ref_timestamp;
123 uint64_t sample_timestamp;
102 uint64_t ret_addr; 124 uint64_t ret_addr;
103 uint64_t ctc_timestamp; 125 uint64_t ctc_timestamp;
104 uint64_t ctc_delta; 126 uint64_t ctc_delta;
@@ -119,6 +141,7 @@ struct intel_pt_decoder {
119 int pkt_len; 141 int pkt_len;
120 int last_packet_type; 142 int last_packet_type;
121 unsigned int cbr; 143 unsigned int cbr;
144 unsigned int cbr_seen;
122 unsigned int max_non_turbo_ratio; 145 unsigned int max_non_turbo_ratio;
123 double max_non_turbo_ratio_fp; 146 double max_non_turbo_ratio_fp;
124 double cbr_cyc_to_tsc; 147 double cbr_cyc_to_tsc;
@@ -136,9 +159,18 @@ struct intel_pt_decoder {
136 bool continuous_period; 159 bool continuous_period;
137 bool overflow; 160 bool overflow;
138 bool set_fup_tx_flags; 161 bool set_fup_tx_flags;
162 bool set_fup_ptw;
163 bool set_fup_mwait;
164 bool set_fup_pwre;
165 bool set_fup_exstop;
139 unsigned int fup_tx_flags; 166 unsigned int fup_tx_flags;
140 unsigned int tx_flags; 167 unsigned int tx_flags;
168 uint64_t fup_ptw_payload;
169 uint64_t fup_mwait_payload;
170 uint64_t fup_pwre_payload;
171 uint64_t cbr_payload;
141 uint64_t timestamp_insn_cnt; 172 uint64_t timestamp_insn_cnt;
173 uint64_t sample_insn_cnt;
142 uint64_t stuck_ip; 174 uint64_t stuck_ip;
143 int no_progress; 175 int no_progress;
144 int stuck_ip_prd; 176 int stuck_ip_prd;
@@ -192,6 +224,7 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
192 decoder->pgd_ip = params->pgd_ip; 224 decoder->pgd_ip = params->pgd_ip;
193 decoder->data = params->data; 225 decoder->data = params->data;
194 decoder->return_compression = params->return_compression; 226 decoder->return_compression = params->return_compression;
227 decoder->branch_enable = params->branch_enable;
195 228
196 decoder->period = params->period; 229 decoder->period = params->period;
197 decoder->period_type = params->period_type; 230 decoder->period_type = params->period_type;
@@ -398,6 +431,7 @@ static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
398static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) 431static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
399{ 432{
400 decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip); 433 decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
434 decoder->have_last_ip = true;
401} 435}
402 436
403static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) 437static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
@@ -635,6 +669,8 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
635 case INTEL_PT_PAD: 669 case INTEL_PT_PAD:
636 case INTEL_PT_VMCS: 670 case INTEL_PT_VMCS:
637 case INTEL_PT_MNT: 671 case INTEL_PT_MNT:
672 case INTEL_PT_PTWRITE:
673 case INTEL_PT_PTWRITE_IP:
638 return 0; 674 return 0;
639 675
640 case INTEL_PT_MTC: 676 case INTEL_PT_MTC:
@@ -733,6 +769,11 @@ static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
733 769
734 case INTEL_PT_TIP_PGD: 770 case INTEL_PT_TIP_PGD:
735 case INTEL_PT_TRACESTOP: 771 case INTEL_PT_TRACESTOP:
772 case INTEL_PT_EXSTOP:
773 case INTEL_PT_EXSTOP_IP:
774 case INTEL_PT_MWAIT:
775 case INTEL_PT_PWRE:
776 case INTEL_PT_PWRX:
736 case INTEL_PT_OVF: 777 case INTEL_PT_OVF:
737 case INTEL_PT_BAD: /* Does not happen */ 778 case INTEL_PT_BAD: /* Does not happen */
738 default: 779 default:
@@ -898,6 +939,7 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
898 939
899 decoder->tot_insn_cnt += insn_cnt; 940 decoder->tot_insn_cnt += insn_cnt;
900 decoder->timestamp_insn_cnt += insn_cnt; 941 decoder->timestamp_insn_cnt += insn_cnt;
942 decoder->sample_insn_cnt += insn_cnt;
901 decoder->period_insn_cnt += insn_cnt; 943 decoder->period_insn_cnt += insn_cnt;
902 944
903 if (err) { 945 if (err) {
@@ -990,6 +1032,57 @@ out_no_progress:
990 return err; 1032 return err;
991} 1033}
992 1034
1035static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
1036{
1037 bool ret = false;
1038
1039 if (decoder->set_fup_tx_flags) {
1040 decoder->set_fup_tx_flags = false;
1041 decoder->tx_flags = decoder->fup_tx_flags;
1042 decoder->state.type = INTEL_PT_TRANSACTION;
1043 decoder->state.from_ip = decoder->ip;
1044 decoder->state.to_ip = 0;
1045 decoder->state.flags = decoder->fup_tx_flags;
1046 return true;
1047 }
1048 if (decoder->set_fup_ptw) {
1049 decoder->set_fup_ptw = false;
1050 decoder->state.type = INTEL_PT_PTW;
1051 decoder->state.flags |= INTEL_PT_FUP_IP;
1052 decoder->state.from_ip = decoder->ip;
1053 decoder->state.to_ip = 0;
1054 decoder->state.ptw_payload = decoder->fup_ptw_payload;
1055 return true;
1056 }
1057 if (decoder->set_fup_mwait) {
1058 decoder->set_fup_mwait = false;
1059 decoder->state.type = INTEL_PT_MWAIT_OP;
1060 decoder->state.from_ip = decoder->ip;
1061 decoder->state.to_ip = 0;
1062 decoder->state.mwait_payload = decoder->fup_mwait_payload;
1063 ret = true;
1064 }
1065 if (decoder->set_fup_pwre) {
1066 decoder->set_fup_pwre = false;
1067 decoder->state.type |= INTEL_PT_PWR_ENTRY;
1068 decoder->state.type &= ~INTEL_PT_BRANCH;
1069 decoder->state.from_ip = decoder->ip;
1070 decoder->state.to_ip = 0;
1071 decoder->state.pwre_payload = decoder->fup_pwre_payload;
1072 ret = true;
1073 }
1074 if (decoder->set_fup_exstop) {
1075 decoder->set_fup_exstop = false;
1076 decoder->state.type |= INTEL_PT_EX_STOP;
1077 decoder->state.type &= ~INTEL_PT_BRANCH;
1078 decoder->state.flags |= INTEL_PT_FUP_IP;
1079 decoder->state.from_ip = decoder->ip;
1080 decoder->state.to_ip = 0;
1081 ret = true;
1082 }
1083 return ret;
1084}
1085
993static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) 1086static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
994{ 1087{
995 struct intel_pt_insn intel_pt_insn; 1088 struct intel_pt_insn intel_pt_insn;
@@ -1003,15 +1096,8 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
1003 if (err == INTEL_PT_RETURN) 1096 if (err == INTEL_PT_RETURN)
1004 return 0; 1097 return 0;
1005 if (err == -EAGAIN) { 1098 if (err == -EAGAIN) {
1006 if (decoder->set_fup_tx_flags) { 1099 if (intel_pt_fup_event(decoder))
1007 decoder->set_fup_tx_flags = false;
1008 decoder->tx_flags = decoder->fup_tx_flags;
1009 decoder->state.type = INTEL_PT_TRANSACTION;
1010 decoder->state.from_ip = decoder->ip;
1011 decoder->state.to_ip = 0;
1012 decoder->state.flags = decoder->fup_tx_flags;
1013 return 0; 1100 return 0;
1014 }
1015 return err; 1101 return err;
1016 } 1102 }
1017 decoder->set_fup_tx_flags = false; 1103 decoder->set_fup_tx_flags = false;
@@ -1360,7 +1446,9 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
1360 1446
1361static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) 1447static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
1362{ 1448{
1363 unsigned int cbr = decoder->packet.payload; 1449 unsigned int cbr = decoder->packet.payload & 0xff;
1450
1451 decoder->cbr_payload = decoder->packet.payload;
1364 1452
1365 if (decoder->cbr == cbr) 1453 if (decoder->cbr == cbr)
1366 return; 1454 return;
@@ -1417,6 +1505,13 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
1417 case INTEL_PT_TRACESTOP: 1505 case INTEL_PT_TRACESTOP:
1418 case INTEL_PT_BAD: 1506 case INTEL_PT_BAD:
1419 case INTEL_PT_PSB: 1507 case INTEL_PT_PSB:
1508 case INTEL_PT_PTWRITE:
1509 case INTEL_PT_PTWRITE_IP:
1510 case INTEL_PT_EXSTOP:
1511 case INTEL_PT_EXSTOP_IP:
1512 case INTEL_PT_MWAIT:
1513 case INTEL_PT_PWRE:
1514 case INTEL_PT_PWRX:
1420 decoder->have_tma = false; 1515 decoder->have_tma = false;
1421 intel_pt_log("ERROR: Unexpected packet\n"); 1516 intel_pt_log("ERROR: Unexpected packet\n");
1422 return -EAGAIN; 1517 return -EAGAIN;
@@ -1446,7 +1541,8 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
1446 1541
1447 case INTEL_PT_FUP: 1542 case INTEL_PT_FUP:
1448 decoder->pge = true; 1543 decoder->pge = true;
1449 intel_pt_set_last_ip(decoder); 1544 if (decoder->packet.count)
1545 intel_pt_set_last_ip(decoder);
1450 break; 1546 break;
1451 1547
1452 case INTEL_PT_MODE_TSX: 1548 case INTEL_PT_MODE_TSX:
@@ -1497,6 +1593,13 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
1497 case INTEL_PT_MODE_TSX: 1593 case INTEL_PT_MODE_TSX:
1498 case INTEL_PT_BAD: 1594 case INTEL_PT_BAD:
1499 case INTEL_PT_PSBEND: 1595 case INTEL_PT_PSBEND:
1596 case INTEL_PT_PTWRITE:
1597 case INTEL_PT_PTWRITE_IP:
1598 case INTEL_PT_EXSTOP:
1599 case INTEL_PT_EXSTOP_IP:
1600 case INTEL_PT_MWAIT:
1601 case INTEL_PT_PWRE:
1602 case INTEL_PT_PWRX:
1500 intel_pt_log("ERROR: Missing TIP after FUP\n"); 1603 intel_pt_log("ERROR: Missing TIP after FUP\n");
1501 decoder->pkt_state = INTEL_PT_STATE_ERR3; 1604 decoder->pkt_state = INTEL_PT_STATE_ERR3;
1502 return -ENOENT; 1605 return -ENOENT;
@@ -1625,6 +1728,15 @@ next:
1625 break; 1728 break;
1626 } 1729 }
1627 intel_pt_set_last_ip(decoder); 1730 intel_pt_set_last_ip(decoder);
1731 if (!decoder->branch_enable) {
1732 decoder->ip = decoder->last_ip;
1733 if (intel_pt_fup_event(decoder))
1734 return 0;
1735 no_tip = false;
1736 break;
1737 }
1738 if (decoder->set_fup_mwait)
1739 no_tip = true;
1628 err = intel_pt_walk_fup(decoder); 1740 err = intel_pt_walk_fup(decoder);
1629 if (err != -EAGAIN) { 1741 if (err != -EAGAIN) {
1630 if (err) 1742 if (err)
@@ -1650,6 +1762,8 @@ next:
1650 break; 1762 break;
1651 1763
1652 case INTEL_PT_PSB: 1764 case INTEL_PT_PSB:
1765 decoder->last_ip = 0;
1766 decoder->have_last_ip = true;
1653 intel_pt_clear_stack(&decoder->stack); 1767 intel_pt_clear_stack(&decoder->stack);
1654 err = intel_pt_walk_psbend(decoder); 1768 err = intel_pt_walk_psbend(decoder);
1655 if (err == -EAGAIN) 1769 if (err == -EAGAIN)
@@ -1696,6 +1810,16 @@ next:
1696 1810
1697 case INTEL_PT_CBR: 1811 case INTEL_PT_CBR:
1698 intel_pt_calc_cbr(decoder); 1812 intel_pt_calc_cbr(decoder);
1813 if (!decoder->branch_enable &&
1814 decoder->cbr != decoder->cbr_seen) {
1815 decoder->cbr_seen = decoder->cbr;
1816 decoder->state.type = INTEL_PT_CBR_CHG;
1817 decoder->state.from_ip = decoder->ip;
1818 decoder->state.to_ip = 0;
1819 decoder->state.cbr_payload =
1820 decoder->packet.payload;
1821 return 0;
1822 }
1699 break; 1823 break;
1700 1824
1701 case INTEL_PT_MODE_EXEC: 1825 case INTEL_PT_MODE_EXEC:
@@ -1722,6 +1846,71 @@ next:
1722 case INTEL_PT_PAD: 1846 case INTEL_PT_PAD:
1723 break; 1847 break;
1724 1848
1849 case INTEL_PT_PTWRITE_IP:
1850 decoder->fup_ptw_payload = decoder->packet.payload;
1851 err = intel_pt_get_next_packet(decoder);
1852 if (err)
1853 return err;
1854 if (decoder->packet.type == INTEL_PT_FUP) {
1855 decoder->set_fup_ptw = true;
1856 no_tip = true;
1857 } else {
1858 intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
1859 decoder->pos);
1860 }
1861 goto next;
1862
1863 case INTEL_PT_PTWRITE:
1864 decoder->state.type = INTEL_PT_PTW;
1865 decoder->state.from_ip = decoder->ip;
1866 decoder->state.to_ip = 0;
1867 decoder->state.ptw_payload = decoder->packet.payload;
1868 return 0;
1869
1870 case INTEL_PT_MWAIT:
1871 decoder->fup_mwait_payload = decoder->packet.payload;
1872 decoder->set_fup_mwait = true;
1873 break;
1874
1875 case INTEL_PT_PWRE:
1876 if (decoder->set_fup_mwait) {
1877 decoder->fup_pwre_payload =
1878 decoder->packet.payload;
1879 decoder->set_fup_pwre = true;
1880 break;
1881 }
1882 decoder->state.type = INTEL_PT_PWR_ENTRY;
1883 decoder->state.from_ip = decoder->ip;
1884 decoder->state.to_ip = 0;
1885 decoder->state.pwrx_payload = decoder->packet.payload;
1886 return 0;
1887
1888 case INTEL_PT_EXSTOP_IP:
1889 err = intel_pt_get_next_packet(decoder);
1890 if (err)
1891 return err;
1892 if (decoder->packet.type == INTEL_PT_FUP) {
1893 decoder->set_fup_exstop = true;
1894 no_tip = true;
1895 } else {
1896 intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
1897 decoder->pos);
1898 }
1899 goto next;
1900
1901 case INTEL_PT_EXSTOP:
1902 decoder->state.type = INTEL_PT_EX_STOP;
1903 decoder->state.from_ip = decoder->ip;
1904 decoder->state.to_ip = 0;
1905 return 0;
1906
1907 case INTEL_PT_PWRX:
1908 decoder->state.type = INTEL_PT_PWR_EXIT;
1909 decoder->state.from_ip = decoder->ip;
1910 decoder->state.to_ip = 0;
1911 decoder->state.pwrx_payload = decoder->packet.payload;
1912 return 0;
1913
1725 default: 1914 default:
1726 return intel_pt_bug(decoder); 1915 return intel_pt_bug(decoder);
1727 } 1916 }
@@ -1730,8 +1919,9 @@ next:
1730 1919
1731static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) 1920static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
1732{ 1921{
1733 return decoder->last_ip || decoder->packet.count == 0 || 1922 return decoder->packet.count &&
1734 decoder->packet.count == 3 || decoder->packet.count == 6; 1923 (decoder->have_last_ip || decoder->packet.count == 3 ||
1924 decoder->packet.count == 6);
1735} 1925}
1736 1926
1737/* Walk PSB+ packets to get in sync. */ 1927/* Walk PSB+ packets to get in sync. */
@@ -1750,6 +1940,13 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
1750 __fallthrough; 1940 __fallthrough;
1751 case INTEL_PT_TIP_PGE: 1941 case INTEL_PT_TIP_PGE:
1752 case INTEL_PT_TIP: 1942 case INTEL_PT_TIP:
1943 case INTEL_PT_PTWRITE:
1944 case INTEL_PT_PTWRITE_IP:
1945 case INTEL_PT_EXSTOP:
1946 case INTEL_PT_EXSTOP_IP:
1947 case INTEL_PT_MWAIT:
1948 case INTEL_PT_PWRE:
1949 case INTEL_PT_PWRX:
1753 intel_pt_log("ERROR: Unexpected packet\n"); 1950 intel_pt_log("ERROR: Unexpected packet\n");
1754 return -ENOENT; 1951 return -ENOENT;
1755 1952
@@ -1854,14 +2051,10 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
1854 break; 2051 break;
1855 2052
1856 case INTEL_PT_FUP: 2053 case INTEL_PT_FUP:
1857 if (decoder->overflow) { 2054 if (intel_pt_have_ip(decoder))
1858 if (intel_pt_have_ip(decoder)) 2055 intel_pt_set_ip(decoder);
1859 intel_pt_set_ip(decoder); 2056 if (decoder->ip)
1860 if (decoder->ip) 2057 return 0;
1861 return 0;
1862 }
1863 if (decoder->packet.count)
1864 intel_pt_set_last_ip(decoder);
1865 break; 2058 break;
1866 2059
1867 case INTEL_PT_MTC: 2060 case INTEL_PT_MTC:
@@ -1910,6 +2103,9 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
1910 break; 2103 break;
1911 2104
1912 case INTEL_PT_PSB: 2105 case INTEL_PT_PSB:
2106 decoder->last_ip = 0;
2107 decoder->have_last_ip = true;
2108 intel_pt_clear_stack(&decoder->stack);
1913 err = intel_pt_walk_psb(decoder); 2109 err = intel_pt_walk_psb(decoder);
1914 if (err) 2110 if (err)
1915 return err; 2111 return err;
@@ -1925,6 +2121,13 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
1925 case INTEL_PT_VMCS: 2121 case INTEL_PT_VMCS:
1926 case INTEL_PT_MNT: 2122 case INTEL_PT_MNT:
1927 case INTEL_PT_PAD: 2123 case INTEL_PT_PAD:
2124 case INTEL_PT_PTWRITE:
2125 case INTEL_PT_PTWRITE_IP:
2126 case INTEL_PT_EXSTOP:
2127 case INTEL_PT_EXSTOP_IP:
2128 case INTEL_PT_MWAIT:
2129 case INTEL_PT_PWRE:
2130 case INTEL_PT_PWRX:
1928 default: 2131 default:
1929 break; 2132 break;
1930 } 2133 }
@@ -1935,6 +2138,19 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
1935{ 2138{
1936 int err; 2139 int err;
1937 2140
2141 decoder->set_fup_tx_flags = false;
2142 decoder->set_fup_ptw = false;
2143 decoder->set_fup_mwait = false;
2144 decoder->set_fup_pwre = false;
2145 decoder->set_fup_exstop = false;
2146
2147 if (!decoder->branch_enable) {
2148 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
2149 decoder->overflow = false;
2150 decoder->state.type = 0; /* Do not have a sample */
2151 return 0;
2152 }
2153
1938 intel_pt_log("Scanning for full IP\n"); 2154 intel_pt_log("Scanning for full IP\n");
1939 err = intel_pt_walk_to_ip(decoder); 2155 err = intel_pt_walk_to_ip(decoder);
1940 if (err) 2156 if (err)
@@ -2043,6 +2259,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
2043 2259
2044 decoder->pge = false; 2260 decoder->pge = false;
2045 decoder->continuous_period = false; 2261 decoder->continuous_period = false;
2262 decoder->have_last_ip = false;
2046 decoder->last_ip = 0; 2263 decoder->last_ip = 0;
2047 decoder->ip = 0; 2264 decoder->ip = 0;
2048 intel_pt_clear_stack(&decoder->stack); 2265 intel_pt_clear_stack(&decoder->stack);
@@ -2051,6 +2268,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
2051 if (err) 2268 if (err)
2052 return err; 2269 return err;
2053 2270
2271 decoder->have_last_ip = true;
2054 decoder->pkt_state = INTEL_PT_STATE_NO_IP; 2272 decoder->pkt_state = INTEL_PT_STATE_NO_IP;
2055 2273
2056 err = intel_pt_walk_psb(decoder); 2274 err = intel_pt_walk_psb(decoder);
@@ -2069,7 +2287,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
2069 2287
2070static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) 2288static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
2071{ 2289{
2072 uint64_t est = decoder->timestamp_insn_cnt << 1; 2290 uint64_t est = decoder->sample_insn_cnt << 1;
2073 2291
2074 if (!decoder->cbr || !decoder->max_non_turbo_ratio) 2292 if (!decoder->cbr || !decoder->max_non_turbo_ratio)
2075 goto out; 2293 goto out;
@@ -2077,7 +2295,7 @@ static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
2077 est *= decoder->max_non_turbo_ratio; 2295 est *= decoder->max_non_turbo_ratio;
2078 est /= decoder->cbr; 2296 est /= decoder->cbr;
2079out: 2297out:
2080 return decoder->timestamp + est; 2298 return decoder->sample_timestamp + est;
2081} 2299}
2082 2300
2083const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) 2301const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
@@ -2093,8 +2311,10 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2093 err = intel_pt_sync(decoder); 2311 err = intel_pt_sync(decoder);
2094 break; 2312 break;
2095 case INTEL_PT_STATE_NO_IP: 2313 case INTEL_PT_STATE_NO_IP:
2314 decoder->have_last_ip = false;
2096 decoder->last_ip = 0; 2315 decoder->last_ip = 0;
2097 /* Fall through */ 2316 decoder->ip = 0;
2317 __fallthrough;
2098 case INTEL_PT_STATE_ERR_RESYNC: 2318 case INTEL_PT_STATE_ERR_RESYNC:
2099 err = intel_pt_sync_ip(decoder); 2319 err = intel_pt_sync_ip(decoder);
2100 break; 2320 break;
@@ -2130,15 +2350,29 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2130 } 2350 }
2131 } while (err == -ENOLINK); 2351 } while (err == -ENOLINK);
2132 2352
2133 decoder->state.err = err ? intel_pt_ext_err(err) : 0; 2353 if (err) {
2134 decoder->state.timestamp = decoder->timestamp; 2354 decoder->state.err = intel_pt_ext_err(err);
2355 decoder->state.from_ip = decoder->ip;
2356 decoder->sample_timestamp = decoder->timestamp;
2357 decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
2358 } else {
2359 decoder->state.err = 0;
2360 if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
2361 decoder->cbr_seen = decoder->cbr;
2362 decoder->state.type |= INTEL_PT_CBR_CHG;
2363 decoder->state.cbr_payload = decoder->cbr_payload;
2364 }
2365 if (intel_pt_sample_time(decoder->pkt_state)) {
2366 decoder->sample_timestamp = decoder->timestamp;
2367 decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
2368 }
2369 }
2370
2371 decoder->state.timestamp = decoder->sample_timestamp;
2135 decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); 2372 decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
2136 decoder->state.cr3 = decoder->cr3; 2373 decoder->state.cr3 = decoder->cr3;
2137 decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; 2374 decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
2138 2375
2139 if (err)
2140 decoder->state.from_ip = decoder->ip;
2141
2142 return &decoder->state; 2376 return &decoder->state;
2143} 2377}
2144 2378
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index e90619a43c0c..921b22e8ca0e 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -25,11 +25,18 @@
25#define INTEL_PT_IN_TX (1 << 0) 25#define INTEL_PT_IN_TX (1 << 0)
26#define INTEL_PT_ABORT_TX (1 << 1) 26#define INTEL_PT_ABORT_TX (1 << 1)
27#define INTEL_PT_ASYNC (1 << 2) 27#define INTEL_PT_ASYNC (1 << 2)
28#define INTEL_PT_FUP_IP (1 << 3)
28 29
29enum intel_pt_sample_type { 30enum intel_pt_sample_type {
30 INTEL_PT_BRANCH = 1 << 0, 31 INTEL_PT_BRANCH = 1 << 0,
31 INTEL_PT_INSTRUCTION = 1 << 1, 32 INTEL_PT_INSTRUCTION = 1 << 1,
32 INTEL_PT_TRANSACTION = 1 << 2, 33 INTEL_PT_TRANSACTION = 1 << 2,
34 INTEL_PT_PTW = 1 << 3,
35 INTEL_PT_MWAIT_OP = 1 << 4,
36 INTEL_PT_PWR_ENTRY = 1 << 5,
37 INTEL_PT_EX_STOP = 1 << 6,
38 INTEL_PT_PWR_EXIT = 1 << 7,
39 INTEL_PT_CBR_CHG = 1 << 8,
33}; 40};
34 41
35enum intel_pt_period_type { 42enum intel_pt_period_type {
@@ -63,6 +70,11 @@ struct intel_pt_state {
63 uint64_t timestamp; 70 uint64_t timestamp;
64 uint64_t est_timestamp; 71 uint64_t est_timestamp;
65 uint64_t trace_nr; 72 uint64_t trace_nr;
73 uint64_t ptw_payload;
74 uint64_t mwait_payload;
75 uint64_t pwre_payload;
76 uint64_t pwrx_payload;
77 uint64_t cbr_payload;
66 uint32_t flags; 78 uint32_t flags;
67 enum intel_pt_insn_op insn_op; 79 enum intel_pt_insn_op insn_op;
68 int insn_len; 80 int insn_len;
@@ -87,6 +99,7 @@ struct intel_pt_params {
87 bool (*pgd_ip)(uint64_t ip, void *data); 99 bool (*pgd_ip)(uint64_t ip, void *data);
88 void *data; 100 void *data;
89 bool return_compression; 101 bool return_compression;
102 bool branch_enable;
90 uint64_t period; 103 uint64_t period;
91 enum intel_pt_period_type period_type; 104 enum intel_pt_period_type period_type;
92 unsigned max_non_turbo_ratio; 105 unsigned max_non_turbo_ratio;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 7528ae4f7e28..ba4c9dd18643 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -64,6 +64,13 @@ static const char * const packet_name[] = {
64 [INTEL_PT_PIP] = "PIP", 64 [INTEL_PT_PIP] = "PIP",
65 [INTEL_PT_OVF] = "OVF", 65 [INTEL_PT_OVF] = "OVF",
66 [INTEL_PT_MNT] = "MNT", 66 [INTEL_PT_MNT] = "MNT",
67 [INTEL_PT_PTWRITE] = "PTWRITE",
68 [INTEL_PT_PTWRITE_IP] = "PTWRITE",
69 [INTEL_PT_EXSTOP] = "EXSTOP",
70 [INTEL_PT_EXSTOP_IP] = "EXSTOP",
71 [INTEL_PT_MWAIT] = "MWAIT",
72 [INTEL_PT_PWRE] = "PWRE",
73 [INTEL_PT_PWRX] = "PWRX",
67}; 74};
68 75
69const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) 76const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
@@ -123,7 +130,7 @@ static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
123 if (len < 4) 130 if (len < 4)
124 return INTEL_PT_NEED_MORE_BYTES; 131 return INTEL_PT_NEED_MORE_BYTES;
125 packet->type = INTEL_PT_CBR; 132 packet->type = INTEL_PT_CBR;
126 packet->payload = buf[2]; 133 packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
127 return 4; 134 return 4;
128} 135}
129 136
@@ -217,12 +224,80 @@ static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
217 } 224 }
218} 225}
219 226
227static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
228 struct intel_pt_pkt *packet)
229{
230 packet->count = (buf[1] >> 5) & 0x3;
231 packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
232 INTEL_PT_PTWRITE;
233
234 switch (packet->count) {
235 case 0:
236 if (len < 6)
237 return INTEL_PT_NEED_MORE_BYTES;
238 packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
239 return 6;
240 case 1:
241 if (len < 10)
242 return INTEL_PT_NEED_MORE_BYTES;
243 packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
244 return 10;
245 default:
246 return INTEL_PT_BAD_PACKET;
247 }
248}
249
250static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
251{
252 packet->type = INTEL_PT_EXSTOP;
253 return 2;
254}
255
256static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
257{
258 packet->type = INTEL_PT_EXSTOP_IP;
259 return 2;
260}
261
262static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
263 struct intel_pt_pkt *packet)
264{
265 if (len < 10)
266 return INTEL_PT_NEED_MORE_BYTES;
267 packet->type = INTEL_PT_MWAIT;
268 packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
269 return 10;
270}
271
272static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
273 struct intel_pt_pkt *packet)
274{
275 if (len < 4)
276 return INTEL_PT_NEED_MORE_BYTES;
277 packet->type = INTEL_PT_PWRE;
278 memcpy_le64(&packet->payload, buf + 2, 2);
279 return 4;
280}
281
282static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
283 struct intel_pt_pkt *packet)
284{
285 if (len < 7)
286 return INTEL_PT_NEED_MORE_BYTES;
287 packet->type = INTEL_PT_PWRX;
288 memcpy_le64(&packet->payload, buf + 2, 5);
289 return 7;
290}
291
220static int intel_pt_get_ext(const unsigned char *buf, size_t len, 292static int intel_pt_get_ext(const unsigned char *buf, size_t len,
221 struct intel_pt_pkt *packet) 293 struct intel_pt_pkt *packet)
222{ 294{
223 if (len < 2) 295 if (len < 2)
224 return INTEL_PT_NEED_MORE_BYTES; 296 return INTEL_PT_NEED_MORE_BYTES;
225 297
298 if ((buf[1] & 0x1f) == 0x12)
299 return intel_pt_get_ptwrite(buf, len, packet);
300
226 switch (buf[1]) { 301 switch (buf[1]) {
227 case 0xa3: /* Long TNT */ 302 case 0xa3: /* Long TNT */
228 return intel_pt_get_long_tnt(buf, len, packet); 303 return intel_pt_get_long_tnt(buf, len, packet);
@@ -244,6 +319,16 @@ static int intel_pt_get_ext(const unsigned char *buf, size_t len,
244 return intel_pt_get_tma(buf, len, packet); 319 return intel_pt_get_tma(buf, len, packet);
245 case 0xC3: /* 3-byte header */ 320 case 0xC3: /* 3-byte header */
246 return intel_pt_get_3byte(buf, len, packet); 321 return intel_pt_get_3byte(buf, len, packet);
322 case 0x62: /* EXSTOP no IP */
323 return intel_pt_get_exstop(packet);
324 case 0xE2: /* EXSTOP with IP */
325 return intel_pt_get_exstop_ip(packet);
326 case 0xC2: /* MWAIT */
327 return intel_pt_get_mwait(buf, len, packet);
328 case 0x22: /* PWRE */
329 return intel_pt_get_pwre(buf, len, packet);
330 case 0xA2: /* PWRX */
331 return intel_pt_get_pwrx(buf, len, packet);
247 default: 332 default:
248 return INTEL_PT_BAD_PACKET; 333 return INTEL_PT_BAD_PACKET;
249 } 334 }
@@ -522,6 +607,29 @@ int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
522 ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", 607 ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
523 name, payload, nr); 608 name, payload, nr);
524 return ret; 609 return ret;
610 case INTEL_PT_PTWRITE:
611 return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
612 case INTEL_PT_PTWRITE_IP:
613 return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
614 case INTEL_PT_EXSTOP:
615 return snprintf(buf, buf_len, "%s IP:0", name);
616 case INTEL_PT_EXSTOP_IP:
617 return snprintf(buf, buf_len, "%s IP:1", name);
618 case INTEL_PT_MWAIT:
619 return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
620 name, payload, (unsigned int)(payload & 0xff),
621 (unsigned int)((payload >> 32) & 0x3));
622 case INTEL_PT_PWRE:
623 return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
624 name, payload, !!(payload & 0x80),
625 (unsigned int)((payload >> 12) & 0xf),
626 (unsigned int)((payload >> 8) & 0xf));
627 case INTEL_PT_PWRX:
628 return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
629 name, payload,
630 (unsigned int)((payload >> 4) & 0xf),
631 (unsigned int)(payload & 0xf),
632 (unsigned int)((payload >> 8) & 0xf));
525 default: 633 default:
526 break; 634 break;
527 } 635 }
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
index 781bb79883bd..73ddc3a88d07 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -52,6 +52,13 @@ enum intel_pt_pkt_type {
52 INTEL_PT_PIP, 52 INTEL_PT_PIP,
53 INTEL_PT_OVF, 53 INTEL_PT_OVF,
54 INTEL_PT_MNT, 54 INTEL_PT_MNT,
55 INTEL_PT_PTWRITE,
56 INTEL_PT_PTWRITE_IP,
57 INTEL_PT_EXSTOP,
58 INTEL_PT_EXSTOP_IP,
59 INTEL_PT_MWAIT,
60 INTEL_PT_PWRE,
61 INTEL_PT_PWRX,
55}; 62};
56 63
57struct intel_pt_pkt { 64struct intel_pt_pkt {
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 4c7718f87a08..6df836469f2b 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -668,6 +668,19 @@ static bool intel_pt_return_compression(struct intel_pt *pt)
668 return true; 668 return true;
669} 669}
670 670
671static bool intel_pt_branch_enable(struct intel_pt *pt)
672{
673 struct perf_evsel *evsel;
674 u64 config;
675
676 evlist__for_each_entry(pt->session->evlist, evsel) {
677 if (intel_pt_get_config(pt, &evsel->attr, &config) &&
678 (config & 1) && !(config & 0x2000))
679 return false;
680 }
681 return true;
682}
683
671static unsigned int intel_pt_mtc_period(struct intel_pt *pt) 684static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
672{ 685{
673 struct perf_evsel *evsel; 686 struct perf_evsel *evsel;
@@ -799,6 +812,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
799 params.walk_insn = intel_pt_walk_next_insn; 812 params.walk_insn = intel_pt_walk_next_insn;
800 params.data = ptq; 813 params.data = ptq;
801 params.return_compression = intel_pt_return_compression(pt); 814 params.return_compression = intel_pt_return_compression(pt);
815 params.branch_enable = intel_pt_branch_enable(pt);
802 params.max_non_turbo_ratio = pt->max_non_turbo_ratio; 816 params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
803 params.mtc_period = intel_pt_mtc_period(pt); 817 params.mtc_period = intel_pt_mtc_period(pt);
804 params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; 818 params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
@@ -1308,18 +1322,14 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
1308 ptq->have_sample = false; 1322 ptq->have_sample = false;
1309 1323
1310 if (pt->sample_instructions && 1324 if (pt->sample_instructions &&
1311 (state->type & INTEL_PT_INSTRUCTION) && 1325 (state->type & INTEL_PT_INSTRUCTION)) {
1312 (!pt->synth_opts.initial_skip ||
1313 pt->num_events++ >= pt->synth_opts.initial_skip)) {
1314 err = intel_pt_synth_instruction_sample(ptq); 1326 err = intel_pt_synth_instruction_sample(ptq);
1315 if (err) 1327 if (err)
1316 return err; 1328 return err;
1317 } 1329 }
1318 1330
1319 if (pt->sample_transactions && 1331 if (pt->sample_transactions &&
1320 (state->type & INTEL_PT_TRANSACTION) && 1332 (state->type & INTEL_PT_TRANSACTION)) {
1321 (!pt->synth_opts.initial_skip ||
1322 pt->num_events++ >= pt->synth_opts.initial_skip)) {
1323 err = intel_pt_synth_transaction_sample(ptq); 1333 err = intel_pt_synth_transaction_sample(ptq);
1324 if (err) 1334 if (err)
1325 return err; 1335 return err;
@@ -2025,6 +2035,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
2025 return err; 2035 return err;
2026 } 2036 }
2027 pt->sample_transactions = true; 2037 pt->sample_transactions = true;
2038 pt->transactions_sample_type = attr.sample_type;
2028 pt->transactions_id = id; 2039 pt->transactions_id = id;
2029 id += 1; 2040 id += 1;
2030 evlist__for_each_entry(evlist, evsel) { 2041 evlist__for_each_entry(evlist, evsel) {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7dc1096264c5..d19c40a81040 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2035,7 +2035,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
2035 2035
2036 if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) { 2036 if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
2037 pr_err("File does not contain CPU events. " 2037 pr_err("File does not contain CPU events. "
2038 "Remove -c option to proceed.\n"); 2038 "Remove -C option to proceed.\n");
2039 return -1; 2039 return -1;
2040 } 2040 }
2041 } 2041 }
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index ac10cc675d39..719d6cb86952 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
44static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; 44static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
45static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; 45static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
46static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; 46static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
47static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
48static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
47static struct rblist runtime_saved_values; 49static struct rblist runtime_saved_values;
48static bool have_frontend_stalled; 50static bool have_frontend_stalled;
49 51
@@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
157 memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); 159 memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
158 memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); 160 memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
159 memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); 161 memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
162 memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
163 memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
160 164
161 next = rb_first(&runtime_saved_values.entries); 165 next = rb_first(&runtime_saved_values.entries);
162 while (next) { 166 while (next) {
@@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
217 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); 221 update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
218 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) 222 else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
219 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); 223 update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
224 else if (perf_stat_evsel__is(counter, SMI_NUM))
225 update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
226 else if (perf_stat_evsel__is(counter, APERF))
227 update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
220 228
221 if (counter->collect_stat) { 229 if (counter->collect_stat) {
222 struct saved_value *v = saved_value_lookup(counter, cpu, ctx, 230 struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
@@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
592 return sanitize_val(1.0 - sum); 600 return sanitize_val(1.0 - sum);
593} 601}
594 602
603static void print_smi_cost(int cpu, struct perf_evsel *evsel,
604 struct perf_stat_output_ctx *out)
605{
606 double smi_num, aperf, cycles, cost = 0.0;
607 int ctx = evsel_context(evsel);
608 const char *color = NULL;
609
610 smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
611 aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
612 cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
613
614 if ((cycles == 0) || (aperf == 0))
615 return;
616
617 if (smi_num)
618 cost = (aperf - cycles) / aperf * 100.00;
619
620 if (cost > 10)
621 color = PERF_COLOR_RED;
622 out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
623 out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
624}
625
595void perf_stat__print_shadow_stats(struct perf_evsel *evsel, 626void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
596 double avg, int cpu, 627 double avg, int cpu,
597 struct perf_stat_output_ctx *out) 628 struct perf_stat_output_ctx *out)
@@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
825 } 856 }
826 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); 857 snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
827 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); 858 print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
859 } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
860 print_smi_cost(cpu, evsel, out);
828 } else { 861 } else {
829 print_metric(ctxp, NULL, NULL, NULL, 0); 862 print_metric(ctxp, NULL, NULL, NULL, 0);
830 } 863 }
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c58174443dc1..53b9a994a3dc 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
86 ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired), 86 ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
87 ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles), 87 ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
88 ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles), 88 ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
89 ID(SMI_NUM, msr/smi/),
90 ID(APERF, msr/aperf/),
89}; 91};
90#undef ID 92#undef ID
91 93
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0a65ae23f495..7522bf10b03e 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
22 PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED, 22 PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
23 PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES, 23 PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
24 PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES, 24 PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
25 PERF_STAT_EVSEL_ID__SMI_NUM,
26 PERF_STAT_EVSEL_ID__APERF,
25 PERF_STAT_EVSEL_ID__MAX, 27 PERF_STAT_EVSEL_ID__MAX,
26}; 28};
27 29
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 28c9f335006c..988111e0bab5 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -343,43 +343,6 @@ int perf_event_paranoid(void)
343 343
344 return value; 344 return value;
345} 345}
346
347bool find_process(const char *name)
348{
349 size_t len = strlen(name);
350 DIR *dir;
351 struct dirent *d;
352 int ret = -1;
353
354 dir = opendir(procfs__mountpoint());
355 if (!dir)
356 return false;
357
358 /* Walk through the directory. */
359 while (ret && (d = readdir(dir)) != NULL) {
360 char path[PATH_MAX];
361 char *data;
362 size_t size;
363
364 if ((d->d_type != DT_DIR) ||
365 !strcmp(".", d->d_name) ||
366 !strcmp("..", d->d_name))
367 continue;
368
369 scnprintf(path, sizeof(path), "%s/%s/comm",
370 procfs__mountpoint(), d->d_name);
371
372 if (filename__read_str(path, &data, &size))
373 continue;
374
375 ret = strncmp(name, data, len);
376 free(data);
377 }
378
379 closedir(dir);
380 return ret ? false : true;
381}
382
383static int 346static int
384fetch_ubuntu_kernel_version(unsigned int *puint) 347fetch_ubuntu_kernel_version(unsigned int *puint)
385{ 348{
@@ -387,8 +350,12 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
387 size_t line_len = 0; 350 size_t line_len = 0;
388 char *ptr, *line = NULL; 351 char *ptr, *line = NULL;
389 int version, patchlevel, sublevel, err; 352 int version, patchlevel, sublevel, err;
390 FILE *vsig = fopen("/proc/version_signature", "r"); 353 FILE *vsig;
354
355 if (!puint)
356 return 0;
391 357
358 vsig = fopen("/proc/version_signature", "r");
392 if (!vsig) { 359 if (!vsig) {
393 pr_debug("Open /proc/version_signature failed: %s\n", 360 pr_debug("Open /proc/version_signature failed: %s\n",
394 strerror(errno)); 361 strerror(errno));
@@ -418,8 +385,7 @@ fetch_ubuntu_kernel_version(unsigned int *puint)
418 goto errout; 385 goto errout;
419 } 386 }
420 387
421 if (puint) 388 *puint = (version << 16) + (patchlevel << 8) + sublevel;
422 *puint = (version << 16) + (patchlevel << 8) + sublevel;
423 err = 0; 389 err = 0;
424errout: 390errout:
425 free(line); 391 free(line);
@@ -446,6 +412,9 @@ fetch_kernel_version(unsigned int *puint, char *str,
446 str[str_size - 1] = '\0'; 412 str[str_size - 1] = '\0';
447 } 413 }
448 414
415 if (!puint || int_ver_ready)
416 return 0;
417
449 err = sscanf(utsname.release, "%d.%d.%d", 418 err = sscanf(utsname.release, "%d.%d.%d",
450 &version, &patchlevel, &sublevel); 419 &version, &patchlevel, &sublevel);
451 420
@@ -455,8 +424,7 @@ fetch_kernel_version(unsigned int *puint, char *str,
455 return -1; 424 return -1;
456 } 425 }
457 426
458 if (puint && !int_ver_ready) 427 *puint = (version << 16) + (patchlevel << 8) + sublevel;
459 *puint = (version << 16) + (patchlevel << 8) + sublevel;
460 return 0; 428 return 0;
461} 429}
462 430
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 21c6db173bcc..978572dfeb14 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -1,7 +1,6 @@
1#ifndef GIT_COMPAT_UTIL_H 1#ifndef GIT_COMPAT_UTIL_H
2#define GIT_COMPAT_UTIL_H 2#define GIT_COMPAT_UTIL_H
3 3
4#define _ALL_SOURCE 1
5#define _BSD_SOURCE 1 4#define _BSD_SOURCE 1
6/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */ 5/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
7#define _DEFAULT_SOURCE 1 6#define _DEFAULT_SOURCE 1
@@ -49,8 +48,6 @@ int hex2u64(const char *ptr, u64 *val);
49extern unsigned int page_size; 48extern unsigned int page_size;
50extern int cacheline_size; 49extern int cacheline_size;
51 50
52bool find_process(const char *name);
53
54int fetch_kernel_version(unsigned int *puint, 51int fetch_kernel_version(unsigned int *puint,
55 char *str, size_t str_sz); 52 char *str, size_t str_sz);
56#define KVER_VERSION(x) (((x) >> 16) & 0xff) 53#define KVER_VERSION(x) (((x) >> 16) & 0xff)