summaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-05-18 04:24:43 -0400
committerIngo Molnar <mingo@kernel.org>2019-05-18 04:24:43 -0400
commit62e1c09418fc16d27720b128275cac61367e2c1b (patch)
tree4759aa6662b1398e2b93696ace58f6f309722b06 /tools/perf/util
parent01be377c62210a8d8fef35be906f9349591bb7cd (diff)
parent4fc4d8dfa056dfd48afe73b9ea3b7570ceb80b9c (diff)
Merge tag 'perf-core-for-mingo-5.2-20190517' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: perf.data: Alexey Budankov: - Streaming compression of perf ring buffer into PERF_RECORD_COMPRESSED user space records, resulting in ~3-5x perf.data file size reduction on variety of tested workloads what saves storage space on larger server systems where perf.data size can easily reach several tens or even hundreds of GiBs, especially when profiling with DWARF-based stacks and tracing of context switches. perf record: Arnaldo Carvalho de Melo - Improve -user-regs/intr-regs suggestions to overcome errors. perf annotate: Jin Yao: - Remove hist__account_cycles() from callback, speeding up branch processing (perf record -b). perf stat: - Add a 'percore' event qualifier, e.g.: -e cpu/event=0,umask=0x3,percore=1/, that sums up the event counts for both hardware threads in a core. We can already do this with --per-core, but it's often useful to do this together with other metrics that are collected per hardware thread. I.e. now its possible to do this per-event, and have it mixed with other events not aggregated by core. core libraries: Donald Yandt: - Check for errors when doing fgets(/proc/version). Jiri Olsa: - Speed up report for perf compiled with linbunwind. tools headers: Arnaldo Carvalho de Melo - Update memcpy_64.S, x86's kvm.h and pt_regs.h. arm64: Florian Fainelli: - Map Brahma-B53 CPUID to cortex-a53 events. - Add Cortex-A57 and Cortex-A72 events. csky: Mao Han: - Add DWARF register mappings for libdw, allowing --call-graph=dwarf to work on the C-SKY arch. x86: Andi Kleen/Kan Liang: - Add support for recording and printing XMM registers, available, for instance, on Icelake. Kan Liang: - Add uncore_upi (Intel's "Ultra Path Interconnect" events) JSON support. UPI replaced the Intel QuickPath Interconnect (QPI) in Xeon Skylake-SP. Intel PT: Adrian Hunter . Fix instructions sampling rate. . Timestamp fixes. . Improve exported-sql-viewer GUI, allowing, for instance, to copy'n'paste the trees, useful for e-mailing. Documentation: Thomas Richter: - Add description for 'perf --debug stderr=1', which redirects stderr to stdout. libtraceevent: Tzvetomir Stoyanov: - Add man pages for the various APIs. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/compress.h53
-rw-r--r--tools/perf/util/env.h11
-rw-r--r--tools/perf/util/event.c1
-rw-r--r--tools/perf/util/event.h7
-rw-r--r--tools/perf/util/evlist.c8
-rw-r--r--tools/perf/util/evlist.h2
-rw-r--r--tools/perf/util/evsel.c2
-rw-r--r--tools/perf/util/evsel.h3
-rw-r--r--tools/perf/util/header.c53
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c31
-rw-r--r--tools/perf/util/machine.c3
-rw-r--r--tools/perf/util/mmap.c102
-rw-r--r--tools/perf/util/mmap.h16
-rw-r--r--tools/perf/util/parse-events.c27
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/parse-events.l1
-rw-r--r--tools/perf/util/parse-regs-options.c33
-rw-r--r--tools/perf/util/parse-regs-options.h3
-rw-r--r--tools/perf/util/perf_regs.c10
-rw-r--r--tools/perf/util/perf_regs.h3
-rw-r--r--tools/perf/util/session.c133
-rw-r--r--tools/perf/util/session.h14
-rw-r--r--tools/perf/util/stat-display.c107
-rw-r--r--tools/perf/util/stat.c8
-rw-r--r--tools/perf/util/thread.c3
-rw-r--r--tools/perf/util/tool.h2
-rw-r--r--tools/perf/util/unwind-libunwind-local.c6
-rw-r--r--tools/perf/util/unwind-libunwind.c10
-rw-r--r--tools/perf/util/zstd.c111
32 files changed, 620 insertions, 149 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8dd3102301ea..6d5bbc8b589b 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -145,6 +145,8 @@ perf-y += scripting-engines/
145 145
146perf-$(CONFIG_ZLIB) += zlib.o 146perf-$(CONFIG_ZLIB) += zlib.o
147perf-$(CONFIG_LZMA) += lzma.o 147perf-$(CONFIG_LZMA) += lzma.o
148perf-$(CONFIG_ZSTD) += zstd.o
149
148perf-y += demangle-java.o 150perf-y += demangle-java.o
149perf-y += demangle-rust.o 151perf-y += demangle-rust.o
150 152
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 09762985c713..0b8573fd9b05 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1021,7 +1021,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64
1021 float ipc = n_insn / ((double)ch->cycles / (double)ch->num); 1021 float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
1022 1022
1023 /* Hide data when there are too many overlaps. */ 1023 /* Hide data when there are too many overlaps. */
1024 if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) 1024 if (ch->reset >= 0x7fff)
1025 return; 1025 return;
1026 1026
1027 for (offset = start; offset <= end; offset++) { 1027 for (offset = start; offset <= end; offset++) {
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index 892e92e7e7fc..0cd3369af2a4 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -2,6 +2,11 @@
2#ifndef PERF_COMPRESS_H 2#ifndef PERF_COMPRESS_H
3#define PERF_COMPRESS_H 3#define PERF_COMPRESS_H
4 4
5#include <stdbool.h>
6#ifdef HAVE_ZSTD_SUPPORT
7#include <zstd.h>
8#endif
9
5#ifdef HAVE_ZLIB_SUPPORT 10#ifdef HAVE_ZLIB_SUPPORT
6int gzip_decompress_to_file(const char *input, int output_fd); 11int gzip_decompress_to_file(const char *input, int output_fd);
7bool gzip_is_compressed(const char *input); 12bool gzip_is_compressed(const char *input);
@@ -12,4 +17,52 @@ int lzma_decompress_to_file(const char *input, int output_fd);
12bool lzma_is_compressed(const char *input); 17bool lzma_is_compressed(const char *input);
13#endif 18#endif
14 19
20struct zstd_data {
21#ifdef HAVE_ZSTD_SUPPORT
22 ZSTD_CStream *cstream;
23 ZSTD_DStream *dstream;
24#endif
25};
26
27#ifdef HAVE_ZSTD_SUPPORT
28
29int zstd_init(struct zstd_data *data, int level);
30int zstd_fini(struct zstd_data *data);
31
32size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
33 void *src, size_t src_size, size_t max_record_size,
34 size_t process_header(void *record, size_t increment));
35
36size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size,
37 void *dst, size_t dst_size);
38#else /* !HAVE_ZSTD_SUPPORT */
39
40static inline int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
41{
42 return 0;
43}
44
45static inline int zstd_fini(struct zstd_data *data __maybe_unused)
46{
47 return 0;
48}
49
50static inline
51size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
52 void *dst __maybe_unused, size_t dst_size __maybe_unused,
53 void *src __maybe_unused, size_t src_size __maybe_unused,
54 size_t max_record_size __maybe_unused,
55 size_t process_header(void *record, size_t increment) __maybe_unused)
56{
57 return 0;
58}
59
60static inline size_t zstd_decompress_stream(struct zstd_data *data __maybe_unused, void *src __maybe_unused,
61 size_t src_size __maybe_unused, void *dst __maybe_unused,
62 size_t dst_size __maybe_unused)
63{
64 return 0;
65}
66#endif
67
15#endif /* PERF_COMPRESS_H */ 68#endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 4f8e2b485c01..271a90b326c4 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -62,6 +62,11 @@ struct perf_env {
62 struct cpu_topology_map *cpu; 62 struct cpu_topology_map *cpu;
63 struct cpu_cache_level *caches; 63 struct cpu_cache_level *caches;
64 int caches_cnt; 64 int caches_cnt;
65 u32 comp_ratio;
66 u32 comp_ver;
67 u32 comp_type;
68 u32 comp_level;
69 u32 comp_mmap_len;
65 struct numa_node *numa_nodes; 70 struct numa_node *numa_nodes;
66 struct memory_node *memory_nodes; 71 struct memory_node *memory_nodes;
67 unsigned long long memory_bsize; 72 unsigned long long memory_bsize;
@@ -80,6 +85,12 @@ struct perf_env {
80 } bpf_progs; 85 } bpf_progs;
81}; 86};
82 87
88enum perf_compress_type {
89 PERF_COMP_NONE = 0,
90 PERF_COMP_ZSTD,
91 PERF_COMP_MAX
92};
93
83struct bpf_prog_info_node; 94struct bpf_prog_info_node;
84struct btf_node; 95struct btf_node;
85 96
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index ba7be74fad6e..d1ad6c419724 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -68,6 +68,7 @@ static const char *perf_event__names[] = {
68 [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE", 68 [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE",
69 [PERF_RECORD_TIME_CONV] = "TIME_CONV", 69 [PERF_RECORD_TIME_CONV] = "TIME_CONV",
70 [PERF_RECORD_HEADER_FEATURE] = "FEATURE", 70 [PERF_RECORD_HEADER_FEATURE] = "FEATURE",
71 [PERF_RECORD_COMPRESSED] = "COMPRESSED",
71}; 72};
72 73
73static const char *perf_ns__names[] = { 74static const char *perf_ns__names[] = {
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 4e908ec1ef64..9e999550f247 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -255,6 +255,7 @@ enum perf_user_event_type { /* above any possible kernel type */
255 PERF_RECORD_EVENT_UPDATE = 78, 255 PERF_RECORD_EVENT_UPDATE = 78,
256 PERF_RECORD_TIME_CONV = 79, 256 PERF_RECORD_TIME_CONV = 79,
257 PERF_RECORD_HEADER_FEATURE = 80, 257 PERF_RECORD_HEADER_FEATURE = 80,
258 PERF_RECORD_COMPRESSED = 81,
258 PERF_RECORD_HEADER_MAX 259 PERF_RECORD_HEADER_MAX
259}; 260};
260 261
@@ -627,6 +628,11 @@ struct feature_event {
627 char data[]; 628 char data[];
628}; 629};
629 630
631struct compressed_event {
632 struct perf_event_header header;
633 char data[];
634};
635
630union perf_event { 636union perf_event {
631 struct perf_event_header header; 637 struct perf_event_header header;
632 struct mmap_event mmap; 638 struct mmap_event mmap;
@@ -660,6 +666,7 @@ union perf_event {
660 struct feature_event feat; 666 struct feature_event feat;
661 struct ksymbol_event ksymbol_event; 667 struct ksymbol_event ksymbol_event;
662 struct bpf_event bpf_event; 668 struct bpf_event bpf_event;
669 struct compressed_event pack;
663}; 670};
664 671
665void perf_event__print_totals(void); 672void perf_event__print_totals(void);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 4b6783ff5813..69d0fa8ab16f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1009,7 +1009,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
1009 */ 1009 */
1010int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1010int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1011 unsigned int auxtrace_pages, 1011 unsigned int auxtrace_pages,
1012 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush) 1012 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
1013 int comp_level)
1013{ 1014{
1014 struct perf_evsel *evsel; 1015 struct perf_evsel *evsel;
1015 const struct cpu_map *cpus = evlist->cpus; 1016 const struct cpu_map *cpus = evlist->cpus;
@@ -1019,7 +1020,8 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1019 * Its value is decided by evsel's write_backward. 1020 * Its value is decided by evsel's write_backward.
1020 * So &mp should not be passed through const pointer. 1021 * So &mp should not be passed through const pointer.
1021 */ 1022 */
1022 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush }; 1023 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
1024 .comp_level = comp_level };
1023 1025
1024 if (!evlist->mmap) 1026 if (!evlist->mmap)
1025 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1027 evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1051,7 +1053,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1051 1053
1052int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) 1054int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
1053{ 1055{
1054 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1); 1056 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
1055} 1057}
1056 1058
1057int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1059int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index c9a0f72677fd..49354fe24d5f 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -178,7 +178,7 @@ unsigned long perf_event_mlock_kb_in_pages(void);
178int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 178int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
179 unsigned int auxtrace_pages, 179 unsigned int auxtrace_pages,
180 bool auxtrace_overwrite, int nr_cblocks, 180 bool auxtrace_overwrite, int nr_cblocks,
181 int affinity, int flush); 181 int affinity, int flush, int comp_level);
182int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); 182int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
183void perf_evlist__munmap(struct perf_evlist *evlist); 183void perf_evlist__munmap(struct perf_evlist *evlist);
184 184
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a10cf4cde920..a6f572a40deb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -813,6 +813,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
813 break; 813 break;
814 case PERF_EVSEL__CONFIG_TERM_DRV_CFG: 814 case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
815 break; 815 break;
816 case PERF_EVSEL__CONFIG_TERM_PERCORE:
817 break;
816 default: 818 default:
817 break; 819 break;
818 } 820 }
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 6d190cbf1070..cad54e8ba522 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -50,6 +50,7 @@ enum term_type {
50 PERF_EVSEL__CONFIG_TERM_OVERWRITE, 50 PERF_EVSEL__CONFIG_TERM_OVERWRITE,
51 PERF_EVSEL__CONFIG_TERM_DRV_CFG, 51 PERF_EVSEL__CONFIG_TERM_DRV_CFG,
52 PERF_EVSEL__CONFIG_TERM_BRANCH, 52 PERF_EVSEL__CONFIG_TERM_BRANCH,
53 PERF_EVSEL__CONFIG_TERM_PERCORE,
53}; 54};
54 55
55struct perf_evsel_config_term { 56struct perf_evsel_config_term {
@@ -67,6 +68,7 @@ struct perf_evsel_config_term {
67 bool overwrite; 68 bool overwrite;
68 char *branch; 69 char *branch;
69 unsigned long max_events; 70 unsigned long max_events;
71 bool percore;
70 } val; 72 } val;
71 bool weak; 73 bool weak;
72}; 74};
@@ -158,6 +160,7 @@ struct perf_evsel {
158 struct perf_evsel **metric_events; 160 struct perf_evsel **metric_events;
159 bool collect_stat; 161 bool collect_stat;
160 bool weak_group; 162 bool weak_group;
163 bool percore;
161 const char *pmu_name; 164 const char *pmu_name;
162 struct { 165 struct {
163 perf_evsel__sb_cb_t *cb; 166 perf_evsel__sb_cb_t *cb;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 2d2af2ac2b1e..847ae51a524b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1344,6 +1344,30 @@ out:
1344 return ret; 1344 return ret;
1345} 1345}
1346 1346
1347static int write_compressed(struct feat_fd *ff __maybe_unused,
1348 struct perf_evlist *evlist __maybe_unused)
1349{
1350 int ret;
1351
1352 ret = do_write(ff, &(ff->ph->env.comp_ver), sizeof(ff->ph->env.comp_ver));
1353 if (ret)
1354 return ret;
1355
1356 ret = do_write(ff, &(ff->ph->env.comp_type), sizeof(ff->ph->env.comp_type));
1357 if (ret)
1358 return ret;
1359
1360 ret = do_write(ff, &(ff->ph->env.comp_level), sizeof(ff->ph->env.comp_level));
1361 if (ret)
1362 return ret;
1363
1364 ret = do_write(ff, &(ff->ph->env.comp_ratio), sizeof(ff->ph->env.comp_ratio));
1365 if (ret)
1366 return ret;
1367
1368 return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
1369}
1370
1347static void print_hostname(struct feat_fd *ff, FILE *fp) 1371static void print_hostname(struct feat_fd *ff, FILE *fp)
1348{ 1372{
1349 fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); 1373 fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1688,6 +1712,13 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
1688 } 1712 }
1689} 1713}
1690 1714
1715static void print_compressed(struct feat_fd *ff, FILE *fp)
1716{
1717 fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
1718 ff->ph->env.comp_type == PERF_COMP_ZSTD ? "Zstd" : "Unknown",
1719 ff->ph->env.comp_level, ff->ph->env.comp_ratio);
1720}
1721
1691static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) 1722static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
1692{ 1723{
1693 const char *delimiter = "# pmu mappings: "; 1724 const char *delimiter = "# pmu mappings: ";
@@ -2667,6 +2698,27 @@ out:
2667 return err; 2698 return err;
2668} 2699}
2669 2700
2701static int process_compressed(struct feat_fd *ff,
2702 void *data __maybe_unused)
2703{
2704 if (do_read_u32(ff, &(ff->ph->env.comp_ver)))
2705 return -1;
2706
2707 if (do_read_u32(ff, &(ff->ph->env.comp_type)))
2708 return -1;
2709
2710 if (do_read_u32(ff, &(ff->ph->env.comp_level)))
2711 return -1;
2712
2713 if (do_read_u32(ff, &(ff->ph->env.comp_ratio)))
2714 return -1;
2715
2716 if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len)))
2717 return -1;
2718
2719 return 0;
2720}
2721
2670struct feature_ops { 2722struct feature_ops {
2671 int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); 2723 int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
2672 void (*print)(struct feat_fd *ff, FILE *fp); 2724 void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2730,6 +2782,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
2730 FEAT_OPN(DIR_FORMAT, dir_format, false), 2782 FEAT_OPN(DIR_FORMAT, dir_format, false),
2731 FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), 2783 FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false),
2732 FEAT_OPR(BPF_BTF, bpf_btf, false), 2784 FEAT_OPR(BPF_BTF, bpf_btf, false),
2785 FEAT_OPR(COMPRESSED, compressed, false),
2733}; 2786};
2734 2787
2735struct header_print_data { 2788struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 386da49e1bfa..5b3abe4172e2 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -42,6 +42,7 @@ enum {
42 HEADER_DIR_FORMAT, 42 HEADER_DIR_FORMAT,
43 HEADER_BPF_PROG_INFO, 43 HEADER_BPF_PROG_INFO,
44 HEADER_BPF_BTF, 44 HEADER_BPF_BTF,
45 HEADER_COMPRESSED,
45 HEADER_LAST_FEATURE, 46 HEADER_LAST_FEATURE,
46 HEADER_FEAT_BITS = 256, 47 HEADER_FEAT_BITS = 256,
47}; 48};
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 872fab163585..f4c3c84b090f 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -58,6 +58,7 @@ enum intel_pt_pkt_state {
58 INTEL_PT_STATE_NO_IP, 58 INTEL_PT_STATE_NO_IP,
59 INTEL_PT_STATE_ERR_RESYNC, 59 INTEL_PT_STATE_ERR_RESYNC,
60 INTEL_PT_STATE_IN_SYNC, 60 INTEL_PT_STATE_IN_SYNC,
61 INTEL_PT_STATE_TNT_CONT,
61 INTEL_PT_STATE_TNT, 62 INTEL_PT_STATE_TNT,
62 INTEL_PT_STATE_TIP, 63 INTEL_PT_STATE_TIP,
63 INTEL_PT_STATE_TIP_PGD, 64 INTEL_PT_STATE_TIP_PGD,
@@ -72,8 +73,9 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
72 case INTEL_PT_STATE_NO_IP: 73 case INTEL_PT_STATE_NO_IP:
73 case INTEL_PT_STATE_ERR_RESYNC: 74 case INTEL_PT_STATE_ERR_RESYNC:
74 case INTEL_PT_STATE_IN_SYNC: 75 case INTEL_PT_STATE_IN_SYNC:
75 case INTEL_PT_STATE_TNT: 76 case INTEL_PT_STATE_TNT_CONT:
76 return true; 77 return true;
78 case INTEL_PT_STATE_TNT:
77 case INTEL_PT_STATE_TIP: 79 case INTEL_PT_STATE_TIP:
78 case INTEL_PT_STATE_TIP_PGD: 80 case INTEL_PT_STATE_TIP_PGD:
79 case INTEL_PT_STATE_FUP: 81 case INTEL_PT_STATE_FUP:
@@ -888,16 +890,20 @@ static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
888 timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; 890 timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
889 masked_timestamp = timestamp & decoder->period_mask; 891 masked_timestamp = timestamp & decoder->period_mask;
890 if (decoder->continuous_period) { 892 if (decoder->continuous_period) {
891 if (masked_timestamp != decoder->last_masked_timestamp) 893 if (masked_timestamp > decoder->last_masked_timestamp)
892 return 1; 894 return 1;
893 } else { 895 } else {
894 timestamp += 1; 896 timestamp += 1;
895 masked_timestamp = timestamp & decoder->period_mask; 897 masked_timestamp = timestamp & decoder->period_mask;
896 if (masked_timestamp != decoder->last_masked_timestamp) { 898 if (masked_timestamp > decoder->last_masked_timestamp) {
897 decoder->last_masked_timestamp = masked_timestamp; 899 decoder->last_masked_timestamp = masked_timestamp;
898 decoder->continuous_period = true; 900 decoder->continuous_period = true;
899 } 901 }
900 } 902 }
903
904 if (masked_timestamp < decoder->last_masked_timestamp)
905 return decoder->period_ticks;
906
901 return decoder->period_ticks - (timestamp - masked_timestamp); 907 return decoder->period_ticks - (timestamp - masked_timestamp);
902} 908}
903 909
@@ -926,7 +932,10 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
926 case INTEL_PT_PERIOD_TICKS: 932 case INTEL_PT_PERIOD_TICKS:
927 timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; 933 timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
928 masked_timestamp = timestamp & decoder->period_mask; 934 masked_timestamp = timestamp & decoder->period_mask;
929 decoder->last_masked_timestamp = masked_timestamp; 935 if (masked_timestamp > decoder->last_masked_timestamp)
936 decoder->last_masked_timestamp = masked_timestamp;
937 else
938 decoder->last_masked_timestamp += decoder->period_ticks;
930 break; 939 break;
931 case INTEL_PT_PERIOD_NONE: 940 case INTEL_PT_PERIOD_NONE:
932 case INTEL_PT_PERIOD_MTC: 941 case INTEL_PT_PERIOD_MTC:
@@ -1254,7 +1263,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1254 return -ENOENT; 1263 return -ENOENT;
1255 } 1264 }
1256 decoder->tnt.count -= 1; 1265 decoder->tnt.count -= 1;
1257 if (!decoder->tnt.count) 1266 if (decoder->tnt.count)
1267 decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
1268 else
1258 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; 1269 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1259 decoder->tnt.payload <<= 1; 1270 decoder->tnt.payload <<= 1;
1260 decoder->state.from_ip = decoder->ip; 1271 decoder->state.from_ip = decoder->ip;
@@ -1285,7 +1296,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1285 1296
1286 if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { 1297 if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
1287 decoder->tnt.count -= 1; 1298 decoder->tnt.count -= 1;
1288 if (!decoder->tnt.count) 1299 if (decoder->tnt.count)
1300 decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
1301 else
1289 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; 1302 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1290 if (decoder->tnt.payload & BIT63) { 1303 if (decoder->tnt.payload & BIT63) {
1291 decoder->tnt.payload <<= 1; 1304 decoder->tnt.payload <<= 1;
@@ -1305,8 +1318,11 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1305 return 0; 1318 return 0;
1306 } 1319 }
1307 decoder->ip += intel_pt_insn.length; 1320 decoder->ip += intel_pt_insn.length;
1308 if (!decoder->tnt.count) 1321 if (!decoder->tnt.count) {
1322 decoder->sample_timestamp = decoder->timestamp;
1323 decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
1309 return -EAGAIN; 1324 return -EAGAIN;
1325 }
1310 decoder->tnt.payload <<= 1; 1326 decoder->tnt.payload <<= 1;
1311 continue; 1327 continue;
1312 } 1328 }
@@ -2365,6 +2381,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2365 err = intel_pt_walk_trace(decoder); 2381 err = intel_pt_walk_trace(decoder);
2366 break; 2382 break;
2367 case INTEL_PT_STATE_TNT: 2383 case INTEL_PT_STATE_TNT:
2384 case INTEL_PT_STATE_TNT_CONT:
2368 err = intel_pt_walk_tnt(decoder); 2385 err = intel_pt_walk_tnt(decoder);
2369 if (err == -EAGAIN) 2386 if (err == -EAGAIN)
2370 err = intel_pt_walk_trace(decoder); 2387 err = intel_pt_walk_trace(decoder);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 3c520baa198c..28a9541c4835 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1234,8 +1234,9 @@ static char *get_kernel_version(const char *root_dir)
1234 if (!file) 1234 if (!file)
1235 return NULL; 1235 return NULL;
1236 1236
1237 version[0] = '\0';
1238 tmp = fgets(version, sizeof(version), file); 1237 tmp = fgets(version, sizeof(version), file);
1238 if (!tmp)
1239 *version = '\0';
1239 fclose(file); 1240 fclose(file);
1240 1241
1241 name = strstr(version, prefix); 1242 name = strstr(version, prefix);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index ef3d79b2c90b..868c0b0e909c 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -157,6 +157,10 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
157} 157}
158 158
159#ifdef HAVE_AIO_SUPPORT 159#ifdef HAVE_AIO_SUPPORT
160static int perf_mmap__aio_enabled(struct perf_mmap *map)
161{
162 return map->aio.nr_cblocks > 0;
163}
160 164
161#ifdef HAVE_LIBNUMA_SUPPORT 165#ifdef HAVE_LIBNUMA_SUPPORT
162static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) 166static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
@@ -198,7 +202,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi
198 202
199 return 0; 203 return 0;
200} 204}
201#else 205#else /* !HAVE_LIBNUMA_SUPPORT */
202static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) 206static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
203{ 207{
204 map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); 208 map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
@@ -285,81 +289,12 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
285 zfree(&map->aio.cblocks); 289 zfree(&map->aio.cblocks);
286 zfree(&map->aio.aiocb); 290 zfree(&map->aio.aiocb);
287} 291}
288 292#else /* !HAVE_AIO_SUPPORT */
289int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx, 293static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused)
290 int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
291 off_t *off)
292{ 294{
293 u64 head = perf_mmap__read_head(md); 295 return 0;
294 unsigned char *data = md->base + page_size;
295 unsigned long size, size0 = 0;
296 void *buf;
297 int rc = 0;
298
299 rc = perf_mmap__read_init(md);
300 if (rc < 0)
301 return (rc == -EAGAIN) ? 0 : -1;
302
303 /*
304 * md->base data is copied into md->data[idx] buffer to
305 * release space in the kernel buffer as fast as possible,
306 * thru perf_mmap__consume() below.
307 *
308 * That lets the kernel to proceed with storing more
309 * profiling data into the kernel buffer earlier than other
310 * per-cpu kernel buffers are handled.
311 *
312 * Coping can be done in two steps in case the chunk of
313 * profiling data crosses the upper bound of the kernel buffer.
314 * In this case we first move part of data from md->start
315 * till the upper bound and then the reminder from the
316 * beginning of the kernel buffer till the end of
317 * the data chunk.
318 */
319
320 size = md->end - md->start;
321
322 if ((md->start & md->mask) + size != (md->end & md->mask)) {
323 buf = &data[md->start & md->mask];
324 size = md->mask + 1 - (md->start & md->mask);
325 md->start += size;
326 memcpy(md->aio.data[idx], buf, size);
327 size0 = size;
328 }
329
330 buf = &data[md->start & md->mask];
331 size = md->end - md->start;
332 md->start += size;
333 memcpy(md->aio.data[idx] + size0, buf, size);
334
335 /*
336 * Increment md->refcount to guard md->data[idx] buffer
337 * from premature deallocation because md object can be
338 * released earlier than aio write request started
339 * on mmap->data[idx] is complete.
340 *
341 * perf_mmap__put() is done at record__aio_complete()
342 * after started request completion.
343 */
344 perf_mmap__get(md);
345
346 md->prev = head;
347 perf_mmap__consume(md);
348
349 rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
350 if (!rc) {
351 *off += size0 + size;
352 } else {
353 /*
354 * Decrement md->refcount back if aio write
355 * operation failed to start.
356 */
357 perf_mmap__put(md);
358 }
359
360 return rc;
361} 296}
362#else 297
363static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused, 298static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
364 struct mmap_params *mp __maybe_unused) 299 struct mmap_params *mp __maybe_unused)
365{ 300{
@@ -374,6 +309,10 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused)
374void perf_mmap__munmap(struct perf_mmap *map) 309void perf_mmap__munmap(struct perf_mmap *map)
375{ 310{
376 perf_mmap__aio_munmap(map); 311 perf_mmap__aio_munmap(map);
312 if (map->data != NULL) {
313 munmap(map->data, perf_mmap__mmap_len(map));
314 map->data = NULL;
315 }
377 if (map->base != NULL) { 316 if (map->base != NULL) {
378 munmap(map->base, perf_mmap__mmap_len(map)); 317 munmap(map->base, perf_mmap__mmap_len(map));
379 map->base = NULL; 318 map->base = NULL;
@@ -442,6 +381,19 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
442 381
443 map->flush = mp->flush; 382 map->flush = mp->flush;
444 383
384 map->comp_level = mp->comp_level;
385
386 if (map->comp_level && !perf_mmap__aio_enabled(map)) {
387 map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
388 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
389 if (map->data == MAP_FAILED) {
390 pr_debug2("failed to mmap data buffer, error %d\n",
391 errno);
392 map->data = NULL;
393 return -1;
394 }
395 }
396
445 if (auxtrace_mmap__mmap(&map->auxtrace_mmap, 397 if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
446 &mp->auxtrace_mp, map->base, fd)) 398 &mp->auxtrace_mp, map->base, fd))
447 return -1; 399 return -1;
@@ -540,7 +492,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
540 492
541 rc = perf_mmap__read_init(md); 493 rc = perf_mmap__read_init(md);
542 if (rc < 0) 494 if (rc < 0)
543 return (rc == -EAGAIN) ? 0 : -1; 495 return (rc == -EAGAIN) ? 1 : -1;
544 496
545 size = md->end - md->start; 497 size = md->end - md->start;
546 498
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index b82f8c2d55c4..274ce389cd84 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -40,6 +40,8 @@ struct perf_mmap {
40#endif 40#endif
41 cpu_set_t affinity_mask; 41 cpu_set_t affinity_mask;
42 u64 flush; 42 u64 flush;
43 void *data;
44 int comp_level;
43}; 45};
44 46
45/* 47/*
@@ -71,7 +73,7 @@ enum bkw_mmap_state {
71}; 73};
72 74
73struct mmap_params { 75struct mmap_params {
74 int prot, mask, nr_cblocks, affinity, flush; 76 int prot, mask, nr_cblocks, affinity, flush, comp_level;
75 struct auxtrace_mmap_params auxtrace_mp; 77 struct auxtrace_mmap_params auxtrace_mp;
76}; 78};
77 79
@@ -99,18 +101,6 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map);
99 101
100int perf_mmap__push(struct perf_mmap *md, void *to, 102int perf_mmap__push(struct perf_mmap *md, void *to,
101 int push(struct perf_mmap *map, void *to, void *buf, size_t size)); 103 int push(struct perf_mmap *map, void *to, void *buf, size_t size));
102#ifdef HAVE_AIO_SUPPORT
103int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
104 int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
105 off_t *off);
106#else
107static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused, int idx __maybe_unused,
108 int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused,
109 off_t *off __maybe_unused)
110{
111 return 0;
112}
113#endif
114 104
115size_t perf_mmap__mmap_len(struct perf_mmap *map); 105size_t perf_mmap__mmap_len(struct perf_mmap *map);
116 106
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4432bfe039fd..cf0b9b81c5aa 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -950,6 +950,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
950 [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", 950 [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite",
951 [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", 951 [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite",
952 [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", 952 [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config",
953 [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore",
953}; 954};
954 955
955static bool config_term_shrinked; 956static bool config_term_shrinked;
@@ -970,6 +971,7 @@ config_term_avail(int term_type, struct parse_events_error *err)
970 case PARSE_EVENTS__TERM_TYPE_CONFIG2: 971 case PARSE_EVENTS__TERM_TYPE_CONFIG2:
971 case PARSE_EVENTS__TERM_TYPE_NAME: 972 case PARSE_EVENTS__TERM_TYPE_NAME:
972 case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: 973 case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
974 case PARSE_EVENTS__TERM_TYPE_PERCORE:
973 return true; 975 return true;
974 default: 976 default:
975 if (!err) 977 if (!err)
@@ -1061,6 +1063,14 @@ do { \
1061 case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: 1063 case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
1062 CHECK_TYPE_VAL(NUM); 1064 CHECK_TYPE_VAL(NUM);
1063 break; 1065 break;
1066 case PARSE_EVENTS__TERM_TYPE_PERCORE:
1067 CHECK_TYPE_VAL(NUM);
1068 if ((unsigned int)term->val.num > 1) {
1069 err->str = strdup("expected 0 or 1");
1070 err->idx = term->err_val;
1071 return -EINVAL;
1072 }
1073 break;
1064 default: 1074 default:
1065 err->str = strdup("unknown term"); 1075 err->str = strdup("unknown term");
1066 err->idx = term->err_term; 1076 err->idx = term->err_term;
@@ -1199,6 +1209,10 @@ do { \
1199 case PARSE_EVENTS__TERM_TYPE_DRV_CFG: 1209 case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
1200 ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str); 1210 ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str);
1201 break; 1211 break;
1212 case PARSE_EVENTS__TERM_TYPE_PERCORE:
1213 ADD_CONFIG_TERM(PERCORE, percore,
1214 term->val.num ? true : false);
1215 break;
1202 default: 1216 default:
1203 break; 1217 break;
1204 } 1218 }
@@ -1260,6 +1274,18 @@ int parse_events_add_tool(struct parse_events_state *parse_state,
1260 return add_event_tool(list, &parse_state->idx, tool_event); 1274 return add_event_tool(list, &parse_state->idx, tool_event);
1261} 1275}
1262 1276
1277static bool config_term_percore(struct list_head *config_terms)
1278{
1279 struct perf_evsel_config_term *term;
1280
1281 list_for_each_entry(term, config_terms, list) {
1282 if (term->type == PERF_EVSEL__CONFIG_TERM_PERCORE)
1283 return term->val.percore;
1284 }
1285
1286 return false;
1287}
1288
1263int parse_events_add_pmu(struct parse_events_state *parse_state, 1289int parse_events_add_pmu(struct parse_events_state *parse_state,
1264 struct list_head *list, char *name, 1290 struct list_head *list, char *name,
1265 struct list_head *head_config, 1291 struct list_head *head_config,
@@ -1333,6 +1359,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
1333 evsel->metric_name = info.metric_name; 1359 evsel->metric_name = info.metric_name;
1334 evsel->pmu_name = name; 1360 evsel->pmu_name = name;
1335 evsel->use_uncore_alias = use_uncore_alias; 1361 evsel->use_uncore_alias = use_uncore_alias;
1362 evsel->percore = config_term_percore(&evsel->config_terms);
1336 } 1363 }
1337 1364
1338 return evsel ? 0 : -ENOMEM; 1365 return evsel ? 0 : -ENOMEM;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index a052cd6ac63e..f7139e1a2fd3 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -75,6 +75,7 @@ enum {
75 PARSE_EVENTS__TERM_TYPE_NOOVERWRITE, 75 PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
76 PARSE_EVENTS__TERM_TYPE_OVERWRITE, 76 PARSE_EVENTS__TERM_TYPE_OVERWRITE,
77 PARSE_EVENTS__TERM_TYPE_DRV_CFG, 77 PARSE_EVENTS__TERM_TYPE_DRV_CFG,
78 PARSE_EVENTS__TERM_TYPE_PERCORE,
78 __PARSE_EVENTS__TERM_TYPE_NR, 79 __PARSE_EVENTS__TERM_TYPE_NR,
79}; 80};
80 81
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index c54bfe88626c..ca6098874fe2 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -283,6 +283,7 @@ inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
283no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } 283no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
284overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } 284overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
285no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } 285no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
286percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
286, { return ','; } 287, { return ','; }
287"/" { BEGIN(INITIAL); return '/'; } 288"/" { BEGIN(INITIAL); return '/'; }
288{name_minus} { return str(yyscanner, PE_NAME); } 289{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
index e6599e290f46..08581e276225 100644
--- a/tools/perf/util/parse-regs-options.c
+++ b/tools/perf/util/parse-regs-options.c
@@ -5,13 +5,14 @@
5#include <subcmd/parse-options.h> 5#include <subcmd/parse-options.h>
6#include "util/parse-regs-options.h" 6#include "util/parse-regs-options.h"
7 7
8int 8static int
9parse_regs(const struct option *opt, const char *str, int unset) 9__parse_regs(const struct option *opt, const char *str, int unset, bool intr)
10{ 10{
11 uint64_t *mode = (uint64_t *)opt->value; 11 uint64_t *mode = (uint64_t *)opt->value;
12 const struct sample_reg *r; 12 const struct sample_reg *r;
13 char *s, *os = NULL, *p; 13 char *s, *os = NULL, *p;
14 int ret = -1; 14 int ret = -1;
15 uint64_t mask;
15 16
16 if (unset) 17 if (unset)
17 return 0; 18 return 0;
@@ -22,6 +23,11 @@ parse_regs(const struct option *opt, const char *str, int unset)
22 if (*mode) 23 if (*mode)
23 return -1; 24 return -1;
24 25
26 if (intr)
27 mask = arch__intr_reg_mask();
28 else
29 mask = arch__user_reg_mask();
30
25 /* str may be NULL in case no arg is passed to -I */ 31 /* str may be NULL in case no arg is passed to -I */
26 if (str) { 32 if (str) {
27 /* because str is read-only */ 33 /* because str is read-only */
@@ -37,19 +43,20 @@ parse_regs(const struct option *opt, const char *str, int unset)
37 if (!strcmp(s, "?")) { 43 if (!strcmp(s, "?")) {
38 fprintf(stderr, "available registers: "); 44 fprintf(stderr, "available registers: ");
39 for (r = sample_reg_masks; r->name; r++) { 45 for (r = sample_reg_masks; r->name; r++) {
40 fprintf(stderr, "%s ", r->name); 46 if (r->mask & mask)
47 fprintf(stderr, "%s ", r->name);
41 } 48 }
42 fputc('\n', stderr); 49 fputc('\n', stderr);
43 /* just printing available regs */ 50 /* just printing available regs */
44 return -1; 51 return -1;
45 } 52 }
46 for (r = sample_reg_masks; r->name; r++) { 53 for (r = sample_reg_masks; r->name; r++) {
47 if (!strcasecmp(s, r->name)) 54 if ((r->mask & mask) && !strcasecmp(s, r->name))
48 break; 55 break;
49 } 56 }
50 if (!r->name) { 57 if (!r->name) {
51 ui__warning("unknown register %s," 58 ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n",
52 " check man page\n", s); 59 s, intr ? "-I" : "--user-regs=");
53 goto error; 60 goto error;
54 } 61 }
55 62
@@ -65,8 +72,20 @@ parse_regs(const struct option *opt, const char *str, int unset)
65 72
66 /* default to all possible regs */ 73 /* default to all possible regs */
67 if (*mode == 0) 74 if (*mode == 0)
68 *mode = PERF_REGS_MASK; 75 *mode = mask;
69error: 76error:
70 free(os); 77 free(os);
71 return ret; 78 return ret;
72} 79}
80
81int
82parse_user_regs(const struct option *opt, const char *str, int unset)
83{
84 return __parse_regs(opt, str, unset, false);
85}
86
87int
88parse_intr_regs(const struct option *opt, const char *str, int unset)
89{
90 return __parse_regs(opt, str, unset, true);
91}
diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h
index cdefb1acf6be..2b23d25c6394 100644
--- a/tools/perf/util/parse-regs-options.h
+++ b/tools/perf/util/parse-regs-options.h
@@ -2,5 +2,6 @@
2#ifndef _PERF_PARSE_REGS_OPTIONS_H 2#ifndef _PERF_PARSE_REGS_OPTIONS_H
3#define _PERF_PARSE_REGS_OPTIONS_H 1 3#define _PERF_PARSE_REGS_OPTIONS_H 1
4struct option; 4struct option;
5int parse_regs(const struct option *opt, const char *str, int unset); 5int parse_user_regs(const struct option *opt, const char *str, int unset);
6int parse_intr_regs(const struct option *opt, const char *str, int unset);
6#endif /* _PERF_PARSE_REGS_OPTIONS_H */ 7#endif /* _PERF_PARSE_REGS_OPTIONS_H */
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 2acfcc527cac..2774cec1f15f 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -13,6 +13,16 @@ int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
13 return SDT_ARG_SKIP; 13 return SDT_ARG_SKIP;
14} 14}
15 15
16uint64_t __weak arch__intr_reg_mask(void)
17{
18 return PERF_REGS_MASK;
19}
20
21uint64_t __weak arch__user_reg_mask(void)
22{
23 return PERF_REGS_MASK;
24}
25
16#ifdef HAVE_PERF_REGS_SUPPORT 26#ifdef HAVE_PERF_REGS_SUPPORT
17int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) 27int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
18{ 28{
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index c9319f8d17a6..cb9c246c8962 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -12,6 +12,7 @@ struct sample_reg {
12 uint64_t mask; 12 uint64_t mask;
13}; 13};
14#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } 14#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) }
15#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) }
15#define SMPL_REG_END { .name = NULL } 16#define SMPL_REG_END { .name = NULL }
16 17
17extern const struct sample_reg sample_reg_masks[]; 18extern const struct sample_reg sample_reg_masks[];
@@ -22,6 +23,8 @@ enum {
22}; 23};
23 24
24int arch_sdt_arg_parse_op(char *old_op, char **new_op); 25int arch_sdt_arg_parse_op(char *old_op, char **new_op);
26uint64_t arch__intr_reg_mask(void);
27uint64_t arch__user_reg_mask(void);
25 28
26#ifdef HAVE_PERF_REGS_SUPPORT 29#ifdef HAVE_PERF_REGS_SUPPORT
27#include <perf_regs.h> 30#include <perf_regs.h>
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index bad5f87ae001..2310a1752983 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -29,6 +29,61 @@
29#include "stat.h" 29#include "stat.h"
30#include "arch/common.h" 30#include "arch/common.h"
31 31
32#ifdef HAVE_ZSTD_SUPPORT
33static int perf_session__process_compressed_event(struct perf_session *session,
34 union perf_event *event, u64 file_offset)
35{
36 void *src;
37 size_t decomp_size, src_size;
38 u64 decomp_last_rem = 0;
39 size_t decomp_len = session->header.env.comp_mmap_len;
40 struct decomp *decomp, *decomp_last = session->decomp_last;
41
42 decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE,
43 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
44 if (decomp == MAP_FAILED) {
45 pr_err("Couldn't allocate memory for decompression\n");
46 return -1;
47 }
48
49 decomp->file_pos = file_offset;
50 decomp->head = 0;
51
52 if (decomp_last) {
53 decomp_last_rem = decomp_last->size - decomp_last->head;
54 memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
55 decomp->size = decomp_last_rem;
56 }
57
58 src = (void *)event + sizeof(struct compressed_event);
59 src_size = event->pack.header.size - sizeof(struct compressed_event);
60
61 decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
62 &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
63 if (!decomp_size) {
64 munmap(decomp, sizeof(struct decomp) + decomp_len);
65 pr_err("Couldn't decompress data\n");
66 return -1;
67 }
68
69 decomp->size += decomp_size;
70
71 if (session->decomp == NULL) {
72 session->decomp = decomp;
73 session->decomp_last = decomp;
74 } else {
75 session->decomp_last->next = decomp;
76 session->decomp_last = decomp;
77 }
78
79 pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);
80
81 return 0;
82}
83#else /* !HAVE_ZSTD_SUPPORT */
84#define perf_session__process_compressed_event perf_session__process_compressed_event_stub
85#endif
86
32static int perf_session__deliver_event(struct perf_session *session, 87static int perf_session__deliver_event(struct perf_session *session,
33 union perf_event *event, 88 union perf_event *event,
34 struct perf_tool *tool, 89 struct perf_tool *tool,
@@ -197,6 +252,21 @@ static void perf_session__delete_threads(struct perf_session *session)
197 machine__delete_threads(&session->machines.host); 252 machine__delete_threads(&session->machines.host);
198} 253}
199 254
255static void perf_session__release_decomp_events(struct perf_session *session)
256{
257 struct decomp *next, *decomp;
258 size_t decomp_len;
259 next = session->decomp;
260 decomp_len = session->header.env.comp_mmap_len;
261 do {
262 decomp = next;
263 if (decomp == NULL)
264 break;
265 next = decomp->next;
266 munmap(decomp, decomp_len + sizeof(struct decomp));
267 } while (1);
268}
269
200void perf_session__delete(struct perf_session *session) 270void perf_session__delete(struct perf_session *session)
201{ 271{
202 if (session == NULL) 272 if (session == NULL)
@@ -205,6 +275,7 @@ void perf_session__delete(struct perf_session *session)
205 auxtrace_index__free(&session->auxtrace_index); 275 auxtrace_index__free(&session->auxtrace_index);
206 perf_session__destroy_kernel_maps(session); 276 perf_session__destroy_kernel_maps(session);
207 perf_session__delete_threads(session); 277 perf_session__delete_threads(session);
278 perf_session__release_decomp_events(session);
208 perf_env__exit(&session->header.env); 279 perf_env__exit(&session->header.env);
209 machines__exit(&session->machines); 280 machines__exit(&session->machines);
210 if (session->data) 281 if (session->data)
@@ -358,6 +429,14 @@ static int process_stat_round_stub(struct perf_session *perf_session __maybe_unu
358 return 0; 429 return 0;
359} 430}
360 431
432static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
433 union perf_event *event __maybe_unused,
434 u64 file_offset __maybe_unused)
435{
436 dump_printf(": unhandled!\n");
437 return 0;
438}
439
361void perf_tool__fill_defaults(struct perf_tool *tool) 440void perf_tool__fill_defaults(struct perf_tool *tool)
362{ 441{
363 if (tool->sample == NULL) 442 if (tool->sample == NULL)
@@ -430,6 +509,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
430 tool->time_conv = process_event_op2_stub; 509 tool->time_conv = process_event_op2_stub;
431 if (tool->feature == NULL) 510 if (tool->feature == NULL)
432 tool->feature = process_event_op2_stub; 511 tool->feature = process_event_op2_stub;
512 if (tool->compressed == NULL)
513 tool->compressed = perf_session__process_compressed_event;
433} 514}
434 515
435static void swap_sample_id_all(union perf_event *event, void *data) 516static void swap_sample_id_all(union perf_event *event, void *data)
@@ -1373,7 +1454,9 @@ static s64 perf_session__process_user_event(struct perf_session *session,
1373 int fd = perf_data__fd(session->data); 1454 int fd = perf_data__fd(session->data);
1374 int err; 1455 int err;
1375 1456
1376 dump_event(session->evlist, event, file_offset, &sample); 1457 if (event->header.type != PERF_RECORD_COMPRESSED ||
1458 tool->compressed == perf_session__process_compressed_event_stub)
1459 dump_event(session->evlist, event, file_offset, &sample);
1377 1460
1378 /* These events are processed right away */ 1461 /* These events are processed right away */
1379 switch (event->header.type) { 1462 switch (event->header.type) {
@@ -1426,6 +1509,11 @@ static s64 perf_session__process_user_event(struct perf_session *session,
1426 return tool->time_conv(session, event); 1509 return tool->time_conv(session, event);
1427 case PERF_RECORD_HEADER_FEATURE: 1510 case PERF_RECORD_HEADER_FEATURE:
1428 return tool->feature(session, event); 1511 return tool->feature(session, event);
1512 case PERF_RECORD_COMPRESSED:
1513 err = tool->compressed(session, event, file_offset);
1514 if (err)
1515 dump_event(session->evlist, event, file_offset, &sample);
1516 return err;
1429 default: 1517 default:
1430 return -EINVAL; 1518 return -EINVAL;
1431 } 1519 }
@@ -1708,6 +1796,8 @@ static int perf_session__flush_thread_stacks(struct perf_session *session)
1708 1796
1709volatile int session_done; 1797volatile int session_done;
1710 1798
1799static int __perf_session__process_decomp_events(struct perf_session *session);
1800
1711static int __perf_session__process_pipe_events(struct perf_session *session) 1801static int __perf_session__process_pipe_events(struct perf_session *session)
1712{ 1802{
1713 struct ordered_events *oe = &session->ordered_events; 1803 struct ordered_events *oe = &session->ordered_events;
@@ -1788,6 +1878,10 @@ more:
1788 if (skip > 0) 1878 if (skip > 0)
1789 head += skip; 1879 head += skip;
1790 1880
1881 err = __perf_session__process_decomp_events(session);
1882 if (err)
1883 goto out_err;
1884
1791 if (!session_done()) 1885 if (!session_done())
1792 goto more; 1886 goto more;
1793done: 1887done:
@@ -1836,6 +1930,39 @@ fetch_mmaped_event(struct perf_session *session,
1836 return event; 1930 return event;
1837} 1931}
1838 1932
1933static int __perf_session__process_decomp_events(struct perf_session *session)
1934{
1935 s64 skip;
1936 u64 size, file_pos = 0;
1937 struct decomp *decomp = session->decomp_last;
1938
1939 if (!decomp)
1940 return 0;
1941
1942 while (decomp->head < decomp->size && !session_done()) {
1943 union perf_event *event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data);
1944
1945 if (!event)
1946 break;
1947
1948 size = event->header.size;
1949
1950 if (size < sizeof(struct perf_event_header) ||
1951 (skip = perf_session__process_event(session, event, file_pos)) < 0) {
1952 pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
1953 decomp->file_pos + decomp->head, event->header.size, event->header.type);
1954 return -EINVAL;
1955 }
1956
1957 if (skip)
1958 size += skip;
1959
1960 decomp->head += size;
1961 }
1962
1963 return 0;
1964}
1965
1839/* 1966/*
1840 * On 64bit we can mmap the data file in one go. No need for tiny mmap 1967 * On 64bit we can mmap the data file in one go. No need for tiny mmap
1841 * slices. On 32bit we use 32MB. 1968 * slices. On 32bit we use 32MB.
@@ -1945,6 +2072,10 @@ more:
1945 head += size; 2072 head += size;
1946 file_pos += size; 2073 file_pos += size;
1947 2074
2075 err = __perf_session__process_decomp_events(session);
2076 if (err)
2077 goto out;
2078
1948 ui_progress__update(prog, size); 2079 ui_progress__update(prog, size);
1949 2080
1950 if (session_done()) 2081 if (session_done())
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index d96eccd7d27f..dd8920b745bc 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -8,6 +8,7 @@
8#include "machine.h" 8#include "machine.h"
9#include "data.h" 9#include "data.h"
10#include "ordered-events.h" 10#include "ordered-events.h"
11#include "util/compress.h"
11#include <linux/kernel.h> 12#include <linux/kernel.h>
12#include <linux/rbtree.h> 13#include <linux/rbtree.h>
13#include <linux/perf_event.h> 14#include <linux/perf_event.h>
@@ -35,6 +36,19 @@ struct perf_session {
35 struct ordered_events ordered_events; 36 struct ordered_events ordered_events;
36 struct perf_data *data; 37 struct perf_data *data;
37 struct perf_tool *tool; 38 struct perf_tool *tool;
39 u64 bytes_transferred;
40 u64 bytes_compressed;
41 struct zstd_data zstd_data;
42 struct decomp *decomp;
43 struct decomp *decomp_last;
44};
45
46struct decomp {
47 struct decomp *next;
48 u64 file_pos;
49 u64 head;
50 size_t size;
51 char data[];
38}; 52};
39 53
40struct perf_tool; 54struct perf_tool;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 3324f23c7efc..4c53bae5644b 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -88,9 +88,17 @@ static void aggr_printout(struct perf_stat_config *config,
88 config->csv_sep); 88 config->csv_sep);
89 break; 89 break;
90 case AGGR_NONE: 90 case AGGR_NONE:
91 fprintf(config->output, "CPU%*d%s", 91 if (evsel->percore) {
92 config->csv_output ? 0 : -4, 92 fprintf(config->output, "S%d-C%*d%s",
93 perf_evsel__cpus(evsel)->map[id], config->csv_sep); 93 cpu_map__id_to_socket(id),
94 config->csv_output ? 0 : -5,
95 cpu_map__id_to_cpu(id), config->csv_sep);
96 } else {
97 fprintf(config->output, "CPU%*d%s ",
98 config->csv_output ? 0 : -5,
99 perf_evsel__cpus(evsel)->map[id],
100 config->csv_sep);
101 }
94 break; 102 break;
95 case AGGR_THREAD: 103 case AGGR_THREAD:
96 fprintf(config->output, "%*s-%*d%s", 104 fprintf(config->output, "%*s-%*d%s",
@@ -594,6 +602,41 @@ static void aggr_cb(struct perf_stat_config *config,
594 } 602 }
595} 603}
596 604
605static void print_counter_aggrdata(struct perf_stat_config *config,
606 struct perf_evsel *counter, int s,
607 char *prefix, bool metric_only,
608 bool *first)
609{
610 struct aggr_data ad;
611 FILE *output = config->output;
612 u64 ena, run, val;
613 int id, nr;
614 double uval;
615
616 ad.id = id = config->aggr_map->map[s];
617 ad.val = ad.ena = ad.run = 0;
618 ad.nr = 0;
619 if (!collect_data(config, counter, aggr_cb, &ad))
620 return;
621
622 nr = ad.nr;
623 ena = ad.ena;
624 run = ad.run;
625 val = ad.val;
626 if (*first && metric_only) {
627 *first = false;
628 aggr_printout(config, counter, id, nr);
629 }
630 if (prefix && !metric_only)
631 fprintf(output, "%s", prefix);
632
633 uval = val * counter->scale;
634 printout(config, id, nr, counter, uval, prefix,
635 run, ena, 1.0, &rt_stat);
636 if (!metric_only)
637 fputc('\n', output);
638}
639
597static void print_aggr(struct perf_stat_config *config, 640static void print_aggr(struct perf_stat_config *config,
598 struct perf_evlist *evlist, 641 struct perf_evlist *evlist,
599 char *prefix) 642 char *prefix)
@@ -601,9 +644,7 @@ static void print_aggr(struct perf_stat_config *config,
601 bool metric_only = config->metric_only; 644 bool metric_only = config->metric_only;
602 FILE *output = config->output; 645 FILE *output = config->output;
603 struct perf_evsel *counter; 646 struct perf_evsel *counter;
604 int s, id, nr; 647 int s;
605 double uval;
606 u64 ena, run, val;
607 bool first; 648 bool first;
608 649
609 if (!(config->aggr_map || config->aggr_get_id)) 650 if (!(config->aggr_map || config->aggr_get_id))
@@ -616,33 +657,14 @@ static void print_aggr(struct perf_stat_config *config,
616 * Without each counter has its own line. 657 * Without each counter has its own line.
617 */ 658 */
618 for (s = 0; s < config->aggr_map->nr; s++) { 659 for (s = 0; s < config->aggr_map->nr; s++) {
619 struct aggr_data ad;
620 if (prefix && metric_only) 660 if (prefix && metric_only)
621 fprintf(output, "%s", prefix); 661 fprintf(output, "%s", prefix);
622 662
623 ad.id = id = config->aggr_map->map[s];
624 first = true; 663 first = true;
625 evlist__for_each_entry(evlist, counter) { 664 evlist__for_each_entry(evlist, counter) {
626 ad.val = ad.ena = ad.run = 0; 665 print_counter_aggrdata(config, counter, s,
627 ad.nr = 0; 666 prefix, metric_only,
628 if (!collect_data(config, counter, aggr_cb, &ad)) 667 &first);
629 continue;
630 nr = ad.nr;
631 ena = ad.ena;
632 run = ad.run;
633 val = ad.val;
634 if (first && metric_only) {
635 first = false;
636 aggr_printout(config, counter, id, nr);
637 }
638 if (prefix && !metric_only)
639 fprintf(output, "%s", prefix);
640
641 uval = val * counter->scale;
642 printout(config, id, nr, counter, uval, prefix,
643 run, ena, 1.0, &rt_stat);
644 if (!metric_only)
645 fputc('\n', output);
646 } 668 }
647 if (metric_only) 669 if (metric_only)
648 fputc('\n', output); 670 fputc('\n', output);
@@ -1089,6 +1111,30 @@ static void print_footer(struct perf_stat_config *config)
1089 "the same PMU. Try reorganizing the group.\n"); 1111 "the same PMU. Try reorganizing the group.\n");
1090} 1112}
1091 1113
1114static void print_percore(struct perf_stat_config *config,
1115 struct perf_evsel *counter, char *prefix)
1116{
1117 bool metric_only = config->metric_only;
1118 FILE *output = config->output;
1119 int s;
1120 bool first = true;
1121
1122 if (!(config->aggr_map || config->aggr_get_id))
1123 return;
1124
1125 for (s = 0; s < config->aggr_map->nr; s++) {
1126 if (prefix && metric_only)
1127 fprintf(output, "%s", prefix);
1128
1129 print_counter_aggrdata(config, counter, s,
1130 prefix, metric_only,
1131 &first);
1132 }
1133
1134 if (metric_only)
1135 fputc('\n', output);
1136}
1137
1092void 1138void
1093perf_evlist__print_counters(struct perf_evlist *evlist, 1139perf_evlist__print_counters(struct perf_evlist *evlist,
1094 struct perf_stat_config *config, 1140 struct perf_stat_config *config,
@@ -1139,7 +1185,10 @@ perf_evlist__print_counters(struct perf_evlist *evlist,
1139 print_no_aggr_metric(config, evlist, prefix); 1185 print_no_aggr_metric(config, evlist, prefix);
1140 else { 1186 else {
1141 evlist__for_each_entry(evlist, counter) { 1187 evlist__for_each_entry(evlist, counter) {
1142 print_counter(config, counter, prefix); 1188 if (counter->percore)
1189 print_percore(config, counter, prefix);
1190 else
1191 print_counter(config, counter, prefix);
1143 } 1192 }
1144 } 1193 }
1145 break; 1194 break;
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 2856cc9d5a31..c3115d939b0b 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -277,9 +277,11 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
277 if (!evsel->snapshot) 277 if (!evsel->snapshot)
278 perf_evsel__compute_deltas(evsel, cpu, thread, count); 278 perf_evsel__compute_deltas(evsel, cpu, thread, count);
279 perf_counts_values__scale(count, config->scale, NULL); 279 perf_counts_values__scale(count, config->scale, NULL);
280 if (config->aggr_mode == AGGR_NONE) 280 if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
281 perf_stat__update_shadow_stats(evsel, count->val, cpu, 281 perf_stat__update_shadow_stats(evsel, count->val,
282 &rt_stat); 282 cpu, &rt_stat);
283 }
284
283 if (config->aggr_mode == AGGR_THREAD) { 285 if (config->aggr_mode == AGGR_THREAD) {
284 if (config->stats) 286 if (config->stats)
285 perf_stat__update_shadow_stats(evsel, 287 perf_stat__update_shadow_stats(evsel,
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 50678d318185..403045a2bbea 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -15,6 +15,7 @@
15#include "map.h" 15#include "map.h"
16#include "symbol.h" 16#include "symbol.h"
17#include "unwind.h" 17#include "unwind.h"
18#include "callchain.h"
18 19
19#include <api/fs/fs.h> 20#include <api/fs/fs.h>
20 21
@@ -327,7 +328,7 @@ static int thread__prepare_access(struct thread *thread)
327{ 328{
328 int err = 0; 329 int err = 0;
329 330
330 if (symbol_conf.use_callchain) 331 if (dwarf_callchain_users)
331 err = __thread__prepare_access(thread); 332 err = __thread__prepare_access(thread);
332 333
333 return err; 334 return err;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 250391672f9f..9096a6e3de59 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -28,6 +28,7 @@ typedef int (*event_attr_op)(struct perf_tool *tool,
28 28
29typedef int (*event_op2)(struct perf_session *session, union perf_event *event); 29typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
30typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event); 30typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
31typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data);
31 32
32typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, 33typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
33 struct ordered_events *oe); 34 struct ordered_events *oe);
@@ -72,6 +73,7 @@ struct perf_tool {
72 stat, 73 stat,
73 stat_round, 74 stat_round,
74 feature; 75 feature;
76 event_op4 compressed;
75 event_op3 auxtrace; 77 event_op3 auxtrace;
76 bool ordered_events; 78 bool ordered_events;
77 bool ordering_requires_timestamps; 79 bool ordering_requires_timestamps;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index f3c666a84e4d..25e1406b1f8b 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -617,8 +617,6 @@ static unw_accessors_t accessors = {
617 617
618static int _unwind__prepare_access(struct thread *thread) 618static int _unwind__prepare_access(struct thread *thread)
619{ 619{
620 if (!dwarf_callchain_users)
621 return 0;
622 thread->addr_space = unw_create_addr_space(&accessors, 0); 620 thread->addr_space = unw_create_addr_space(&accessors, 0);
623 if (!thread->addr_space) { 621 if (!thread->addr_space) {
624 pr_err("unwind: Can't create unwind address space.\n"); 622 pr_err("unwind: Can't create unwind address space.\n");
@@ -631,15 +629,11 @@ static int _unwind__prepare_access(struct thread *thread)
631 629
632static void _unwind__flush_access(struct thread *thread) 630static void _unwind__flush_access(struct thread *thread)
633{ 631{
634 if (!dwarf_callchain_users)
635 return;
636 unw_flush_cache(thread->addr_space, 0, 0); 632 unw_flush_cache(thread->addr_space, 0, 0);
637} 633}
638 634
639static void _unwind__finish_access(struct thread *thread) 635static void _unwind__finish_access(struct thread *thread)
640{ 636{
641 if (!dwarf_callchain_users)
642 return;
643 unw_destroy_addr_space(thread->addr_space); 637 unw_destroy_addr_space(thread->addr_space);
644} 638}
645 639
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index 9778b3133b77..c0811977d7d5 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -5,6 +5,7 @@
5#include "session.h" 5#include "session.h"
6#include "debug.h" 6#include "debug.h"
7#include "env.h" 7#include "env.h"
8#include "callchain.h"
8 9
9struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; 10struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
10struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; 11struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
@@ -24,6 +25,9 @@ int unwind__prepare_access(struct thread *thread, struct map *map,
24 struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops; 25 struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
25 int err; 26 int err;
26 27
28 if (!dwarf_callchain_users)
29 return 0;
30
27 if (thread->addr_space) { 31 if (thread->addr_space) {
28 pr_debug("unwind: thread map already set, dso=%s\n", 32 pr_debug("unwind: thread map already set, dso=%s\n",
29 map->dso->name); 33 map->dso->name);
@@ -65,12 +69,18 @@ out_register:
65 69
66void unwind__flush_access(struct thread *thread) 70void unwind__flush_access(struct thread *thread)
67{ 71{
72 if (!dwarf_callchain_users)
73 return;
74
68 if (thread->unwind_libunwind_ops) 75 if (thread->unwind_libunwind_ops)
69 thread->unwind_libunwind_ops->flush_access(thread); 76 thread->unwind_libunwind_ops->flush_access(thread);
70} 77}
71 78
72void unwind__finish_access(struct thread *thread) 79void unwind__finish_access(struct thread *thread)
73{ 80{
81 if (!dwarf_callchain_users)
82 return;
83
74 if (thread->unwind_libunwind_ops) 84 if (thread->unwind_libunwind_ops)
75 thread->unwind_libunwind_ops->finish_access(thread); 85 thread->unwind_libunwind_ops->finish_access(thread);
76} 86}
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
new file mode 100644
index 000000000000..23bdb9884576
--- /dev/null
+++ b/tools/perf/util/zstd.c
@@ -0,0 +1,111 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#include <string.h>
4
5#include "util/compress.h"
6#include "util/debug.h"
7
8int zstd_init(struct zstd_data *data, int level)
9{
10 size_t ret;
11
12 data->dstream = ZSTD_createDStream();
13 if (data->dstream == NULL) {
14 pr_err("Couldn't create decompression stream.\n");
15 return -1;
16 }
17
18 ret = ZSTD_initDStream(data->dstream);
19 if (ZSTD_isError(ret)) {
20 pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret));
21 return -1;
22 }
23
24 if (!level)
25 return 0;
26
27 data->cstream = ZSTD_createCStream();
28 if (data->cstream == NULL) {
29 pr_err("Couldn't create compression stream.\n");
30 return -1;
31 }
32
33 ret = ZSTD_initCStream(data->cstream, level);
34 if (ZSTD_isError(ret)) {
35 pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret));
36 return -1;
37 }
38
39 return 0;
40}
41
42int zstd_fini(struct zstd_data *data)
43{
44 if (data->dstream) {
45 ZSTD_freeDStream(data->dstream);
46 data->dstream = NULL;
47 }
48
49 if (data->cstream) {
50 ZSTD_freeCStream(data->cstream);
51 data->cstream = NULL;
52 }
53
54 return 0;
55}
56
57size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
58 void *src, size_t src_size, size_t max_record_size,
59 size_t process_header(void *record, size_t increment))
60{
61 size_t ret, size, compressed = 0;
62 ZSTD_inBuffer input = { src, src_size, 0 };
63 ZSTD_outBuffer output;
64 void *record;
65
66 while (input.pos < input.size) {
67 record = dst;
68 size = process_header(record, 0);
69 compressed += size;
70 dst += size;
71 dst_size -= size;
72 output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ?
73 max_record_size : dst_size, 0 };
74 ret = ZSTD_compressStream(data->cstream, &output, &input);
75 ZSTD_flushStream(data->cstream, &output);
76 if (ZSTD_isError(ret)) {
77 pr_err("failed to compress %ld bytes: %s\n",
78 (long)src_size, ZSTD_getErrorName(ret));
79 memcpy(dst, src, src_size);
80 return src_size;
81 }
82 size = output.pos;
83 size = process_header(record, size);
84 compressed += size;
85 dst += size;
86 dst_size -= size;
87 }
88
89 return compressed;
90}
91
92size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size,
93 void *dst, size_t dst_size)
94{
95 size_t ret;
96 ZSTD_inBuffer input = { src, src_size, 0 };
97 ZSTD_outBuffer output = { dst, dst_size, 0 };
98
99 while (input.pos < input.size) {
100 ret = ZSTD_decompressStream(data->dstream, &output, &input);
101 if (ZSTD_isError(ret)) {
102 pr_err("failed to decompress (B): %ld -> %ld : %s\n",
103 src_size, output.size, ZSTD_getErrorName(ret));
104 break;
105 }
106 output.dst = dst + output.pos;
107 output.size = dst_size - output.pos;
108 }
109
110 return output.pos;
111}