summaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2019-05-18 04:24:43 -0400
committerIngo Molnar <mingo@kernel.org>2019-05-18 04:24:43 -0400
commit62e1c09418fc16d27720b128275cac61367e2c1b (patch)
tree4759aa6662b1398e2b93696ace58f6f309722b06 /tools/perf
parent01be377c62210a8d8fef35be906f9349591bb7cd (diff)
parent4fc4d8dfa056dfd48afe73b9ea3b7570ceb80b9c (diff)
Merge tag 'perf-core-for-mingo-5.2-20190517' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: perf.data: Alexey Budankov: - Streaming compression of perf ring buffer into PERF_RECORD_COMPRESSED user space records, resulting in ~3-5x perf.data file size reduction on variety of tested workloads what saves storage space on larger server systems where perf.data size can easily reach several tens or even hundreds of GiBs, especially when profiling with DWARF-based stacks and tracing of context switches. perf record: Arnaldo Carvalho de Melo - Improve -user-regs/intr-regs suggestions to overcome errors. perf annotate: Jin Yao: - Remove hist__account_cycles() from callback, speeding up branch processing (perf record -b). perf stat: - Add a 'percore' event qualifier, e.g.: -e cpu/event=0,umask=0x3,percore=1/, that sums up the event counts for both hardware threads in a core. We can already do this with --per-core, but it's often useful to do this together with other metrics that are collected per hardware thread. I.e. now its possible to do this per-event, and have it mixed with other events not aggregated by core. core libraries: Donald Yandt: - Check for errors when doing fgets(/proc/version). Jiri Olsa: - Speed up report for perf compiled with linbunwind. tools headers: Arnaldo Carvalho de Melo - Update memcpy_64.S, x86's kvm.h and pt_regs.h. arm64: Florian Fainelli: - Map Brahma-B53 CPUID to cortex-a53 events. - Add Cortex-A57 and Cortex-A72 events. csky: Mao Han: - Add DWARF register mappings for libdw, allowing --call-graph=dwarf to work on the C-SKY arch. x86: Andi Kleen/Kan Liang: - Add support for recording and printing XMM registers, available, for instance, on Icelake. Kan Liang: - Add uncore_upi (Intel's "Ultra Path Interconnect" events) JSON support. UPI replaced the Intel QuickPath Interconnect (QPI) in Xeon Skylake-SP. Intel PT: Adrian Hunter . Fix instructions sampling rate. . Timestamp fixes. . Improve exported-sql-viewer GUI, allowing, for instance, to copy'n'paste the trees, useful for e-mailing. Documentation: Thomas Richter: - Add description for 'perf --debug stderr=1', which redirects stderr to stdout. libtraceevent: Tzvetomir Stoyanov: - Add man pages for the various APIs. Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/perf-list.txt12
-rw-r--r--tools/perf/Documentation/perf-record.txt8
-rw-r--r--tools/perf/Documentation/perf-stat.txt4
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt24
-rw-r--r--tools/perf/Documentation/perf.txt2
-rw-r--r--tools/perf/arch/x86/include/perf_regs.h26
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c44
-rw-r--r--tools/perf/builtin-annotate.c4
-rw-r--r--tools/perf/builtin-inject.c4
-rw-r--r--tools/perf/builtin-record.c229
-rw-r--r--tools/perf/builtin-report.c16
-rw-r--r--tools/perf/builtin-stat.c21
-rw-r--r--tools/perf/perf.h1
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a57-a72/core-imp-def.json179
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv5
-rw-r--r--tools/perf/pmu-events/jevents.c2
-rwxr-xr-xtools/perf/scripts/python/exported-sql-viewer.py340
-rw-r--r--tools/perf/tests/dso-data.c4
-rw-r--r--tools/perf/tests/make2
-rwxr-xr-xtools/perf/tests/shell/record+zstd_comp_decomp.sh34
-rw-r--r--tools/perf/util/Build2
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/compress.h53
-rw-r--r--tools/perf/util/env.h11
-rw-r--r--tools/perf/util/event.c1
-rw-r--r--tools/perf/util/event.h7
-rw-r--r--tools/perf/util/evlist.c8
-rw-r--r--tools/perf/util/evlist.h2
-rw-r--r--tools/perf/util/evsel.c2
-rw-r--r--tools/perf/util/evsel.h3
-rw-r--r--tools/perf/util/header.c53
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c31
-rw-r--r--tools/perf/util/machine.c3
-rw-r--r--tools/perf/util/mmap.c102
-rw-r--r--tools/perf/util/mmap.h16
-rw-r--r--tools/perf/util/parse-events.c27
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/parse-events.l1
-rw-r--r--tools/perf/util/parse-regs-options.c33
-rw-r--r--tools/perf/util/parse-regs-options.h3
-rw-r--r--tools/perf/util/perf_regs.c10
-rw-r--r--tools/perf/util/perf_regs.h3
-rw-r--r--tools/perf/util/session.c133
-rw-r--r--tools/perf/util/session.h14
-rw-r--r--tools/perf/util/stat-display.c107
-rw-r--r--tools/perf/util/stat.c8
-rw-r--r--tools/perf/util/thread.c3
-rw-r--r--tools/perf/util/tool.h2
-rw-r--r--tools/perf/util/unwind-libunwind-local.c6
-rw-r--r--tools/perf/util/unwind-libunwind.c10
-rw-r--r--tools/perf/util/zstd.c111
52 files changed, 1525 insertions, 205 deletions
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 138fb6e94b3c..18ed1b0fceb3 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -199,6 +199,18 @@ also be supplied. For example:
199 199
200 perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ... 200 perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
201 201
202EVENT QUALIFIERS:
203
204It is also possible to add extra qualifiers to an event:
205
206percore:
207
208Sums up the event counts for all hardware threads in a core, e.g.:
209
210
211 perf stat -e cpu/event=0,umask=0x3,percore=1/
212
213
202EVENT GROUPS 214EVENT GROUPS
203------------ 215------------
204 216
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 58986f4cc190..de269430720a 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -406,7 +406,8 @@ symbolic names, e.g. on x86, ax, si. To list the available registers use
406--intr-regs=ax,bx. The list of register is architecture dependent. 406--intr-regs=ax,bx. The list of register is architecture dependent.
407 407
408--user-regs:: 408--user-regs::
409Capture user registers at sample time. Same arguments as -I. 409Similar to -I, but capture user registers at sample time. To list the available
410user registers use --user-regs=\?.
410 411
411--running-time:: 412--running-time::
412Record running and enabled time for read events (:S) 413Record running and enabled time for read events (:S)
@@ -478,6 +479,11 @@ Also at some cases executing less output write syscalls with bigger data size
478can take less time than executing more output write syscalls with smaller data 479can take less time than executing more output write syscalls with smaller data
479size thus lowering runtime profiling overhead. 480size thus lowering runtime profiling overhead.
480 481
482-z::
483--compression-level[=n]::
484Produce compressed trace using specified level n (default: 1 - fastest compression,
48522 - smallest trace)
486
481--all-kernel:: 487--all-kernel::
482Configure all used events to run in kernel space. 488Configure all used events to run in kernel space.
483 489
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 39c05f89104e..1e312c2672e4 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -43,6 +43,10 @@ report::
43 param1 and param2 are defined as formats for the PMU in 43 param1 and param2 are defined as formats for the PMU in
44 /sys/bus/event_source/devices/<pmu>/format/* 44 /sys/bus/event_source/devices/<pmu>/format/*
45 45
46 'percore' is a event qualifier that sums up the event counts for both
47 hardware threads in a core. For example:
48 perf stat -A -a -e cpu/event,percore=1/,otherevent ...
49
46 - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/' 50 - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/'
47 where M, N, K are numbers (in decimal, hex, octal format). 51 where M, N, K are numbers (in decimal, hex, octal format).
48 Acceptable values for each of 'config', 'config1' and 'config2' 52 Acceptable values for each of 'config', 'config1' and 'config2'
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index 593ef49b273c..6967e9b02be5 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -272,6 +272,19 @@ struct {
272 272
273Two uint64_t for the time of first sample and the time of last sample. 273Two uint64_t for the time of first sample and the time of last sample.
274 274
275 HEADER_COMPRESSED = 27,
276
277struct {
278 u32 version;
279 u32 type;
280 u32 level;
281 u32 ratio;
282 u32 mmap_len;
283};
284
285Indicates that trace contains records of PERF_RECORD_COMPRESSED type
286that have perf_events records in compressed form.
287
275 other bits are reserved and should ignored for now 288 other bits are reserved and should ignored for now
276 HEADER_FEAT_BITS = 256, 289 HEADER_FEAT_BITS = 256,
277 290
@@ -437,6 +450,17 @@ struct auxtrace_error_event {
437Describes a header feature. These are records used in pipe-mode that 450Describes a header feature. These are records used in pipe-mode that
438contain information that otherwise would be in perf.data file's header. 451contain information that otherwise would be in perf.data file's header.
439 452
453 PERF_RECORD_COMPRESSED = 81,
454
455struct compressed_event {
456 struct perf_event_header header;
457 char data[];
458};
459
460The header is followed by compressed data frame that can be decompressed
461into array of perf trace records. The size of the entire compressed event
462record including the header is limited by the max value of header.size.
463
440Event types 464Event types
441 465
442Define the event attributes with their IDs. 466Define the event attributes with their IDs.
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index 864e37597252..401f0ed67439 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt
@@ -22,6 +22,8 @@ OPTIONS
22 verbose - general debug messages 22 verbose - general debug messages
23 ordered-events - ordered events object debug messages 23 ordered-events - ordered events object debug messages
24 data-convert - data convert command debug messages 24 data-convert - data convert command debug messages
25 stderr - write debug output (option -v) to stderr
26 in browser mode
25 27
26--buildid-dir:: 28--buildid-dir::
27 Setup buildid cache directory. It has higher priority than 29 Setup buildid cache directory. It has higher priority than
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index 7f6d538f8a89..b7cd91a9014f 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -8,9 +8,10 @@
8 8
9void perf_regs_load(u64 *regs); 9void perf_regs_load(u64 *regs);
10 10
11#define PERF_REGS_MAX PERF_REG_X86_XMM_MAX
12#define PERF_XMM_REGS_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
11#ifndef HAVE_ARCH_X86_64_SUPPORT 13#ifndef HAVE_ARCH_X86_64_SUPPORT
12#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) 14#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
13#define PERF_REGS_MAX PERF_REG_X86_32_MAX
14#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32 15#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
15#else 16#else
16#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \ 17#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
@@ -18,7 +19,6 @@ void perf_regs_load(u64 *regs);
18 (1ULL << PERF_REG_X86_FS) | \ 19 (1ULL << PERF_REG_X86_FS) | \
19 (1ULL << PERF_REG_X86_GS)) 20 (1ULL << PERF_REG_X86_GS))
20#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT) 21#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT)
21#define PERF_REGS_MAX PERF_REG_X86_64_MAX
22#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64 22#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
23#endif 23#endif
24#define PERF_REG_IP PERF_REG_X86_IP 24#define PERF_REG_IP PERF_REG_X86_IP
@@ -77,6 +77,28 @@ static inline const char *perf_reg_name(int id)
77 case PERF_REG_X86_R15: 77 case PERF_REG_X86_R15:
78 return "R15"; 78 return "R15";
79#endif /* HAVE_ARCH_X86_64_SUPPORT */ 79#endif /* HAVE_ARCH_X86_64_SUPPORT */
80
81#define XMM(x) \
82 case PERF_REG_X86_XMM ## x: \
83 case PERF_REG_X86_XMM ## x + 1: \
84 return "XMM" #x;
85 XMM(0)
86 XMM(1)
87 XMM(2)
88 XMM(3)
89 XMM(4)
90 XMM(5)
91 XMM(6)
92 XMM(7)
93 XMM(8)
94 XMM(9)
95 XMM(10)
96 XMM(11)
97 XMM(12)
98 XMM(13)
99 XMM(14)
100 XMM(15)
101#undef XMM
80 default: 102 default:
81 return NULL; 103 return NULL;
82 } 104 }
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index fead6b3b4206..7886ca5263e3 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -31,6 +31,22 @@ const struct sample_reg sample_reg_masks[] = {
31 SMPL_REG(R14, PERF_REG_X86_R14), 31 SMPL_REG(R14, PERF_REG_X86_R14),
32 SMPL_REG(R15, PERF_REG_X86_R15), 32 SMPL_REG(R15, PERF_REG_X86_R15),
33#endif 33#endif
34 SMPL_REG2(XMM0, PERF_REG_X86_XMM0),
35 SMPL_REG2(XMM1, PERF_REG_X86_XMM1),
36 SMPL_REG2(XMM2, PERF_REG_X86_XMM2),
37 SMPL_REG2(XMM3, PERF_REG_X86_XMM3),
38 SMPL_REG2(XMM4, PERF_REG_X86_XMM4),
39 SMPL_REG2(XMM5, PERF_REG_X86_XMM5),
40 SMPL_REG2(XMM6, PERF_REG_X86_XMM6),
41 SMPL_REG2(XMM7, PERF_REG_X86_XMM7),
42 SMPL_REG2(XMM8, PERF_REG_X86_XMM8),
43 SMPL_REG2(XMM9, PERF_REG_X86_XMM9),
44 SMPL_REG2(XMM10, PERF_REG_X86_XMM10),
45 SMPL_REG2(XMM11, PERF_REG_X86_XMM11),
46 SMPL_REG2(XMM12, PERF_REG_X86_XMM12),
47 SMPL_REG2(XMM13, PERF_REG_X86_XMM13),
48 SMPL_REG2(XMM14, PERF_REG_X86_XMM14),
49 SMPL_REG2(XMM15, PERF_REG_X86_XMM15),
34 SMPL_REG_END 50 SMPL_REG_END
35}; 51};
36 52
@@ -254,3 +270,31 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
254 270
255 return SDT_ARG_VALID; 271 return SDT_ARG_VALID;
256} 272}
273
274uint64_t arch__intr_reg_mask(void)
275{
276 struct perf_event_attr attr = {
277 .type = PERF_TYPE_HARDWARE,
278 .config = PERF_COUNT_HW_CPU_CYCLES,
279 .sample_type = PERF_SAMPLE_REGS_INTR,
280 .sample_regs_intr = PERF_XMM_REGS_MASK,
281 .precise_ip = 1,
282 .disabled = 1,
283 .exclude_kernel = 1,
284 };
285 int fd;
286 /*
287 * In an unnamed union, init it here to build on older gcc versions
288 */
289 attr.sample_period = 1;
290
291 event_attr_init(&attr);
292
293 fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
294 if (fd != -1) {
295 close(fd);
296 return (PERF_XMM_REGS_MASK | PERF_REGS_MASK);
297 }
298
299 return PERF_REGS_MASK;
300}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 67f9d9ffacfb..77deb3a40596 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -159,8 +159,6 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
159 struct perf_evsel *evsel = iter->evsel; 159 struct perf_evsel *evsel = iter->evsel;
160 int err; 160 int err;
161 161
162 hist__account_cycles(sample->branch_stack, al, sample, false);
163
164 bi = he->branch_info; 162 bi = he->branch_info;
165 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); 163 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
166 164
@@ -199,6 +197,8 @@ static int process_branch_callback(struct perf_evsel *evsel,
199 if (a.map != NULL) 197 if (a.map != NULL)
200 a.map->dso->hit = 1; 198 a.map->dso->hit = 1;
201 199
200 hist__account_cycles(sample->branch_stack, al, sample, false);
201
202 ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann); 202 ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
203 return ret; 203 return ret;
204} 204}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 24086b7f1b14..8e0e06d3edfc 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -837,6 +837,9 @@ int cmd_inject(int argc, const char **argv)
837 if (inject.session == NULL) 837 if (inject.session == NULL)
838 return -1; 838 return -1;
839 839
840 if (zstd_init(&(inject.session->zstd_data), 0) < 0)
841 pr_warning("Decompression initialization failed.\n");
842
840 if (inject.build_ids) { 843 if (inject.build_ids) {
841 /* 844 /*
842 * to make sure the mmap records are ordered correctly 845 * to make sure the mmap records are ordered correctly
@@ -867,6 +870,7 @@ int cmd_inject(int argc, const char **argv)
867 ret = __cmd_inject(&inject); 870 ret = __cmd_inject(&inject);
868 871
869out_delete: 872out_delete:
873 zstd_fini(&(inject.session->zstd_data));
870 perf_session__delete(inject.session); 874 perf_session__delete(inject.session);
871 return ret; 875 return ret;
872} 876}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c5e10552776a..e2c3a585a61e 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -133,6 +133,11 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse
133 return 0; 133 return 0;
134} 134}
135 135
136static int record__aio_enabled(struct record *rec);
137static int record__comp_enabled(struct record *rec);
138static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
139 void *src, size_t src_size);
140
136#ifdef HAVE_AIO_SUPPORT 141#ifdef HAVE_AIO_SUPPORT
137static int record__aio_write(struct aiocb *cblock, int trace_fd, 142static int record__aio_write(struct aiocb *cblock, int trace_fd,
138 void *buf, size_t size, off_t off) 143 void *buf, size_t size, off_t off)
@@ -183,9 +188,9 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
183 if (rem_size == 0) { 188 if (rem_size == 0) {
184 cblock->aio_fildes = -1; 189 cblock->aio_fildes = -1;
185 /* 190 /*
186 * md->refcount is incremented in perf_mmap__push() for 191 * md->refcount is incremented in record__aio_pushfn() for
187 * every enqueued aio write request so decrement it because 192 * every aio write request started in record__aio_push() so
188 * the request is now complete. 193 * decrement it because the request is now complete.
189 */ 194 */
190 perf_mmap__put(md); 195 perf_mmap__put(md);
191 rc = 1; 196 rc = 1;
@@ -240,18 +245,89 @@ static int record__aio_sync(struct perf_mmap *md, bool sync_all)
240 } while (1); 245 } while (1);
241} 246}
242 247
243static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off) 248struct record_aio {
249 struct record *rec;
250 void *data;
251 size_t size;
252};
253
254static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size)
244{ 255{
245 struct record *rec = to; 256 struct record_aio *aio = to;
246 int ret, trace_fd = rec->session->data->file.fd;
247 257
248 rec->samples++; 258 /*
259 * map->base data pointed by buf is copied into free map->aio.data[] buffer
260 * to release space in the kernel buffer as fast as possible, calling
261 * perf_mmap__consume() from perf_mmap__push() function.
262 *
263 * That lets the kernel to proceed with storing more profiling data into
264 * the kernel buffer earlier than other per-cpu kernel buffers are handled.
265 *
266 * Coping can be done in two steps in case the chunk of profiling data
267 * crosses the upper bound of the kernel buffer. In this case we first move
268 * part of data from map->start till the upper bound and then the reminder
269 * from the beginning of the kernel buffer till the end of the data chunk.
270 */
271
272 if (record__comp_enabled(aio->rec)) {
273 size = zstd_compress(aio->rec->session, aio->data + aio->size,
274 perf_mmap__mmap_len(map) - aio->size,
275 buf, size);
276 } else {
277 memcpy(aio->data + aio->size, buf, size);
278 }
279
280 if (!aio->size) {
281 /*
282 * Increment map->refcount to guard map->aio.data[] buffer
283 * from premature deallocation because map object can be
284 * released earlier than aio write request started on
285 * map->aio.data[] buffer is complete.
286 *
287 * perf_mmap__put() is done at record__aio_complete()
288 * after started aio request completion or at record__aio_push()
289 * if the request failed to start.
290 */
291 perf_mmap__get(map);
292 }
293
294 aio->size += size;
295
296 return size;
297}
249 298
250 ret = record__aio_write(cblock, trace_fd, bf, size, off); 299static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off)
300{
301 int ret, idx;
302 int trace_fd = rec->session->data->file.fd;
303 struct record_aio aio = { .rec = rec, .size = 0 };
304
305 /*
306 * Call record__aio_sync() to wait till map->aio.data[] buffer
307 * becomes available after previous aio write operation.
308 */
309
310 idx = record__aio_sync(map, false);
311 aio.data = map->aio.data[idx];
312 ret = perf_mmap__push(map, &aio, record__aio_pushfn);
313 if (ret != 0) /* ret > 0 - no data, ret < 0 - error */
314 return ret;
315
316 rec->samples++;
317 ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off);
251 if (!ret) { 318 if (!ret) {
252 rec->bytes_written += size; 319 *off += aio.size;
320 rec->bytes_written += aio.size;
253 if (switch_output_size(rec)) 321 if (switch_output_size(rec))
254 trigger_hit(&switch_output_trigger); 322 trigger_hit(&switch_output_trigger);
323 } else {
324 /*
325 * Decrement map->refcount incremented in record__aio_pushfn()
326 * back if record__aio_write() operation failed to start, otherwise
327 * map->refcount is decremented in record__aio_complete() after
328 * aio write operation finishes successfully.
329 */
330 perf_mmap__put(map);
255 } 331 }
256 332
257 return ret; 333 return ret;
@@ -273,7 +349,7 @@ static void record__aio_mmap_read_sync(struct record *rec)
273 struct perf_evlist *evlist = rec->evlist; 349 struct perf_evlist *evlist = rec->evlist;
274 struct perf_mmap *maps = evlist->mmap; 350 struct perf_mmap *maps = evlist->mmap;
275 351
276 if (!rec->opts.nr_cblocks) 352 if (!record__aio_enabled(rec))
277 return; 353 return;
278 354
279 for (i = 0; i < evlist->nr_mmaps; i++) { 355 for (i = 0; i < evlist->nr_mmaps; i++) {
@@ -307,13 +383,8 @@ static int record__aio_parse(const struct option *opt,
307#else /* HAVE_AIO_SUPPORT */ 383#else /* HAVE_AIO_SUPPORT */
308static int nr_cblocks_max = 0; 384static int nr_cblocks_max = 0;
309 385
310static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused) 386static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused,
311{ 387 off_t *off __maybe_unused)
312 return -1;
313}
314
315static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
316 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
317{ 388{
318 return -1; 389 return -1;
319} 390}
@@ -372,6 +443,32 @@ static int record__mmap_flush_parse(const struct option *opt,
372 return 0; 443 return 0;
373} 444}
374 445
446#ifdef HAVE_ZSTD_SUPPORT
447static unsigned int comp_level_default = 1;
448
449static int record__parse_comp_level(const struct option *opt, const char *str, int unset)
450{
451 struct record_opts *opts = opt->value;
452
453 if (unset) {
454 opts->comp_level = 0;
455 } else {
456 if (str)
457 opts->comp_level = strtol(str, NULL, 0);
458 if (!opts->comp_level)
459 opts->comp_level = comp_level_default;
460 }
461
462 return 0;
463}
464#endif
465static unsigned int comp_level_max = 22;
466
467static int record__comp_enabled(struct record *rec)
468{
469 return rec->opts.comp_level > 0;
470}
471
375static int process_synthesized_event(struct perf_tool *tool, 472static int process_synthesized_event(struct perf_tool *tool,
376 union perf_event *event, 473 union perf_event *event,
377 struct perf_sample *sample __maybe_unused, 474 struct perf_sample *sample __maybe_unused,
@@ -385,6 +482,11 @@ static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size
385{ 482{
386 struct record *rec = to; 483 struct record *rec = to;
387 484
485 if (record__comp_enabled(rec)) {
486 size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size);
487 bf = map->data;
488 }
489
388 rec->samples++; 490 rec->samples++;
389 return record__write(rec, map, bf, size); 491 return record__write(rec, map, bf, size);
390} 492}
@@ -582,7 +684,7 @@ static int record__mmap_evlist(struct record *rec,
582 opts->auxtrace_mmap_pages, 684 opts->auxtrace_mmap_pages,
583 opts->auxtrace_snapshot_mode, 685 opts->auxtrace_snapshot_mode,
584 opts->nr_cblocks, opts->affinity, 686 opts->nr_cblocks, opts->affinity,
585 opts->mmap_flush) < 0) { 687 opts->mmap_flush, opts->comp_level) < 0) {
586 if (errno == EPERM) { 688 if (errno == EPERM) {
587 pr_err("Permission error mapping pages.\n" 689 pr_err("Permission error mapping pages.\n"
588 "Consider increasing " 690 "Consider increasing "
@@ -771,6 +873,37 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
771 } 873 }
772} 874}
773 875
876static size_t process_comp_header(void *record, size_t increment)
877{
878 struct compressed_event *event = record;
879 size_t size = sizeof(*event);
880
881 if (increment) {
882 event->header.size += increment;
883 return increment;
884 }
885
886 event->header.type = PERF_RECORD_COMPRESSED;
887 event->header.size = size;
888
889 return size;
890}
891
892static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size,
893 void *src, size_t src_size)
894{
895 size_t compressed;
896 size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event) - 1;
897
898 compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size,
899 max_record_size, process_comp_header);
900
901 session->bytes_transferred += src_size;
902 session->bytes_compressed += compressed;
903
904 return compressed;
905}
906
774static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, 907static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
775 bool overwrite, bool synch) 908 bool overwrite, bool synch)
776{ 909{
@@ -779,7 +912,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
779 int rc = 0; 912 int rc = 0;
780 struct perf_mmap *maps; 913 struct perf_mmap *maps;
781 int trace_fd = rec->data.file.fd; 914 int trace_fd = rec->data.file.fd;
782 off_t off; 915 off_t off = 0;
783 916
784 if (!evlist) 917 if (!evlist)
785 return 0; 918 return 0;
@@ -805,20 +938,14 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli
805 map->flush = 1; 938 map->flush = 1;
806 } 939 }
807 if (!record__aio_enabled(rec)) { 940 if (!record__aio_enabled(rec)) {
808 if (perf_mmap__push(map, rec, record__pushfn) != 0) { 941 if (perf_mmap__push(map, rec, record__pushfn) < 0) {
809 if (synch) 942 if (synch)
810 map->flush = flush; 943 map->flush = flush;
811 rc = -1; 944 rc = -1;
812 goto out; 945 goto out;
813 } 946 }
814 } else { 947 } else {
815 int idx; 948 if (record__aio_push(rec, map, &off) < 0) {
816 /*
817 * Call record__aio_sync() to wait till map->data buffer
818 * becomes available after previous aio write request.
819 */
820 idx = record__aio_sync(map, false);
821 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
822 record__aio_set_pos(trace_fd, off); 949 record__aio_set_pos(trace_fd, off);
823 if (synch) 950 if (synch)
824 map->flush = flush; 951 map->flush = flush;
@@ -888,6 +1015,8 @@ static void record__init_features(struct record *rec)
888 perf_header__clear_feat(&session->header, HEADER_CLOCKID); 1015 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
889 1016
890 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); 1017 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
1018 if (!record__comp_enabled(rec))
1019 perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
891 1020
892 perf_header__clear_feat(&session->header, HEADER_STAT); 1021 perf_header__clear_feat(&session->header, HEADER_STAT);
893} 1022}
@@ -1186,6 +1315,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
1186 bool disabled = false, draining = false; 1315 bool disabled = false, draining = false;
1187 struct perf_evlist *sb_evlist = NULL; 1316 struct perf_evlist *sb_evlist = NULL;
1188 int fd; 1317 int fd;
1318 float ratio = 0;
1189 1319
1190 atexit(record__sig_exit); 1320 atexit(record__sig_exit);
1191 signal(SIGCHLD, sig_handler); 1321 signal(SIGCHLD, sig_handler);
@@ -1215,6 +1345,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
1215 fd = perf_data__fd(data); 1345 fd = perf_data__fd(data);
1216 rec->session = session; 1346 rec->session = session;
1217 1347
1348 if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) {
1349 pr_err("Compression initialization failed.\n");
1350 return -1;
1351 }
1352
1353 session->header.env.comp_type = PERF_COMP_ZSTD;
1354 session->header.env.comp_level = rec->opts.comp_level;
1355
1218 record__init_features(rec); 1356 record__init_features(rec);
1219 1357
1220 if (rec->opts.use_clockid && rec->opts.clockid_res_ns) 1358 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
@@ -1244,6 +1382,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
1244 err = -1; 1382 err = -1;
1245 goto out_child; 1383 goto out_child;
1246 } 1384 }
1385 session->header.env.comp_mmap_len = session->evlist->mmap_len;
1247 1386
1248 err = bpf__apply_obj_config(); 1387 err = bpf__apply_obj_config();
1249 if (err) { 1388 if (err) {
@@ -1491,6 +1630,11 @@ out_child:
1491 record__mmap_read_all(rec, true); 1630 record__mmap_read_all(rec, true);
1492 record__aio_mmap_read_sync(rec); 1631 record__aio_mmap_read_sync(rec);
1493 1632
1633 if (rec->session->bytes_transferred && rec->session->bytes_compressed) {
1634 ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed;
1635 session->header.env.comp_ratio = ratio + 0.5;
1636 }
1637
1494 if (forks) { 1638 if (forks) {
1495 int exit_status; 1639 int exit_status;
1496 1640
@@ -1537,12 +1681,19 @@ out_child:
1537 else 1681 else
1538 samples[0] = '\0'; 1682 samples[0] = '\0';
1539 1683
1540 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", 1684 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s",
1541 perf_data__size(data) / 1024.0 / 1024.0, 1685 perf_data__size(data) / 1024.0 / 1024.0,
1542 data->path, postfix, samples); 1686 data->path, postfix, samples);
1687 if (ratio) {
1688 fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)",
1689 rec->session->bytes_transferred / 1024.0 / 1024.0,
1690 ratio);
1691 }
1692 fprintf(stderr, " ]\n");
1543 } 1693 }
1544 1694
1545out_delete_session: 1695out_delete_session:
1696 zstd_fini(&session->zstd_data);
1546 perf_session__delete(session); 1697 perf_session__delete(session);
1547 1698
1548 if (!opts->no_bpf_event) 1699 if (!opts->no_bpf_event)
@@ -2017,10 +2168,10 @@ static struct option __record_options[] = {
2017 "use per-thread mmaps"), 2168 "use per-thread mmaps"),
2018 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", 2169 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
2019 "sample selected machine registers on interrupt," 2170 "sample selected machine registers on interrupt,"
2020 " use -I ? to list register names", parse_regs), 2171 " use '-I?' to list register names", parse_intr_regs),
2021 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", 2172 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
2022 "sample selected machine registers on interrupt," 2173 "sample selected machine registers on interrupt,"
2023 " use -I ? to list register names", parse_regs), 2174 " use '--user-regs=?' to list register names", parse_user_regs),
2024 OPT_BOOLEAN(0, "running-time", &record.opts.running_time, 2175 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
2025 "Record running/enabled time of read (:S) events"), 2176 "Record running/enabled time of read (:S) events"),
2026 OPT_CALLBACK('k', "clockid", &record.opts, 2177 OPT_CALLBACK('k', "clockid", &record.opts,
@@ -2068,6 +2219,11 @@ static struct option __record_options[] = {
2068 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", 2219 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2069 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", 2220 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2070 record__parse_affinity), 2221 record__parse_affinity),
2222#ifdef HAVE_ZSTD_SUPPORT
2223 OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default,
2224 "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)",
2225 record__parse_comp_level),
2226#endif
2071 OPT_END() 2227 OPT_END()
2072}; 2228};
2073 2229
@@ -2127,6 +2283,12 @@ int cmd_record(int argc, const char **argv)
2127 "cgroup monitoring only available in system-wide mode"); 2283 "cgroup monitoring only available in system-wide mode");
2128 2284
2129 } 2285 }
2286
2287 if (rec->opts.comp_level != 0) {
2288 pr_debug("Compression enabled, disabling build id collection at the end of the session.\n");
2289 rec->no_buildid = true;
2290 }
2291
2130 if (rec->opts.record_switch_events && 2292 if (rec->opts.record_switch_events &&
2131 !perf_can_record_switch_events()) { 2293 !perf_can_record_switch_events()) {
2132 ui__error("kernel does not support recording context switch events\n"); 2294 ui__error("kernel does not support recording context switch events\n");
@@ -2272,12 +2434,15 @@ int cmd_record(int argc, const char **argv)
2272 2434
2273 if (rec->opts.nr_cblocks > nr_cblocks_max) 2435 if (rec->opts.nr_cblocks > nr_cblocks_max)
2274 rec->opts.nr_cblocks = nr_cblocks_max; 2436 rec->opts.nr_cblocks = nr_cblocks_max;
2275 if (verbose > 0) 2437 pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2276 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2277 2438
2278 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); 2439 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2279 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); 2440 pr_debug("mmap flush: %d\n", rec->opts.mmap_flush);
2280 2441
2442 if (rec->opts.comp_level > comp_level_max)
2443 rec->opts.comp_level = comp_level_max;
2444 pr_debug("comp level: %d\n", rec->opts.comp_level);
2445
2281 err = __cmd_record(&record, argc, argv); 2446 err = __cmd_record(&record, argc, argv);
2282out: 2447out:
2283 perf_evlist__delete(rec->evlist); 2448 perf_evlist__delete(rec->evlist);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4054eb1f98ac..1ca533f06a4c 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -136,9 +136,6 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter,
136 if (!ui__has_annotation() && !rep->symbol_ipc) 136 if (!ui__has_annotation() && !rep->symbol_ipc)
137 return 0; 137 return 0;
138 138
139 hist__account_cycles(sample->branch_stack, al, sample,
140 rep->nonany_branch_mode);
141
142 if (sort__mode == SORT_MODE__BRANCH) { 139 if (sort__mode == SORT_MODE__BRANCH) {
143 bi = he->branch_info; 140 bi = he->branch_info;
144 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); 141 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
@@ -181,9 +178,6 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
181 if (!ui__has_annotation() && !rep->symbol_ipc) 178 if (!ui__has_annotation() && !rep->symbol_ipc)
182 return 0; 179 return 0;
183 180
184 hist__account_cycles(sample->branch_stack, al, sample,
185 rep->nonany_branch_mode);
186
187 bi = he->branch_info; 181 bi = he->branch_info;
188 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel); 182 err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
189 if (err) 183 if (err)
@@ -282,6 +276,11 @@ static int process_sample_event(struct perf_tool *tool,
282 if (al.map != NULL) 276 if (al.map != NULL)
283 al.map->dso->hit = 1; 277 al.map->dso->hit = 1;
284 278
279 if (ui__has_annotation() || rep->symbol_ipc) {
280 hist__account_cycles(sample->branch_stack, &al, sample,
281 rep->nonany_branch_mode);
282 }
283
285 ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep); 284 ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
286 if (ret < 0) 285 if (ret < 0)
287 pr_debug("problem adding hist entry, skipping event\n"); 286 pr_debug("problem adding hist entry, skipping event\n");
@@ -1259,6 +1258,9 @@ repeat:
1259 if (session == NULL) 1258 if (session == NULL)
1260 return -1; 1259 return -1;
1261 1260
1261 if (zstd_init(&(session->zstd_data), 0) < 0)
1262 pr_warning("Decompression initialization failed. Reported data may be incomplete.\n");
1263
1262 if (report.queue_size) { 1264 if (report.queue_size) {
1263 ordered_events__set_alloc_size(&session->ordered_events, 1265 ordered_events__set_alloc_size(&session->ordered_events,
1264 report.queue_size); 1266 report.queue_size);
@@ -1449,7 +1451,7 @@ repeat:
1449error: 1451error:
1450 if (report.ptime_range) 1452 if (report.ptime_range)
1451 zfree(&report.ptime_range); 1453 zfree(&report.ptime_range);
1452 1454 zstd_fini(&(session->zstd_data));
1453 perf_session__delete(session); 1455 perf_session__delete(session);
1454 return ret; 1456 return ret;
1455} 1457}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a3c060878faa..24b8e690fb69 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -847,6 +847,18 @@ static int perf_stat__get_core_cached(struct perf_stat_config *config,
847 return perf_stat__get_aggr(config, perf_stat__get_core, map, idx); 847 return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
848} 848}
849 849
850static bool term_percore_set(void)
851{
852 struct perf_evsel *counter;
853
854 evlist__for_each_entry(evsel_list, counter) {
855 if (counter->percore)
856 return true;
857 }
858
859 return false;
860}
861
850static int perf_stat_init_aggr_mode(void) 862static int perf_stat_init_aggr_mode(void)
851{ 863{
852 int nr; 864 int nr;
@@ -867,6 +879,15 @@ static int perf_stat_init_aggr_mode(void)
867 stat_config.aggr_get_id = perf_stat__get_core_cached; 879 stat_config.aggr_get_id = perf_stat__get_core_cached;
868 break; 880 break;
869 case AGGR_NONE: 881 case AGGR_NONE:
882 if (term_percore_set()) {
883 if (cpu_map__build_core_map(evsel_list->cpus,
884 &stat_config.aggr_map)) {
885 perror("cannot build core map");
886 return -1;
887 }
888 stat_config.aggr_get_id = perf_stat__get_core_cached;
889 }
890 break;
870 case AGGR_GLOBAL: 891 case AGGR_GLOBAL:
871 case AGGR_THREAD: 892 case AGGR_THREAD:
872 case AGGR_UNSET: 893 case AGGR_UNSET:
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 369eae61068d..d59dee61b64d 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -86,6 +86,7 @@ struct record_opts {
86 int nr_cblocks; 86 int nr_cblocks;
87 int affinity; 87 int affinity;
88 int mmap_flush; 88 int mmap_flush;
89 unsigned int comp_level;
89}; 90};
90 91
91enum perf_affinity { 92enum perf_affinity {
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a57-a72/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a57-a72/core-imp-def.json
new file mode 100644
index 000000000000..0ac9b7927450
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a57-a72/core-imp-def.json
@@ -0,0 +1,179 @@
1[
2 {
3 "ArchStdEvent": "L1D_CACHE_RD",
4 },
5 {
6 "ArchStdEvent": "L1D_CACHE_WR",
7 },
8 {
9 "ArchStdEvent": "L1D_CACHE_REFILL_RD",
10 },
11 {
12 "ArchStdEvent": "L1D_CACHE_REFILL_WR",
13 },
14 {
15 "ArchStdEvent": "L1D_CACHE_WB_VICTIM",
16 },
17 {
18 "ArchStdEvent": "L1D_CACHE_WB_CLEAN",
19 },
20 {
21 "ArchStdEvent": "L1D_CACHE_INVAL",
22 },
23 {
24 "ArchStdEvent": "L1D_TLB_REFILL_RD",
25 },
26 {
27 "ArchStdEvent": "L1D_TLB_REFILL_WR",
28 },
29 {
30 "ArchStdEvent": "L2D_CACHE_RD",
31 },
32 {
33 "ArchStdEvent": "L2D_CACHE_WR",
34 },
35 {
36 "ArchStdEvent": "L2D_CACHE_REFILL_RD",
37 },
38 {
39 "ArchStdEvent": "L2D_CACHE_REFILL_WR",
40 },
41 {
42 "ArchStdEvent": "L2D_CACHE_WB_VICTIM",
43 },
44 {
45 "ArchStdEvent": "L2D_CACHE_WB_CLEAN",
46 },
47 {
48 "ArchStdEvent": "L2D_CACHE_INVAL",
49 },
50 {
51 "ArchStdEvent": "BUS_ACCESS_RD",
52 },
53 {
54 "ArchStdEvent": "BUS_ACCESS_WR",
55 },
56 {
57 "ArchStdEvent": "BUS_ACCESS_SHARED",
58 },
59 {
60 "ArchStdEvent": "BUS_ACCESS_NOT_SHARED",
61 },
62 {
63 "ArchStdEvent": "BUS_ACCESS_NORMAL",
64 },
65 {
66 "ArchStdEvent": "BUS_ACCESS_PERIPH",
67 },
68 {
69 "ArchStdEvent": "MEM_ACCESS_RD",
70 },
71 {
72 "ArchStdEvent": "MEM_ACCESS_WR",
73 },
74 {
75 "ArchStdEvent": "UNALIGNED_LD_SPEC",
76 },
77 {
78 "ArchStdEvent": "UNALIGNED_ST_SPEC",
79 },
80 {
81 "ArchStdEvent": "UNALIGNED_LDST_SPEC",
82 },
83 {
84 "ArchStdEvent": "LDREX_SPEC",
85 },
86 {
87 "ArchStdEvent": "STREX_PASS_SPEC",
88 },
89 {
90 "ArchStdEvent": "STREX_FAIL_SPEC",
91 },
92 {
93 "ArchStdEvent": "LD_SPEC",
94 },
95 {
96 "ArchStdEvent": "ST_SPEC",
97 },
98 {
99 "ArchStdEvent": "LDST_SPEC",
100 },
101 {
102 "ArchStdEvent": "DP_SPEC",
103 },
104 {
105 "ArchStdEvent": "ASE_SPEC",
106 },
107 {
108 "ArchStdEvent": "VFP_SPEC",
109 },
110 {
111 "ArchStdEvent": "PC_WRITE_SPEC",
112 },
113 {
114 "ArchStdEvent": "CRYPTO_SPEC",
115 },
116 {
117 "ArchStdEvent": "BR_IMMED_SPEC",
118 },
119 {
120 "ArchStdEvent": "BR_RETURN_SPEC",
121 },
122 {
123 "ArchStdEvent": "BR_INDIRECT_SPEC",
124 },
125 {
126 "ArchStdEvent": "ISB_SPEC",
127 },
128 {
129 "ArchStdEvent": "DSB_SPEC",
130 },
131 {
132 "ArchStdEvent": "DMB_SPEC",
133 },
134 {
135 "ArchStdEvent": "EXC_UNDEF",
136 },
137 {
138 "ArchStdEvent": "EXC_SVC",
139 },
140 {
141 "ArchStdEvent": "EXC_PABORT",
142 },
143 {
144 "ArchStdEvent": "EXC_DABORT",
145 },
146 {
147 "ArchStdEvent": "EXC_IRQ",
148 },
149 {
150 "ArchStdEvent": "EXC_FIQ",
151 },
152 {
153 "ArchStdEvent": "EXC_SMC",
154 },
155 {
156 "ArchStdEvent": "EXC_HVC",
157 },
158 {
159 "ArchStdEvent": "EXC_TRAP_PABORT",
160 },
161 {
162 "ArchStdEvent": "EXC_TRAP_DABORT",
163 },
164 {
165 "ArchStdEvent": "EXC_TRAP_OTHER",
166 },
167 {
168 "ArchStdEvent": "EXC_TRAP_IRQ",
169 },
170 {
171 "ArchStdEvent": "EXC_TRAP_FIQ",
172 },
173 {
174 "ArchStdEvent": "RC_LD_SPEC",
175 },
176 {
177 "ArchStdEvent": "RC_ST_SPEC",
178 },
179]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index 59cd8604b0bd..927fcddcb4aa 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -12,7 +12,10 @@
12# 12#
13# 13#
14#Family-model,Version,Filename,EventType 14#Family-model,Version,Filename,EventType
150x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core 150x00000000410fd030,v1,arm/cortex-a53,core
160x00000000420f1000,v1,arm/cortex-a53,core
170x00000000410fd070,v1,arm/cortex-a57-a72,core
180x00000000410fd080,v1,arm/cortex-a57-a72,core
160x00000000420f5160,v1,cavium/thunderx2,core 190x00000000420f5160,v1,cavium/thunderx2,core
170x00000000430f0af0,v1,cavium/thunderx2,core 200x00000000430f0af0,v1,cavium/thunderx2,core
180x00000000480fd010,v1,hisilicon/hip08,core 210x00000000480fd010,v1,hisilicon/hip08,core
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 68c92bb599ee..58f77fd0f59f 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -235,6 +235,7 @@ static struct map {
235 { "iMPH-U", "uncore_arb" }, 235 { "iMPH-U", "uncore_arb" },
236 { "CPU-M-CF", "cpum_cf" }, 236 { "CPU-M-CF", "cpum_cf" },
237 { "CPU-M-SF", "cpum_sf" }, 237 { "CPU-M-SF", "cpum_sf" },
238 { "UPI LL", "uncore_upi" },
238 {} 239 {}
239}; 240};
240 241
@@ -414,7 +415,6 @@ static int save_arch_std_events(void *data, char *name, char *event,
414 char *metric_name, char *metric_group) 415 char *metric_name, char *metric_group)
415{ 416{
416 struct event_struct *es; 417 struct event_struct *es;
417 struct stat *sb = data;
418 418
419 es = malloc(sizeof(*es)); 419 es = malloc(sizeof(*es));
420 if (!es) 420 if (!es)
diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py
index 74ef92f1d19a..affed7d149be 100755
--- a/tools/perf/scripts/python/exported-sql-viewer.py
+++ b/tools/perf/scripts/python/exported-sql-viewer.py
@@ -456,6 +456,10 @@ class CallGraphLevelItemBase(object):
456 self.query_done = False; 456 self.query_done = False;
457 self.child_count = 0 457 self.child_count = 0
458 self.child_items = [] 458 self.child_items = []
459 if parent_item:
460 self.level = parent_item.level + 1
461 else:
462 self.level = 0
459 463
460 def getChildItem(self, row): 464 def getChildItem(self, row):
461 return self.child_items[row] 465 return self.child_items[row]
@@ -877,9 +881,14 @@ class TreeWindowBase(QMdiSubWindow):
877 super(TreeWindowBase, self).__init__(parent) 881 super(TreeWindowBase, self).__init__(parent)
878 882
879 self.model = None 883 self.model = None
880 self.view = None
881 self.find_bar = None 884 self.find_bar = None
882 885
886 self.view = QTreeView()
887 self.view.setSelectionMode(QAbstractItemView.ContiguousSelection)
888 self.view.CopyCellsToClipboard = CopyTreeCellsToClipboard
889
890 self.context_menu = TreeContextMenu(self.view)
891
883 def DisplayFound(self, ids): 892 def DisplayFound(self, ids):
884 if not len(ids): 893 if not len(ids):
885 return False 894 return False
@@ -921,7 +930,6 @@ class CallGraphWindow(TreeWindowBase):
921 930
922 self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x)) 931 self.model = LookupCreateModel("Context-Sensitive Call Graph", lambda x=glb: CallGraphModel(x))
923 932
924 self.view = QTreeView()
925 self.view.setModel(self.model) 933 self.view.setModel(self.model)
926 934
927 for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)): 935 for c, w in ((0, 250), (1, 100), (2, 60), (3, 70), (4, 70), (5, 100)):
@@ -944,7 +952,6 @@ class CallTreeWindow(TreeWindowBase):
944 952
945 self.model = LookupCreateModel("Call Tree", lambda x=glb: CallTreeModel(x)) 953 self.model = LookupCreateModel("Call Tree", lambda x=glb: CallTreeModel(x))
946 954
947 self.view = QTreeView()
948 self.view.setModel(self.model) 955 self.view.setModel(self.model)
949 956
950 for c, w in ((0, 230), (1, 100), (2, 100), (3, 70), (4, 70), (5, 100)): 957 for c, w in ((0, 230), (1, 100), (2, 100), (3, 70), (4, 70), (5, 100)):
@@ -1649,10 +1656,14 @@ class BranchWindow(QMdiSubWindow):
1649 1656
1650 self.view = QTreeView() 1657 self.view = QTreeView()
1651 self.view.setUniformRowHeights(True) 1658 self.view.setUniformRowHeights(True)
1659 self.view.setSelectionMode(QAbstractItemView.ContiguousSelection)
1660 self.view.CopyCellsToClipboard = CopyTreeCellsToClipboard
1652 self.view.setModel(self.model) 1661 self.view.setModel(self.model)
1653 1662
1654 self.ResizeColumnsToContents() 1663 self.ResizeColumnsToContents()
1655 1664
1665 self.context_menu = TreeContextMenu(self.view)
1666
1656 self.find_bar = FindBar(self, self, True) 1667 self.find_bar = FindBar(self, self, True)
1657 1668
1658 self.finder = ChildDataItemFinder(self.model.root) 1669 self.finder = ChildDataItemFinder(self.model.root)
@@ -2261,6 +2272,240 @@ class ResizeColumnsToContentsBase(QObject):
2261 self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths) 2272 self.data_model.rowsInserted.disconnect(self.UpdateColumnWidths)
2262 self.ResizeColumnsToContents() 2273 self.ResizeColumnsToContents()
2263 2274
2275# Convert value to CSV
2276
2277def ToCSValue(val):
2278 if '"' in val:
2279 val = val.replace('"', '""')
2280 if "," in val or '"' in val:
2281 val = '"' + val + '"'
2282 return val
2283
2284# Key to sort table model indexes by row / column, assuming fewer than 1000 columns
2285
2286glb_max_cols = 1000
2287
2288def RowColumnKey(a):
2289 return a.row() * glb_max_cols + a.column()
2290
2291# Copy selected table cells to clipboard
2292
2293def CopyTableCellsToClipboard(view, as_csv=False, with_hdr=False):
2294 indexes = sorted(view.selectedIndexes(), key=RowColumnKey)
2295 idx_cnt = len(indexes)
2296 if not idx_cnt:
2297 return
2298 if idx_cnt == 1:
2299 with_hdr=False
2300 min_row = indexes[0].row()
2301 max_row = indexes[0].row()
2302 min_col = indexes[0].column()
2303 max_col = indexes[0].column()
2304 for i in indexes:
2305 min_row = min(min_row, i.row())
2306 max_row = max(max_row, i.row())
2307 min_col = min(min_col, i.column())
2308 max_col = max(max_col, i.column())
2309 if max_col > glb_max_cols:
2310 raise RuntimeError("glb_max_cols is too low")
2311 max_width = [0] * (1 + max_col - min_col)
2312 for i in indexes:
2313 c = i.column() - min_col
2314 max_width[c] = max(max_width[c], len(str(i.data())))
2315 text = ""
2316 pad = ""
2317 sep = ""
2318 if with_hdr:
2319 model = indexes[0].model()
2320 for col in range(min_col, max_col + 1):
2321 val = model.headerData(col, Qt.Horizontal)
2322 if as_csv:
2323 text += sep + ToCSValue(val)
2324 sep = ","
2325 else:
2326 c = col - min_col
2327 max_width[c] = max(max_width[c], len(val))
2328 width = max_width[c]
2329 align = model.headerData(col, Qt.Horizontal, Qt.TextAlignmentRole)
2330 if align & Qt.AlignRight:
2331 val = val.rjust(width)
2332 text += pad + sep + val
2333 pad = " " * (width - len(val))
2334 sep = " "
2335 text += "\n"
2336 pad = ""
2337 sep = ""
2338 last_row = min_row
2339 for i in indexes:
2340 if i.row() > last_row:
2341 last_row = i.row()
2342 text += "\n"
2343 pad = ""
2344 sep = ""
2345 if as_csv:
2346 text += sep + ToCSValue(str(i.data()))
2347 sep = ","
2348 else:
2349 width = max_width[i.column() - min_col]
2350 if i.data(Qt.TextAlignmentRole) & Qt.AlignRight:
2351 val = str(i.data()).rjust(width)
2352 else:
2353 val = str(i.data())
2354 text += pad + sep + val
2355 pad = " " * (width - len(val))
2356 sep = " "
2357 QApplication.clipboard().setText(text)
2358
2359def CopyTreeCellsToClipboard(view, as_csv=False, with_hdr=False):
2360 indexes = view.selectedIndexes()
2361 if not len(indexes):
2362 return
2363
2364 selection = view.selectionModel()
2365
2366 first = None
2367 for i in indexes:
2368 above = view.indexAbove(i)
2369 if not selection.isSelected(above):
2370 first = i
2371 break
2372
2373 if first is None:
2374 raise RuntimeError("CopyTreeCellsToClipboard internal error")
2375
2376 model = first.model()
2377 row_cnt = 0
2378 col_cnt = model.columnCount(first)
2379 max_width = [0] * col_cnt
2380
2381 indent_sz = 2
2382 indent_str = " " * indent_sz
2383
2384 expanded_mark_sz = 2
2385 if sys.version_info[0] == 3:
2386 expanded_mark = "\u25BC "
2387 not_expanded_mark = "\u25B6 "
2388 else:
2389 expanded_mark = unicode(chr(0xE2) + chr(0x96) + chr(0xBC) + " ", "utf-8")
2390 not_expanded_mark = unicode(chr(0xE2) + chr(0x96) + chr(0xB6) + " ", "utf-8")
2391 leaf_mark = " "
2392
2393 if not as_csv:
2394 pos = first
2395 while True:
2396 row_cnt += 1
2397 row = pos.row()
2398 for c in range(col_cnt):
2399 i = pos.sibling(row, c)
2400 if c:
2401 n = len(str(i.data()))
2402 else:
2403 n = len(str(i.data()).strip())
2404 n += (i.internalPointer().level - 1) * indent_sz
2405 n += expanded_mark_sz
2406 max_width[c] = max(max_width[c], n)
2407 pos = view.indexBelow(pos)
2408 if not selection.isSelected(pos):
2409 break
2410
2411 text = ""
2412 pad = ""
2413 sep = ""
2414 if with_hdr:
2415 for c in range(col_cnt):
2416 val = model.headerData(c, Qt.Horizontal, Qt.DisplayRole).strip()
2417 if as_csv:
2418 text += sep + ToCSValue(val)
2419 sep = ","
2420 else:
2421 max_width[c] = max(max_width[c], len(val))
2422 width = max_width[c]
2423 align = model.headerData(c, Qt.Horizontal, Qt.TextAlignmentRole)
2424 if align & Qt.AlignRight:
2425 val = val.rjust(width)
2426 text += pad + sep + val
2427 pad = " " * (width - len(val))
2428 sep = " "
2429 text += "\n"
2430 pad = ""
2431 sep = ""
2432
2433 pos = first
2434 while True:
2435 row = pos.row()
2436 for c in range(col_cnt):
2437 i = pos.sibling(row, c)
2438 val = str(i.data())
2439 if not c:
2440 if model.hasChildren(i):
2441 if view.isExpanded(i):
2442 mark = expanded_mark
2443 else:
2444 mark = not_expanded_mark
2445 else:
2446 mark = leaf_mark
2447 val = indent_str * (i.internalPointer().level - 1) + mark + val.strip()
2448 if as_csv:
2449 text += sep + ToCSValue(val)
2450 sep = ","
2451 else:
2452 width = max_width[c]
2453 if c and i.data(Qt.TextAlignmentRole) & Qt.AlignRight:
2454 val = val.rjust(width)
2455 text += pad + sep + val
2456 pad = " " * (width - len(val))
2457 sep = " "
2458 pos = view.indexBelow(pos)
2459 if not selection.isSelected(pos):
2460 break
2461 text = text.rstrip() + "\n"
2462 pad = ""
2463 sep = ""
2464
2465 QApplication.clipboard().setText(text)
2466
2467def CopyCellsToClipboard(view, as_csv=False, with_hdr=False):
2468 view.CopyCellsToClipboard(view, as_csv, with_hdr)
2469
2470def CopyCellsToClipboardHdr(view):
2471 CopyCellsToClipboard(view, False, True)
2472
2473def CopyCellsToClipboardCSV(view):
2474 CopyCellsToClipboard(view, True, True)
2475
2476# Context menu
2477
2478class ContextMenu(object):
2479
2480 def __init__(self, view):
2481 self.view = view
2482 self.view.setContextMenuPolicy(Qt.CustomContextMenu)
2483 self.view.customContextMenuRequested.connect(self.ShowContextMenu)
2484
2485 def ShowContextMenu(self, pos):
2486 menu = QMenu(self.view)
2487 self.AddActions(menu)
2488 menu.exec_(self.view.mapToGlobal(pos))
2489
2490 def AddCopy(self, menu):
2491 menu.addAction(CreateAction("&Copy selection", "Copy to clipboard", lambda: CopyCellsToClipboardHdr(self.view), self.view))
2492 menu.addAction(CreateAction("Copy selection as CS&V", "Copy to clipboard as CSV", lambda: CopyCellsToClipboardCSV(self.view), self.view))
2493
2494 def AddActions(self, menu):
2495 self.AddCopy(menu)
2496
2497class TreeContextMenu(ContextMenu):
2498
2499 def __init__(self, view):
2500 super(TreeContextMenu, self).__init__(view)
2501
2502 def AddActions(self, menu):
2503 i = self.view.currentIndex()
2504 text = str(i.data()).strip()
2505 if len(text):
2506 menu.addAction(CreateAction('Copy "' + text + '"', "Copy to clipboard", lambda: QApplication.clipboard().setText(text), self.view))
2507 self.AddCopy(menu)
2508
2264# Table window 2509# Table window
2265 2510
2266class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase): 2511class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase):
@@ -2279,9 +2524,13 @@ class TableWindow(QMdiSubWindow, ResizeColumnsToContentsBase):
2279 self.view.verticalHeader().setVisible(False) 2524 self.view.verticalHeader().setVisible(False)
2280 self.view.sortByColumn(-1, Qt.AscendingOrder) 2525 self.view.sortByColumn(-1, Qt.AscendingOrder)
2281 self.view.setSortingEnabled(True) 2526 self.view.setSortingEnabled(True)
2527 self.view.setSelectionMode(QAbstractItemView.ContiguousSelection)
2528 self.view.CopyCellsToClipboard = CopyTableCellsToClipboard
2282 2529
2283 self.ResizeColumnsToContents() 2530 self.ResizeColumnsToContents()
2284 2531
2532 self.context_menu = ContextMenu(self.view)
2533
2285 self.find_bar = FindBar(self, self, True) 2534 self.find_bar = FindBar(self, self, True)
2286 2535
2287 self.finder = ChildDataItemFinder(self.data_model) 2536 self.finder = ChildDataItemFinder(self.data_model)
@@ -2395,6 +2644,10 @@ class TopCallsWindow(QMdiSubWindow, ResizeColumnsToContentsBase):
2395 self.view.setModel(self.model) 2644 self.view.setModel(self.model)
2396 self.view.setEditTriggers(QAbstractItemView.NoEditTriggers) 2645 self.view.setEditTriggers(QAbstractItemView.NoEditTriggers)
2397 self.view.verticalHeader().setVisible(False) 2646 self.view.verticalHeader().setVisible(False)
2647 self.view.setSelectionMode(QAbstractItemView.ContiguousSelection)
2648 self.view.CopyCellsToClipboard = CopyTableCellsToClipboard
2649
2650 self.context_menu = ContextMenu(self.view)
2398 2651
2399 self.ResizeColumnsToContents() 2652 self.ResizeColumnsToContents()
2400 2653
@@ -2660,6 +2913,60 @@ class HelpOnlyWindow(QMainWindow):
2660 2913
2661 self.setCentralWidget(self.text) 2914 self.setCentralWidget(self.text)
2662 2915
2916# PostqreSQL server version
2917
2918def PostqreSQLServerVersion(db):
2919 query = QSqlQuery(db)
2920 QueryExec(query, "SELECT VERSION()")
2921 if query.next():
2922 v_str = query.value(0)
2923 v_list = v_str.strip().split(" ")
2924 if v_list[0] == "PostgreSQL" and v_list[2] == "on":
2925 return v_list[1]
2926 return v_str
2927 return "Unknown"
2928
2929# SQLite version
2930
2931def SQLiteVersion(db):
2932 query = QSqlQuery(db)
2933 QueryExec(query, "SELECT sqlite_version()")
2934 if query.next():
2935 return query.value(0)
2936 return "Unknown"
2937
2938# About dialog
2939
2940class AboutDialog(QDialog):
2941
2942 def __init__(self, glb, parent=None):
2943 super(AboutDialog, self).__init__(parent)
2944
2945 self.setWindowTitle("About Exported SQL Viewer")
2946 self.setMinimumWidth(300)
2947
2948 pyside_version = "1" if pyside_version_1 else "2"
2949
2950 text = "<pre>"
2951 text += "Python version: " + sys.version.split(" ")[0] + "\n"
2952 text += "PySide version: " + pyside_version + "\n"
2953 text += "Qt version: " + qVersion() + "\n"
2954 if glb.dbref.is_sqlite3:
2955 text += "SQLite version: " + SQLiteVersion(glb.db) + "\n"
2956 else:
2957 text += "PostqreSQL version: " + PostqreSQLServerVersion(glb.db) + "\n"
2958 text += "</pre>"
2959
2960 self.text = QTextBrowser()
2961 self.text.setHtml(text)
2962 self.text.setReadOnly(True)
2963 self.text.setOpenExternalLinks(True)
2964
2965 self.vbox = QVBoxLayout()
2966 self.vbox.addWidget(self.text)
2967
2968 self.setLayout(self.vbox);
2969
2663# Font resize 2970# Font resize
2664 2971
2665def ResizeFont(widget, diff): 2972def ResizeFont(widget, diff):
@@ -2732,6 +3039,8 @@ class MainWindow(QMainWindow):
2732 file_menu.addAction(CreateExitAction(glb.app, self)) 3039 file_menu.addAction(CreateExitAction(glb.app, self))
2733 3040
2734 edit_menu = menu.addMenu("&Edit") 3041 edit_menu = menu.addMenu("&Edit")
3042 edit_menu.addAction(CreateAction("&Copy", "Copy to clipboard", self.CopyToClipboard, self, QKeySequence.Copy))
3043 edit_menu.addAction(CreateAction("Copy as CS&V", "Copy to clipboard as CSV", self.CopyToClipboardCSV, self))
2735 edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find)) 3044 edit_menu.addAction(CreateAction("&Find...", "Find items", self.Find, self, QKeySequence.Find))
2736 edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)])) 3045 edit_menu.addAction(CreateAction("Fetch &more records...", "Fetch more records", self.FetchMoreRecords, self, [QKeySequence(Qt.Key_F8)]))
2737 edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")])) 3046 edit_menu.addAction(CreateAction("&Shrink Font", "Make text smaller", self.ShrinkFont, self, [QKeySequence("Ctrl+-")]))
@@ -2755,6 +3064,21 @@ class MainWindow(QMainWindow):
2755 3064
2756 help_menu = menu.addMenu("&Help") 3065 help_menu = menu.addMenu("&Help")
2757 help_menu.addAction(CreateAction("&Exported SQL Viewer Help", "Helpful information", self.Help, self, QKeySequence.HelpContents)) 3066 help_menu.addAction(CreateAction("&Exported SQL Viewer Help", "Helpful information", self.Help, self, QKeySequence.HelpContents))
3067 help_menu.addAction(CreateAction("&About Exported SQL Viewer", "About this application", self.About, self))
3068
3069 def Try(self, fn):
3070 win = self.mdi_area.activeSubWindow()
3071 if win:
3072 try:
3073 fn(win.view)
3074 except:
3075 pass
3076
3077 def CopyToClipboard(self):
3078 self.Try(CopyCellsToClipboardHdr)
3079
3080 def CopyToClipboardCSV(self):
3081 self.Try(CopyCellsToClipboardCSV)
2758 3082
2759 def Find(self): 3083 def Find(self):
2760 win = self.mdi_area.activeSubWindow() 3084 win = self.mdi_area.activeSubWindow()
@@ -2773,12 +3097,10 @@ class MainWindow(QMainWindow):
2773 pass 3097 pass
2774 3098
2775 def ShrinkFont(self): 3099 def ShrinkFont(self):
2776 win = self.mdi_area.activeSubWindow() 3100 self.Try(ShrinkFont)
2777 ShrinkFont(win.view)
2778 3101
2779 def EnlargeFont(self): 3102 def EnlargeFont(self):
2780 win = self.mdi_area.activeSubWindow() 3103 self.Try(EnlargeFont)
2781 EnlargeFont(win.view)
2782 3104
2783 def EventMenu(self, events, reports_menu): 3105 def EventMenu(self, events, reports_menu):
2784 branches_events = 0 3106 branches_events = 0
@@ -2828,6 +3150,10 @@ class MainWindow(QMainWindow):
2828 def Help(self): 3150 def Help(self):
2829 HelpWindow(self.glb, self) 3151 HelpWindow(self.glb, self)
2830 3152
3153 def About(self):
3154 dialog = AboutDialog(self.glb, self)
3155 dialog.exec_()
3156
2831# XED Disassembler 3157# XED Disassembler
2832 3158
2833class xed_state_t(Structure): 3159class xed_state_t(Structure):
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 7f6c52021e41..946ab4b63acd 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -304,7 +304,7 @@ int test__dso_data_cache(struct test *test __maybe_unused, int subtest __maybe_u
304 /* Make sure we did not leak any file descriptor. */ 304 /* Make sure we did not leak any file descriptor. */
305 nr_end = open_files_cnt(); 305 nr_end = open_files_cnt();
306 pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end); 306 pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end);
307 TEST_ASSERT_VAL("failed leadking files", nr == nr_end); 307 TEST_ASSERT_VAL("failed leaking files", nr == nr_end);
308 return 0; 308 return 0;
309} 309}
310 310
@@ -380,6 +380,6 @@ int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_
380 /* Make sure we did not leak any file descriptor. */ 380 /* Make sure we did not leak any file descriptor. */
381 nr_end = open_files_cnt(); 381 nr_end = open_files_cnt();
382 pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end); 382 pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end);
383 TEST_ASSERT_VAL("failed leadking files", nr == nr_end); 383 TEST_ASSERT_VAL("failed leaking files", nr == nr_end);
384 return 0; 384 return 0;
385} 385}
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index e46723568516..5363a12a8b9b 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -107,7 +107,7 @@ make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
107make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 107make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
108make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 108make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
109make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 109make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
110make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 110make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1
111 111
112# $(run) contains all available tests 112# $(run) contains all available tests
113run := make_pure 113run := make_pure
diff --git a/tools/perf/tests/shell/record+zstd_comp_decomp.sh b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
new file mode 100755
index 000000000000..5dcba800109f
--- /dev/null
+++ b/tools/perf/tests/shell/record+zstd_comp_decomp.sh
@@ -0,0 +1,34 @@
1#!/bin/sh
2# Zstd perf.data compression/decompression
3
4trace_file=$(mktemp /tmp/perf.data.XXX)
5perf_tool=perf
6
7skip_if_no_z_record() {
8 $perf_tool record -h 2>&1 | grep -q '\-z, \-\-compression\-level'
9}
10
11collect_z_record() {
12 echo "Collecting compressed record file:"
13 $perf_tool record -o $trace_file -g -z -F 5000 -- \
14 dd count=500 if=/dev/random of=/dev/null
15}
16
17check_compressed_stats() {
18 echo "Checking compressed events stats:"
19 $perf_tool report -i $trace_file --header --stats | \
20 grep -E "(# compressed : Zstd,)|(COMPRESSED events:)"
21}
22
23check_compressed_output() {
24 $perf_tool inject -i $trace_file -o $trace_file.decomp &&
25 $perf_tool report -i $trace_file --stdio | head -n -3 > $trace_file.comp.output &&
26 $perf_tool report -i $trace_file.decomp --stdio | head -n -3 > $trace_file.decomp.output &&
27 diff $trace_file.comp.output $trace_file.decomp.output
28}
29
30skip_if_no_z_record || exit 2
31collect_z_record && check_compressed_stats && check_compressed_output
32err=$?
33rm -f $trace_file*
34exit $err
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8dd3102301ea..6d5bbc8b589b 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -145,6 +145,8 @@ perf-y += scripting-engines/
145 145
146perf-$(CONFIG_ZLIB) += zlib.o 146perf-$(CONFIG_ZLIB) += zlib.o
147perf-$(CONFIG_LZMA) += lzma.o 147perf-$(CONFIG_LZMA) += lzma.o
148perf-$(CONFIG_ZSTD) += zstd.o
149
148perf-y += demangle-java.o 150perf-y += demangle-java.o
149perf-y += demangle-rust.o 151perf-y += demangle-rust.o
150 152
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 09762985c713..0b8573fd9b05 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1021,7 +1021,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64
1021 float ipc = n_insn / ((double)ch->cycles / (double)ch->num); 1021 float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
1022 1022
1023 /* Hide data when there are too many overlaps. */ 1023 /* Hide data when there are too many overlaps. */
1024 if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) 1024 if (ch->reset >= 0x7fff)
1025 return; 1025 return;
1026 1026
1027 for (offset = start; offset <= end; offset++) { 1027 for (offset = start; offset <= end; offset++) {
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
index 892e92e7e7fc..0cd3369af2a4 100644
--- a/tools/perf/util/compress.h
+++ b/tools/perf/util/compress.h
@@ -2,6 +2,11 @@
2#ifndef PERF_COMPRESS_H 2#ifndef PERF_COMPRESS_H
3#define PERF_COMPRESS_H 3#define PERF_COMPRESS_H
4 4
5#include <stdbool.h>
6#ifdef HAVE_ZSTD_SUPPORT
7#include <zstd.h>
8#endif
9
5#ifdef HAVE_ZLIB_SUPPORT 10#ifdef HAVE_ZLIB_SUPPORT
6int gzip_decompress_to_file(const char *input, int output_fd); 11int gzip_decompress_to_file(const char *input, int output_fd);
7bool gzip_is_compressed(const char *input); 12bool gzip_is_compressed(const char *input);
@@ -12,4 +17,52 @@ int lzma_decompress_to_file(const char *input, int output_fd);
12bool lzma_is_compressed(const char *input); 17bool lzma_is_compressed(const char *input);
13#endif 18#endif
14 19
20struct zstd_data {
21#ifdef HAVE_ZSTD_SUPPORT
22 ZSTD_CStream *cstream;
23 ZSTD_DStream *dstream;
24#endif
25};
26
27#ifdef HAVE_ZSTD_SUPPORT
28
29int zstd_init(struct zstd_data *data, int level);
30int zstd_fini(struct zstd_data *data);
31
32size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
33 void *src, size_t src_size, size_t max_record_size,
34 size_t process_header(void *record, size_t increment));
35
36size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size,
37 void *dst, size_t dst_size);
38#else /* !HAVE_ZSTD_SUPPORT */
39
40static inline int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused)
41{
42 return 0;
43}
44
45static inline int zstd_fini(struct zstd_data *data __maybe_unused)
46{
47 return 0;
48}
49
50static inline
51size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused,
52 void *dst __maybe_unused, size_t dst_size __maybe_unused,
53 void *src __maybe_unused, size_t src_size __maybe_unused,
54 size_t max_record_size __maybe_unused,
55 size_t process_header(void *record, size_t increment) __maybe_unused)
56{
57 return 0;
58}
59
60static inline size_t zstd_decompress_stream(struct zstd_data *data __maybe_unused, void *src __maybe_unused,
61 size_t src_size __maybe_unused, void *dst __maybe_unused,
62 size_t dst_size __maybe_unused)
63{
64 return 0;
65}
66#endif
67
15#endif /* PERF_COMPRESS_H */ 68#endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 4f8e2b485c01..271a90b326c4 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -62,6 +62,11 @@ struct perf_env {
62 struct cpu_topology_map *cpu; 62 struct cpu_topology_map *cpu;
63 struct cpu_cache_level *caches; 63 struct cpu_cache_level *caches;
64 int caches_cnt; 64 int caches_cnt;
65 u32 comp_ratio;
66 u32 comp_ver;
67 u32 comp_type;
68 u32 comp_level;
69 u32 comp_mmap_len;
65 struct numa_node *numa_nodes; 70 struct numa_node *numa_nodes;
66 struct memory_node *memory_nodes; 71 struct memory_node *memory_nodes;
67 unsigned long long memory_bsize; 72 unsigned long long memory_bsize;
@@ -80,6 +85,12 @@ struct perf_env {
80 } bpf_progs; 85 } bpf_progs;
81}; 86};
82 87
88enum perf_compress_type {
89 PERF_COMP_NONE = 0,
90 PERF_COMP_ZSTD,
91 PERF_COMP_MAX
92};
93
83struct bpf_prog_info_node; 94struct bpf_prog_info_node;
84struct btf_node; 95struct btf_node;
85 96
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index ba7be74fad6e..d1ad6c419724 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -68,6 +68,7 @@ static const char *perf_event__names[] = {
68 [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE", 68 [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE",
69 [PERF_RECORD_TIME_CONV] = "TIME_CONV", 69 [PERF_RECORD_TIME_CONV] = "TIME_CONV",
70 [PERF_RECORD_HEADER_FEATURE] = "FEATURE", 70 [PERF_RECORD_HEADER_FEATURE] = "FEATURE",
71 [PERF_RECORD_COMPRESSED] = "COMPRESSED",
71}; 72};
72 73
73static const char *perf_ns__names[] = { 74static const char *perf_ns__names[] = {
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 4e908ec1ef64..9e999550f247 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -255,6 +255,7 @@ enum perf_user_event_type { /* above any possible kernel type */
255 PERF_RECORD_EVENT_UPDATE = 78, 255 PERF_RECORD_EVENT_UPDATE = 78,
256 PERF_RECORD_TIME_CONV = 79, 256 PERF_RECORD_TIME_CONV = 79,
257 PERF_RECORD_HEADER_FEATURE = 80, 257 PERF_RECORD_HEADER_FEATURE = 80,
258 PERF_RECORD_COMPRESSED = 81,
258 PERF_RECORD_HEADER_MAX 259 PERF_RECORD_HEADER_MAX
259}; 260};
260 261
@@ -627,6 +628,11 @@ struct feature_event {
627 char data[]; 628 char data[];
628}; 629};
629 630
631struct compressed_event {
632 struct perf_event_header header;
633 char data[];
634};
635
630union perf_event { 636union perf_event {
631 struct perf_event_header header; 637 struct perf_event_header header;
632 struct mmap_event mmap; 638 struct mmap_event mmap;
@@ -660,6 +666,7 @@ union perf_event {
660 struct feature_event feat; 666 struct feature_event feat;
661 struct ksymbol_event ksymbol_event; 667 struct ksymbol_event ksymbol_event;
662 struct bpf_event bpf_event; 668 struct bpf_event bpf_event;
669 struct compressed_event pack;
663}; 670};
664 671
665void perf_event__print_totals(void); 672void perf_event__print_totals(void);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 4b6783ff5813..69d0fa8ab16f 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1009,7 +1009,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
1009 */ 1009 */
1010int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 1010int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1011 unsigned int auxtrace_pages, 1011 unsigned int auxtrace_pages,
1012 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush) 1012 bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush,
1013 int comp_level)
1013{ 1014{
1014 struct perf_evsel *evsel; 1015 struct perf_evsel *evsel;
1015 const struct cpu_map *cpus = evlist->cpus; 1016 const struct cpu_map *cpus = evlist->cpus;
@@ -1019,7 +1020,8 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1019 * Its value is decided by evsel's write_backward. 1020 * Its value is decided by evsel's write_backward.
1020 * So &mp should not be passed through const pointer. 1021 * So &mp should not be passed through const pointer.
1021 */ 1022 */
1022 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush }; 1023 struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush,
1024 .comp_level = comp_level };
1023 1025
1024 if (!evlist->mmap) 1026 if (!evlist->mmap)
1025 evlist->mmap = perf_evlist__alloc_mmap(evlist, false); 1027 evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1051,7 +1053,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
1051 1053
1052int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) 1054int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
1053{ 1055{
1054 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1); 1056 return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0);
1055} 1057}
1056 1058
1057int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1059int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index c9a0f72677fd..49354fe24d5f 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -178,7 +178,7 @@ unsigned long perf_event_mlock_kb_in_pages(void);
178int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, 178int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
179 unsigned int auxtrace_pages, 179 unsigned int auxtrace_pages,
180 bool auxtrace_overwrite, int nr_cblocks, 180 bool auxtrace_overwrite, int nr_cblocks,
181 int affinity, int flush); 181 int affinity, int flush, int comp_level);
182int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); 182int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
183void perf_evlist__munmap(struct perf_evlist *evlist); 183void perf_evlist__munmap(struct perf_evlist *evlist);
184 184
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a10cf4cde920..a6f572a40deb 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -813,6 +813,8 @@ static void apply_config_terms(struct perf_evsel *evsel,
813 break; 813 break;
814 case PERF_EVSEL__CONFIG_TERM_DRV_CFG: 814 case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
815 break; 815 break;
816 case PERF_EVSEL__CONFIG_TERM_PERCORE:
817 break;
816 default: 818 default:
817 break; 819 break;
818 } 820 }
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 6d190cbf1070..cad54e8ba522 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -50,6 +50,7 @@ enum term_type {
50 PERF_EVSEL__CONFIG_TERM_OVERWRITE, 50 PERF_EVSEL__CONFIG_TERM_OVERWRITE,
51 PERF_EVSEL__CONFIG_TERM_DRV_CFG, 51 PERF_EVSEL__CONFIG_TERM_DRV_CFG,
52 PERF_EVSEL__CONFIG_TERM_BRANCH, 52 PERF_EVSEL__CONFIG_TERM_BRANCH,
53 PERF_EVSEL__CONFIG_TERM_PERCORE,
53}; 54};
54 55
55struct perf_evsel_config_term { 56struct perf_evsel_config_term {
@@ -67,6 +68,7 @@ struct perf_evsel_config_term {
67 bool overwrite; 68 bool overwrite;
68 char *branch; 69 char *branch;
69 unsigned long max_events; 70 unsigned long max_events;
71 bool percore;
70 } val; 72 } val;
71 bool weak; 73 bool weak;
72}; 74};
@@ -158,6 +160,7 @@ struct perf_evsel {
158 struct perf_evsel **metric_events; 160 struct perf_evsel **metric_events;
159 bool collect_stat; 161 bool collect_stat;
160 bool weak_group; 162 bool weak_group;
163 bool percore;
161 const char *pmu_name; 164 const char *pmu_name;
162 struct { 165 struct {
163 perf_evsel__sb_cb_t *cb; 166 perf_evsel__sb_cb_t *cb;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 2d2af2ac2b1e..847ae51a524b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1344,6 +1344,30 @@ out:
1344 return ret; 1344 return ret;
1345} 1345}
1346 1346
1347static int write_compressed(struct feat_fd *ff __maybe_unused,
1348 struct perf_evlist *evlist __maybe_unused)
1349{
1350 int ret;
1351
1352 ret = do_write(ff, &(ff->ph->env.comp_ver), sizeof(ff->ph->env.comp_ver));
1353 if (ret)
1354 return ret;
1355
1356 ret = do_write(ff, &(ff->ph->env.comp_type), sizeof(ff->ph->env.comp_type));
1357 if (ret)
1358 return ret;
1359
1360 ret = do_write(ff, &(ff->ph->env.comp_level), sizeof(ff->ph->env.comp_level));
1361 if (ret)
1362 return ret;
1363
1364 ret = do_write(ff, &(ff->ph->env.comp_ratio), sizeof(ff->ph->env.comp_ratio));
1365 if (ret)
1366 return ret;
1367
1368 return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
1369}
1370
1347static void print_hostname(struct feat_fd *ff, FILE *fp) 1371static void print_hostname(struct feat_fd *ff, FILE *fp)
1348{ 1372{
1349 fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); 1373 fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1688,6 +1712,13 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
1688 } 1712 }
1689} 1713}
1690 1714
1715static void print_compressed(struct feat_fd *ff, FILE *fp)
1716{
1717 fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n",
1718 ff->ph->env.comp_type == PERF_COMP_ZSTD ? "Zstd" : "Unknown",
1719 ff->ph->env.comp_level, ff->ph->env.comp_ratio);
1720}
1721
1691static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) 1722static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
1692{ 1723{
1693 const char *delimiter = "# pmu mappings: "; 1724 const char *delimiter = "# pmu mappings: ";
@@ -2667,6 +2698,27 @@ out:
2667 return err; 2698 return err;
2668} 2699}
2669 2700
2701static int process_compressed(struct feat_fd *ff,
2702 void *data __maybe_unused)
2703{
2704 if (do_read_u32(ff, &(ff->ph->env.comp_ver)))
2705 return -1;
2706
2707 if (do_read_u32(ff, &(ff->ph->env.comp_type)))
2708 return -1;
2709
2710 if (do_read_u32(ff, &(ff->ph->env.comp_level)))
2711 return -1;
2712
2713 if (do_read_u32(ff, &(ff->ph->env.comp_ratio)))
2714 return -1;
2715
2716 if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len)))
2717 return -1;
2718
2719 return 0;
2720}
2721
2670struct feature_ops { 2722struct feature_ops {
2671 int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); 2723 int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
2672 void (*print)(struct feat_fd *ff, FILE *fp); 2724 void (*print)(struct feat_fd *ff, FILE *fp);
@@ -2730,6 +2782,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
2730 FEAT_OPN(DIR_FORMAT, dir_format, false), 2782 FEAT_OPN(DIR_FORMAT, dir_format, false),
2731 FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), 2783 FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false),
2732 FEAT_OPR(BPF_BTF, bpf_btf, false), 2784 FEAT_OPR(BPF_BTF, bpf_btf, false),
2785 FEAT_OPR(COMPRESSED, compressed, false),
2733}; 2786};
2734 2787
2735struct header_print_data { 2788struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 386da49e1bfa..5b3abe4172e2 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -42,6 +42,7 @@ enum {
42 HEADER_DIR_FORMAT, 42 HEADER_DIR_FORMAT,
43 HEADER_BPF_PROG_INFO, 43 HEADER_BPF_PROG_INFO,
44 HEADER_BPF_BTF, 44 HEADER_BPF_BTF,
45 HEADER_COMPRESSED,
45 HEADER_LAST_FEATURE, 46 HEADER_LAST_FEATURE,
46 HEADER_FEAT_BITS = 256, 47 HEADER_FEAT_BITS = 256,
47}; 48};
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 872fab163585..f4c3c84b090f 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -58,6 +58,7 @@ enum intel_pt_pkt_state {
58 INTEL_PT_STATE_NO_IP, 58 INTEL_PT_STATE_NO_IP,
59 INTEL_PT_STATE_ERR_RESYNC, 59 INTEL_PT_STATE_ERR_RESYNC,
60 INTEL_PT_STATE_IN_SYNC, 60 INTEL_PT_STATE_IN_SYNC,
61 INTEL_PT_STATE_TNT_CONT,
61 INTEL_PT_STATE_TNT, 62 INTEL_PT_STATE_TNT,
62 INTEL_PT_STATE_TIP, 63 INTEL_PT_STATE_TIP,
63 INTEL_PT_STATE_TIP_PGD, 64 INTEL_PT_STATE_TIP_PGD,
@@ -72,8 +73,9 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
72 case INTEL_PT_STATE_NO_IP: 73 case INTEL_PT_STATE_NO_IP:
73 case INTEL_PT_STATE_ERR_RESYNC: 74 case INTEL_PT_STATE_ERR_RESYNC:
74 case INTEL_PT_STATE_IN_SYNC: 75 case INTEL_PT_STATE_IN_SYNC:
75 case INTEL_PT_STATE_TNT: 76 case INTEL_PT_STATE_TNT_CONT:
76 return true; 77 return true;
78 case INTEL_PT_STATE_TNT:
77 case INTEL_PT_STATE_TIP: 79 case INTEL_PT_STATE_TIP:
78 case INTEL_PT_STATE_TIP_PGD: 80 case INTEL_PT_STATE_TIP_PGD:
79 case INTEL_PT_STATE_FUP: 81 case INTEL_PT_STATE_FUP:
@@ -888,16 +890,20 @@ static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
888 timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; 890 timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
889 masked_timestamp = timestamp & decoder->period_mask; 891 masked_timestamp = timestamp & decoder->period_mask;
890 if (decoder->continuous_period) { 892 if (decoder->continuous_period) {
891 if (masked_timestamp != decoder->last_masked_timestamp) 893 if (masked_timestamp > decoder->last_masked_timestamp)
892 return 1; 894 return 1;
893 } else { 895 } else {
894 timestamp += 1; 896 timestamp += 1;
895 masked_timestamp = timestamp & decoder->period_mask; 897 masked_timestamp = timestamp & decoder->period_mask;
896 if (masked_timestamp != decoder->last_masked_timestamp) { 898 if (masked_timestamp > decoder->last_masked_timestamp) {
897 decoder->last_masked_timestamp = masked_timestamp; 899 decoder->last_masked_timestamp = masked_timestamp;
898 decoder->continuous_period = true; 900 decoder->continuous_period = true;
899 } 901 }
900 } 902 }
903
904 if (masked_timestamp < decoder->last_masked_timestamp)
905 return decoder->period_ticks;
906
901 return decoder->period_ticks - (timestamp - masked_timestamp); 907 return decoder->period_ticks - (timestamp - masked_timestamp);
902} 908}
903 909
@@ -926,7 +932,10 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
926 case INTEL_PT_PERIOD_TICKS: 932 case INTEL_PT_PERIOD_TICKS:
927 timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; 933 timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
928 masked_timestamp = timestamp & decoder->period_mask; 934 masked_timestamp = timestamp & decoder->period_mask;
929 decoder->last_masked_timestamp = masked_timestamp; 935 if (masked_timestamp > decoder->last_masked_timestamp)
936 decoder->last_masked_timestamp = masked_timestamp;
937 else
938 decoder->last_masked_timestamp += decoder->period_ticks;
930 break; 939 break;
931 case INTEL_PT_PERIOD_NONE: 940 case INTEL_PT_PERIOD_NONE:
932 case INTEL_PT_PERIOD_MTC: 941 case INTEL_PT_PERIOD_MTC:
@@ -1254,7 +1263,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1254 return -ENOENT; 1263 return -ENOENT;
1255 } 1264 }
1256 decoder->tnt.count -= 1; 1265 decoder->tnt.count -= 1;
1257 if (!decoder->tnt.count) 1266 if (decoder->tnt.count)
1267 decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
1268 else
1258 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; 1269 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1259 decoder->tnt.payload <<= 1; 1270 decoder->tnt.payload <<= 1;
1260 decoder->state.from_ip = decoder->ip; 1271 decoder->state.from_ip = decoder->ip;
@@ -1285,7 +1296,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1285 1296
1286 if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { 1297 if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
1287 decoder->tnt.count -= 1; 1298 decoder->tnt.count -= 1;
1288 if (!decoder->tnt.count) 1299 if (decoder->tnt.count)
1300 decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
1301 else
1289 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; 1302 decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
1290 if (decoder->tnt.payload & BIT63) { 1303 if (decoder->tnt.payload & BIT63) {
1291 decoder->tnt.payload <<= 1; 1304 decoder->tnt.payload <<= 1;
@@ -1305,8 +1318,11 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
1305 return 0; 1318 return 0;
1306 } 1319 }
1307 decoder->ip += intel_pt_insn.length; 1320 decoder->ip += intel_pt_insn.length;
1308 if (!decoder->tnt.count) 1321 if (!decoder->tnt.count) {
1322 decoder->sample_timestamp = decoder->timestamp;
1323 decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
1309 return -EAGAIN; 1324 return -EAGAIN;
1325 }
1310 decoder->tnt.payload <<= 1; 1326 decoder->tnt.payload <<= 1;
1311 continue; 1327 continue;
1312 } 1328 }
@@ -2365,6 +2381,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
2365 err = intel_pt_walk_trace(decoder); 2381 err = intel_pt_walk_trace(decoder);
2366 break; 2382 break;
2367 case INTEL_PT_STATE_TNT: 2383 case INTEL_PT_STATE_TNT:
2384 case INTEL_PT_STATE_TNT_CONT:
2368 err = intel_pt_walk_tnt(decoder); 2385 err = intel_pt_walk_tnt(decoder);
2369 if (err == -EAGAIN) 2386 if (err == -EAGAIN)
2370 err = intel_pt_walk_trace(decoder); 2387 err = intel_pt_walk_trace(decoder);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 3c520baa198c..28a9541c4835 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1234,8 +1234,9 @@ static char *get_kernel_version(const char *root_dir)
1234 if (!file) 1234 if (!file)
1235 return NULL; 1235 return NULL;
1236 1236
1237 version[0] = '\0';
1238 tmp = fgets(version, sizeof(version), file); 1237 tmp = fgets(version, sizeof(version), file);
1238 if (!tmp)
1239 *version = '\0';
1239 fclose(file); 1240 fclose(file);
1240 1241
1241 name = strstr(version, prefix); 1242 name = strstr(version, prefix);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index ef3d79b2c90b..868c0b0e909c 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -157,6 +157,10 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb
157} 157}
158 158
159#ifdef HAVE_AIO_SUPPORT 159#ifdef HAVE_AIO_SUPPORT
160static int perf_mmap__aio_enabled(struct perf_mmap *map)
161{
162 return map->aio.nr_cblocks > 0;
163}
160 164
161#ifdef HAVE_LIBNUMA_SUPPORT 165#ifdef HAVE_LIBNUMA_SUPPORT
162static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) 166static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
@@ -198,7 +202,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi
198 202
199 return 0; 203 return 0;
200} 204}
201#else 205#else /* !HAVE_LIBNUMA_SUPPORT */
202static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) 206static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx)
203{ 207{
204 map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); 208 map->aio.data[idx] = malloc(perf_mmap__mmap_len(map));
@@ -285,81 +289,12 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
285 zfree(&map->aio.cblocks); 289 zfree(&map->aio.cblocks);
286 zfree(&map->aio.aiocb); 290 zfree(&map->aio.aiocb);
287} 291}
288 292#else /* !HAVE_AIO_SUPPORT */
289int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx, 293static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused)
290 int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
291 off_t *off)
292{ 294{
293 u64 head = perf_mmap__read_head(md); 295 return 0;
294 unsigned char *data = md->base + page_size;
295 unsigned long size, size0 = 0;
296 void *buf;
297 int rc = 0;
298
299 rc = perf_mmap__read_init(md);
300 if (rc < 0)
301 return (rc == -EAGAIN) ? 0 : -1;
302
303 /*
304 * md->base data is copied into md->data[idx] buffer to
305 * release space in the kernel buffer as fast as possible,
306 * thru perf_mmap__consume() below.
307 *
308 * That lets the kernel to proceed with storing more
309 * profiling data into the kernel buffer earlier than other
310 * per-cpu kernel buffers are handled.
311 *
312 * Coping can be done in two steps in case the chunk of
313 * profiling data crosses the upper bound of the kernel buffer.
314 * In this case we first move part of data from md->start
315 * till the upper bound and then the reminder from the
316 * beginning of the kernel buffer till the end of
317 * the data chunk.
318 */
319
320 size = md->end - md->start;
321
322 if ((md->start & md->mask) + size != (md->end & md->mask)) {
323 buf = &data[md->start & md->mask];
324 size = md->mask + 1 - (md->start & md->mask);
325 md->start += size;
326 memcpy(md->aio.data[idx], buf, size);
327 size0 = size;
328 }
329
330 buf = &data[md->start & md->mask];
331 size = md->end - md->start;
332 md->start += size;
333 memcpy(md->aio.data[idx] + size0, buf, size);
334
335 /*
336 * Increment md->refcount to guard md->data[idx] buffer
337 * from premature deallocation because md object can be
338 * released earlier than aio write request started
339 * on mmap->data[idx] is complete.
340 *
341 * perf_mmap__put() is done at record__aio_complete()
342 * after started request completion.
343 */
344 perf_mmap__get(md);
345
346 md->prev = head;
347 perf_mmap__consume(md);
348
349 rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off);
350 if (!rc) {
351 *off += size0 + size;
352 } else {
353 /*
354 * Decrement md->refcount back if aio write
355 * operation failed to start.
356 */
357 perf_mmap__put(md);
358 }
359
360 return rc;
361} 296}
362#else 297
363static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused, 298static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
364 struct mmap_params *mp __maybe_unused) 299 struct mmap_params *mp __maybe_unused)
365{ 300{
@@ -374,6 +309,10 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused)
374void perf_mmap__munmap(struct perf_mmap *map) 309void perf_mmap__munmap(struct perf_mmap *map)
375{ 310{
376 perf_mmap__aio_munmap(map); 311 perf_mmap__aio_munmap(map);
312 if (map->data != NULL) {
313 munmap(map->data, perf_mmap__mmap_len(map));
314 map->data = NULL;
315 }
377 if (map->base != NULL) { 316 if (map->base != NULL) {
378 munmap(map->base, perf_mmap__mmap_len(map)); 317 munmap(map->base, perf_mmap__mmap_len(map));
379 map->base = NULL; 318 map->base = NULL;
@@ -442,6 +381,19 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c
442 381
443 map->flush = mp->flush; 382 map->flush = mp->flush;
444 383
384 map->comp_level = mp->comp_level;
385
386 if (map->comp_level && !perf_mmap__aio_enabled(map)) {
387 map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE,
388 MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
389 if (map->data == MAP_FAILED) {
390 pr_debug2("failed to mmap data buffer, error %d\n",
391 errno);
392 map->data = NULL;
393 return -1;
394 }
395 }
396
445 if (auxtrace_mmap__mmap(&map->auxtrace_mmap, 397 if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
446 &mp->auxtrace_mp, map->base, fd)) 398 &mp->auxtrace_mp, map->base, fd))
447 return -1; 399 return -1;
@@ -540,7 +492,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to,
540 492
541 rc = perf_mmap__read_init(md); 493 rc = perf_mmap__read_init(md);
542 if (rc < 0) 494 if (rc < 0)
543 return (rc == -EAGAIN) ? 0 : -1; 495 return (rc == -EAGAIN) ? 1 : -1;
544 496
545 size = md->end - md->start; 497 size = md->end - md->start;
546 498
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index b82f8c2d55c4..274ce389cd84 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -40,6 +40,8 @@ struct perf_mmap {
40#endif 40#endif
41 cpu_set_t affinity_mask; 41 cpu_set_t affinity_mask;
42 u64 flush; 42 u64 flush;
43 void *data;
44 int comp_level;
43}; 45};
44 46
45/* 47/*
@@ -71,7 +73,7 @@ enum bkw_mmap_state {
71}; 73};
72 74
73struct mmap_params { 75struct mmap_params {
74 int prot, mask, nr_cblocks, affinity, flush; 76 int prot, mask, nr_cblocks, affinity, flush, comp_level;
75 struct auxtrace_mmap_params auxtrace_mp; 77 struct auxtrace_mmap_params auxtrace_mp;
76}; 78};
77 79
@@ -99,18 +101,6 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map);
99 101
100int perf_mmap__push(struct perf_mmap *md, void *to, 102int perf_mmap__push(struct perf_mmap *md, void *to,
101 int push(struct perf_mmap *map, void *to, void *buf, size_t size)); 103 int push(struct perf_mmap *map, void *to, void *buf, size_t size));
102#ifdef HAVE_AIO_SUPPORT
103int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx,
104 int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
105 off_t *off);
106#else
107static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused, int idx __maybe_unused,
108 int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused,
109 off_t *off __maybe_unused)
110{
111 return 0;
112}
113#endif
114 104
115size_t perf_mmap__mmap_len(struct perf_mmap *map); 105size_t perf_mmap__mmap_len(struct perf_mmap *map);
116 106
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4432bfe039fd..cf0b9b81c5aa 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -950,6 +950,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
950 [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", 950 [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite",
951 [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", 951 [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite",
952 [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", 952 [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config",
953 [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore",
953}; 954};
954 955
955static bool config_term_shrinked; 956static bool config_term_shrinked;
@@ -970,6 +971,7 @@ config_term_avail(int term_type, struct parse_events_error *err)
970 case PARSE_EVENTS__TERM_TYPE_CONFIG2: 971 case PARSE_EVENTS__TERM_TYPE_CONFIG2:
971 case PARSE_EVENTS__TERM_TYPE_NAME: 972 case PARSE_EVENTS__TERM_TYPE_NAME:
972 case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: 973 case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
974 case PARSE_EVENTS__TERM_TYPE_PERCORE:
973 return true; 975 return true;
974 default: 976 default:
975 if (!err) 977 if (!err)
@@ -1061,6 +1063,14 @@ do { \
1061 case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: 1063 case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS:
1062 CHECK_TYPE_VAL(NUM); 1064 CHECK_TYPE_VAL(NUM);
1063 break; 1065 break;
1066 case PARSE_EVENTS__TERM_TYPE_PERCORE:
1067 CHECK_TYPE_VAL(NUM);
1068 if ((unsigned int)term->val.num > 1) {
1069 err->str = strdup("expected 0 or 1");
1070 err->idx = term->err_val;
1071 return -EINVAL;
1072 }
1073 break;
1064 default: 1074 default:
1065 err->str = strdup("unknown term"); 1075 err->str = strdup("unknown term");
1066 err->idx = term->err_term; 1076 err->idx = term->err_term;
@@ -1199,6 +1209,10 @@ do { \
1199 case PARSE_EVENTS__TERM_TYPE_DRV_CFG: 1209 case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
1200 ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str); 1210 ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str);
1201 break; 1211 break;
1212 case PARSE_EVENTS__TERM_TYPE_PERCORE:
1213 ADD_CONFIG_TERM(PERCORE, percore,
1214 term->val.num ? true : false);
1215 break;
1202 default: 1216 default:
1203 break; 1217 break;
1204 } 1218 }
@@ -1260,6 +1274,18 @@ int parse_events_add_tool(struct parse_events_state *parse_state,
1260 return add_event_tool(list, &parse_state->idx, tool_event); 1274 return add_event_tool(list, &parse_state->idx, tool_event);
1261} 1275}
1262 1276
1277static bool config_term_percore(struct list_head *config_terms)
1278{
1279 struct perf_evsel_config_term *term;
1280
1281 list_for_each_entry(term, config_terms, list) {
1282 if (term->type == PERF_EVSEL__CONFIG_TERM_PERCORE)
1283 return term->val.percore;
1284 }
1285
1286 return false;
1287}
1288
1263int parse_events_add_pmu(struct parse_events_state *parse_state, 1289int parse_events_add_pmu(struct parse_events_state *parse_state,
1264 struct list_head *list, char *name, 1290 struct list_head *list, char *name,
1265 struct list_head *head_config, 1291 struct list_head *head_config,
@@ -1333,6 +1359,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
1333 evsel->metric_name = info.metric_name; 1359 evsel->metric_name = info.metric_name;
1334 evsel->pmu_name = name; 1360 evsel->pmu_name = name;
1335 evsel->use_uncore_alias = use_uncore_alias; 1361 evsel->use_uncore_alias = use_uncore_alias;
1362 evsel->percore = config_term_percore(&evsel->config_terms);
1336 } 1363 }
1337 1364
1338 return evsel ? 0 : -ENOMEM; 1365 return evsel ? 0 : -ENOMEM;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index a052cd6ac63e..f7139e1a2fd3 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -75,6 +75,7 @@ enum {
75 PARSE_EVENTS__TERM_TYPE_NOOVERWRITE, 75 PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
76 PARSE_EVENTS__TERM_TYPE_OVERWRITE, 76 PARSE_EVENTS__TERM_TYPE_OVERWRITE,
77 PARSE_EVENTS__TERM_TYPE_DRV_CFG, 77 PARSE_EVENTS__TERM_TYPE_DRV_CFG,
78 PARSE_EVENTS__TERM_TYPE_PERCORE,
78 __PARSE_EVENTS__TERM_TYPE_NR, 79 __PARSE_EVENTS__TERM_TYPE_NR,
79}; 80};
80 81
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index c54bfe88626c..ca6098874fe2 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -283,6 +283,7 @@ inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
283no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } 283no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
284overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } 284overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
285no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } 285no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
286percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
286, { return ','; } 287, { return ','; }
287"/" { BEGIN(INITIAL); return '/'; } 288"/" { BEGIN(INITIAL); return '/'; }
288{name_minus} { return str(yyscanner, PE_NAME); } 289{name_minus} { return str(yyscanner, PE_NAME); }
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
index e6599e290f46..08581e276225 100644
--- a/tools/perf/util/parse-regs-options.c
+++ b/tools/perf/util/parse-regs-options.c
@@ -5,13 +5,14 @@
5#include <subcmd/parse-options.h> 5#include <subcmd/parse-options.h>
6#include "util/parse-regs-options.h" 6#include "util/parse-regs-options.h"
7 7
8int 8static int
9parse_regs(const struct option *opt, const char *str, int unset) 9__parse_regs(const struct option *opt, const char *str, int unset, bool intr)
10{ 10{
11 uint64_t *mode = (uint64_t *)opt->value; 11 uint64_t *mode = (uint64_t *)opt->value;
12 const struct sample_reg *r; 12 const struct sample_reg *r;
13 char *s, *os = NULL, *p; 13 char *s, *os = NULL, *p;
14 int ret = -1; 14 int ret = -1;
15 uint64_t mask;
15 16
16 if (unset) 17 if (unset)
17 return 0; 18 return 0;
@@ -22,6 +23,11 @@ parse_regs(const struct option *opt, const char *str, int unset)
22 if (*mode) 23 if (*mode)
23 return -1; 24 return -1;
24 25
26 if (intr)
27 mask = arch__intr_reg_mask();
28 else
29 mask = arch__user_reg_mask();
30
25 /* str may be NULL in case no arg is passed to -I */ 31 /* str may be NULL in case no arg is passed to -I */
26 if (str) { 32 if (str) {
27 /* because str is read-only */ 33 /* because str is read-only */
@@ -37,19 +43,20 @@ parse_regs(const struct option *opt, const char *str, int unset)
37 if (!strcmp(s, "?")) { 43 if (!strcmp(s, "?")) {
38 fprintf(stderr, "available registers: "); 44 fprintf(stderr, "available registers: ");
39 for (r = sample_reg_masks; r->name; r++) { 45 for (r = sample_reg_masks; r->name; r++) {
40 fprintf(stderr, "%s ", r->name); 46 if (r->mask & mask)
47 fprintf(stderr, "%s ", r->name);
41 } 48 }
42 fputc('\n', stderr); 49 fputc('\n', stderr);
43 /* just printing available regs */ 50 /* just printing available regs */
44 return -1; 51 return -1;
45 } 52 }
46 for (r = sample_reg_masks; r->name; r++) { 53 for (r = sample_reg_masks; r->name; r++) {
47 if (!strcasecmp(s, r->name)) 54 if ((r->mask & mask) && !strcasecmp(s, r->name))
48 break; 55 break;
49 } 56 }
50 if (!r->name) { 57 if (!r->name) {
51 ui__warning("unknown register %s," 58 ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n",
52 " check man page\n", s); 59 s, intr ? "-I" : "--user-regs=");
53 goto error; 60 goto error;
54 } 61 }
55 62
@@ -65,8 +72,20 @@ parse_regs(const struct option *opt, const char *str, int unset)
65 72
66 /* default to all possible regs */ 73 /* default to all possible regs */
67 if (*mode == 0) 74 if (*mode == 0)
68 *mode = PERF_REGS_MASK; 75 *mode = mask;
69error: 76error:
70 free(os); 77 free(os);
71 return ret; 78 return ret;
72} 79}
80
81int
82parse_user_regs(const struct option *opt, const char *str, int unset)
83{
84 return __parse_regs(opt, str, unset, false);
85}
86
87int
88parse_intr_regs(const struct option *opt, const char *str, int unset)
89{
90 return __parse_regs(opt, str, unset, true);
91}
diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h
index cdefb1acf6be..2b23d25c6394 100644
--- a/tools/perf/util/parse-regs-options.h
+++ b/tools/perf/util/parse-regs-options.h
@@ -2,5 +2,6 @@
2#ifndef _PERF_PARSE_REGS_OPTIONS_H 2#ifndef _PERF_PARSE_REGS_OPTIONS_H
3#define _PERF_PARSE_REGS_OPTIONS_H 1 3#define _PERF_PARSE_REGS_OPTIONS_H 1
4struct option; 4struct option;
5int parse_regs(const struct option *opt, const char *str, int unset); 5int parse_user_regs(const struct option *opt, const char *str, int unset);
6int parse_intr_regs(const struct option *opt, const char *str, int unset);
6#endif /* _PERF_PARSE_REGS_OPTIONS_H */ 7#endif /* _PERF_PARSE_REGS_OPTIONS_H */
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 2acfcc527cac..2774cec1f15f 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -13,6 +13,16 @@ int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
13 return SDT_ARG_SKIP; 13 return SDT_ARG_SKIP;
14} 14}
15 15
16uint64_t __weak arch__intr_reg_mask(void)
17{
18 return PERF_REGS_MASK;
19}
20
21uint64_t __weak arch__user_reg_mask(void)
22{
23 return PERF_REGS_MASK;
24}
25
16#ifdef HAVE_PERF_REGS_SUPPORT 26#ifdef HAVE_PERF_REGS_SUPPORT
17int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) 27int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
18{ 28{
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index c9319f8d17a6..cb9c246c8962 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -12,6 +12,7 @@ struct sample_reg {
12 uint64_t mask; 12 uint64_t mask;
13}; 13};
14#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } 14#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) }
15#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) }
15#define SMPL_REG_END { .name = NULL } 16#define SMPL_REG_END { .name = NULL }
16 17
17extern const struct sample_reg sample_reg_masks[]; 18extern const struct sample_reg sample_reg_masks[];
@@ -22,6 +23,8 @@ enum {
22}; 23};
23 24
24int arch_sdt_arg_parse_op(char *old_op, char **new_op); 25int arch_sdt_arg_parse_op(char *old_op, char **new_op);
26uint64_t arch__intr_reg_mask(void);
27uint64_t arch__user_reg_mask(void);
25 28
26#ifdef HAVE_PERF_REGS_SUPPORT 29#ifdef HAVE_PERF_REGS_SUPPORT
27#include <perf_regs.h> 30#include <perf_regs.h>
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index bad5f87ae001..2310a1752983 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -29,6 +29,61 @@
29#include "stat.h" 29#include "stat.h"
30#include "arch/common.h" 30#include "arch/common.h"
31 31
32#ifdef HAVE_ZSTD_SUPPORT
33static int perf_session__process_compressed_event(struct perf_session *session,
34 union perf_event *event, u64 file_offset)
35{
36 void *src;
37 size_t decomp_size, src_size;
38 u64 decomp_last_rem = 0;
39 size_t decomp_len = session->header.env.comp_mmap_len;
40 struct decomp *decomp, *decomp_last = session->decomp_last;
41
42 decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE,
43 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
44 if (decomp == MAP_FAILED) {
45 pr_err("Couldn't allocate memory for decompression\n");
46 return -1;
47 }
48
49 decomp->file_pos = file_offset;
50 decomp->head = 0;
51
52 if (decomp_last) {
53 decomp_last_rem = decomp_last->size - decomp_last->head;
54 memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
55 decomp->size = decomp_last_rem;
56 }
57
58 src = (void *)event + sizeof(struct compressed_event);
59 src_size = event->pack.header.size - sizeof(struct compressed_event);
60
61 decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
62 &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
63 if (!decomp_size) {
64 munmap(decomp, sizeof(struct decomp) + decomp_len);
65 pr_err("Couldn't decompress data\n");
66 return -1;
67 }
68
69 decomp->size += decomp_size;
70
71 if (session->decomp == NULL) {
72 session->decomp = decomp;
73 session->decomp_last = decomp;
74 } else {
75 session->decomp_last->next = decomp;
76 session->decomp_last = decomp;
77 }
78
79 pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);
80
81 return 0;
82}
83#else /* !HAVE_ZSTD_SUPPORT */
84#define perf_session__process_compressed_event perf_session__process_compressed_event_stub
85#endif
86
32static int perf_session__deliver_event(struct perf_session *session, 87static int perf_session__deliver_event(struct perf_session *session,
33 union perf_event *event, 88 union perf_event *event,
34 struct perf_tool *tool, 89 struct perf_tool *tool,
@@ -197,6 +252,21 @@ static void perf_session__delete_threads(struct perf_session *session)
197 machine__delete_threads(&session->machines.host); 252 machine__delete_threads(&session->machines.host);
198} 253}
199 254
255static void perf_session__release_decomp_events(struct perf_session *session)
256{
257 struct decomp *next, *decomp;
258 size_t decomp_len;
259 next = session->decomp;
260 decomp_len = session->header.env.comp_mmap_len;
261 do {
262 decomp = next;
263 if (decomp == NULL)
264 break;
265 next = decomp->next;
266 munmap(decomp, decomp_len + sizeof(struct decomp));
267 } while (1);
268}
269
200void perf_session__delete(struct perf_session *session) 270void perf_session__delete(struct perf_session *session)
201{ 271{
202 if (session == NULL) 272 if (session == NULL)
@@ -205,6 +275,7 @@ void perf_session__delete(struct perf_session *session)
205 auxtrace_index__free(&session->auxtrace_index); 275 auxtrace_index__free(&session->auxtrace_index);
206 perf_session__destroy_kernel_maps(session); 276 perf_session__destroy_kernel_maps(session);
207 perf_session__delete_threads(session); 277 perf_session__delete_threads(session);
278 perf_session__release_decomp_events(session);
208 perf_env__exit(&session->header.env); 279 perf_env__exit(&session->header.env);
209 machines__exit(&session->machines); 280 machines__exit(&session->machines);
210 if (session->data) 281 if (session->data)
@@ -358,6 +429,14 @@ static int process_stat_round_stub(struct perf_session *perf_session __maybe_unu
358 return 0; 429 return 0;
359} 430}
360 431
432static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused,
433 union perf_event *event __maybe_unused,
434 u64 file_offset __maybe_unused)
435{
436 dump_printf(": unhandled!\n");
437 return 0;
438}
439
361void perf_tool__fill_defaults(struct perf_tool *tool) 440void perf_tool__fill_defaults(struct perf_tool *tool)
362{ 441{
363 if (tool->sample == NULL) 442 if (tool->sample == NULL)
@@ -430,6 +509,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
430 tool->time_conv = process_event_op2_stub; 509 tool->time_conv = process_event_op2_stub;
431 if (tool->feature == NULL) 510 if (tool->feature == NULL)
432 tool->feature = process_event_op2_stub; 511 tool->feature = process_event_op2_stub;
512 if (tool->compressed == NULL)
513 tool->compressed = perf_session__process_compressed_event;
433} 514}
434 515
435static void swap_sample_id_all(union perf_event *event, void *data) 516static void swap_sample_id_all(union perf_event *event, void *data)
@@ -1373,7 +1454,9 @@ static s64 perf_session__process_user_event(struct perf_session *session,
1373 int fd = perf_data__fd(session->data); 1454 int fd = perf_data__fd(session->data);
1374 int err; 1455 int err;
1375 1456
1376 dump_event(session->evlist, event, file_offset, &sample); 1457 if (event->header.type != PERF_RECORD_COMPRESSED ||
1458 tool->compressed == perf_session__process_compressed_event_stub)
1459 dump_event(session->evlist, event, file_offset, &sample);
1377 1460
1378 /* These events are processed right away */ 1461 /* These events are processed right away */
1379 switch (event->header.type) { 1462 switch (event->header.type) {
@@ -1426,6 +1509,11 @@ static s64 perf_session__process_user_event(struct perf_session *session,
1426 return tool->time_conv(session, event); 1509 return tool->time_conv(session, event);
1427 case PERF_RECORD_HEADER_FEATURE: 1510 case PERF_RECORD_HEADER_FEATURE:
1428 return tool->feature(session, event); 1511 return tool->feature(session, event);
1512 case PERF_RECORD_COMPRESSED:
1513 err = tool->compressed(session, event, file_offset);
1514 if (err)
1515 dump_event(session->evlist, event, file_offset, &sample);
1516 return err;
1429 default: 1517 default:
1430 return -EINVAL; 1518 return -EINVAL;
1431 } 1519 }
@@ -1708,6 +1796,8 @@ static int perf_session__flush_thread_stacks(struct perf_session *session)
1708 1796
1709volatile int session_done; 1797volatile int session_done;
1710 1798
1799static int __perf_session__process_decomp_events(struct perf_session *session);
1800
1711static int __perf_session__process_pipe_events(struct perf_session *session) 1801static int __perf_session__process_pipe_events(struct perf_session *session)
1712{ 1802{
1713 struct ordered_events *oe = &session->ordered_events; 1803 struct ordered_events *oe = &session->ordered_events;
@@ -1788,6 +1878,10 @@ more:
1788 if (skip > 0) 1878 if (skip > 0)
1789 head += skip; 1879 head += skip;
1790 1880
1881 err = __perf_session__process_decomp_events(session);
1882 if (err)
1883 goto out_err;
1884
1791 if (!session_done()) 1885 if (!session_done())
1792 goto more; 1886 goto more;
1793done: 1887done:
@@ -1836,6 +1930,39 @@ fetch_mmaped_event(struct perf_session *session,
1836 return event; 1930 return event;
1837} 1931}
1838 1932
1933static int __perf_session__process_decomp_events(struct perf_session *session)
1934{
1935 s64 skip;
1936 u64 size, file_pos = 0;
1937 struct decomp *decomp = session->decomp_last;
1938
1939 if (!decomp)
1940 return 0;
1941
1942 while (decomp->head < decomp->size && !session_done()) {
1943 union perf_event *event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data);
1944
1945 if (!event)
1946 break;
1947
1948 size = event->header.size;
1949
1950 if (size < sizeof(struct perf_event_header) ||
1951 (skip = perf_session__process_event(session, event, file_pos)) < 0) {
1952 pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
1953 decomp->file_pos + decomp->head, event->header.size, event->header.type);
1954 return -EINVAL;
1955 }
1956
1957 if (skip)
1958 size += skip;
1959
1960 decomp->head += size;
1961 }
1962
1963 return 0;
1964}
1965
1839/* 1966/*
1840 * On 64bit we can mmap the data file in one go. No need for tiny mmap 1967 * On 64bit we can mmap the data file in one go. No need for tiny mmap
1841 * slices. On 32bit we use 32MB. 1968 * slices. On 32bit we use 32MB.
@@ -1945,6 +2072,10 @@ more:
1945 head += size; 2072 head += size;
1946 file_pos += size; 2073 file_pos += size;
1947 2074
2075 err = __perf_session__process_decomp_events(session);
2076 if (err)
2077 goto out;
2078
1948 ui_progress__update(prog, size); 2079 ui_progress__update(prog, size);
1949 2080
1950 if (session_done()) 2081 if (session_done())
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index d96eccd7d27f..dd8920b745bc 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -8,6 +8,7 @@
8#include "machine.h" 8#include "machine.h"
9#include "data.h" 9#include "data.h"
10#include "ordered-events.h" 10#include "ordered-events.h"
11#include "util/compress.h"
11#include <linux/kernel.h> 12#include <linux/kernel.h>
12#include <linux/rbtree.h> 13#include <linux/rbtree.h>
13#include <linux/perf_event.h> 14#include <linux/perf_event.h>
@@ -35,6 +36,19 @@ struct perf_session {
35 struct ordered_events ordered_events; 36 struct ordered_events ordered_events;
36 struct perf_data *data; 37 struct perf_data *data;
37 struct perf_tool *tool; 38 struct perf_tool *tool;
39 u64 bytes_transferred;
40 u64 bytes_compressed;
41 struct zstd_data zstd_data;
42 struct decomp *decomp;
43 struct decomp *decomp_last;
44};
45
46struct decomp {
47 struct decomp *next;
48 u64 file_pos;
49 u64 head;
50 size_t size;
51 char data[];
38}; 52};
39 53
40struct perf_tool; 54struct perf_tool;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 3324f23c7efc..4c53bae5644b 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -88,9 +88,17 @@ static void aggr_printout(struct perf_stat_config *config,
88 config->csv_sep); 88 config->csv_sep);
89 break; 89 break;
90 case AGGR_NONE: 90 case AGGR_NONE:
91 fprintf(config->output, "CPU%*d%s", 91 if (evsel->percore) {
92 config->csv_output ? 0 : -4, 92 fprintf(config->output, "S%d-C%*d%s",
93 perf_evsel__cpus(evsel)->map[id], config->csv_sep); 93 cpu_map__id_to_socket(id),
94 config->csv_output ? 0 : -5,
95 cpu_map__id_to_cpu(id), config->csv_sep);
96 } else {
97 fprintf(config->output, "CPU%*d%s ",
98 config->csv_output ? 0 : -5,
99 perf_evsel__cpus(evsel)->map[id],
100 config->csv_sep);
101 }
94 break; 102 break;
95 case AGGR_THREAD: 103 case AGGR_THREAD:
96 fprintf(config->output, "%*s-%*d%s", 104 fprintf(config->output, "%*s-%*d%s",
@@ -594,6 +602,41 @@ static void aggr_cb(struct perf_stat_config *config,
594 } 602 }
595} 603}
596 604
605static void print_counter_aggrdata(struct perf_stat_config *config,
606 struct perf_evsel *counter, int s,
607 char *prefix, bool metric_only,
608 bool *first)
609{
610 struct aggr_data ad;
611 FILE *output = config->output;
612 u64 ena, run, val;
613 int id, nr;
614 double uval;
615
616 ad.id = id = config->aggr_map->map[s];
617 ad.val = ad.ena = ad.run = 0;
618 ad.nr = 0;
619 if (!collect_data(config, counter, aggr_cb, &ad))
620 return;
621
622 nr = ad.nr;
623 ena = ad.ena;
624 run = ad.run;
625 val = ad.val;
626 if (*first && metric_only) {
627 *first = false;
628 aggr_printout(config, counter, id, nr);
629 }
630 if (prefix && !metric_only)
631 fprintf(output, "%s", prefix);
632
633 uval = val * counter->scale;
634 printout(config, id, nr, counter, uval, prefix,
635 run, ena, 1.0, &rt_stat);
636 if (!metric_only)
637 fputc('\n', output);
638}
639
597static void print_aggr(struct perf_stat_config *config, 640static void print_aggr(struct perf_stat_config *config,
598 struct perf_evlist *evlist, 641 struct perf_evlist *evlist,
599 char *prefix) 642 char *prefix)
@@ -601,9 +644,7 @@ static void print_aggr(struct perf_stat_config *config,
601 bool metric_only = config->metric_only; 644 bool metric_only = config->metric_only;
602 FILE *output = config->output; 645 FILE *output = config->output;
603 struct perf_evsel *counter; 646 struct perf_evsel *counter;
604 int s, id, nr; 647 int s;
605 double uval;
606 u64 ena, run, val;
607 bool first; 648 bool first;
608 649
609 if (!(config->aggr_map || config->aggr_get_id)) 650 if (!(config->aggr_map || config->aggr_get_id))
@@ -616,33 +657,14 @@ static void print_aggr(struct perf_stat_config *config,
616 * Without each counter has its own line. 657 * Without each counter has its own line.
617 */ 658 */
618 for (s = 0; s < config->aggr_map->nr; s++) { 659 for (s = 0; s < config->aggr_map->nr; s++) {
619 struct aggr_data ad;
620 if (prefix && metric_only) 660 if (prefix && metric_only)
621 fprintf(output, "%s", prefix); 661 fprintf(output, "%s", prefix);
622 662
623 ad.id = id = config->aggr_map->map[s];
624 first = true; 663 first = true;
625 evlist__for_each_entry(evlist, counter) { 664 evlist__for_each_entry(evlist, counter) {
626 ad.val = ad.ena = ad.run = 0; 665 print_counter_aggrdata(config, counter, s,
627 ad.nr = 0; 666 prefix, metric_only,
628 if (!collect_data(config, counter, aggr_cb, &ad)) 667 &first);
629 continue;
630 nr = ad.nr;
631 ena = ad.ena;
632 run = ad.run;
633 val = ad.val;
634 if (first && metric_only) {
635 first = false;
636 aggr_printout(config, counter, id, nr);
637 }
638 if (prefix && !metric_only)
639 fprintf(output, "%s", prefix);
640
641 uval = val * counter->scale;
642 printout(config, id, nr, counter, uval, prefix,
643 run, ena, 1.0, &rt_stat);
644 if (!metric_only)
645 fputc('\n', output);
646 } 668 }
647 if (metric_only) 669 if (metric_only)
648 fputc('\n', output); 670 fputc('\n', output);
@@ -1089,6 +1111,30 @@ static void print_footer(struct perf_stat_config *config)
1089 "the same PMU. Try reorganizing the group.\n"); 1111 "the same PMU. Try reorganizing the group.\n");
1090} 1112}
1091 1113
1114static void print_percore(struct perf_stat_config *config,
1115 struct perf_evsel *counter, char *prefix)
1116{
1117 bool metric_only = config->metric_only;
1118 FILE *output = config->output;
1119 int s;
1120 bool first = true;
1121
1122 if (!(config->aggr_map || config->aggr_get_id))
1123 return;
1124
1125 for (s = 0; s < config->aggr_map->nr; s++) {
1126 if (prefix && metric_only)
1127 fprintf(output, "%s", prefix);
1128
1129 print_counter_aggrdata(config, counter, s,
1130 prefix, metric_only,
1131 &first);
1132 }
1133
1134 if (metric_only)
1135 fputc('\n', output);
1136}
1137
1092void 1138void
1093perf_evlist__print_counters(struct perf_evlist *evlist, 1139perf_evlist__print_counters(struct perf_evlist *evlist,
1094 struct perf_stat_config *config, 1140 struct perf_stat_config *config,
@@ -1139,7 +1185,10 @@ perf_evlist__print_counters(struct perf_evlist *evlist,
1139 print_no_aggr_metric(config, evlist, prefix); 1185 print_no_aggr_metric(config, evlist, prefix);
1140 else { 1186 else {
1141 evlist__for_each_entry(evlist, counter) { 1187 evlist__for_each_entry(evlist, counter) {
1142 print_counter(config, counter, prefix); 1188 if (counter->percore)
1189 print_percore(config, counter, prefix);
1190 else
1191 print_counter(config, counter, prefix);
1143 } 1192 }
1144 } 1193 }
1145 break; 1194 break;
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 2856cc9d5a31..c3115d939b0b 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -277,9 +277,11 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
277 if (!evsel->snapshot) 277 if (!evsel->snapshot)
278 perf_evsel__compute_deltas(evsel, cpu, thread, count); 278 perf_evsel__compute_deltas(evsel, cpu, thread, count);
279 perf_counts_values__scale(count, config->scale, NULL); 279 perf_counts_values__scale(count, config->scale, NULL);
280 if (config->aggr_mode == AGGR_NONE) 280 if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
281 perf_stat__update_shadow_stats(evsel, count->val, cpu, 281 perf_stat__update_shadow_stats(evsel, count->val,
282 &rt_stat); 282 cpu, &rt_stat);
283 }
284
283 if (config->aggr_mode == AGGR_THREAD) { 285 if (config->aggr_mode == AGGR_THREAD) {
284 if (config->stats) 286 if (config->stats)
285 perf_stat__update_shadow_stats(evsel, 287 perf_stat__update_shadow_stats(evsel,
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 50678d318185..403045a2bbea 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -15,6 +15,7 @@
15#include "map.h" 15#include "map.h"
16#include "symbol.h" 16#include "symbol.h"
17#include "unwind.h" 17#include "unwind.h"
18#include "callchain.h"
18 19
19#include <api/fs/fs.h> 20#include <api/fs/fs.h>
20 21
@@ -327,7 +328,7 @@ static int thread__prepare_access(struct thread *thread)
327{ 328{
328 int err = 0; 329 int err = 0;
329 330
330 if (symbol_conf.use_callchain) 331 if (dwarf_callchain_users)
331 err = __thread__prepare_access(thread); 332 err = __thread__prepare_access(thread);
332 333
333 return err; 334 return err;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 250391672f9f..9096a6e3de59 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -28,6 +28,7 @@ typedef int (*event_attr_op)(struct perf_tool *tool,
28 28
29typedef int (*event_op2)(struct perf_session *session, union perf_event *event); 29typedef int (*event_op2)(struct perf_session *session, union perf_event *event);
30typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event); 30typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event);
31typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data);
31 32
32typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, 33typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
33 struct ordered_events *oe); 34 struct ordered_events *oe);
@@ -72,6 +73,7 @@ struct perf_tool {
72 stat, 73 stat,
73 stat_round, 74 stat_round,
74 feature; 75 feature;
76 event_op4 compressed;
75 event_op3 auxtrace; 77 event_op3 auxtrace;
76 bool ordered_events; 78 bool ordered_events;
77 bool ordering_requires_timestamps; 79 bool ordering_requires_timestamps;
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index f3c666a84e4d..25e1406b1f8b 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -617,8 +617,6 @@ static unw_accessors_t accessors = {
617 617
618static int _unwind__prepare_access(struct thread *thread) 618static int _unwind__prepare_access(struct thread *thread)
619{ 619{
620 if (!dwarf_callchain_users)
621 return 0;
622 thread->addr_space = unw_create_addr_space(&accessors, 0); 620 thread->addr_space = unw_create_addr_space(&accessors, 0);
623 if (!thread->addr_space) { 621 if (!thread->addr_space) {
624 pr_err("unwind: Can't create unwind address space.\n"); 622 pr_err("unwind: Can't create unwind address space.\n");
@@ -631,15 +629,11 @@ static int _unwind__prepare_access(struct thread *thread)
631 629
632static void _unwind__flush_access(struct thread *thread) 630static void _unwind__flush_access(struct thread *thread)
633{ 631{
634 if (!dwarf_callchain_users)
635 return;
636 unw_flush_cache(thread->addr_space, 0, 0); 632 unw_flush_cache(thread->addr_space, 0, 0);
637} 633}
638 634
639static void _unwind__finish_access(struct thread *thread) 635static void _unwind__finish_access(struct thread *thread)
640{ 636{
641 if (!dwarf_callchain_users)
642 return;
643 unw_destroy_addr_space(thread->addr_space); 637 unw_destroy_addr_space(thread->addr_space);
644} 638}
645 639
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index 9778b3133b77..c0811977d7d5 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -5,6 +5,7 @@
5#include "session.h" 5#include "session.h"
6#include "debug.h" 6#include "debug.h"
7#include "env.h" 7#include "env.h"
8#include "callchain.h"
8 9
9struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; 10struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
10struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; 11struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
@@ -24,6 +25,9 @@ int unwind__prepare_access(struct thread *thread, struct map *map,
24 struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops; 25 struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
25 int err; 26 int err;
26 27
28 if (!dwarf_callchain_users)
29 return 0;
30
27 if (thread->addr_space) { 31 if (thread->addr_space) {
28 pr_debug("unwind: thread map already set, dso=%s\n", 32 pr_debug("unwind: thread map already set, dso=%s\n",
29 map->dso->name); 33 map->dso->name);
@@ -65,12 +69,18 @@ out_register:
65 69
66void unwind__flush_access(struct thread *thread) 70void unwind__flush_access(struct thread *thread)
67{ 71{
72 if (!dwarf_callchain_users)
73 return;
74
68 if (thread->unwind_libunwind_ops) 75 if (thread->unwind_libunwind_ops)
69 thread->unwind_libunwind_ops->flush_access(thread); 76 thread->unwind_libunwind_ops->flush_access(thread);
70} 77}
71 78
72void unwind__finish_access(struct thread *thread) 79void unwind__finish_access(struct thread *thread)
73{ 80{
81 if (!dwarf_callchain_users)
82 return;
83
74 if (thread->unwind_libunwind_ops) 84 if (thread->unwind_libunwind_ops)
75 thread->unwind_libunwind_ops->finish_access(thread); 85 thread->unwind_libunwind_ops->finish_access(thread);
76} 86}
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
new file mode 100644
index 000000000000..23bdb9884576
--- /dev/null
+++ b/tools/perf/util/zstd.c
@@ -0,0 +1,111 @@
1// SPDX-License-Identifier: GPL-2.0
2
3#include <string.h>
4
5#include "util/compress.h"
6#include "util/debug.h"
7
8int zstd_init(struct zstd_data *data, int level)
9{
10 size_t ret;
11
12 data->dstream = ZSTD_createDStream();
13 if (data->dstream == NULL) {
14 pr_err("Couldn't create decompression stream.\n");
15 return -1;
16 }
17
18 ret = ZSTD_initDStream(data->dstream);
19 if (ZSTD_isError(ret)) {
20 pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret));
21 return -1;
22 }
23
24 if (!level)
25 return 0;
26
27 data->cstream = ZSTD_createCStream();
28 if (data->cstream == NULL) {
29 pr_err("Couldn't create compression stream.\n");
30 return -1;
31 }
32
33 ret = ZSTD_initCStream(data->cstream, level);
34 if (ZSTD_isError(ret)) {
35 pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret));
36 return -1;
37 }
38
39 return 0;
40}
41
42int zstd_fini(struct zstd_data *data)
43{
44 if (data->dstream) {
45 ZSTD_freeDStream(data->dstream);
46 data->dstream = NULL;
47 }
48
49 if (data->cstream) {
50 ZSTD_freeCStream(data->cstream);
51 data->cstream = NULL;
52 }
53
54 return 0;
55}
56
57size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size,
58 void *src, size_t src_size, size_t max_record_size,
59 size_t process_header(void *record, size_t increment))
60{
61 size_t ret, size, compressed = 0;
62 ZSTD_inBuffer input = { src, src_size, 0 };
63 ZSTD_outBuffer output;
64 void *record;
65
66 while (input.pos < input.size) {
67 record = dst;
68 size = process_header(record, 0);
69 compressed += size;
70 dst += size;
71 dst_size -= size;
72 output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ?
73 max_record_size : dst_size, 0 };
74 ret = ZSTD_compressStream(data->cstream, &output, &input);
75 ZSTD_flushStream(data->cstream, &output);
76 if (ZSTD_isError(ret)) {
77 pr_err("failed to compress %ld bytes: %s\n",
78 (long)src_size, ZSTD_getErrorName(ret));
79 memcpy(dst, src, src_size);
80 return src_size;
81 }
82 size = output.pos;
83 size = process_header(record, size);
84 compressed += size;
85 dst += size;
86 dst_size -= size;
87 }
88
89 return compressed;
90}
91
92size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size,
93 void *dst, size_t dst_size)
94{
95 size_t ret;
96 ZSTD_inBuffer input = { src, src_size, 0 };
97 ZSTD_outBuffer output = { dst, dst_size, 0 };
98
99 while (input.pos < input.size) {
100 ret = ZSTD_decompressStream(data->dstream, &output, &input);
101 if (ZSTD_isError(ret)) {
102 pr_err("failed to decompress (B): %ld -> %ld : %s\n",
103 src_size, output.size, ZSTD_getErrorName(ret));
104 break;
105 }
106 output.dst = dst + output.pos;
107 output.size = dst_size - output.pos;
108 }
109
110 return output.pos;
111}