aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-02-17 05:39:47 -0500
committerIngo Molnar <mingo@kernel.org>2018-02-17 05:39:47 -0500
commit11737ca9e3b9d84448fa405a80980aa9957bcee8 (patch)
tree33b79a0c5a5c96344fe6f72e3a37104385a90ea7
parent7057bb975dab827997e0ca9dd92cafef0856b0cc (diff)
parent21316ac6803d4a1aadd74b896db8d60a92cd1140 (diff)
Merge tag 'perf-core-for-mingo-4.17-20180216' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: - Fix wrong jump arrow in systems with branch records with cycles, i.e. Intel's >= Skylake (Jin Yao) - Fix 'perf record --per-thread' problem introduced when implementing 'perf stat --per-thread (Jin Yao) - Use arch__compare_symbol_names() to fix 'perf test vmlinux', that was using strcmp(symbol names) while the dso routines doing symbol lookups used the arch overridable one, making this test fail in architectures that overrided that function with something other than strcmp() (Jiri Olsa) - Add 'perf script --show-round-event' to display PERF_RECORD_FINISHED_ROUND entries (Jiri Olsa) - Fix dwarf unwind for stripped binaries in 'perf test' (Jiri Olsa) - Use ordered_events for 'perf report --tasks', otherwise we may get artifacts when PERF_RECORD_FORK gets processed before PERF_RECORD_COMM (when they got recorded in different CPUs) (Jiri Olsa) - Add support to display group output for non group events, i.e. now when one uses 'perf report --group' on a perf.data file recorded without explicitly grouping events with {} (e.g. "perf record -e '{cycles,instructions}'" get the same output that would produce, i.e. see all those non-grouped events in multiple columns, at the same time (Jiri Olsa) - Skip non-address kallsyms entries, e.g. '(null)' for !root (Jiri Olsa) - Kernel maps fixes wrt perf.data(report) versus live system (top) (Jiri Olsa) - Fix memory corruption when using 'perf record -j call -g -a <application>' followed by 'perf report --branch-history' (Jiri Olsa) - ARM CoreSight fixes (Mathieu Poirier) - Add inject capability for CoreSight Traces (Robert Waker) - Update documentation for use of 'perf' + ARM CoreSight (Robert Walker) - Man pages fixes (Sangwon Hong, Jaecheol Shin) - Fix some 'perf test' cases on s/390 and x86_64 (some backtraces changed with a glibc update) (Thomas Richter) - Add detailed CPUID info in the 'perf.data' headers for s/390 to then use it in 'perf annotate' (Thomas Richter) - Add '--interval-count N' to 'perf stat', to use with -I, i.e. 'perf stat -I 1000 --interval-count 2' will show stats every 1000ms, two times (yuzhoujian) - Add 'perf stat --timeout Nms', that will run for that many milliseconds and then stop, printing the counters (yuzhoujian) - Fix description for 'perf report --mem-modex (Andi Kleen) - Use a wildcard to remove the vfs_getname probe in the 'perf test' shell based test cases (Arnaldo Carvalho de Melo) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/trace/coresight.txt51
-rw-r--r--tools/arch/powerpc/include/uapi/asm/unistd.h402
-rw-r--r--tools/lib/api/fs/fs.c44
-rw-r--r--tools/lib/api/fs/fs.h2
-rw-r--r--tools/lib/symbol/kallsyms.c4
-rw-r--r--tools/perf/Documentation/perf-annotate.txt6
-rw-r--r--tools/perf/Documentation/perf-kmem.txt6
-rw-r--r--tools/perf/Documentation/perf-mem.txt4
-rw-r--r--tools/perf/Documentation/perf-report.txt5
-rw-r--r--tools/perf/Documentation/perf-script.txt3
-rw-r--r--tools/perf/Documentation/perf-stat.txt10
-rw-r--r--tools/perf/Makefile.config2
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c2
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c51
-rw-r--r--tools/perf/arch/powerpc/Makefile25
-rwxr-xr-xtools/perf/arch/powerpc/entry/syscalls/mksyscalltbl37
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c27
-rw-r--r--tools/perf/arch/s390/util/header.c148
-rw-r--r--tools/perf/builtin-record.c2
-rw-r--r--tools/perf/builtin-report.c7
-rw-r--r--tools/perf/builtin-script.c17
-rw-r--r--tools/perf/builtin-stat.c53
-rwxr-xr-xtools/perf/check-headers.sh1
-rw-r--r--tools/perf/tests/code-reading.c33
-rw-r--r--tools/perf/tests/dwarf-unwind.c46
-rw-r--r--tools/perf/tests/shell/lib/probe_vfs_getname.sh2
-rwxr-xr-xtools/perf/tests/shell/trace+probe_libc_inet_pton.sh6
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c4
-rw-r--r--tools/perf/ui/browsers/annotate.c9
-rw-r--r--tools/perf/util/build-id.c10
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c74
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h2
-rw-r--r--tools/perf/util/cs-etm.c478
-rw-r--r--tools/perf/util/event.c16
-rw-r--r--tools/perf/util/evlist.c21
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.c4
-rw-r--r--tools/perf/util/hist.h1
-rw-r--r--tools/perf/util/machine.c145
-rw-r--r--tools/perf/util/machine.h6
-rw-r--r--tools/perf/util/pmu.c47
-rw-r--r--tools/perf/util/sort.c7
-rw-r--r--tools/perf/util/stat.h2
-rw-r--r--tools/perf/util/symbol.c13
-rw-r--r--tools/perf/util/syscalltbl.c8
-rw-r--r--tools/perf/util/thread_map.c4
-rw-r--r--tools/perf/util/thread_map.h2
47 files changed, 1577 insertions, 273 deletions
diff --git a/Documentation/trace/coresight.txt b/Documentation/trace/coresight.txt
index a33c88cd5d1d..6f0120c3a4f1 100644
--- a/Documentation/trace/coresight.txt
+++ b/Documentation/trace/coresight.txt
@@ -330,3 +330,54 @@ Details on how to use the generic STM API can be found here [2].
330 330
331[1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm 331[1]. Documentation/ABI/testing/sysfs-bus-coresight-devices-stm
332[2]. Documentation/trace/stm.txt 332[2]. Documentation/trace/stm.txt
333
334
335Using perf tools
336----------------
337
338perf can be used to record and analyze trace of programs.
339
340Execution can be recorded using 'perf record' with the cs_etm event,
341specifying the name of the sink to record to, e.g:
342
343 perf record -e cs_etm/@20070000.etr/u --per-thread
344
345The 'perf report' and 'perf script' commands can be used to analyze execution,
346synthesizing instruction and branch events from the instruction trace.
347'perf inject' can be used to replace the trace data with the synthesized events.
348The --itrace option controls the type and frequency of synthesized events
349(see perf documentation).
350
351Note that only 64-bit programs are currently supported - further work is
352required to support instruction decode of 32-bit Arm programs.
353
354
355Generating coverage files for Feedback Directed Optimization: AutoFDO
356---------------------------------------------------------------------
357
358'perf inject' accepts the --itrace option in which case tracing data is
359removed and replaced with the synthesized events. e.g.
360
361 perf inject --itrace --strip -i perf.data -o perf.data.new
362
363Below is an example of using ARM ETM for autoFDO. It requires autofdo
364(https://github.com/google/autofdo) and gcc version 5. The bubble
365sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial).
366
367 $ gcc-5 -O3 sort.c -o sort
368 $ taskset -c 2 ./sort
369 Bubble sorting array of 30000 elements
370 5910 ms
371
372 $ perf record -e cs_etm/@20070000.etr/u --per-thread taskset -c 2 ./sort
373 Bubble sorting array of 30000 elements
374 12543 ms
375 [ perf record: Woken up 35 times to write data ]
376 [ perf record: Captured and wrote 69.640 MB perf.data ]
377
378 $ perf inject -i perf.data -o inj.data --itrace=il64 --strip
379 $ create_gcov --binary=./sort --profile=inj.data --gcov=sort.gcov -gcov_version=1
380 $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
381 $ taskset -c 2 ./sort_autofdo
382 Bubble sorting array of 30000 elements
383 5806 ms
diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h
new file mode 100644
index 000000000000..389c36fd8299
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/unistd.h
@@ -0,0 +1,402 @@
1/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
2/*
3 * This file contains the system call numbers.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version
8 * 2 of the License, or (at your option) any later version.
9 */
10#ifndef _UAPI_ASM_POWERPC_UNISTD_H_
11#define _UAPI_ASM_POWERPC_UNISTD_H_
12
13
14#define __NR_restart_syscall 0
15#define __NR_exit 1
16#define __NR_fork 2
17#define __NR_read 3
18#define __NR_write 4
19#define __NR_open 5
20#define __NR_close 6
21#define __NR_waitpid 7
22#define __NR_creat 8
23#define __NR_link 9
24#define __NR_unlink 10
25#define __NR_execve 11
26#define __NR_chdir 12
27#define __NR_time 13
28#define __NR_mknod 14
29#define __NR_chmod 15
30#define __NR_lchown 16
31#define __NR_break 17
32#define __NR_oldstat 18
33#define __NR_lseek 19
34#define __NR_getpid 20
35#define __NR_mount 21
36#define __NR_umount 22
37#define __NR_setuid 23
38#define __NR_getuid 24
39#define __NR_stime 25
40#define __NR_ptrace 26
41#define __NR_alarm 27
42#define __NR_oldfstat 28
43#define __NR_pause 29
44#define __NR_utime 30
45#define __NR_stty 31
46#define __NR_gtty 32
47#define __NR_access 33
48#define __NR_nice 34
49#define __NR_ftime 35
50#define __NR_sync 36
51#define __NR_kill 37
52#define __NR_rename 38
53#define __NR_mkdir 39
54#define __NR_rmdir 40
55#define __NR_dup 41
56#define __NR_pipe 42
57#define __NR_times 43
58#define __NR_prof 44
59#define __NR_brk 45
60#define __NR_setgid 46
61#define __NR_getgid 47
62#define __NR_signal 48
63#define __NR_geteuid 49
64#define __NR_getegid 50
65#define __NR_acct 51
66#define __NR_umount2 52
67#define __NR_lock 53
68#define __NR_ioctl 54
69#define __NR_fcntl 55
70#define __NR_mpx 56
71#define __NR_setpgid 57
72#define __NR_ulimit 58
73#define __NR_oldolduname 59
74#define __NR_umask 60
75#define __NR_chroot 61
76#define __NR_ustat 62
77#define __NR_dup2 63
78#define __NR_getppid 64
79#define __NR_getpgrp 65
80#define __NR_setsid 66
81#define __NR_sigaction 67
82#define __NR_sgetmask 68
83#define __NR_ssetmask 69
84#define __NR_setreuid 70
85#define __NR_setregid 71
86#define __NR_sigsuspend 72
87#define __NR_sigpending 73
88#define __NR_sethostname 74
89#define __NR_setrlimit 75
90#define __NR_getrlimit 76
91#define __NR_getrusage 77
92#define __NR_gettimeofday 78
93#define __NR_settimeofday 79
94#define __NR_getgroups 80
95#define __NR_setgroups 81
96#define __NR_select 82
97#define __NR_symlink 83
98#define __NR_oldlstat 84
99#define __NR_readlink 85
100#define __NR_uselib 86
101#define __NR_swapon 87
102#define __NR_reboot 88
103#define __NR_readdir 89
104#define __NR_mmap 90
105#define __NR_munmap 91
106#define __NR_truncate 92
107#define __NR_ftruncate 93
108#define __NR_fchmod 94
109#define __NR_fchown 95
110#define __NR_getpriority 96
111#define __NR_setpriority 97
112#define __NR_profil 98
113#define __NR_statfs 99
114#define __NR_fstatfs 100
115#define __NR_ioperm 101
116#define __NR_socketcall 102
117#define __NR_syslog 103
118#define __NR_setitimer 104
119#define __NR_getitimer 105
120#define __NR_stat 106
121#define __NR_lstat 107
122#define __NR_fstat 108
123#define __NR_olduname 109
124#define __NR_iopl 110
125#define __NR_vhangup 111
126#define __NR_idle 112
127#define __NR_vm86 113
128#define __NR_wait4 114
129#define __NR_swapoff 115
130#define __NR_sysinfo 116
131#define __NR_ipc 117
132#define __NR_fsync 118
133#define __NR_sigreturn 119
134#define __NR_clone 120
135#define __NR_setdomainname 121
136#define __NR_uname 122
137#define __NR_modify_ldt 123
138#define __NR_adjtimex 124
139#define __NR_mprotect 125
140#define __NR_sigprocmask 126
141#define __NR_create_module 127
142#define __NR_init_module 128
143#define __NR_delete_module 129
144#define __NR_get_kernel_syms 130
145#define __NR_quotactl 131
146#define __NR_getpgid 132
147#define __NR_fchdir 133
148#define __NR_bdflush 134
149#define __NR_sysfs 135
150#define __NR_personality 136
151#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
152#define __NR_setfsuid 138
153#define __NR_setfsgid 139
154#define __NR__llseek 140
155#define __NR_getdents 141
156#define __NR__newselect 142
157#define __NR_flock 143
158#define __NR_msync 144
159#define __NR_readv 145
160#define __NR_writev 146
161#define __NR_getsid 147
162#define __NR_fdatasync 148
163#define __NR__sysctl 149
164#define __NR_mlock 150
165#define __NR_munlock 151
166#define __NR_mlockall 152
167#define __NR_munlockall 153
168#define __NR_sched_setparam 154
169#define __NR_sched_getparam 155
170#define __NR_sched_setscheduler 156
171#define __NR_sched_getscheduler 157
172#define __NR_sched_yield 158
173#define __NR_sched_get_priority_max 159
174#define __NR_sched_get_priority_min 160
175#define __NR_sched_rr_get_interval 161
176#define __NR_nanosleep 162
177#define __NR_mremap 163
178#define __NR_setresuid 164
179#define __NR_getresuid 165
180#define __NR_query_module 166
181#define __NR_poll 167
182#define __NR_nfsservctl 168
183#define __NR_setresgid 169
184#define __NR_getresgid 170
185#define __NR_prctl 171
186#define __NR_rt_sigreturn 172
187#define __NR_rt_sigaction 173
188#define __NR_rt_sigprocmask 174
189#define __NR_rt_sigpending 175
190#define __NR_rt_sigtimedwait 176
191#define __NR_rt_sigqueueinfo 177
192#define __NR_rt_sigsuspend 178
193#define __NR_pread64 179
194#define __NR_pwrite64 180
195#define __NR_chown 181
196#define __NR_getcwd 182
197#define __NR_capget 183
198#define __NR_capset 184
199#define __NR_sigaltstack 185
200#define __NR_sendfile 186
201#define __NR_getpmsg 187 /* some people actually want streams */
202#define __NR_putpmsg 188 /* some people actually want streams */
203#define __NR_vfork 189
204#define __NR_ugetrlimit 190 /* SuS compliant getrlimit */
205#define __NR_readahead 191
206#ifndef __powerpc64__ /* these are 32-bit only */
207#define __NR_mmap2 192
208#define __NR_truncate64 193
209#define __NR_ftruncate64 194
210#define __NR_stat64 195
211#define __NR_lstat64 196
212#define __NR_fstat64 197
213#endif
214#define __NR_pciconfig_read 198
215#define __NR_pciconfig_write 199
216#define __NR_pciconfig_iobase 200
217#define __NR_multiplexer 201
218#define __NR_getdents64 202
219#define __NR_pivot_root 203
220#ifndef __powerpc64__
221#define __NR_fcntl64 204
222#endif
223#define __NR_madvise 205
224#define __NR_mincore 206
225#define __NR_gettid 207
226#define __NR_tkill 208
227#define __NR_setxattr 209
228#define __NR_lsetxattr 210
229#define __NR_fsetxattr 211
230#define __NR_getxattr 212
231#define __NR_lgetxattr 213
232#define __NR_fgetxattr 214
233#define __NR_listxattr 215
234#define __NR_llistxattr 216
235#define __NR_flistxattr 217
236#define __NR_removexattr 218
237#define __NR_lremovexattr 219
238#define __NR_fremovexattr 220
239#define __NR_futex 221
240#define __NR_sched_setaffinity 222
241#define __NR_sched_getaffinity 223
242/* 224 currently unused */
243#define __NR_tuxcall 225
244#ifndef __powerpc64__
245#define __NR_sendfile64 226
246#endif
247#define __NR_io_setup 227
248#define __NR_io_destroy 228
249#define __NR_io_getevents 229
250#define __NR_io_submit 230
251#define __NR_io_cancel 231
252#define __NR_set_tid_address 232
253#define __NR_fadvise64 233
254#define __NR_exit_group 234
255#define __NR_lookup_dcookie 235
256#define __NR_epoll_create 236
257#define __NR_epoll_ctl 237
258#define __NR_epoll_wait 238
259#define __NR_remap_file_pages 239
260#define __NR_timer_create 240
261#define __NR_timer_settime 241
262#define __NR_timer_gettime 242
263#define __NR_timer_getoverrun 243
264#define __NR_timer_delete 244
265#define __NR_clock_settime 245
266#define __NR_clock_gettime 246
267#define __NR_clock_getres 247
268#define __NR_clock_nanosleep 248
269#define __NR_swapcontext 249
270#define __NR_tgkill 250
271#define __NR_utimes 251
272#define __NR_statfs64 252
273#define __NR_fstatfs64 253
274#ifndef __powerpc64__
275#define __NR_fadvise64_64 254
276#endif
277#define __NR_rtas 255
278#define __NR_sys_debug_setcontext 256
279/* Number 257 is reserved for vserver */
280#define __NR_migrate_pages 258
281#define __NR_mbind 259
282#define __NR_get_mempolicy 260
283#define __NR_set_mempolicy 261
284#define __NR_mq_open 262
285#define __NR_mq_unlink 263
286#define __NR_mq_timedsend 264
287#define __NR_mq_timedreceive 265
288#define __NR_mq_notify 266
289#define __NR_mq_getsetattr 267
290#define __NR_kexec_load 268
291#define __NR_add_key 269
292#define __NR_request_key 270
293#define __NR_keyctl 271
294#define __NR_waitid 272
295#define __NR_ioprio_set 273
296#define __NR_ioprio_get 274
297#define __NR_inotify_init 275
298#define __NR_inotify_add_watch 276
299#define __NR_inotify_rm_watch 277
300#define __NR_spu_run 278
301#define __NR_spu_create 279
302#define __NR_pselect6 280
303#define __NR_ppoll 281
304#define __NR_unshare 282
305#define __NR_splice 283
306#define __NR_tee 284
307#define __NR_vmsplice 285
308#define __NR_openat 286
309#define __NR_mkdirat 287
310#define __NR_mknodat 288
311#define __NR_fchownat 289
312#define __NR_futimesat 290
313#ifdef __powerpc64__
314#define __NR_newfstatat 291
315#else
316#define __NR_fstatat64 291
317#endif
318#define __NR_unlinkat 292
319#define __NR_renameat 293
320#define __NR_linkat 294
321#define __NR_symlinkat 295
322#define __NR_readlinkat 296
323#define __NR_fchmodat 297
324#define __NR_faccessat 298
325#define __NR_get_robust_list 299
326#define __NR_set_robust_list 300
327#define __NR_move_pages 301
328#define __NR_getcpu 302
329#define __NR_epoll_pwait 303
330#define __NR_utimensat 304
331#define __NR_signalfd 305
332#define __NR_timerfd_create 306
333#define __NR_eventfd 307
334#define __NR_sync_file_range2 308
335#define __NR_fallocate 309
336#define __NR_subpage_prot 310
337#define __NR_timerfd_settime 311
338#define __NR_timerfd_gettime 312
339#define __NR_signalfd4 313
340#define __NR_eventfd2 314
341#define __NR_epoll_create1 315
342#define __NR_dup3 316
343#define __NR_pipe2 317
344#define __NR_inotify_init1 318
345#define __NR_perf_event_open 319
346#define __NR_preadv 320
347#define __NR_pwritev 321
348#define __NR_rt_tgsigqueueinfo 322
349#define __NR_fanotify_init 323
350#define __NR_fanotify_mark 324
351#define __NR_prlimit64 325
352#define __NR_socket 326
353#define __NR_bind 327
354#define __NR_connect 328
355#define __NR_listen 329
356#define __NR_accept 330
357#define __NR_getsockname 331
358#define __NR_getpeername 332
359#define __NR_socketpair 333
360#define __NR_send 334
361#define __NR_sendto 335
362#define __NR_recv 336
363#define __NR_recvfrom 337
364#define __NR_shutdown 338
365#define __NR_setsockopt 339
366#define __NR_getsockopt 340
367#define __NR_sendmsg 341
368#define __NR_recvmsg 342
369#define __NR_recvmmsg 343
370#define __NR_accept4 344
371#define __NR_name_to_handle_at 345
372#define __NR_open_by_handle_at 346
373#define __NR_clock_adjtime 347
374#define __NR_syncfs 348
375#define __NR_sendmmsg 349
376#define __NR_setns 350
377#define __NR_process_vm_readv 351
378#define __NR_process_vm_writev 352
379#define __NR_finit_module 353
380#define __NR_kcmp 354
381#define __NR_sched_setattr 355
382#define __NR_sched_getattr 356
383#define __NR_renameat2 357
384#define __NR_seccomp 358
385#define __NR_getrandom 359
386#define __NR_memfd_create 360
387#define __NR_bpf 361
388#define __NR_execveat 362
389#define __NR_switch_endian 363
390#define __NR_userfaultfd 364
391#define __NR_membarrier 365
392#define __NR_mlock2 378
393#define __NR_copy_file_range 379
394#define __NR_preadv2 380
395#define __NR_pwritev2 381
396#define __NR_kexec_file_load 382
397#define __NR_statx 383
398#define __NR_pkey_alloc 384
399#define __NR_pkey_free 385
400#define __NR_pkey_mprotect 386
401
402#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index b24afc0e6e81..6a12bbf39f7b 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -315,12 +315,8 @@ int filename__read_int(const char *filename, int *value)
315 return err; 315 return err;
316} 316}
317 317
318/* 318static int filename__read_ull_base(const char *filename,
319 * Parses @value out of @filename with strtoull. 319 unsigned long long *value, int base)
320 * By using 0 for base, the strtoull detects the
321 * base automatically (see man strtoull).
322 */
323int filename__read_ull(const char *filename, unsigned long long *value)
324{ 320{
325 char line[64]; 321 char line[64];
326 int fd = open(filename, O_RDONLY), err = -1; 322 int fd = open(filename, O_RDONLY), err = -1;
@@ -329,7 +325,7 @@ int filename__read_ull(const char *filename, unsigned long long *value)
329 return -1; 325 return -1;
330 326
331 if (read(fd, line, sizeof(line)) > 0) { 327 if (read(fd, line, sizeof(line)) > 0) {
332 *value = strtoull(line, NULL, 0); 328 *value = strtoull(line, NULL, base);
333 if (*value != ULLONG_MAX) 329 if (*value != ULLONG_MAX)
334 err = 0; 330 err = 0;
335 } 331 }
@@ -338,6 +334,25 @@ int filename__read_ull(const char *filename, unsigned long long *value)
338 return err; 334 return err;
339} 335}
340 336
337/*
338 * Parses @value out of @filename with strtoull.
339 * By using 16 for base to treat the number as hex.
340 */
341int filename__read_xll(const char *filename, unsigned long long *value)
342{
343 return filename__read_ull_base(filename, value, 16);
344}
345
346/*
347 * Parses @value out of @filename with strtoull.
348 * By using 0 for base, the strtoull detects the
349 * base automatically (see man strtoull).
350 */
351int filename__read_ull(const char *filename, unsigned long long *value)
352{
353 return filename__read_ull_base(filename, value, 0);
354}
355
341#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */ 356#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */
342 357
343int filename__read_str(const char *filename, char **buf, size_t *sizep) 358int filename__read_str(const char *filename, char **buf, size_t *sizep)
@@ -417,7 +432,8 @@ int procfs__read_str(const char *entry, char **buf, size_t *sizep)
417 return filename__read_str(path, buf, sizep); 432 return filename__read_str(path, buf, sizep);
418} 433}
419 434
420int sysfs__read_ull(const char *entry, unsigned long long *value) 435static int sysfs__read_ull_base(const char *entry,
436 unsigned long long *value, int base)
421{ 437{
422 char path[PATH_MAX]; 438 char path[PATH_MAX];
423 const char *sysfs = sysfs__mountpoint(); 439 const char *sysfs = sysfs__mountpoint();
@@ -427,7 +443,17 @@ int sysfs__read_ull(const char *entry, unsigned long long *value)
427 443
428 snprintf(path, sizeof(path), "%s/%s", sysfs, entry); 444 snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
429 445
430 return filename__read_ull(path, value); 446 return filename__read_ull_base(path, value, base);
447}
448
449int sysfs__read_xll(const char *entry, unsigned long long *value)
450{
451 return sysfs__read_ull_base(entry, value, 16);
452}
453
454int sysfs__read_ull(const char *entry, unsigned long long *value)
455{
456 return sysfs__read_ull_base(entry, value, 0);
431} 457}
432 458
433int sysfs__read_int(const char *entry, int *value) 459int sysfs__read_int(const char *entry, int *value)
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index dda49deefb52..92d03b8396b1 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -30,6 +30,7 @@ FS(bpf_fs)
30 30
31int filename__read_int(const char *filename, int *value); 31int filename__read_int(const char *filename, int *value);
32int filename__read_ull(const char *filename, unsigned long long *value); 32int filename__read_ull(const char *filename, unsigned long long *value);
33int filename__read_xll(const char *filename, unsigned long long *value);
33int filename__read_str(const char *filename, char **buf, size_t *sizep); 34int filename__read_str(const char *filename, char **buf, size_t *sizep);
34 35
35int filename__write_int(const char *filename, int value); 36int filename__write_int(const char *filename, int value);
@@ -39,6 +40,7 @@ int procfs__read_str(const char *entry, char **buf, size_t *sizep);
39int sysctl__read_int(const char *sysctl, int *value); 40int sysctl__read_int(const char *sysctl, int *value);
40int sysfs__read_int(const char *entry, int *value); 41int sysfs__read_int(const char *entry, int *value);
41int sysfs__read_ull(const char *entry, unsigned long long *value); 42int sysfs__read_ull(const char *entry, unsigned long long *value);
43int sysfs__read_xll(const char *entry, unsigned long long *value);
42int sysfs__read_str(const char *entry, char **buf, size_t *sizep); 44int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
43int sysfs__read_bool(const char *entry, bool *value); 45int sysfs__read_bool(const char *entry, bool *value);
44 46
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 914cb8e3d40b..689b6a130dd7 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -38,6 +38,10 @@ int kallsyms__parse(const char *filename, void *arg,
38 38
39 len = hex2u64(line, &start); 39 len = hex2u64(line, &start);
40 40
41 /* Skip the line if we failed to parse the address. */
42 if (!len)
43 continue;
44
41 len++; 45 len++;
42 if (len + 2 >= line_len) 46 if (len + 2 >= line_len)
43 continue; 47 continue;
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index c635eab6af54..292809c3c0ca 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -21,7 +21,7 @@ If there is no debug info in the object, then annotated assembly is displayed.
21OPTIONS 21OPTIONS
22------- 22-------
23-i:: 23-i::
24--input=:: 24--input=<file>::
25 Input file name. (default: perf.data unless stdin is a fifo) 25 Input file name. (default: perf.data unless stdin is a fifo)
26 26
27-d:: 27-d::
@@ -69,7 +69,7 @@ OPTIONS
69 69
70--stdio:: Use the stdio interface. 70--stdio:: Use the stdio interface.
71 71
72--stdio-color:: 72--stdio-color=<mode>::
73 'always', 'never' or 'auto', allowing configuring color output 73 'always', 'never' or 'auto', allowing configuring color output
74 via the command line, in addition to via "color.ui" .perfconfig. 74 via the command line, in addition to via "color.ui" .perfconfig.
75 Use '--stdio-color always' to generate color even when redirecting 75 Use '--stdio-color always' to generate color even when redirecting
@@ -84,7 +84,7 @@ OPTIONS
84--gtk:: Use the GTK interface. 84--gtk:: Use the GTK interface.
85 85
86-C:: 86-C::
87--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can 87--cpu=<cpu>:: Only report samples for the list of CPUs provided. Multiple CPUs can
88 be provided as a comma-separated list with no space: 0,1. Ranges of 88 be provided as a comma-separated list with no space: 0,1. Ranges of
89 CPUs are specified with -: 0-2. Default is to report samples on all 89 CPUs are specified with -: 0-2. Default is to report samples on all
90 CPUs. 90 CPUs.
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 479fc3261a50..85b8ac695c87 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -25,6 +25,10 @@ OPTIONS
25--input=<file>:: 25--input=<file>::
26 Select the input file (default: perf.data unless stdin is a fifo) 26 Select the input file (default: perf.data unless stdin is a fifo)
27 27
28-f::
29--force::
30 Don't do ownership validation
31
28-v:: 32-v::
29--verbose:: 33--verbose::
30 Be more verbose. (show symbol address, etc) 34 Be more verbose. (show symbol address, etc)
@@ -61,7 +65,7 @@ OPTIONS
61 default, but this option shows live (currently allocated) pages 65 default, but this option shows live (currently allocated) pages
62 instead. (This option works with --page option only) 66 instead. (This option works with --page option only)
63 67
64--time:: 68--time=<start>,<stop>::
65 Only analyze samples within given time window: <start>,<stop>. Times 69 Only analyze samples within given time window: <start>,<stop>. Times
66 have the format seconds.microseconds. If start is not given (i.e., time 70 have the format seconds.microseconds. If start is not given (i.e., time
67 string is ',x.y') then analysis starts at the beginning of the file. If 71 string is ',x.y') then analysis starts at the beginning of the file. If
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 4be08a1e3f8d..b0211410969b 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -28,6 +28,10 @@ OPTIONS
28<command>...:: 28<command>...::
29 Any command you can specify in a shell. 29 Any command you can specify in a shell.
30 30
31-f::
32--force::
33 Don't do ownership validation
34
31-t:: 35-t::
32--type=:: 36--type=::
33 Select the memory operation type: load or store (default: load,store) 37 Select the memory operation type: load or store (default: load,store)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 907e505b6309..cba16d8a970e 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -354,7 +354,8 @@ OPTIONS
354 Path to objdump binary. 354 Path to objdump binary.
355 355
356--group:: 356--group::
357 Show event group information together. 357 Show event group information together. It forces group output also
358 if there are no groups defined in data file.
358 359
359--demangle:: 360--demangle::
360 Demangle symbol names to human readable form. It's enabled by default, 361 Demangle symbol names to human readable form. It's enabled by default,
@@ -367,7 +368,7 @@ OPTIONS
367 Use the data addresses of samples in addition to instruction addresses 368 Use the data addresses of samples in addition to instruction addresses
368 to build the histograms. To generate meaningful output, the perf.data 369 to build the histograms. To generate meaningful output, the perf.data
369 file must have been obtained using perf record -d -W and using a 370 file must have been obtained using perf record -d -W and using a
370 special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See 371 special event -e cpu/mem-loads/p or -e cpu/mem-stores/p. See
371 'perf mem' for simpler access. 372 'perf mem' for simpler access.
372 373
373--percent-limit:: 374--percent-limit::
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 7730c1d2b5d3..36ec0257f8d3 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -303,6 +303,9 @@ OPTIONS
303--show-lost-events 303--show-lost-events
304 Display lost events i.e. events of type PERF_RECORD_LOST. 304 Display lost events i.e. events of type PERF_RECORD_LOST.
305 305
306--show-round-events
307 Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND.
308
306--demangle:: 309--demangle::
307 Demangle symbol names to human readable form. It's enabled by default, 310 Demangle symbol names to human readable form. It's enabled by default,
308 disable with --no-demangle. 311 disable with --no-demangle.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 823fce7674bb..2bbe79a50d3c 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -146,6 +146,16 @@ Print count deltas every N milliseconds (minimum: 10ms)
146The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution. 146The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution.
147 example: 'perf stat -I 1000 -e cycles -a sleep 5' 147 example: 'perf stat -I 1000 -e cycles -a sleep 5'
148 148
149--interval-count times::
150Print count deltas for fixed number of times.
151This option should be used together with "-I" option.
152 example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
153
154--timeout msecs::
155Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms).
156This option is not supported with the "-I" option.
157 example: 'perf stat --time 2000 -e cycles -a'
158
149--metric-only:: 159--metric-only::
150Only print computed metrics. Print them in a single line. 160Only print computed metrics. Print them in a single line.
151Don't show any raw values. Not supported with --per-thread. 161Don't show any raw values. Not supported with --per-thread.
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 0dfdaa9fa81e..577a5d2988fe 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -27,6 +27,8 @@ NO_SYSCALL_TABLE := 1
27# Additional ARCH settings for ppc 27# Additional ARCH settings for ppc
28ifeq ($(SRCARCH),powerpc) 28ifeq ($(SRCARCH),powerpc)
29 NO_PERF_REGS := 0 29 NO_PERF_REGS := 0
30 NO_SYSCALL_TABLE := 0
31 CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
30 LIBUNWIND_LIBS := -lunwind -lunwind-ppc64 32 LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
31endif 33endif
32 34
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 2323581b157d..fa639e3e52ac 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -68,7 +68,7 @@ struct auxtrace_record
68 bool found_spe = false; 68 bool found_spe = false;
69 static struct perf_pmu **arm_spe_pmus = NULL; 69 static struct perf_pmu **arm_spe_pmus = NULL;
70 static int nr_spes = 0; 70 static int nr_spes = 0;
71 int i; 71 int i = 0;
72 72
73 if (!evlist) 73 if (!evlist)
74 return NULL; 74 return NULL;
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index fbfc055d3f4d..5c655ad4621e 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -298,12 +298,17 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
298{ 298{
299 int i; 299 int i;
300 int etmv3 = 0, etmv4 = 0; 300 int etmv3 = 0, etmv4 = 0;
301 const struct cpu_map *cpus = evlist->cpus; 301 struct cpu_map *event_cpus = evlist->cpus;
302 struct cpu_map *online_cpus = cpu_map__new(NULL);
302 303
303 /* cpu map is not empty, we have specific CPUs to work with */ 304 /* cpu map is not empty, we have specific CPUs to work with */
304 if (!cpu_map__empty(cpus)) { 305 if (!cpu_map__empty(event_cpus)) {
305 for (i = 0; i < cpu_map__nr(cpus); i++) { 306 for (i = 0; i < cpu__max_cpu(); i++) {
306 if (cs_etm_is_etmv4(itr, cpus->map[i])) 307 if (!cpu_map__has(event_cpus, i) ||
308 !cpu_map__has(online_cpus, i))
309 continue;
310
311 if (cs_etm_is_etmv4(itr, i))
307 etmv4++; 312 etmv4++;
308 else 313 else
309 etmv3++; 314 etmv3++;
@@ -311,6 +316,9 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
311 } else { 316 } else {
312 /* get configuration for all CPUs in the system */ 317 /* get configuration for all CPUs in the system */
313 for (i = 0; i < cpu__max_cpu(); i++) { 318 for (i = 0; i < cpu__max_cpu(); i++) {
319 if (!cpu_map__has(online_cpus, i))
320 continue;
321
314 if (cs_etm_is_etmv4(itr, i)) 322 if (cs_etm_is_etmv4(itr, i))
315 etmv4++; 323 etmv4++;
316 else 324 else
@@ -318,6 +326,8 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
318 } 326 }
319 } 327 }
320 328
329 cpu_map__put(online_cpus);
330
321 return (CS_ETM_HEADER_SIZE + 331 return (CS_ETM_HEADER_SIZE +
322 (etmv4 * CS_ETMV4_PRIV_SIZE) + 332 (etmv4 * CS_ETMV4_PRIV_SIZE) +
323 (etmv3 * CS_ETMV3_PRIV_SIZE)); 333 (etmv3 * CS_ETMV3_PRIV_SIZE));
@@ -447,7 +457,9 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
447 int i; 457 int i;
448 u32 offset; 458 u32 offset;
449 u64 nr_cpu, type; 459 u64 nr_cpu, type;
450 const struct cpu_map *cpus = session->evlist->cpus; 460 struct cpu_map *cpu_map;
461 struct cpu_map *event_cpus = session->evlist->cpus;
462 struct cpu_map *online_cpus = cpu_map__new(NULL);
451 struct cs_etm_recording *ptr = 463 struct cs_etm_recording *ptr =
452 container_of(itr, struct cs_etm_recording, itr); 464 container_of(itr, struct cs_etm_recording, itr);
453 struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu; 465 struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
@@ -458,8 +470,21 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
458 if (!session->evlist->nr_mmaps) 470 if (!session->evlist->nr_mmaps)
459 return -EINVAL; 471 return -EINVAL;
460 472
461 /* If the cpu_map is empty all CPUs are involved */ 473 /* If the cpu_map is empty all online CPUs are involved */
462 nr_cpu = cpu_map__empty(cpus) ? cpu__max_cpu() : cpu_map__nr(cpus); 474 if (cpu_map__empty(event_cpus)) {
475 cpu_map = online_cpus;
476 } else {
477 /* Make sure all specified CPUs are online */
478 for (i = 0; i < cpu_map__nr(event_cpus); i++) {
479 if (cpu_map__has(event_cpus, i) &&
480 !cpu_map__has(online_cpus, i))
481 return -EINVAL;
482 }
483
484 cpu_map = event_cpus;
485 }
486
487 nr_cpu = cpu_map__nr(cpu_map);
463 /* Get PMU type as dynamically assigned by the core */ 488 /* Get PMU type as dynamically assigned by the core */
464 type = cs_etm_pmu->type; 489 type = cs_etm_pmu->type;
465 490
@@ -472,15 +497,11 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
472 497
473 offset = CS_ETM_SNAPSHOT + 1; 498 offset = CS_ETM_SNAPSHOT + 1;
474 499
475 /* cpu map is not empty, we have specific CPUs to work with */ 500 for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
476 if (!cpu_map__empty(cpus)) { 501 if (cpu_map__has(cpu_map, i))
477 for (i = 0; i < cpu_map__nr(cpus) && offset < priv_size; i++)
478 cs_etm_get_metadata(cpus->map[i], &offset, itr, info);
479 } else {
480 /* get configuration for all CPUs in the system */
481 for (i = 0; i < cpu__max_cpu(); i++)
482 cs_etm_get_metadata(i, &offset, itr, info); 502 cs_etm_get_metadata(i, &offset, itr, info);
483 } 503
504 cpu_map__put(online_cpus);
484 505
485 return 0; 506 return 0;
486} 507}
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
index 42dab7c8f508..a111239df182 100644
--- a/tools/perf/arch/powerpc/Makefile
+++ b/tools/perf/arch/powerpc/Makefile
@@ -6,3 +6,28 @@ endif
6HAVE_KVM_STAT_SUPPORT := 1 6HAVE_KVM_STAT_SUPPORT := 1
7PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 7PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
8PERF_HAVE_JITDUMP := 1 8PERF_HAVE_JITDUMP := 1
9
10#
11# Syscall table generation for perf
12#
13
14out := $(OUTPUT)arch/powerpc/include/generated/asm
15header32 := $(out)/syscalls_32.c
16header64 := $(out)/syscalls_64.c
17sysdef := $(srctree)/tools/arch/powerpc/include/uapi/asm/unistd.h
18sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls/
19systbl := $(sysprf)/mksyscalltbl
20
21# Create output directory if not already present
22_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
23
24$(header64): $(sysdef) $(systbl)
25 $(Q)$(SHELL) '$(systbl)' '64' '$(CC)' $(sysdef) > $@
26
27$(header32): $(sysdef) $(systbl)
28 $(Q)$(SHELL) '$(systbl)' '32' '$(CC)' $(sysdef) > $@
29
30clean::
31 $(call QUIET_CLEAN, powerpc) $(RM) $(header32) $(header64)
32
33archheaders: $(header32) $(header64)
diff --git a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
new file mode 100755
index 000000000000..ef52e1dd694b
--- /dev/null
+++ b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
@@ -0,0 +1,37 @@
1#!/bin/sh
2# SPDX-License-Identifier: GPL-2.0
3#
4# Generate system call table for perf. Derived from
5# s390 script.
6#
7# Copyright IBM Corp. 2017
8# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
9# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
10
11wordsize=$1
12gcc=$2
13input=$3
14
15if ! test -r $input; then
16 echo "Could not read input file" >&2
17 exit 1
18fi
19
20create_table()
21{
22 local wordsize=$1
23 local max_nr
24
25 echo "static const char *syscalltbl_powerpc_${wordsize}[] = {"
26 while read sc nr; do
27 printf '\t[%d] = "%s",\n' $nr $sc
28 max_nr=$nr
29 done
30 echo '};'
31 echo "#define SYSCALLTBL_POWERPC_${wordsize}_MAX_ID $max_nr"
32}
33
34$gcc -m${wordsize} -E -dM -x c $input \
35 |sed -ne 's/^#define __NR_//p' \
36 |sort -t' ' -k2 -nu \
37 |create_table ${wordsize}
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
index 8c72b44444cb..01df9d8303e1 100644
--- a/tools/perf/arch/s390/annotate/instructions.c
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -23,12 +23,37 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na
23 return ops; 23 return ops;
24} 24}
25 25
26static int s390__cpuid_parse(struct arch *arch, char *cpuid)
27{
28 unsigned int family;
29 char model[16], model_c[16], cpumf_v[16], cpumf_a[16];
30 int ret;
31
32 /*
33 * cpuid string format:
34 * "IBM,family,model-capacity,model[,cpum_cf-version,cpum_cf-authorization]"
35 */
36 ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%s", &family, model_c,
37 model, cpumf_v, cpumf_a);
38 if (ret >= 2) {
39 arch->family = family;
40 arch->model = 0;
41 return 0;
42 }
43
44 return -1;
45}
46
26static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused) 47static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
27{ 48{
49 int err = 0;
50
28 if (!arch->initialized) { 51 if (!arch->initialized) {
29 arch->initialized = true; 52 arch->initialized = true;
30 arch->associate_instruction_ops = s390__associate_ins_ops; 53 arch->associate_instruction_ops = s390__associate_ins_ops;
54 if (cpuid)
55 err = s390__cpuid_parse(arch, cpuid);
31 } 56 }
32 57
33 return 0; 58 return err;
34} 59}
diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c
index 9fa6c3e5782c..231294b80dc4 100644
--- a/tools/perf/arch/s390/util/header.c
+++ b/tools/perf/arch/s390/util/header.c
@@ -1,8 +1,9 @@
1/* 1/*
2 * Implementation of get_cpuid(). 2 * Implementation of get_cpuid().
3 * 3 *
4 * Copyright 2014 IBM Corp. 4 * Copyright IBM Corp. 2014, 2018
5 * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com> 5 * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
6 * Thomas Richter <tmricht@linux.vnet.ibm.com>
6 * 7 *
7 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License (version 2 only) 9 * it under the terms of the GNU General Public License (version 2 only)
@@ -13,16 +14,153 @@
13#include <unistd.h> 14#include <unistd.h>
14#include <stdio.h> 15#include <stdio.h>
15#include <string.h> 16#include <string.h>
17#include <ctype.h>
16 18
17#include "../../util/header.h" 19#include "../../util/header.h"
20#include "../../util/util.h"
21
22#define SYSINFO_MANU "Manufacturer:"
23#define SYSINFO_TYPE "Type:"
24#define SYSINFO_MODEL "Model:"
25#define SRVLVL_CPUMF "CPU-MF:"
26#define SRVLVL_VERSION "version="
27#define SRVLVL_AUTHORIZATION "authorization="
28#define SYSINFO "/proc/sysinfo"
29#define SRVLVL "/proc/service_levels"
18 30
19int get_cpuid(char *buffer, size_t sz) 31int get_cpuid(char *buffer, size_t sz)
20{ 32{
21 const char *cpuid = "IBM/S390"; 33 char *cp, *line = NULL, *line2;
34 char type[8], model[33], version[8], manufacturer[32], authorization[8];
35 int tpsize = 0, mdsize = 0, vssize = 0, mfsize = 0, atsize = 0;
36 int read;
37 unsigned long line_sz;
38 size_t nbytes;
39 FILE *sysinfo;
40
41 /*
42 * Scan /proc/sysinfo line by line and read out values for
43 * Manufacturer:, Type: and Model:, for example:
44 * Manufacturer: IBM
45 * Type: 2964
46 * Model: 702 N96
47 * The first word is the Model Capacity and the second word is
48 * Model (can be omitted). Both words have a maximum size of 16
49 * bytes.
50 */
51 memset(manufacturer, 0, sizeof(manufacturer));
52 memset(type, 0, sizeof(type));
53 memset(model, 0, sizeof(model));
54 memset(version, 0, sizeof(version));
55 memset(authorization, 0, sizeof(authorization));
56
57 sysinfo = fopen(SYSINFO, "r");
58 if (sysinfo == NULL)
59 return -1;
60
61 while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
62 if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) {
63 line2 = line + strlen(SYSINFO_MANU);
64
65 while ((cp = strtok_r(line2, "\n ", &line2))) {
66 mfsize += scnprintf(manufacturer + mfsize,
67 sizeof(manufacturer) - mfsize, "%s", cp);
68 }
69 }
70
71 if (!strncmp(line, SYSINFO_TYPE, strlen(SYSINFO_TYPE))) {
72 line2 = line + strlen(SYSINFO_TYPE);
22 73
23 if (strlen(cpuid) + 1 > sz) 74 while ((cp = strtok_r(line2, "\n ", &line2))) {
75 tpsize += scnprintf(type + tpsize,
76 sizeof(type) - tpsize, "%s", cp);
77 }
78 }
79
80 if (!strncmp(line, SYSINFO_MODEL, strlen(SYSINFO_MODEL))) {
81 line2 = line + strlen(SYSINFO_MODEL);
82
83 while ((cp = strtok_r(line2, "\n ", &line2))) {
84 mdsize += scnprintf(model + mdsize, sizeof(type) - mdsize,
85 "%s%s", model[0] ? "," : "", cp);
86 }
87 break;
88 }
89 }
90 fclose(sysinfo);
91
92 /* Missing manufacturer, type or model information should not happen */
93 if (!manufacturer[0] || !type[0] || !model[0])
24 return -1; 94 return -1;
25 95
26 strcpy(buffer, cpuid); 96 /*
27 return 0; 97 * Scan /proc/service_levels and return the CPU-MF counter facility
98 * version number and authorization level.
99 * Optional, does not exist on z/VM guests.
100 */
101 sysinfo = fopen(SRVLVL, "r");
102 if (sysinfo == NULL)
103 goto skip_sysinfo;
104 while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
105 if (strncmp(line, SRVLVL_CPUMF, strlen(SRVLVL_CPUMF)))
106 continue;
107
108 line2 = line + strlen(SRVLVL_CPUMF);
109 while ((cp = strtok_r(line2, "\n ", &line2))) {
110 if (!strncmp(cp, SRVLVL_VERSION,
111 strlen(SRVLVL_VERSION))) {
112 char *sep = strchr(cp, '=');
113
114 vssize += scnprintf(version + vssize,
115 sizeof(version) - vssize, "%s", sep + 1);
116 }
117 if (!strncmp(cp, SRVLVL_AUTHORIZATION,
118 strlen(SRVLVL_AUTHORIZATION))) {
119 char *sep = strchr(cp, '=');
120
121 atsize += scnprintf(authorization + atsize,
122 sizeof(authorization) - atsize, "%s", sep + 1);
123 }
124 }
125 }
126 fclose(sysinfo);
127
128skip_sysinfo:
129 free(line);
130
131 if (version[0] && authorization[0] )
132 nbytes = snprintf(buffer, sz, "%s,%s,%s,%s,%s",
133 manufacturer, type, model, version,
134 authorization);
135 else
136 nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type,
137 model);
138 return (nbytes >= sz) ? -1 : 0;
139}
140
141char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
142{
143 char *buf = malloc(128);
144
145 if (buf && get_cpuid(buf, 128) < 0)
146 zfree(&buf);
147 return buf;
148}
149
150/*
151 * Compare the cpuid string returned by get_cpuid() function
152 * with the name generated by the jevents file read from
153 * pmu-events/arch/s390/mapfile.csv.
154 *
155 * Parameter mapcpuid is the cpuid as stored in the
156 * pmu-events/arch/s390/mapfile.csv. This is just the type number.
157 * Parameter cpuid is the cpuid returned by function get_cpuid().
158 */
159int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
160{
161 char *cp = strchr(cpuid, ',');
162
163 if (cp == NULL)
164 return -1;
165 return strncmp(cp + 1, mapcpuid, strlen(mapcpuid));
28} 166}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bf4ca749d1ac..907267206973 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1803,7 +1803,7 @@ int cmd_record(int argc, const char **argv)
1803 err = target__validate(&rec->opts.target); 1803 err = target__validate(&rec->opts.target);
1804 if (err) { 1804 if (err) {
1805 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ); 1805 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1806 ui__warning("%s", errbuf); 1806 ui__warning("%s\n", errbuf);
1807 } 1807 }
1808 1808
1809 err = target__parse_uid(&rec->opts.target); 1809 err = target__parse_uid(&rec->opts.target);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4ad5dc649716..1eedb1815c4c 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -614,6 +614,7 @@ static int stats_print(struct report *rep)
614static void tasks_setup(struct report *rep) 614static void tasks_setup(struct report *rep)
615{ 615{
616 memset(&rep->tool, 0, sizeof(rep->tool)); 616 memset(&rep->tool, 0, sizeof(rep->tool));
617 rep->tool.ordered_events = true;
617 if (rep->mmaps_mode) { 618 if (rep->mmaps_mode) {
618 rep->tool.mmap = perf_event__process_mmap; 619 rep->tool.mmap = perf_event__process_mmap;
619 rep->tool.mmap2 = perf_event__process_mmap2; 620 rep->tool.mmap2 = perf_event__process_mmap2;
@@ -937,6 +938,7 @@ int cmd_report(int argc, const char **argv)
937 "perf report [<options>]", 938 "perf report [<options>]",
938 NULL 939 NULL
939 }; 940 };
941 bool group_set = false;
940 struct report report = { 942 struct report report = {
941 .tool = { 943 .tool = {
942 .sample = process_sample_event, 944 .sample = process_sample_event,
@@ -1056,7 +1058,7 @@ int cmd_report(int argc, const char **argv)
1056 "Specify disassembler style (e.g. -M intel for intel syntax)"), 1058 "Specify disassembler style (e.g. -M intel for intel syntax)"),
1057 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, 1059 OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
1058 "Show a column with the sum of periods"), 1060 "Show a column with the sum of periods"),
1059 OPT_BOOLEAN(0, "group", &symbol_conf.event_group, 1061 OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &group_set,
1060 "Show event group information together"), 1062 "Show event group information together"),
1061 OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", 1063 OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
1062 "use branch records for per branch histogram filling", 1064 "use branch records for per branch histogram filling",
@@ -1173,6 +1175,9 @@ repeat:
1173 has_br_stack = perf_header__has_feat(&session->header, 1175 has_br_stack = perf_header__has_feat(&session->header,
1174 HEADER_BRANCH_STACK); 1176 HEADER_BRANCH_STACK);
1175 1177
1178 if (group_set && !session->evlist->nr_groups)
1179 perf_evlist__set_leader(session->evlist);
1180
1176 if (itrace_synth_opts.last_branch) 1181 if (itrace_synth_opts.last_branch)
1177 has_br_stack = true; 1182 has_br_stack = true;
1178 1183
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index ab19a6ee4093..cce926aeb0c0 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1489,6 +1489,7 @@ struct perf_script {
1489 bool show_switch_events; 1489 bool show_switch_events;
1490 bool show_namespace_events; 1490 bool show_namespace_events;
1491 bool show_lost_events; 1491 bool show_lost_events;
1492 bool show_round_events;
1492 bool allocated; 1493 bool allocated;
1493 bool per_event_dump; 1494 bool per_event_dump;
1494 struct cpu_map *cpus; 1495 struct cpu_map *cpus;
@@ -2104,6 +2105,16 @@ process_lost_event(struct perf_tool *tool,
2104 return 0; 2105 return 0;
2105} 2106}
2106 2107
2108static int
2109process_finished_round_event(struct perf_tool *tool __maybe_unused,
2110 union perf_event *event,
2111 struct ordered_events *oe __maybe_unused)
2112
2113{
2114 perf_event__fprintf(event, stdout);
2115 return 0;
2116}
2117
2107static void sig_handler(int sig __maybe_unused) 2118static void sig_handler(int sig __maybe_unused)
2108{ 2119{
2109 session_done = 1; 2120 session_done = 1;
@@ -2200,6 +2211,10 @@ static int __cmd_script(struct perf_script *script)
2200 script->tool.namespaces = process_namespaces_event; 2211 script->tool.namespaces = process_namespaces_event;
2201 if (script->show_lost_events) 2212 if (script->show_lost_events)
2202 script->tool.lost = process_lost_event; 2213 script->tool.lost = process_lost_event;
2214 if (script->show_round_events) {
2215 script->tool.ordered_events = false;
2216 script->tool.finished_round = process_finished_round_event;
2217 }
2203 2218
2204 if (perf_script__setup_per_event_dump(script)) { 2219 if (perf_script__setup_per_event_dump(script)) {
2205 pr_err("Couldn't create the per event dump files\n"); 2220 pr_err("Couldn't create the per event dump files\n");
@@ -3139,6 +3154,8 @@ int cmd_script(int argc, const char **argv)
3139 "Show namespace events (if recorded)"), 3154 "Show namespace events (if recorded)"),
3140 OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events, 3155 OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
3141 "Show lost events (if recorded)"), 3156 "Show lost events (if recorded)"),
3157 OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
3158 "Show round events (if recorded)"),
3142 OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, 3159 OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
3143 "Dump trace output to files named by the monitored events"), 3160 "Dump trace output to files named by the monitored events"),
3144 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), 3161 OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 98bf9d32f222..2d49eccf98f2 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -168,6 +168,7 @@ static struct timespec ref_time;
168static struct cpu_map *aggr_map; 168static struct cpu_map *aggr_map;
169static aggr_get_id_t aggr_get_id; 169static aggr_get_id_t aggr_get_id;
170static bool append_file; 170static bool append_file;
171static bool interval_count;
171static const char *output_name; 172static const char *output_name;
172static int output_fd; 173static int output_fd;
173static int print_free_counters_hint; 174static int print_free_counters_hint;
@@ -571,6 +572,8 @@ static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
571static int __run_perf_stat(int argc, const char **argv) 572static int __run_perf_stat(int argc, const char **argv)
572{ 573{
573 int interval = stat_config.interval; 574 int interval = stat_config.interval;
575 int times = stat_config.times;
576 int timeout = stat_config.timeout;
574 char msg[BUFSIZ]; 577 char msg[BUFSIZ];
575 unsigned long long t0, t1; 578 unsigned long long t0, t1;
576 struct perf_evsel *counter; 579 struct perf_evsel *counter;
@@ -584,6 +587,9 @@ static int __run_perf_stat(int argc, const char **argv)
584 if (interval) { 587 if (interval) {
585 ts.tv_sec = interval / USEC_PER_MSEC; 588 ts.tv_sec = interval / USEC_PER_MSEC;
586 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC; 589 ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
590 } else if (timeout) {
591 ts.tv_sec = timeout / USEC_PER_MSEC;
592 ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
587 } else { 593 } else {
588 ts.tv_sec = 1; 594 ts.tv_sec = 1;
589 ts.tv_nsec = 0; 595 ts.tv_nsec = 0;
@@ -696,10 +702,14 @@ try_again:
696 perf_evlist__start_workload(evsel_list); 702 perf_evlist__start_workload(evsel_list);
697 enable_counters(); 703 enable_counters();
698 704
699 if (interval) { 705 if (interval || timeout) {
700 while (!waitpid(child_pid, &status, WNOHANG)) { 706 while (!waitpid(child_pid, &status, WNOHANG)) {
701 nanosleep(&ts, NULL); 707 nanosleep(&ts, NULL);
708 if (timeout)
709 break;
702 process_interval(); 710 process_interval();
711 if (interval_count && !(--times))
712 break;
703 } 713 }
704 } 714 }
705 waitpid(child_pid, &status, 0); 715 waitpid(child_pid, &status, 0);
@@ -716,8 +726,13 @@ try_again:
716 enable_counters(); 726 enable_counters();
717 while (!done) { 727 while (!done) {
718 nanosleep(&ts, NULL); 728 nanosleep(&ts, NULL);
719 if (interval) 729 if (timeout)
730 break;
731 if (interval) {
720 process_interval(); 732 process_interval();
733 if (interval_count && !(--times))
734 break;
735 }
721 } 736 }
722 } 737 }
723 738
@@ -1891,6 +1906,10 @@ static const struct option stat_options[] = {
1891 "command to run after to the measured command"), 1906 "command to run after to the measured command"),
1892 OPT_UINTEGER('I', "interval-print", &stat_config.interval, 1907 OPT_UINTEGER('I', "interval-print", &stat_config.interval,
1893 "print counts at regular interval in ms (>= 10)"), 1908 "print counts at regular interval in ms (>= 10)"),
1909 OPT_INTEGER(0, "interval-count", &stat_config.times,
1910 "print counts for fixed number of times"),
1911 OPT_UINTEGER(0, "timeout", &stat_config.timeout,
1912 "stop workload and print counts after a timeout period in ms (>= 10ms)"),
1894 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, 1913 OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
1895 "aggregate counts per processor socket", AGGR_SOCKET), 1914 "aggregate counts per processor socket", AGGR_SOCKET),
1896 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, 1915 OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
@@ -2688,7 +2707,7 @@ int cmd_stat(int argc, const char **argv)
2688 int status = -EINVAL, run_idx; 2707 int status = -EINVAL, run_idx;
2689 const char *mode; 2708 const char *mode;
2690 FILE *output = stderr; 2709 FILE *output = stderr;
2691 unsigned int interval; 2710 unsigned int interval, timeout;
2692 const char * const stat_subcommands[] = { "record", "report" }; 2711 const char * const stat_subcommands[] = { "record", "report" };
2693 2712
2694 setlocale(LC_ALL, ""); 2713 setlocale(LC_ALL, "");
@@ -2719,6 +2738,7 @@ int cmd_stat(int argc, const char **argv)
2719 return __cmd_report(argc, argv); 2738 return __cmd_report(argc, argv);
2720 2739
2721 interval = stat_config.interval; 2740 interval = stat_config.interval;
2741 timeout = stat_config.timeout;
2722 2742
2723 /* 2743 /*
2724 * For record command the -o is already taken care of. 2744 * For record command the -o is already taken care of.
@@ -2871,6 +2891,33 @@ int cmd_stat(int argc, const char **argv)
2871 "Please proceed with caution.\n"); 2891 "Please proceed with caution.\n");
2872 } 2892 }
2873 2893
2894 if (stat_config.times && interval)
2895 interval_count = true;
2896 else if (stat_config.times && !interval) {
2897 pr_err("interval-count option should be used together with "
2898 "interval-print.\n");
2899 parse_options_usage(stat_usage, stat_options, "interval-count", 0);
2900 parse_options_usage(stat_usage, stat_options, "I", 1);
2901 goto out;
2902 }
2903
2904 if (timeout && timeout < 100) {
2905 if (timeout < 10) {
2906 pr_err("timeout must be >= 10ms.\n");
2907 parse_options_usage(stat_usage, stat_options, "timeout", 0);
2908 goto out;
2909 } else
2910 pr_warning("timeout < 100ms. "
2911 "The overhead percentage could be high in some cases. "
2912 "Please proceed with caution.\n");
2913 }
2914 if (timeout && interval) {
2915 pr_err("timeout option is not supported with interval-print.\n");
2916 parse_options_usage(stat_usage, stat_options, "timeout", 0);
2917 parse_options_usage(stat_usage, stat_options, "I", 1);
2918 goto out;
2919 }
2920
2874 if (perf_evlist__alloc_stats(evsel_list, interval)) 2921 if (perf_evlist__alloc_stats(evsel_list, interval))
2875 goto out; 2922 goto out;
2876 2923
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 790ec25919a0..bf206ffe5c45 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -42,6 +42,7 @@ arch/parisc/include/uapi/asm/errno.h
42arch/powerpc/include/uapi/asm/errno.h 42arch/powerpc/include/uapi/asm/errno.h
43arch/sparc/include/uapi/asm/errno.h 43arch/sparc/include/uapi/asm/errno.h
44arch/x86/include/uapi/asm/errno.h 44arch/x86/include/uapi/asm/errno.h
45arch/powerpc/include/uapi/asm/unistd.h
45include/asm-generic/bitops/arch_hweight.h 46include/asm-generic/bitops/arch_hweight.h
46include/asm-generic/bitops/const_hweight.h 47include/asm-generic/bitops/const_hweight.h
47include/asm-generic/bitops/__fls.h 48include/asm-generic/bitops/__fls.h
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 3bf7b145b826..c7115d369511 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -482,6 +482,34 @@ static void fs_something(void)
482 } 482 }
483} 483}
484 484
485static const char *do_determine_event(bool excl_kernel)
486{
487 const char *event = excl_kernel ? "cycles:u" : "cycles";
488
489#ifdef __s390x__
490 char cpuid[128], model[16], model_c[16], cpum_cf_v[16];
491 unsigned int family;
492 int ret, cpum_cf_a;
493
494 if (get_cpuid(cpuid, sizeof(cpuid)))
495 goto out_clocks;
496 ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c,
497 model, cpum_cf_v, &cpum_cf_a);
498 if (ret != 5) /* Not available */
499 goto out_clocks;
500 if (excl_kernel && (cpum_cf_a & 4))
501 return event;
502 if (!excl_kernel && (cpum_cf_a & 2))
503 return event;
504
505 /* Fall through: missing authorization */
506out_clocks:
507 event = excl_kernel ? "cpu-clock:u" : "cpu-clock";
508
509#endif
510 return event;
511}
512
485static void do_something(void) 513static void do_something(void)
486{ 514{
487 fs_something(); 515 fs_something();
@@ -592,10 +620,7 @@ static int do_test_code_reading(bool try_kcore)
592 620
593 perf_evlist__set_maps(evlist, cpus, threads); 621 perf_evlist__set_maps(evlist, cpus, threads);
594 622
595 if (excl_kernel) 623 str = do_determine_event(excl_kernel);
596 str = "cycles:u";
597 else
598 str = "cycles";
599 pr_debug("Parsing event '%s'\n", str); 624 pr_debug("Parsing event '%s'\n", str);
600 ret = parse_events(evlist, str, NULL); 625 ret = parse_events(evlist, str, NULL);
601 if (ret < 0) { 626 if (ret < 0) {
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 260418969120..2f008067d989 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -37,6 +37,19 @@ static int init_live_machine(struct machine *machine)
37 mmap_handler, machine, true, 500); 37 mmap_handler, machine, true, 500);
38} 38}
39 39
40/*
41 * We need to keep these functions global, despite the
42 * fact that they are used only locally in this object,
43 * in order to keep them around even if the binary is
44 * stripped. If they are gone, the unwind check for
45 * symbol fails.
46 */
47int test_dwarf_unwind__thread(struct thread *thread);
48int test_dwarf_unwind__compare(void *p1, void *p2);
49int test_dwarf_unwind__krava_3(struct thread *thread);
50int test_dwarf_unwind__krava_2(struct thread *thread);
51int test_dwarf_unwind__krava_1(struct thread *thread);
52
40#define MAX_STACK 8 53#define MAX_STACK 8
41 54
42static int unwind_entry(struct unwind_entry *entry, void *arg) 55static int unwind_entry(struct unwind_entry *entry, void *arg)
@@ -45,12 +58,12 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
45 char *symbol = entry->sym ? entry->sym->name : NULL; 58 char *symbol = entry->sym ? entry->sym->name : NULL;
46 static const char *funcs[MAX_STACK] = { 59 static const char *funcs[MAX_STACK] = {
47 "test__arch_unwind_sample", 60 "test__arch_unwind_sample",
48 "unwind_thread", 61 "test_dwarf_unwind__thread",
49 "compare", 62 "test_dwarf_unwind__compare",
50 "bsearch", 63 "bsearch",
51 "krava_3", 64 "test_dwarf_unwind__krava_3",
52 "krava_2", 65 "test_dwarf_unwind__krava_2",
53 "krava_1", 66 "test_dwarf_unwind__krava_1",
54 "test__dwarf_unwind" 67 "test__dwarf_unwind"
55 }; 68 };
56 /* 69 /*
@@ -77,7 +90,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
77 return strcmp((const char *) symbol, funcs[idx]); 90 return strcmp((const char *) symbol, funcs[idx]);
78} 91}
79 92
80static noinline int unwind_thread(struct thread *thread) 93noinline int test_dwarf_unwind__thread(struct thread *thread)
81{ 94{
82 struct perf_sample sample; 95 struct perf_sample sample;
83 unsigned long cnt = 0; 96 unsigned long cnt = 0;
@@ -108,7 +121,7 @@ static noinline int unwind_thread(struct thread *thread)
108 121
109static int global_unwind_retval = -INT_MAX; 122static int global_unwind_retval = -INT_MAX;
110 123
111static noinline int compare(void *p1, void *p2) 124noinline int test_dwarf_unwind__compare(void *p1, void *p2)
112{ 125{
113 /* Any possible value should be 'thread' */ 126 /* Any possible value should be 'thread' */
114 struct thread *thread = *(struct thread **)p1; 127 struct thread *thread = *(struct thread **)p1;
@@ -117,17 +130,17 @@ static noinline int compare(void *p1, void *p2)
117 /* Call unwinder twice for both callchain orders. */ 130 /* Call unwinder twice for both callchain orders. */
118 callchain_param.order = ORDER_CALLER; 131 callchain_param.order = ORDER_CALLER;
119 132
120 global_unwind_retval = unwind_thread(thread); 133 global_unwind_retval = test_dwarf_unwind__thread(thread);
121 if (!global_unwind_retval) { 134 if (!global_unwind_retval) {
122 callchain_param.order = ORDER_CALLEE; 135 callchain_param.order = ORDER_CALLEE;
123 global_unwind_retval = unwind_thread(thread); 136 global_unwind_retval = test_dwarf_unwind__thread(thread);
124 } 137 }
125 } 138 }
126 139
127 return p1 - p2; 140 return p1 - p2;
128} 141}
129 142
130static noinline int krava_3(struct thread *thread) 143noinline int test_dwarf_unwind__krava_3(struct thread *thread)
131{ 144{
132 struct thread *array[2] = {thread, thread}; 145 struct thread *array[2] = {thread, thread};
133 void *fp = &bsearch; 146 void *fp = &bsearch;
@@ -141,18 +154,19 @@ static noinline int krava_3(struct thread *thread)
141 size_t, int (*)(void *, void *)); 154 size_t, int (*)(void *, void *));
142 155
143 _bsearch = fp; 156 _bsearch = fp;
144 _bsearch(array, &thread, 2, sizeof(struct thread **), compare); 157 _bsearch(array, &thread, 2, sizeof(struct thread **),
158 test_dwarf_unwind__compare);
145 return global_unwind_retval; 159 return global_unwind_retval;
146} 160}
147 161
148static noinline int krava_2(struct thread *thread) 162noinline int test_dwarf_unwind__krava_2(struct thread *thread)
149{ 163{
150 return krava_3(thread); 164 return test_dwarf_unwind__krava_3(thread);
151} 165}
152 166
153static noinline int krava_1(struct thread *thread) 167noinline int test_dwarf_unwind__krava_1(struct thread *thread)
154{ 168{
155 return krava_2(thread); 169 return test_dwarf_unwind__krava_2(thread);
156} 170}
157 171
158int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused) 172int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused)
@@ -189,7 +203,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu
189 goto out; 203 goto out;
190 } 204 }
191 205
192 err = krava_1(thread); 206 err = test_dwarf_unwind__krava_1(thread);
193 thread__put(thread); 207 thread__put(thread);
194 208
195 out: 209 out:
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index 30a950c9d407..1c16e56cd93e 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -5,7 +5,7 @@ had_vfs_getname=$?
5 5
6cleanup_probe_vfs_getname() { 6cleanup_probe_vfs_getname() {
7 if [ $had_vfs_getname -eq 1 ] ; then 7 if [ $had_vfs_getname -eq 1 ] ; then
8 perf probe -q -d probe:vfs_getname 8 perf probe -q -d probe:vfs_getname*
9 fi 9 fi
10} 10}
11 11
diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
index c446c894b297..8c4ab0b390c0 100755
--- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh
@@ -21,12 +21,12 @@ trace_libc_inet_pton_backtrace() {
21 expected[3]=".*packets transmitted.*" 21 expected[3]=".*packets transmitted.*"
22 expected[4]="rtt min.*" 22 expected[4]="rtt min.*"
23 expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)" 23 expected[5]="[0-9]+\.[0-9]+[[:space:]]+probe_libc:inet_pton:\([[:xdigit:]]+\)"
24 expected[6]=".*inet_pton[[:space:]]\($libc\)$" 24 expected[6]=".*inet_pton[[:space:]]\($libc|inlined\)$"
25 case "$(uname -m)" in 25 case "$(uname -m)" in
26 s390x) 26 s390x)
27 eventattr='call-graph=dwarf' 27 eventattr='call-graph=dwarf'
28 expected[7]="gaih_inet[[:space:]]\(inlined\)$" 28 expected[7]="gaih_inet.*[[:space:]]\($libc|inlined\)$"
29 expected[8]="__GI_getaddrinfo[[:space:]]\(inlined\)$" 29 expected[8]="__GI_getaddrinfo[[:space:]]\($libc|inlined\)$"
30 expected[9]="main[[:space:]]\(.*/bin/ping.*\)$" 30 expected[9]="main[[:space:]]\(.*/bin/ping.*\)$"
31 expected[10]="__libc_start_main[[:space:]]\($libc\)$" 31 expected[10]="__libc_start_main[[:space:]]\($libc\)$"
32 expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$" 32 expected[11]="_start[[:space:]]\(.*/bin/ping.*\)$"
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index f6789fb029d6..1e5adb65632a 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -56,7 +56,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
56 * be compacted against the list of modules found in the "vmlinux" 56 * be compacted against the list of modules found in the "vmlinux"
57 * code and with the one got from /proc/modules from the "kallsyms" code. 57 * code and with the one got from /proc/modules from the "kallsyms" code.
58 */ 58 */
59 if (__machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type, true) <= 0) { 59 if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms", type) <= 0) {
60 pr_debug("dso__load_kallsyms "); 60 pr_debug("dso__load_kallsyms ");
61 goto out; 61 goto out;
62 } 62 }
@@ -125,7 +125,7 @@ int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest
125 125
126 if (pair && UM(pair->start) == mem_start) { 126 if (pair && UM(pair->start) == mem_start) {
127next_pair: 127next_pair:
128 if (strcmp(sym->name, pair->name) == 0) { 128 if (arch__compare_symbol_names(sym->name, pair->name) == 0) {
129 /* 129 /*
130 * kallsyms don't have the symbol end, so we 130 * kallsyms don't have the symbol end, so we
131 * set that by using the next symbol start - 1, 131 * set that by using the next symbol start - 1,
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 286427975112..e2f666391ac4 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -319,6 +319,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
319 struct map_symbol *ms = ab->b.priv; 319 struct map_symbol *ms = ab->b.priv;
320 struct symbol *sym = ms->sym; 320 struct symbol *sym = ms->sym;
321 u8 pcnt_width = annotate_browser__pcnt_width(ab); 321 u8 pcnt_width = annotate_browser__pcnt_width(ab);
322 int width = 0;
322 323
323 /* PLT symbols contain external offsets */ 324 /* PLT symbols contain external offsets */
324 if (strstr(sym->name, "@plt")) 325 if (strstr(sym->name, "@plt"))
@@ -340,13 +341,17 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
340 to = (u64)btarget->idx; 341 to = (u64)btarget->idx;
341 } 342 }
342 343
344 if (ab->have_cycles)
345 width = IPC_WIDTH + CYCLES_WIDTH;
346
343 ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS); 347 ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
344 __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, 348 __ui_browser__line_arrow(browser,
349 pcnt_width + 2 + ab->addr_width + width,
345 from, to); 350 from, to);
346 351
347 if (is_fused(ab, cursor)) { 352 if (is_fused(ab, cursor)) {
348 ui_browser__mark_fused(browser, 353 ui_browser__mark_fused(browser,
349 pcnt_width + 3 + ab->addr_width, 354 pcnt_width + 3 + ab->addr_width + width,
350 from - 1, 355 from - 1,
351 to > from ? true : false); 356 to > from ? true : false);
352 } 357 }
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 7f8553630c4d..537eadd81914 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -316,7 +316,6 @@ static int machine__write_buildid_table(struct machine *machine,
316 struct feat_fd *fd) 316 struct feat_fd *fd)
317{ 317{
318 int err = 0; 318 int err = 0;
319 char nm[PATH_MAX];
320 struct dso *pos; 319 struct dso *pos;
321 u16 kmisc = PERF_RECORD_MISC_KERNEL, 320 u16 kmisc = PERF_RECORD_MISC_KERNEL,
322 umisc = PERF_RECORD_MISC_USER; 321 umisc = PERF_RECORD_MISC_USER;
@@ -338,9 +337,8 @@ static int machine__write_buildid_table(struct machine *machine,
338 name = pos->short_name; 337 name = pos->short_name;
339 name_len = pos->short_name_len; 338 name_len = pos->short_name_len;
340 } else if (dso__is_kcore(pos)) { 339 } else if (dso__is_kcore(pos)) {
341 machine__mmap_name(machine, nm, sizeof(nm)); 340 name = machine->mmap_name;
342 name = nm; 341 name_len = strlen(name);
343 name_len = strlen(nm);
344 } else { 342 } else {
345 name = pos->long_name; 343 name = pos->long_name;
346 name_len = pos->long_name_len; 344 name_len = pos->long_name_len;
@@ -813,12 +811,10 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine)
813 bool is_kallsyms = dso__is_kallsyms(dso); 811 bool is_kallsyms = dso__is_kallsyms(dso);
814 bool is_vdso = dso__is_vdso(dso); 812 bool is_vdso = dso__is_vdso(dso);
815 const char *name = dso->long_name; 813 const char *name = dso->long_name;
816 char nm[PATH_MAX];
817 814
818 if (dso__is_kcore(dso)) { 815 if (dso__is_kcore(dso)) {
819 is_kallsyms = true; 816 is_kallsyms = true;
820 machine__mmap_name(machine, nm, sizeof(nm)); 817 name = machine->mmap_name;
821 name = nm;
822 } 818 }
823 return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, 819 return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
824 dso->nsinfo, is_kallsyms, is_vdso); 820 dso->nsinfo, is_kallsyms, is_vdso);
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 1fb01849f1c7..640af88331b4 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -78,6 +78,8 @@ int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
78{ 78{
79 ocsd_datapath_resp_t dp_ret; 79 ocsd_datapath_resp_t dp_ret;
80 80
81 decoder->prev_return = OCSD_RESP_CONT;
82
81 dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, 83 dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
82 0, 0, NULL, NULL); 84 0, 0, NULL, NULL);
83 if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) 85 if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
@@ -253,16 +255,16 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
253 decoder->packet_count = 0; 255 decoder->packet_count = 0;
254 for (i = 0; i < MAX_BUFFER; i++) { 256 for (i = 0; i < MAX_BUFFER; i++) {
255 decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; 257 decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL;
256 decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; 258 decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL;
257 decoder->packet_buffer[i].exc = false; 259 decoder->packet_buffer[i].last_instr_taken_branch = false;
258 decoder->packet_buffer[i].exc_ret = false; 260 decoder->packet_buffer[i].exc = false;
259 decoder->packet_buffer[i].cpu = INT_MIN; 261 decoder->packet_buffer[i].exc_ret = false;
262 decoder->packet_buffer[i].cpu = INT_MIN;
260 } 263 }
261} 264}
262 265
263static ocsd_datapath_resp_t 266static ocsd_datapath_resp_t
264cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, 267cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
265 const ocsd_generic_trace_elem *elem,
266 const u8 trace_chan_id, 268 const u8 trace_chan_id,
267 enum cs_etm_sample_type sample_type) 269 enum cs_etm_sample_type sample_type)
268{ 270{
@@ -278,18 +280,16 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
278 return OCSD_RESP_FATAL_SYS_ERR; 280 return OCSD_RESP_FATAL_SYS_ERR;
279 281
280 et = decoder->tail; 282 et = decoder->tail;
283 et = (et + 1) & (MAX_BUFFER - 1);
284 decoder->tail = et;
285 decoder->packet_count++;
286
281 decoder->packet_buffer[et].sample_type = sample_type; 287 decoder->packet_buffer[et].sample_type = sample_type;
282 decoder->packet_buffer[et].start_addr = elem->st_addr;
283 decoder->packet_buffer[et].end_addr = elem->en_addr;
284 decoder->packet_buffer[et].exc = false; 288 decoder->packet_buffer[et].exc = false;
285 decoder->packet_buffer[et].exc_ret = false; 289 decoder->packet_buffer[et].exc_ret = false;
286 decoder->packet_buffer[et].cpu = *((int *)inode->priv); 290 decoder->packet_buffer[et].cpu = *((int *)inode->priv);
287 291 decoder->packet_buffer[et].start_addr = 0xdeadbeefdeadbeefUL;
288 /* Wrap around if need be */ 292 decoder->packet_buffer[et].end_addr = 0xdeadbeefdeadbeefUL;
289 et = (et + 1) & (MAX_BUFFER - 1);
290
291 decoder->tail = et;
292 decoder->packet_count++;
293 293
294 if (decoder->packet_count == MAX_BUFFER - 1) 294 if (decoder->packet_count == MAX_BUFFER - 1)
295 return OCSD_RESP_WAIT; 295 return OCSD_RESP_WAIT;
@@ -297,6 +297,47 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
297 return OCSD_RESP_CONT; 297 return OCSD_RESP_CONT;
298} 298}
299 299
300static ocsd_datapath_resp_t
301cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
302 const ocsd_generic_trace_elem *elem,
303 const uint8_t trace_chan_id)
304{
305 int ret = 0;
306 struct cs_etm_packet *packet;
307
308 ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
309 CS_ETM_RANGE);
310 if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
311 return ret;
312
313 packet = &decoder->packet_buffer[decoder->tail];
314
315 packet->start_addr = elem->st_addr;
316 packet->end_addr = elem->en_addr;
317 switch (elem->last_i_type) {
318 case OCSD_INSTR_BR:
319 case OCSD_INSTR_BR_INDIRECT:
320 packet->last_instr_taken_branch = elem->last_instr_exec;
321 break;
322 case OCSD_INSTR_ISB:
323 case OCSD_INSTR_DSB_DMB:
324 case OCSD_INSTR_OTHER:
325 default:
326 packet->last_instr_taken_branch = false;
327 break;
328 }
329
330 return ret;
331}
332
333static ocsd_datapath_resp_t
334cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder,
335 const uint8_t trace_chan_id)
336{
337 return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
338 CS_ETM_TRACE_ON);
339}
340
300static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( 341static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
301 const void *context, 342 const void *context,
302 const ocsd_trc_index_t indx __maybe_unused, 343 const ocsd_trc_index_t indx __maybe_unused,
@@ -313,12 +354,13 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
313 decoder->trace_on = false; 354 decoder->trace_on = false;
314 break; 355 break;
315 case OCSD_GEN_TRC_ELEM_TRACE_ON: 356 case OCSD_GEN_TRC_ELEM_TRACE_ON:
357 resp = cs_etm_decoder__buffer_trace_on(decoder,
358 trace_chan_id);
316 decoder->trace_on = true; 359 decoder->trace_on = true;
317 break; 360 break;
318 case OCSD_GEN_TRC_ELEM_INSTR_RANGE: 361 case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
319 resp = cs_etm_decoder__buffer_packet(decoder, elem, 362 resp = cs_etm_decoder__buffer_range(decoder, elem,
320 trace_chan_id, 363 trace_chan_id);
321 CS_ETM_RANGE);
322 break; 364 break;
323 case OCSD_GEN_TRC_ELEM_EXCEPTION: 365 case OCSD_GEN_TRC_ELEM_EXCEPTION:
324 decoder->packet_buffer[decoder->tail].exc = true; 366 decoder->packet_buffer[decoder->tail].exc = true;
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 3d2e6205d186..743f5f444304 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -24,12 +24,14 @@ struct cs_etm_buffer {
24 24
25enum cs_etm_sample_type { 25enum cs_etm_sample_type {
26 CS_ETM_RANGE = 1 << 0, 26 CS_ETM_RANGE = 1 << 0,
27 CS_ETM_TRACE_ON = 1 << 1,
27}; 28};
28 29
29struct cs_etm_packet { 30struct cs_etm_packet {
30 enum cs_etm_sample_type sample_type; 31 enum cs_etm_sample_type sample_type;
31 u64 start_addr; 32 u64 start_addr;
32 u64 end_addr; 33 u64 end_addr;
34 u8 last_instr_taken_branch;
33 u8 exc; 35 u8 exc;
34 u8 exc_ret; 36 u8 exc_ret;
35 int cpu; 37 int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index b9f0a53dfa65..1b0d422373be 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -32,6 +32,14 @@
32 32
33#define MAX_TIMESTAMP (~0ULL) 33#define MAX_TIMESTAMP (~0ULL)
34 34
35/*
36 * A64 instructions are always 4 bytes
37 *
38 * Only A64 is supported, so can use this constant for converting between
39 * addresses and instruction counts, calculting offsets etc
40 */
41#define A64_INSTR_SIZE 4
42
35struct cs_etm_auxtrace { 43struct cs_etm_auxtrace {
36 struct auxtrace auxtrace; 44 struct auxtrace auxtrace;
37 struct auxtrace_queues queues; 45 struct auxtrace_queues queues;
@@ -45,11 +53,15 @@ struct cs_etm_auxtrace {
45 u8 snapshot_mode; 53 u8 snapshot_mode;
46 u8 data_queued; 54 u8 data_queued;
47 u8 sample_branches; 55 u8 sample_branches;
56 u8 sample_instructions;
48 57
49 int num_cpu; 58 int num_cpu;
50 u32 auxtrace_type; 59 u32 auxtrace_type;
51 u64 branches_sample_type; 60 u64 branches_sample_type;
52 u64 branches_id; 61 u64 branches_id;
62 u64 instructions_sample_type;
63 u64 instructions_sample_period;
64 u64 instructions_id;
53 u64 **metadata; 65 u64 **metadata;
54 u64 kernel_start; 66 u64 kernel_start;
55 unsigned int pmu_type; 67 unsigned int pmu_type;
@@ -68,6 +80,12 @@ struct cs_etm_queue {
68 u64 time; 80 u64 time;
69 u64 timestamp; 81 u64 timestamp;
70 u64 offset; 82 u64 offset;
83 u64 period_instructions;
84 struct branch_stack *last_branch;
85 struct branch_stack *last_branch_rb;
86 size_t last_branch_pos;
87 struct cs_etm_packet *prev_packet;
88 struct cs_etm_packet *packet;
71}; 89};
72 90
73static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); 91static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
@@ -174,6 +192,16 @@ static void cs_etm__free_queue(void *priv)
174{ 192{
175 struct cs_etm_queue *etmq = priv; 193 struct cs_etm_queue *etmq = priv;
176 194
195 if (!etmq)
196 return;
197
198 thread__zput(etmq->thread);
199 cs_etm_decoder__free(etmq->decoder);
200 zfree(&etmq->event_buf);
201 zfree(&etmq->last_branch);
202 zfree(&etmq->last_branch_rb);
203 zfree(&etmq->prev_packet);
204 zfree(&etmq->packet);
177 free(etmq); 205 free(etmq);
178} 206}
179 207
@@ -270,11 +298,35 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
270 struct cs_etm_decoder_params d_params; 298 struct cs_etm_decoder_params d_params;
271 struct cs_etm_trace_params *t_params; 299 struct cs_etm_trace_params *t_params;
272 struct cs_etm_queue *etmq; 300 struct cs_etm_queue *etmq;
301 size_t szp = sizeof(struct cs_etm_packet);
273 302
274 etmq = zalloc(sizeof(*etmq)); 303 etmq = zalloc(sizeof(*etmq));
275 if (!etmq) 304 if (!etmq)
276 return NULL; 305 return NULL;
277 306
307 etmq->packet = zalloc(szp);
308 if (!etmq->packet)
309 goto out_free;
310
311 if (etm->synth_opts.last_branch || etm->sample_branches) {
312 etmq->prev_packet = zalloc(szp);
313 if (!etmq->prev_packet)
314 goto out_free;
315 }
316
317 if (etm->synth_opts.last_branch) {
318 size_t sz = sizeof(struct branch_stack);
319
320 sz += etm->synth_opts.last_branch_sz *
321 sizeof(struct branch_entry);
322 etmq->last_branch = zalloc(sz);
323 if (!etmq->last_branch)
324 goto out_free;
325 etmq->last_branch_rb = zalloc(sz);
326 if (!etmq->last_branch_rb)
327 goto out_free;
328 }
329
278 etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); 330 etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
279 if (!etmq->event_buf) 331 if (!etmq->event_buf)
280 goto out_free; 332 goto out_free;
@@ -329,6 +381,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
329 goto out_free_decoder; 381 goto out_free_decoder;
330 382
331 etmq->offset = 0; 383 etmq->offset = 0;
384 etmq->period_instructions = 0;
332 385
333 return etmq; 386 return etmq;
334 387
@@ -336,6 +389,10 @@ out_free_decoder:
336 cs_etm_decoder__free(etmq->decoder); 389 cs_etm_decoder__free(etmq->decoder);
337out_free: 390out_free:
338 zfree(&etmq->event_buf); 391 zfree(&etmq->event_buf);
392 zfree(&etmq->last_branch);
393 zfree(&etmq->last_branch_rb);
394 zfree(&etmq->prev_packet);
395 zfree(&etmq->packet);
339 free(etmq); 396 free(etmq);
340 397
341 return NULL; 398 return NULL;
@@ -389,6 +446,129 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
389 return 0; 446 return 0;
390} 447}
391 448
449static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
450{
451 struct branch_stack *bs_src = etmq->last_branch_rb;
452 struct branch_stack *bs_dst = etmq->last_branch;
453 size_t nr = 0;
454
455 /*
456 * Set the number of records before early exit: ->nr is used to
457 * determine how many branches to copy from ->entries.
458 */
459 bs_dst->nr = bs_src->nr;
460
461 /*
462 * Early exit when there is nothing to copy.
463 */
464 if (!bs_src->nr)
465 return;
466
467 /*
468 * As bs_src->entries is a circular buffer, we need to copy from it in
469 * two steps. First, copy the branches from the most recently inserted
470 * branch ->last_branch_pos until the end of bs_src->entries buffer.
471 */
472 nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos;
473 memcpy(&bs_dst->entries[0],
474 &bs_src->entries[etmq->last_branch_pos],
475 sizeof(struct branch_entry) * nr);
476
477 /*
478 * If we wrapped around at least once, the branches from the beginning
479 * of the bs_src->entries buffer and until the ->last_branch_pos element
480 * are older valid branches: copy them over. The total number of
481 * branches copied over will be equal to the number of branches asked by
482 * the user in last_branch_sz.
483 */
484 if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
485 memcpy(&bs_dst->entries[nr],
486 &bs_src->entries[0],
487 sizeof(struct branch_entry) * etmq->last_branch_pos);
488 }
489}
490
491static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
492{
493 etmq->last_branch_pos = 0;
494 etmq->last_branch_rb->nr = 0;
495}
496
497static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
498{
499 /*
500 * The packet records the execution range with an exclusive end address
501 *
502 * A64 instructions are constant size, so the last executed
503 * instruction is A64_INSTR_SIZE before the end address
504 * Will need to do instruction level decode for T32 instructions as
505 * they can be variable size (not yet supported).
506 */
507 return packet->end_addr - A64_INSTR_SIZE;
508}
509
510static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
511{
512 /*
513 * Only A64 instructions are currently supported, so can get
514 * instruction count by dividing.
515 * Will need to do instruction level decode for T32 instructions as
516 * they can be variable size (not yet supported).
517 */
518 return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
519}
520
521static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
522 u64 offset)
523{
524 /*
525 * Only A64 instructions are currently supported, so can get
526 * instruction address by muliplying.
527 * Will need to do instruction level decode for T32 instructions as
528 * they can be variable size (not yet supported).
529 */
530 return packet->start_addr + offset * A64_INSTR_SIZE;
531}
532
533static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
534{
535 struct branch_stack *bs = etmq->last_branch_rb;
536 struct branch_entry *be;
537
538 /*
539 * The branches are recorded in a circular buffer in reverse
540 * chronological order: we start recording from the last element of the
541 * buffer down. After writing the first element of the stack, move the
542 * insert position back to the end of the buffer.
543 */
544 if (!etmq->last_branch_pos)
545 etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
546
547 etmq->last_branch_pos -= 1;
548
549 be = &bs->entries[etmq->last_branch_pos];
550 be->from = cs_etm__last_executed_instr(etmq->prev_packet);
551 be->to = etmq->packet->start_addr;
552 /* No support for mispredict */
553 be->flags.mispred = 0;
554 be->flags.predicted = 1;
555
556 /*
557 * Increment bs->nr until reaching the number of last branches asked by
558 * the user on the command line.
559 */
560 if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
561 bs->nr += 1;
562}
563
564static int cs_etm__inject_event(union perf_event *event,
565 struct perf_sample *sample, u64 type)
566{
567 event->header.size = perf_event__sample_event_size(sample, type, 0);
568 return perf_event__synthesize_sample(event, type, 0, sample);
569}
570
571
392static int 572static int
393cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) 573cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
394{ 574{
@@ -453,35 +633,105 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
453 } 633 }
454} 634}
455 635
636static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
637 u64 addr, u64 period)
638{
639 int ret = 0;
640 struct cs_etm_auxtrace *etm = etmq->etm;
641 union perf_event *event = etmq->event_buf;
642 struct perf_sample sample = {.ip = 0,};
643
644 event->sample.header.type = PERF_RECORD_SAMPLE;
645 event->sample.header.misc = PERF_RECORD_MISC_USER;
646 event->sample.header.size = sizeof(struct perf_event_header);
647
648 sample.ip = addr;
649 sample.pid = etmq->pid;
650 sample.tid = etmq->tid;
651 sample.id = etmq->etm->instructions_id;
652 sample.stream_id = etmq->etm->instructions_id;
653 sample.period = period;
654 sample.cpu = etmq->packet->cpu;
655 sample.flags = 0;
656 sample.insn_len = 1;
657 sample.cpumode = event->header.misc;
658
659 if (etm->synth_opts.last_branch) {
660 cs_etm__copy_last_branch_rb(etmq);
661 sample.branch_stack = etmq->last_branch;
662 }
663
664 if (etm->synth_opts.inject) {
665 ret = cs_etm__inject_event(event, &sample,
666 etm->instructions_sample_type);
667 if (ret)
668 return ret;
669 }
670
671 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
672
673 if (ret)
674 pr_err(
675 "CS ETM Trace: failed to deliver instruction event, error %d\n",
676 ret);
677
678 if (etm->synth_opts.last_branch)
679 cs_etm__reset_last_branch_rb(etmq);
680
681 return ret;
682}
683
456/* 684/*
457 * The cs etm packet encodes an instruction range between a branch target 685 * The cs etm packet encodes an instruction range between a branch target
458 * and the next taken branch. Generate sample accordingly. 686 * and the next taken branch. Generate sample accordingly.
459 */ 687 */
460static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, 688static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
461 struct cs_etm_packet *packet)
462{ 689{
463 int ret = 0; 690 int ret = 0;
464 struct cs_etm_auxtrace *etm = etmq->etm; 691 struct cs_etm_auxtrace *etm = etmq->etm;
465 struct perf_sample sample = {.ip = 0,}; 692 struct perf_sample sample = {.ip = 0,};
466 union perf_event *event = etmq->event_buf; 693 union perf_event *event = etmq->event_buf;
467 u64 start_addr = packet->start_addr; 694 struct dummy_branch_stack {
468 u64 end_addr = packet->end_addr; 695 u64 nr;
696 struct branch_entry entries;
697 } dummy_bs;
469 698
470 event->sample.header.type = PERF_RECORD_SAMPLE; 699 event->sample.header.type = PERF_RECORD_SAMPLE;
471 event->sample.header.misc = PERF_RECORD_MISC_USER; 700 event->sample.header.misc = PERF_RECORD_MISC_USER;
472 event->sample.header.size = sizeof(struct perf_event_header); 701 event->sample.header.size = sizeof(struct perf_event_header);
473 702
474 sample.ip = start_addr; 703 sample.ip = cs_etm__last_executed_instr(etmq->prev_packet);
475 sample.pid = etmq->pid; 704 sample.pid = etmq->pid;
476 sample.tid = etmq->tid; 705 sample.tid = etmq->tid;
477 sample.addr = end_addr; 706 sample.addr = etmq->packet->start_addr;
478 sample.id = etmq->etm->branches_id; 707 sample.id = etmq->etm->branches_id;
479 sample.stream_id = etmq->etm->branches_id; 708 sample.stream_id = etmq->etm->branches_id;
480 sample.period = 1; 709 sample.period = 1;
481 sample.cpu = packet->cpu; 710 sample.cpu = etmq->packet->cpu;
482 sample.flags = 0; 711 sample.flags = 0;
483 sample.cpumode = PERF_RECORD_MISC_USER; 712 sample.cpumode = PERF_RECORD_MISC_USER;
484 713
714 /*
715 * perf report cannot handle events without a branch stack
716 */
717 if (etm->synth_opts.last_branch) {
718 dummy_bs = (struct dummy_branch_stack){
719 .nr = 1,
720 .entries = {
721 .from = sample.ip,
722 .to = sample.addr,
723 },
724 };
725 sample.branch_stack = (struct branch_stack *)&dummy_bs;
726 }
727
728 if (etm->synth_opts.inject) {
729 ret = cs_etm__inject_event(event, &sample,
730 etm->branches_sample_type);
731 if (ret)
732 return ret;
733 }
734
485 ret = perf_session__deliver_synth_event(etm->session, event, &sample); 735 ret = perf_session__deliver_synth_event(etm->session, event, &sample);
486 736
487 if (ret) 737 if (ret)
@@ -578,6 +828,24 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
578 etm->sample_branches = true; 828 etm->sample_branches = true;
579 etm->branches_sample_type = attr.sample_type; 829 etm->branches_sample_type = attr.sample_type;
580 etm->branches_id = id; 830 etm->branches_id = id;
831 id += 1;
832 attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
833 }
834
835 if (etm->synth_opts.last_branch)
836 attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
837
838 if (etm->synth_opts.instructions) {
839 attr.config = PERF_COUNT_HW_INSTRUCTIONS;
840 attr.sample_period = etm->synth_opts.period;
841 etm->instructions_sample_period = attr.sample_period;
842 err = cs_etm__synth_event(session, &attr, id);
843 if (err)
844 return err;
845 etm->sample_instructions = true;
846 etm->instructions_sample_type = attr.sample_type;
847 etm->instructions_id = id;
848 id += 1;
581 } 849 }
582 850
583 return 0; 851 return 0;
@@ -585,25 +853,108 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
585 853
586static int cs_etm__sample(struct cs_etm_queue *etmq) 854static int cs_etm__sample(struct cs_etm_queue *etmq)
587{ 855{
856 struct cs_etm_auxtrace *etm = etmq->etm;
857 struct cs_etm_packet *tmp;
588 int ret; 858 int ret;
589 struct cs_etm_packet packet; 859 u64 instrs_executed;
590 860
591 while (1) { 861 instrs_executed = cs_etm__instr_count(etmq->packet);
592 ret = cs_etm_decoder__get_packet(etmq->decoder, &packet); 862 etmq->period_instructions += instrs_executed;
593 if (ret <= 0) 863
864 /*
865 * Record a branch when the last instruction in
866 * PREV_PACKET is a branch.
867 */
868 if (etm->synth_opts.last_branch &&
869 etmq->prev_packet &&
870 etmq->prev_packet->sample_type == CS_ETM_RANGE &&
871 etmq->prev_packet->last_instr_taken_branch)
872 cs_etm__update_last_branch_rb(etmq);
873
874 if (etm->sample_instructions &&
875 etmq->period_instructions >= etm->instructions_sample_period) {
876 /*
877 * Emit instruction sample periodically
878 * TODO: allow period to be defined in cycles and clock time
879 */
880
881 /* Get number of instructions executed after the sample point */
882 u64 instrs_over = etmq->period_instructions -
883 etm->instructions_sample_period;
884
885 /*
886 * Calculate the address of the sampled instruction (-1 as
887 * sample is reported as though instruction has just been
888 * executed, but PC has not advanced to next instruction)
889 */
890 u64 offset = (instrs_executed - instrs_over - 1);
891 u64 addr = cs_etm__instr_addr(etmq->packet, offset);
892
893 ret = cs_etm__synth_instruction_sample(
894 etmq, addr, etm->instructions_sample_period);
895 if (ret)
896 return ret;
897
898 /* Carry remaining instructions into next sample period */
899 etmq->period_instructions = instrs_over;
900 }
901
902 if (etm->sample_branches &&
903 etmq->prev_packet &&
904 etmq->prev_packet->sample_type == CS_ETM_RANGE &&
905 etmq->prev_packet->last_instr_taken_branch) {
906 ret = cs_etm__synth_branch_sample(etmq);
907 if (ret)
594 return ret; 908 return ret;
909 }
595 910
911 if (etm->sample_branches || etm->synth_opts.last_branch) {
596 /* 912 /*
597 * If the packet contains an instruction range, generate an 913 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
598 * instruction sequence event. 914 * the next incoming packet.
599 */ 915 */
600 if (packet.sample_type & CS_ETM_RANGE) 916 tmp = etmq->packet;
601 cs_etm__synth_branch_sample(etmq, &packet); 917 etmq->packet = etmq->prev_packet;
918 etmq->prev_packet = tmp;
602 } 919 }
603 920
604 return 0; 921 return 0;
605} 922}
606 923
924static int cs_etm__flush(struct cs_etm_queue *etmq)
925{
926 int err = 0;
927 struct cs_etm_packet *tmp;
928
929 if (etmq->etm->synth_opts.last_branch &&
930 etmq->prev_packet &&
931 etmq->prev_packet->sample_type == CS_ETM_RANGE) {
932 /*
933 * Generate a last branch event for the branches left in the
934 * circular buffer at the end of the trace.
935 *
936 * Use the address of the end of the last reported execution
937 * range
938 */
939 u64 addr = cs_etm__last_executed_instr(etmq->prev_packet);
940
941 err = cs_etm__synth_instruction_sample(
942 etmq, addr,
943 etmq->period_instructions);
944 etmq->period_instructions = 0;
945
946 /*
947 * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
948 * the next incoming packet.
949 */
950 tmp = etmq->packet;
951 etmq->packet = etmq->prev_packet;
952 etmq->prev_packet = tmp;
953 }
954
955 return err;
956}
957
607static int cs_etm__run_decoder(struct cs_etm_queue *etmq) 958static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
608{ 959{
609 struct cs_etm_auxtrace *etm = etmq->etm; 960 struct cs_etm_auxtrace *etm = etmq->etm;
@@ -615,45 +966,72 @@ static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
615 etm->kernel_start = machine__kernel_start(etm->machine); 966 etm->kernel_start = machine__kernel_start(etm->machine);
616 967
617 /* Go through each buffer in the queue and decode them one by one */ 968 /* Go through each buffer in the queue and decode them one by one */
618more: 969 while (1) {
619 buffer_used = 0; 970 buffer_used = 0;
620 memset(&buffer, 0, sizeof(buffer)); 971 memset(&buffer, 0, sizeof(buffer));
621 err = cs_etm__get_trace(&buffer, etmq); 972 err = cs_etm__get_trace(&buffer, etmq);
622 if (err <= 0) 973 if (err <= 0)
623 return err;
624 /*
625 * We cannot assume consecutive blocks in the data file are contiguous,
626 * reset the decoder to force re-sync.
627 */
628 err = cs_etm_decoder__reset(etmq->decoder);
629 if (err != 0)
630 return err;
631
632 /* Run trace decoder until buffer consumed or end of trace */
633 do {
634 processed = 0;
635
636 err = cs_etm_decoder__process_data_block(
637 etmq->decoder,
638 etmq->offset,
639 &buffer.buf[buffer_used],
640 buffer.len - buffer_used,
641 &processed);
642
643 if (err)
644 return err; 974 return err;
645
646 etmq->offset += processed;
647 buffer_used += processed;
648
649 /* 975 /*
650 * Nothing to do with an error condition, let's hope the next 976 * We cannot assume consecutive blocks in the data file are
651 * chunk will be better. 977 * contiguous, reset the decoder to force re-sync.
652 */ 978 */
653 err = cs_etm__sample(etmq); 979 err = cs_etm_decoder__reset(etmq->decoder);
654 } while (buffer.len > buffer_used); 980 if (err != 0)
981 return err;
982
983 /* Run trace decoder until buffer consumed or end of trace */
984 do {
985 processed = 0;
986 err = cs_etm_decoder__process_data_block(
987 etmq->decoder,
988 etmq->offset,
989 &buffer.buf[buffer_used],
990 buffer.len - buffer_used,
991 &processed);
992 if (err)
993 return err;
994
995 etmq->offset += processed;
996 buffer_used += processed;
997
998 /* Process each packet in this chunk */
999 while (1) {
1000 err = cs_etm_decoder__get_packet(etmq->decoder,
1001 etmq->packet);
1002 if (err <= 0)
1003 /*
1004 * Stop processing this chunk on
1005 * end of data or error
1006 */
1007 break;
1008
1009 switch (etmq->packet->sample_type) {
1010 case CS_ETM_RANGE:
1011 /*
1012 * If the packet contains an instruction
1013 * range, generate instruction sequence
1014 * events.
1015 */
1016 cs_etm__sample(etmq);
1017 break;
1018 case CS_ETM_TRACE_ON:
1019 /*
1020 * Discontinuity in trace, flush
1021 * previous branch stack
1022 */
1023 cs_etm__flush(etmq);
1024 break;
1025 default:
1026 break;
1027 }
1028 }
1029 } while (buffer.len > buffer_used);
655 1030
656goto more; 1031 if (err == 0)
1032 /* Flush any remaining branch stack entries */
1033 err = cs_etm__flush(etmq);
1034 }
657 1035
658 return err; 1036 return err;
659} 1037}
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 44e603c27944..f0a6cbd033cc 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -894,8 +894,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
894 struct machine *machine) 894 struct machine *machine)
895{ 895{
896 size_t size; 896 size_t size;
897 const char *mmap_name;
898 char name_buff[PATH_MAX];
899 struct map *map = machine__kernel_map(machine); 897 struct map *map = machine__kernel_map(machine);
900 struct kmap *kmap; 898 struct kmap *kmap;
901 int err; 899 int err;
@@ -918,7 +916,6 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
918 return -1; 916 return -1;
919 } 917 }
920 918
921 mmap_name = machine__mmap_name(machine, name_buff, sizeof(name_buff));
922 if (machine__is_host(machine)) { 919 if (machine__is_host(machine)) {
923 /* 920 /*
924 * kernel uses PERF_RECORD_MISC_USER for user space maps, 921 * kernel uses PERF_RECORD_MISC_USER for user space maps,
@@ -931,7 +928,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
931 928
932 kmap = map__kmap(map); 929 kmap = map__kmap(map);
933 size = snprintf(event->mmap.filename, sizeof(event->mmap.filename), 930 size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
934 "%s%s", mmap_name, kmap->ref_reloc_sym->name) + 1; 931 "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
935 size = PERF_ALIGN(size, sizeof(u64)); 932 size = PERF_ALIGN(size, sizeof(u64));
936 event->mmap.header.type = PERF_RECORD_MMAP; 933 event->mmap.header.type = PERF_RECORD_MMAP;
937 event->mmap.header.size = (sizeof(event->mmap) - 934 event->mmap.header.size = (sizeof(event->mmap) -
@@ -1591,17 +1588,6 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
1591 return -1; 1588 return -1;
1592 1589
1593 dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); 1590 dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
1594 /*
1595 * Have we already created the kernel maps for this machine?
1596 *
1597 * This should have happened earlier, when we processed the kernel MMAP
1598 * events, but for older perf.data files there was no such thing, so do
1599 * it now.
1600 */
1601 if (sample->cpumode == PERF_RECORD_MISC_KERNEL &&
1602 machine__kernel_map(machine) == NULL)
1603 machine__create_kernel_maps(machine);
1604
1605 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al); 1591 thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
1606 dump_printf(" ...... dso: %s\n", 1592 dump_printf(" ...... dso: %s\n",
1607 al->map ? al->map->dso->long_name : 1593 al->map ? al->map->dso->long_name :
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e5fc14e53c05..7b7d535396f7 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1086,11 +1086,30 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
1086 1086
1087int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) 1087int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
1088{ 1088{
1089 bool all_threads = (target->per_thread && target->system_wide);
1089 struct cpu_map *cpus; 1090 struct cpu_map *cpus;
1090 struct thread_map *threads; 1091 struct thread_map *threads;
1091 1092
1093 /*
1094 * If specify '-a' and '--per-thread' to perf record, perf record
1095 * will override '--per-thread'. target->per_thread = false and
1096 * target->system_wide = true.
1097 *
1098 * If specify '--per-thread' only to perf record,
1099 * target->per_thread = true and target->system_wide = false.
1100 *
1101 * So target->per_thread && target->system_wide is false.
1102 * For perf record, thread_map__new_str doesn't call
1103 * thread_map__new_all_cpus. That will keep perf record's
1104 * current behavior.
1105 *
1106 * For perf stat, it allows the case that target->per_thread and
1107 * target->system_wide are all true. It means to collect system-wide
1108 * per-thread data. thread_map__new_str will call
1109 * thread_map__new_all_cpus to enumerate all threads.
1110 */
1092 threads = thread_map__new_str(target->pid, target->tid, target->uid, 1111 threads = thread_map__new_str(target->pid, target->tid, target->uid,
1093 target->per_thread); 1112 all_threads);
1094 1113
1095 if (!threads) 1114 if (!threads)
1096 return -1; 1115 return -1;
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index f28aaaa3a440..942bdec6d70d 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -174,4 +174,5 @@ int write_padded(struct feat_fd *fd, const void *bf,
174int get_cpuid(char *buffer, size_t sz); 174int get_cpuid(char *buffer, size_t sz);
175 175
176char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); 176char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
177int strcmp_cpuid_str(const char *s1, const char *s2);
177#endif /* __PERF_HEADER_H */ 178#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index b6140950301e..44a8456cea10 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -879,7 +879,7 @@ iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
879 * cumulated only one time to prevent entries more than 100% 879 * cumulated only one time to prevent entries more than 100%
880 * overhead. 880 * overhead.
881 */ 881 */
882 he_cache = malloc(sizeof(*he_cache) * (iter->max_stack + 1)); 882 he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1));
883 if (he_cache == NULL) 883 if (he_cache == NULL)
884 return -ENOMEM; 884 return -ENOMEM;
885 885
@@ -1045,8 +1045,6 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
1045 if (err) 1045 if (err)
1046 return err; 1046 return err;
1047 1047
1048 iter->max_stack = max_stack_depth;
1049
1050 err = iter->ops->prepare_entry(iter, al); 1048 err = iter->ops->prepare_entry(iter, al);
1051 if (err) 1049 if (err)
1052 goto out; 1050 goto out;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 02721b579746..e869cad4d89f 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -107,7 +107,6 @@ struct hist_entry_iter {
107 int curr; 107 int curr;
108 108
109 bool hide_unresolved; 109 bool hide_unresolved;
110 int max_stack;
111 110
112 struct perf_evsel *evsel; 111 struct perf_evsel *evsel;
113 struct perf_sample *sample; 112 struct perf_sample *sample;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index b05a67464c03..fe27ef55cbb9 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -48,8 +48,31 @@ static void machine__threads_init(struct machine *machine)
48 } 48 }
49} 49}
50 50
51static int machine__set_mmap_name(struct machine *machine)
52{
53 if (machine__is_host(machine)) {
54 if (symbol_conf.vmlinux_name)
55 machine->mmap_name = strdup(symbol_conf.vmlinux_name);
56 else
57 machine->mmap_name = strdup("[kernel.kallsyms]");
58 } else if (machine__is_default_guest(machine)) {
59 if (symbol_conf.default_guest_vmlinux_name)
60 machine->mmap_name = strdup(symbol_conf.default_guest_vmlinux_name);
61 else
62 machine->mmap_name = strdup("[guest.kernel.kallsyms]");
63 } else {
64 if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
65 machine->pid) < 0)
66 machine->mmap_name = NULL;
67 }
68
69 return machine->mmap_name ? 0 : -ENOMEM;
70}
71
51int machine__init(struct machine *machine, const char *root_dir, pid_t pid) 72int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
52{ 73{
74 int err = -ENOMEM;
75
53 memset(machine, 0, sizeof(*machine)); 76 memset(machine, 0, sizeof(*machine));
54 map_groups__init(&machine->kmaps, machine); 77 map_groups__init(&machine->kmaps, machine);
55 RB_CLEAR_NODE(&machine->rb_node); 78 RB_CLEAR_NODE(&machine->rb_node);
@@ -73,13 +96,16 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
73 if (machine->root_dir == NULL) 96 if (machine->root_dir == NULL)
74 return -ENOMEM; 97 return -ENOMEM;
75 98
99 if (machine__set_mmap_name(machine))
100 goto out;
101
76 if (pid != HOST_KERNEL_ID) { 102 if (pid != HOST_KERNEL_ID) {
77 struct thread *thread = machine__findnew_thread(machine, -1, 103 struct thread *thread = machine__findnew_thread(machine, -1,
78 pid); 104 pid);
79 char comm[64]; 105 char comm[64];
80 106
81 if (thread == NULL) 107 if (thread == NULL)
82 return -ENOMEM; 108 goto out;
83 109
84 snprintf(comm, sizeof(comm), "[guest/%d]", pid); 110 snprintf(comm, sizeof(comm), "[guest/%d]", pid);
85 thread__set_comm(thread, comm, 0); 111 thread__set_comm(thread, comm, 0);
@@ -87,7 +113,13 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
87 } 113 }
88 114
89 machine->current_tid = NULL; 115 machine->current_tid = NULL;
116 err = 0;
90 117
118out:
119 if (err) {
120 zfree(&machine->root_dir);
121 zfree(&machine->mmap_name);
122 }
91 return 0; 123 return 0;
92} 124}
93 125
@@ -119,7 +151,7 @@ struct machine *machine__new_kallsyms(void)
119 * ask for not using the kcore parsing code, once this one is fixed 151 * ask for not using the kcore parsing code, once this one is fixed
120 * to create a map per module. 152 * to create a map per module.
121 */ 153 */
122 if (machine && __machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION, true) <= 0) { 154 if (machine && machine__load_kallsyms(machine, "/proc/kallsyms", MAP__FUNCTION) <= 0) {
123 machine__delete(machine); 155 machine__delete(machine);
124 machine = NULL; 156 machine = NULL;
125 } 157 }
@@ -180,6 +212,7 @@ void machine__exit(struct machine *machine)
180 dsos__exit(&machine->dsos); 212 dsos__exit(&machine->dsos);
181 machine__exit_vdso(machine); 213 machine__exit_vdso(machine);
182 zfree(&machine->root_dir); 214 zfree(&machine->root_dir);
215 zfree(&machine->mmap_name);
183 zfree(&machine->current_tid); 216 zfree(&machine->current_tid);
184 217
185 for (i = 0; i < THREADS__TABLE_SIZE; i++) { 218 for (i = 0; i < THREADS__TABLE_SIZE; i++) {
@@ -322,20 +355,6 @@ void machines__process_guests(struct machines *machines,
322 } 355 }
323} 356}
324 357
325char *machine__mmap_name(struct machine *machine, char *bf, size_t size)
326{
327 if (machine__is_host(machine))
328 snprintf(bf, size, "[%s]", "kernel.kallsyms");
329 else if (machine__is_default_guest(machine))
330 snprintf(bf, size, "[%s]", "guest.kernel.kallsyms");
331 else {
332 snprintf(bf, size, "[%s.%d]", "guest.kernel.kallsyms",
333 machine->pid);
334 }
335
336 return bf;
337}
338
339void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size) 358void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
340{ 359{
341 struct rb_node *node; 360 struct rb_node *node;
@@ -771,25 +790,13 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
771 790
772static struct dso *machine__get_kernel(struct machine *machine) 791static struct dso *machine__get_kernel(struct machine *machine)
773{ 792{
774 const char *vmlinux_name = NULL; 793 const char *vmlinux_name = machine->mmap_name;
775 struct dso *kernel; 794 struct dso *kernel;
776 795
777 if (machine__is_host(machine)) { 796 if (machine__is_host(machine)) {
778 vmlinux_name = symbol_conf.vmlinux_name;
779 if (!vmlinux_name)
780 vmlinux_name = DSO__NAME_KALLSYMS;
781
782 kernel = machine__findnew_kernel(machine, vmlinux_name, 797 kernel = machine__findnew_kernel(machine, vmlinux_name,
783 "[kernel]", DSO_TYPE_KERNEL); 798 "[kernel]", DSO_TYPE_KERNEL);
784 } else { 799 } else {
785 char bf[PATH_MAX];
786
787 if (machine__is_default_guest(machine))
788 vmlinux_name = symbol_conf.default_guest_vmlinux_name;
789 if (!vmlinux_name)
790 vmlinux_name = machine__mmap_name(machine, bf,
791 sizeof(bf));
792
793 kernel = machine__findnew_kernel(machine, vmlinux_name, 800 kernel = machine__findnew_kernel(machine, vmlinux_name,
794 "[guest.kernel]", 801 "[guest.kernel]",
795 DSO_TYPE_GUEST_KERNEL); 802 DSO_TYPE_GUEST_KERNEL);
@@ -849,13 +856,10 @@ static int machine__get_running_kernel_start(struct machine *machine,
849 return 0; 856 return 0;
850} 857}
851 858
852int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) 859static int
860__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
853{ 861{
854 int type; 862 int type;
855 u64 start = 0;
856
857 if (machine__get_running_kernel_start(machine, NULL, &start))
858 return -1;
859 863
860 /* In case of renewal the kernel map, destroy previous one */ 864 /* In case of renewal the kernel map, destroy previous one */
861 machine__destroy_kernel_maps(machine); 865 machine__destroy_kernel_maps(machine);
@@ -864,7 +868,7 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
864 struct kmap *kmap; 868 struct kmap *kmap;
865 struct map *map; 869 struct map *map;
866 870
867 machine->vmlinux_maps[type] = map__new2(start, kernel, type); 871 machine->vmlinux_maps[type] = map__new2(0, kernel, type);
868 if (machine->vmlinux_maps[type] == NULL) 872 if (machine->vmlinux_maps[type] == NULL)
869 return -1; 873 return -1;
870 874
@@ -987,11 +991,11 @@ int machines__create_kernel_maps(struct machines *machines, pid_t pid)
987 return machine__create_kernel_maps(machine); 991 return machine__create_kernel_maps(machine);
988} 992}
989 993
990int __machine__load_kallsyms(struct machine *machine, const char *filename, 994int machine__load_kallsyms(struct machine *machine, const char *filename,
991 enum map_type type, bool no_kcore) 995 enum map_type type)
992{ 996{
993 struct map *map = machine__kernel_map(machine); 997 struct map *map = machine__kernel_map(machine);
994 int ret = __dso__load_kallsyms(map->dso, filename, map, no_kcore); 998 int ret = __dso__load_kallsyms(map->dso, filename, map, true);
995 999
996 if (ret > 0) { 1000 if (ret > 0) {
997 dso__set_loaded(map->dso, type); 1001 dso__set_loaded(map->dso, type);
@@ -1006,12 +1010,6 @@ int __machine__load_kallsyms(struct machine *machine, const char *filename,
1006 return ret; 1010 return ret;
1007} 1011}
1008 1012
1009int machine__load_kallsyms(struct machine *machine, const char *filename,
1010 enum map_type type)
1011{
1012 return __machine__load_kallsyms(machine, filename, type, false);
1013}
1014
1015int machine__load_vmlinux_path(struct machine *machine, enum map_type type) 1013int machine__load_vmlinux_path(struct machine *machine, enum map_type type)
1016{ 1014{
1017 struct map *map = machine__kernel_map(machine); 1015 struct map *map = machine__kernel_map(machine);
@@ -1215,6 +1213,24 @@ static int machine__create_modules(struct machine *machine)
1215 return 0; 1213 return 0;
1216} 1214}
1217 1215
1216static void machine__set_kernel_mmap(struct machine *machine,
1217 u64 start, u64 end)
1218{
1219 int i;
1220
1221 for (i = 0; i < MAP__NR_TYPES; i++) {
1222 machine->vmlinux_maps[i]->start = start;
1223 machine->vmlinux_maps[i]->end = end;
1224
1225 /*
1226 * Be a bit paranoid here, some perf.data file came with
1227 * a zero sized synthesized MMAP event for the kernel.
1228 */
1229 if (machine->vmlinux_maps[i]->end == 0)
1230 machine->vmlinux_maps[i]->end = ~0ULL;
1231 }
1232}
1233
1218int machine__create_kernel_maps(struct machine *machine) 1234int machine__create_kernel_maps(struct machine *machine)
1219{ 1235{
1220 struct dso *kernel = machine__get_kernel(machine); 1236 struct dso *kernel = machine__get_kernel(machine);
@@ -1239,40 +1255,22 @@ int machine__create_kernel_maps(struct machine *machine)
1239 "continuing anyway...\n", machine->pid); 1255 "continuing anyway...\n", machine->pid);
1240 } 1256 }
1241 1257
1242 /*
1243 * Now that we have all the maps created, just set the ->end of them:
1244 */
1245 map_groups__fixup_end(&machine->kmaps);
1246
1247 if (!machine__get_running_kernel_start(machine, &name, &addr)) { 1258 if (!machine__get_running_kernel_start(machine, &name, &addr)) {
1248 if (name && 1259 if (name &&
1249 maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) { 1260 maps__set_kallsyms_ref_reloc_sym(machine->vmlinux_maps, name, addr)) {
1250 machine__destroy_kernel_maps(machine); 1261 machine__destroy_kernel_maps(machine);
1251 return -1; 1262 return -1;
1252 } 1263 }
1264 machine__set_kernel_mmap(machine, addr, 0);
1253 } 1265 }
1254 1266
1267 /*
1268 * Now that we have all the maps created, just set the ->end of them:
1269 */
1270 map_groups__fixup_end(&machine->kmaps);
1255 return 0; 1271 return 0;
1256} 1272}
1257 1273
1258static void machine__set_kernel_mmap_len(struct machine *machine,
1259 union perf_event *event)
1260{
1261 int i;
1262
1263 for (i = 0; i < MAP__NR_TYPES; i++) {
1264 machine->vmlinux_maps[i]->start = event->mmap.start;
1265 machine->vmlinux_maps[i]->end = (event->mmap.start +
1266 event->mmap.len);
1267 /*
1268 * Be a bit paranoid here, some perf.data file came with
1269 * a zero sized synthesized MMAP event for the kernel.
1270 */
1271 if (machine->vmlinux_maps[i]->end == 0)
1272 machine->vmlinux_maps[i]->end = ~0ULL;
1273 }
1274}
1275
1276static bool machine__uses_kcore(struct machine *machine) 1274static bool machine__uses_kcore(struct machine *machine)
1277{ 1275{
1278 struct dso *dso; 1276 struct dso *dso;
@@ -1289,7 +1287,6 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1289 union perf_event *event) 1287 union perf_event *event)
1290{ 1288{
1291 struct map *map; 1289 struct map *map;
1292 char kmmap_prefix[PATH_MAX];
1293 enum dso_kernel_type kernel_type; 1290 enum dso_kernel_type kernel_type;
1294 bool is_kernel_mmap; 1291 bool is_kernel_mmap;
1295 1292
@@ -1297,15 +1294,14 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1297 if (machine__uses_kcore(machine)) 1294 if (machine__uses_kcore(machine))
1298 return 0; 1295 return 0;
1299 1296
1300 machine__mmap_name(machine, kmmap_prefix, sizeof(kmmap_prefix));
1301 if (machine__is_host(machine)) 1297 if (machine__is_host(machine))
1302 kernel_type = DSO_TYPE_KERNEL; 1298 kernel_type = DSO_TYPE_KERNEL;
1303 else 1299 else
1304 kernel_type = DSO_TYPE_GUEST_KERNEL; 1300 kernel_type = DSO_TYPE_GUEST_KERNEL;
1305 1301
1306 is_kernel_mmap = memcmp(event->mmap.filename, 1302 is_kernel_mmap = memcmp(event->mmap.filename,
1307 kmmap_prefix, 1303 machine->mmap_name,
1308 strlen(kmmap_prefix) - 1) == 0; 1304 strlen(machine->mmap_name) - 1) == 0;
1309 if (event->mmap.filename[0] == '/' || 1305 if (event->mmap.filename[0] == '/' ||
1310 (!is_kernel_mmap && event->mmap.filename[0] == '[')) { 1306 (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
1311 map = machine__findnew_module_map(machine, event->mmap.start, 1307 map = machine__findnew_module_map(machine, event->mmap.start,
@@ -1316,7 +1312,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1316 map->end = map->start + event->mmap.len; 1312 map->end = map->start + event->mmap.len;
1317 } else if (is_kernel_mmap) { 1313 } else if (is_kernel_mmap) {
1318 const char *symbol_name = (event->mmap.filename + 1314 const char *symbol_name = (event->mmap.filename +
1319 strlen(kmmap_prefix)); 1315 strlen(machine->mmap_name));
1320 /* 1316 /*
1321 * Should be there already, from the build-id table in 1317 * Should be there already, from the build-id table in
1322 * the header. 1318 * the header.
@@ -1357,7 +1353,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1357 up_read(&machine->dsos.lock); 1353 up_read(&machine->dsos.lock);
1358 1354
1359 if (kernel == NULL) 1355 if (kernel == NULL)
1360 kernel = machine__findnew_dso(machine, kmmap_prefix); 1356 kernel = machine__findnew_dso(machine, machine->mmap_name);
1361 if (kernel == NULL) 1357 if (kernel == NULL)
1362 goto out_problem; 1358 goto out_problem;
1363 1359
@@ -1370,7 +1366,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
1370 if (strstr(kernel->long_name, "vmlinux")) 1366 if (strstr(kernel->long_name, "vmlinux"))
1371 dso__set_short_name(kernel, "[kernel.vmlinux]", false); 1367 dso__set_short_name(kernel, "[kernel.vmlinux]", false);
1372 1368
1373 machine__set_kernel_mmap_len(machine, event); 1369 machine__set_kernel_mmap(machine, event->mmap.start,
1370 event->mmap.start + event->mmap.len);
1374 1371
1375 /* 1372 /*
1376 * Avoid using a zero address (kptr_restrict) for the ref reloc 1373 * Avoid using a zero address (kptr_restrict) for the ref reloc
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 5ce860b64c74..66cc200ef86f 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -43,6 +43,7 @@ struct machine {
43 bool comm_exec; 43 bool comm_exec;
44 bool kptr_restrict_warned; 44 bool kptr_restrict_warned;
45 char *root_dir; 45 char *root_dir;
46 char *mmap_name;
46 struct threads threads[THREADS__TABLE_SIZE]; 47 struct threads threads[THREADS__TABLE_SIZE];
47 struct vdso_info *vdso_info; 48 struct vdso_info *vdso_info;
48 struct perf_env *env; 49 struct perf_env *env;
@@ -142,8 +143,6 @@ struct machine *machines__find(struct machines *machines, pid_t pid);
142struct machine *machines__findnew(struct machines *machines, pid_t pid); 143struct machine *machines__findnew(struct machines *machines, pid_t pid);
143 144
144void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size); 145void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
145char *machine__mmap_name(struct machine *machine, char *bf, size_t size);
146
147void machines__set_comm_exec(struct machines *machines, bool comm_exec); 146void machines__set_comm_exec(struct machines *machines, bool comm_exec);
148 147
149struct machine *machine__new_host(void); 148struct machine *machine__new_host(void);
@@ -226,8 +225,6 @@ struct map *machine__findnew_module_map(struct machine *machine, u64 start,
226 const char *filename); 225 const char *filename);
227int arch__fix_module_text_start(u64 *start, const char *name); 226int arch__fix_module_text_start(u64 *start, const char *name);
228 227
229int __machine__load_kallsyms(struct machine *machine, const char *filename,
230 enum map_type type, bool no_kcore);
231int machine__load_kallsyms(struct machine *machine, const char *filename, 228int machine__load_kallsyms(struct machine *machine, const char *filename,
232 enum map_type type); 229 enum map_type type);
233int machine__load_vmlinux_path(struct machine *machine, enum map_type type); 230int machine__load_vmlinux_path(struct machine *machine, enum map_type type);
@@ -239,7 +236,6 @@ size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
239 bool (skip)(struct dso *dso, int parm), int parm); 236 bool (skip)(struct dso *dso, int parm), int parm);
240 237
241void machine__destroy_kernel_maps(struct machine *machine); 238void machine__destroy_kernel_maps(struct machine *machine);
242int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel);
243int machine__create_kernel_maps(struct machine *machine); 239int machine__create_kernel_maps(struct machine *machine);
244 240
245int machines__create_kernel_maps(struct machines *machines, pid_t pid); 241int machines__create_kernel_maps(struct machines *machines, pid_t pid);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 57e38fdf0b34..1111d5bf15ca 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -576,6 +576,34 @@ char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
576 return NULL; 576 return NULL;
577} 577}
578 578
579/* Return zero when the cpuid from the mapfile.csv matches the
580 * cpuid string generated on this platform.
581 * Otherwise return non-zero.
582 */
583int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
584{
585 regex_t re;
586 regmatch_t pmatch[1];
587 int match;
588
589 if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
590 /* Warn unable to generate match particular string. */
591 pr_info("Invalid regular expression %s\n", mapcpuid);
592 return 1;
593 }
594
595 match = !regexec(&re, cpuid, 1, pmatch, 0);
596 regfree(&re);
597 if (match) {
598 size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
599
600 /* Verify the entire string matched. */
601 if (match_len == strlen(cpuid))
602 return 0;
603 }
604 return 1;
605}
606
579static char *perf_pmu__getcpuid(struct perf_pmu *pmu) 607static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
580{ 608{
581 char *cpuid; 609 char *cpuid;
@@ -610,31 +638,14 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
610 638
611 i = 0; 639 i = 0;
612 for (;;) { 640 for (;;) {
613 regex_t re;
614 regmatch_t pmatch[1];
615 int match;
616
617 map = &pmu_events_map[i++]; 641 map = &pmu_events_map[i++];
618 if (!map->table) { 642 if (!map->table) {
619 map = NULL; 643 map = NULL;
620 break; 644 break;
621 } 645 }
622 646
623 if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) { 647 if (!strcmp_cpuid_str(map->cpuid, cpuid))
624 /* Warn unable to generate match particular string. */
625 pr_info("Invalid regular expression %s\n", map->cpuid);
626 break; 648 break;
627 }
628
629 match = !regexec(&re, cpuid, 1, pmatch, 0);
630 regfree(&re);
631 if (match) {
632 size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
633
634 /* Verify the entire string matched. */
635 if (match_len == strlen(cpuid))
636 break;
637 }
638 } 649 }
639 free(cpuid); 650 free(cpuid);
640 return map; 651 return map;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2da4d0456a03..e8514f651865 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -111,17 +111,20 @@ struct sort_entry sort_thread = {
111 111
112/* --sort comm */ 112/* --sort comm */
113 113
114/*
115 * We can't use pointer comparison in functions below,
116 * because it gives different results based on pointer
117 * values, which could break some sorting assumptions.
118 */
114static int64_t 119static int64_t
115sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) 120sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
116{ 121{
117 /* Compare the addr that should be unique among comm */
118 return strcmp(comm__str(right->comm), comm__str(left->comm)); 122 return strcmp(comm__str(right->comm), comm__str(left->comm));
119} 123}
120 124
121static int64_t 125static int64_t
122sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) 126sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
123{ 127{
124 /* Compare the addr that should be unique among comm */
125 return strcmp(comm__str(right->comm), comm__str(left->comm)); 128 return strcmp(comm__str(right->comm), comm__str(left->comm));
126} 129}
127 130
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index dbc6f7134f61..2f44e386a0e8 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -90,6 +90,8 @@ struct perf_stat_config {
90 bool scale; 90 bool scale;
91 FILE *output; 91 FILE *output;
92 unsigned int interval; 92 unsigned int interval;
93 unsigned int timeout;
94 int times;
93 struct runtime_stat *stats; 95 struct runtime_stat *stats;
94 int stats_num; 96 int stats_num;
95}; 97};
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index cc065d4bfafc..a1a312d99f30 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1582,7 +1582,7 @@ int dso__load(struct dso *dso, struct map *map)
1582 bool next_slot = false; 1582 bool next_slot = false;
1583 bool is_reg; 1583 bool is_reg;
1584 bool nsexit; 1584 bool nsexit;
1585 int sirc; 1585 int sirc = -1;
1586 1586
1587 enum dso_binary_type symtab_type = binary_type_symtab[i]; 1587 enum dso_binary_type symtab_type = binary_type_symtab[i];
1588 1588
@@ -1600,16 +1600,14 @@ int dso__load(struct dso *dso, struct map *map)
1600 nsinfo__mountns_exit(&nsc); 1600 nsinfo__mountns_exit(&nsc);
1601 1601
1602 is_reg = is_regular_file(name); 1602 is_reg = is_regular_file(name);
1603 sirc = symsrc__init(ss, dso, name, symtab_type); 1603 if (is_reg)
1604 sirc = symsrc__init(ss, dso, name, symtab_type);
1604 1605
1605 if (nsexit) 1606 if (nsexit)
1606 nsinfo__mountns_enter(dso->nsinfo, &nsc); 1607 nsinfo__mountns_enter(dso->nsinfo, &nsc);
1607 1608
1608 if (!is_reg || sirc < 0) { 1609 if (!is_reg || sirc < 0)
1609 if (sirc >= 0)
1610 symsrc__destroy(ss);
1611 continue; 1610 continue;
1612 }
1613 1611
1614 if (!syms_ss && symsrc__has_symtab(ss)) { 1612 if (!syms_ss && symsrc__has_symtab(ss)) {
1615 syms_ss = ss; 1613 syms_ss = ss;
@@ -1960,8 +1958,7 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
1960 pr_debug("Using %s for symbols\n", kallsyms_filename); 1958 pr_debug("Using %s for symbols\n", kallsyms_filename);
1961 if (err > 0 && !dso__is_kcore(dso)) { 1959 if (err > 0 && !dso__is_kcore(dso)) {
1962 dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS; 1960 dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
1963 machine__mmap_name(machine, path, sizeof(path)); 1961 dso__set_long_name(dso, machine->mmap_name, false);
1964 dso__set_long_name(dso, strdup(path), true);
1965 map__fixup_start(map); 1962 map__fixup_start(map);
1966 map__fixup_end(map); 1963 map__fixup_end(map);
1967 } 1964 }
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 303bdb84ab5a..895122d638dd 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -30,6 +30,14 @@ static const char **syscalltbl_native = syscalltbl_x86_64;
30#include <asm/syscalls_64.c> 30#include <asm/syscalls_64.c>
31const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; 31const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
32static const char **syscalltbl_native = syscalltbl_s390_64; 32static const char **syscalltbl_native = syscalltbl_s390_64;
33#elif defined(__powerpc64__)
34#include <asm/syscalls_64.c>
35const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
36static const char **syscalltbl_native = syscalltbl_powerpc_64;
37#elif defined(__powerpc__)
38#include <asm/syscalls_32.c>
39const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
40static const char **syscalltbl_native = syscalltbl_powerpc_32;
33#endif 41#endif
34 42
35struct syscall { 43struct syscall {
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index 3e1038f6491c..729dad8f412d 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -323,7 +323,7 @@ out_free_threads:
323} 323}
324 324
325struct thread_map *thread_map__new_str(const char *pid, const char *tid, 325struct thread_map *thread_map__new_str(const char *pid, const char *tid,
326 uid_t uid, bool per_thread) 326 uid_t uid, bool all_threads)
327{ 327{
328 if (pid) 328 if (pid)
329 return thread_map__new_by_pid_str(pid); 329 return thread_map__new_by_pid_str(pid);
@@ -331,7 +331,7 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid,
331 if (!tid && uid != UINT_MAX) 331 if (!tid && uid != UINT_MAX)
332 return thread_map__new_by_uid(uid); 332 return thread_map__new_by_uid(uid);
333 333
334 if (per_thread) 334 if (all_threads)
335 return thread_map__new_all_cpus(); 335 return thread_map__new_all_cpus();
336 336
337 return thread_map__new_by_tid_str(tid); 337 return thread_map__new_by_tid_str(tid);
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 0a806b99e73c..5ec91cfd1869 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -31,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map);
31void thread_map__put(struct thread_map *map); 31void thread_map__put(struct thread_map *map);
32 32
33struct thread_map *thread_map__new_str(const char *pid, 33struct thread_map *thread_map__new_str(const char *pid,
34 const char *tid, uid_t uid, bool per_thread); 34 const char *tid, uid_t uid, bool all_threads);
35 35
36struct thread_map *thread_map__new_by_tid_str(const char *tid_str); 36struct thread_map *thread_map__new_by_tid_str(const char *tid_str);
37 37