aboutsummaryrefslogtreecommitdiffstats
path: root/tools/perf
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf')
-rw-r--r--tools/perf/Documentation/Makefile2
-rw-r--r--tools/perf/Documentation/examples.txt225
-rw-r--r--tools/perf/Documentation/perf-record.txt60
-rw-r--r--tools/perf/Documentation/perf-stat.txt2
-rw-r--r--tools/perf/Documentation/perf-top.txt112
-rw-r--r--tools/perf/Makefile40
-rw-r--r--tools/perf/builtin-annotate.c14
-rw-r--r--tools/perf/builtin-list.c3
-rw-r--r--tools/perf/builtin-record.c108
-rw-r--r--tools/perf/builtin-report.c146
-rw-r--r--tools/perf/builtin-stat.c2
-rw-r--r--tools/perf/builtin-top.c553
-rw-r--r--tools/perf/util/callchain.c32
-rw-r--r--tools/perf/util/callchain.h8
-rw-r--r--tools/perf/util/header.c5
-rw-r--r--tools/perf/util/parse-events.c36
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/quote.c2
-rw-r--r--tools/perf/util/symbol.c133
-rw-r--r--tools/perf/util/symbol.h26
20 files changed, 1347 insertions, 163 deletions
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 5457192e1b41..bdd3b7ecad0a 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -35,7 +35,7 @@ man7dir=$(mandir)/man7
35# DESTDIR= 35# DESTDIR=
36 36
37ASCIIDOC=asciidoc 37ASCIIDOC=asciidoc
38ASCIIDOC_EXTRA = 38ASCIIDOC_EXTRA = --unsafe
39MANPAGE_XSL = manpage-normal.xsl 39MANPAGE_XSL = manpage-normal.xsl
40XMLTO_EXTRA = 40XMLTO_EXTRA =
41INSTALL?=install 41INSTALL?=install
diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt
new file mode 100644
index 000000000000..8eb6c489fb15
--- /dev/null
+++ b/tools/perf/Documentation/examples.txt
@@ -0,0 +1,225 @@
1
2 ------------------------------
3 ****** perf by examples ******
4 ------------------------------
5
6[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
7
8
9First, discovery/enumeration of available counters can be done via
10'perf list':
11
12titan:~> perf list
13 [...]
14 kmem:kmalloc [Tracepoint event]
15 kmem:kmem_cache_alloc [Tracepoint event]
16 kmem:kmalloc_node [Tracepoint event]
17 kmem:kmem_cache_alloc_node [Tracepoint event]
18 kmem:kfree [Tracepoint event]
19 kmem:kmem_cache_free [Tracepoint event]
20 kmem:mm_page_free_direct [Tracepoint event]
21 kmem:mm_pagevec_free [Tracepoint event]
22 kmem:mm_page_alloc [Tracepoint event]
23 kmem:mm_page_alloc_zone_locked [Tracepoint event]
24 kmem:mm_page_pcpu_drain [Tracepoint event]
25 kmem:mm_page_alloc_extfrag [Tracepoint event]
26
27Then any (or all) of the above event sources can be activated and
28measured. For example the page alloc/free properties of a 'hackbench
29run' are:
30
31 titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
32 -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10
33 Time: 0.575
34
35 Performance counter stats for './hackbench 10':
36
37 13857 kmem:mm_page_pcpu_drain
38 27576 kmem:mm_page_alloc
39 6025 kmem:mm_pagevec_free
40 20934 kmem:mm_page_free_direct
41
42 0.613972165 seconds time elapsed
43
44You can observe the statistical properties as well, by using the
45'repeat the workload N times' feature of perf stat:
46
47 titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
48 kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
49 kmem:mm_page_free_direct ./hackbench 10
50 Time: 0.627
51 Time: 0.644
52 Time: 0.564
53 Time: 0.559
54 Time: 0.626
55
56 Performance counter stats for './hackbench 10' (5 runs):
57
58 12920 kmem:mm_page_pcpu_drain ( +- 3.359% )
59 25035 kmem:mm_page_alloc ( +- 3.783% )
60 6104 kmem:mm_pagevec_free ( +- 0.934% )
61 18376 kmem:mm_page_free_direct ( +- 4.941% )
62
63 0.643954516 seconds time elapsed ( +- 2.363% )
64
65Furthermore, these tracepoints can be used to sample the workload as
66well. For example the page allocations done by a 'git gc' can be
67captured the following way:
68
69 titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc
70 Counting objects: 1148, done.
71 Delta compression using up to 2 threads.
72 Compressing objects: 100% (450/450), done.
73 Writing objects: 100% (1148/1148), done.
74 Total 1148 (delta 690), reused 1148 (delta 690)
75 [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ]
76
77To check which functions generated page allocations:
78
79 titan:~/git> perf report
80 # Samples: 10646
81 #
82 # Overhead Command Shared Object
83 # ........ ............... ..........................
84 #
85 23.57% git-repack /lib64/libc-2.5.so
86 21.81% git /lib64/libc-2.5.so
87 14.59% git ./git
88 11.79% git-repack ./git
89 7.12% git /lib64/ld-2.5.so
90 3.16% git-repack /lib64/libpthread-2.5.so
91 2.09% git-repack /bin/bash
92 1.97% rm /lib64/libc-2.5.so
93 1.39% mv /lib64/ld-2.5.so
94 1.37% mv /lib64/libc-2.5.so
95 1.12% git-repack /lib64/ld-2.5.so
96 0.95% rm /lib64/ld-2.5.so
97 0.90% git-update-serv /lib64/libc-2.5.so
98 0.73% git-update-serv /lib64/ld-2.5.so
99 0.68% perf /lib64/libpthread-2.5.so
100 0.64% git-repack /usr/lib64/libz.so.1.2.3
101
102Or to see it on a more finegrained level:
103
104titan:~/git> perf report --sort comm,dso,symbol
105# Samples: 10646
106#
107# Overhead Command Shared Object Symbol
108# ........ ............... .......................... ......
109#
110 9.35% git-repack ./git [.] insert_obj_hash
111 9.12% git ./git [.] insert_obj_hash
112 7.31% git /lib64/libc-2.5.so [.] memcpy
113 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc
114 6.24% git-repack /lib64/libc-2.5.so [.] memcpy
115 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork
116 5.47% git /lib64/libc-2.5.so [.] _int_malloc
117 2.99% git /lib64/libc-2.5.so [.] memset
118
119Furthermore, call-graph sampling can be done too, of page
120allocations - to see precisely what kind of page allocations there
121are:
122
123 titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc
124 Counting objects: 1148, done.
125 Delta compression using up to 2 threads.
126 Compressing objects: 100% (450/450), done.
127 Writing objects: 100% (1148/1148), done.
128 Total 1148 (delta 690), reused 1148 (delta 690)
129 [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ]
130
131 titan:~/git> perf report -g
132 # Samples: 10686
133 #
134 # Overhead Command Shared Object
135 # ........ ............... ..........................
136 #
137 23.25% git-repack /lib64/libc-2.5.so
138 |
139 |--50.00%-- _int_free
140 |
141 |--37.50%-- __GI___fork
142 | make_child
143 |
144 |--12.50%-- ptmalloc_unlock_all2
145 | make_child
146 |
147 --6.25%-- __GI_strcpy
148 21.61% git /lib64/libc-2.5.so
149 |
150 |--30.00%-- __GI_read
151 | |
152 | --83.33%-- git_config_from_file
153 | git_config
154 | |
155 [...]
156
157Or you can observe the whole system's page allocations for 10
158seconds:
159
160titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
161kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
162kmem:mm_page_free_direct sleep 10
163
164 Performance counter stats for 'sleep 10':
165
166 171585 kmem:mm_page_pcpu_drain
167 322114 kmem:mm_page_alloc
168 73623 kmem:mm_pagevec_free
169 254115 kmem:mm_page_free_direct
170
171 10.000591410 seconds time elapsed
172
173Or observe how fluctuating the page allocations are, via statistical
174analysis done over ten 1-second intervals:
175
176 titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
177 kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
178 kmem:mm_page_free_direct sleep 1
179
180 Performance counter stats for 'sleep 1' (10 runs):
181
182 17254 kmem:mm_page_pcpu_drain ( +- 3.709% )
183 34394 kmem:mm_page_alloc ( +- 4.617% )
184 7509 kmem:mm_pagevec_free ( +- 4.820% )
185 25653 kmem:mm_page_free_direct ( +- 3.672% )
186
187 1.058135029 seconds time elapsed ( +- 3.089% )
188
189Or you can annotate the recorded 'git gc' run on a per symbol basis
190and check which instructions/source-code generated page allocations:
191
192 titan:~/git> perf annotate __GI___fork
193 ------------------------------------------------
194 Percent | Source code & Disassembly of libc-2.5.so
195 ------------------------------------------------
196 :
197 :
198 : Disassembly of section .plt:
199 : Disassembly of section .text:
200 :
201 : 00000031a2e95560 <__fork>:
202 [...]
203 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax
204 0.00 : 31a2e95607: 0f 05 syscall
205 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
206 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202>
207 0.00 : 31a2e95615: 85 c0 test %eax,%eax
208
209( this shows that 83.42% of __GI___fork's page allocations come from
210 the 0x38 system call it performs. )
211
212etc. etc. - a lot more is possible. I could list a dozen of
213other different usecases straight away - neither of which is
214possible via /proc/vmstat.
215
216/proc/vmstat is not in the same league really, in terms of
217expressive power of system analysis and performance
218analysis.
219
220All that the above results needed were those new tracepoints
221in include/tracing/events/kmem.h.
222
223 Ingo
224
225
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 1dbc1eeb4c01..6be696b0a2bb 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -29,13 +29,67 @@ OPTIONS
29 Select the PMU event. Selection can be a symbolic event name 29 Select the PMU event. Selection can be a symbolic event name
30 (use 'perf list' to list all events) or a raw PMU 30 (use 'perf list' to list all events) or a raw PMU
31 event (eventsel+umask) in the form of rNNN where NNN is a 31 event (eventsel+umask) in the form of rNNN where NNN is a
32 hexadecimal event descriptor. 32 hexadecimal event descriptor.
33 33
34-a:: 34-a::
35 system-wide collection 35 System-wide collection.
36 36
37-l:: 37-l::
38 scale counter values 38 Scale counter values.
39
40-p::
41--pid=::
42 Record events on existing pid.
43
44-r::
45--realtime=::
46 Collect data with this RT SCHED_FIFO priority.
47-A::
48--append::
49 Append to the output file to do incremental profiling.
50
51-f::
52--force::
53 Overwrite existing data file.
54
55-c::
56--count=::
57 Event period to sample.
58
59-o::
60--output=::
61 Output file name.
62
63-i::
64--inherit::
65 Child tasks inherit counters.
66-F::
67--freq=::
68 Profile at this frequency.
69
70-m::
71--mmap-pages=::
72 Number of mmap data pages.
73
74-g::
75--call-graph::
76 Do call-graph (stack chain/backtrace) recording.
77
78-v::
79--verbose::
80 Be more verbose (show counter open errors, etc).
81
82-s::
83--stat::
84 Per thread counts.
85
86-d::
87--data::
88 Sample addresses.
89
90-n::
91--no-samples::
92 Don't sample.
39 93
40SEE ALSO 94SEE ALSO
41-------- 95--------
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 0d74346d21ab..484080dd5b6f 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -40,7 +40,7 @@ OPTIONS
40-a:: 40-a::
41 system-wide collection 41 system-wide collection
42 42
43-S:: 43-c::
44 scale counter values 44 scale counter values
45 45
46EXAMPLES 46EXAMPLES
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 539d01289725..4a7d558dc309 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -3,36 +3,122 @@ perf-top(1)
3 3
4NAME 4NAME
5---- 5----
6perf-top - Run a command and profile it 6perf-top - System profiling tool.
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> 11'perf top' [-e <EVENT> | --event=EVENT] [<options>]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command runs a command and gathers a performance counter profile 15This command generates and displays a performance counter profile in realtime.
16from it.
17 16
18 17
19OPTIONS 18OPTIONS
20------- 19-------
21<command>...:: 20-a::
22 Any command you can specify in a shell. 21--all-cpus::
22 System-wide collection. (default)
23
24-c <count>::
25--count=<count>::
26 Event period to sample.
27
28-C <cpu>::
29--CPU=<cpu>::
30 CPU to profile.
31
32-d <seconds>::
33--delay=<seconds>::
34 Number of seconds to delay between refreshes.
23 35
24-e:: 36-e <event>::
25--event=:: 37--event=<event>::
26 Select the PMU event. Selection can be a symbolic event name 38 Select the PMU event. Selection can be a symbolic event name
27 (use 'perf list' to list all events) or a raw PMU 39 (use 'perf list' to list all events) or a raw PMU
28 event (eventsel+umask) in the form of rNNN where NNN is a 40 event (eventsel+umask) in the form of rNNN where NNN is a
29 hexadecimal event descriptor. 41 hexadecimal event descriptor.
30 42
31-a:: 43-E <entries>::
32 system-wide collection 44--entries=<entries>::
45 Display this many functions.
46
47-f <count>::
48--count-filter=<count>::
49 Only display functions with more events than this.
50
51-F <freq>::
52--freq=<freq>::
53 Profile at this frequency.
54
55-i::
56--inherit::
57 Child tasks inherit counters, only makes sens with -p option.
58
59-k <path>::
60--vmlinux=<path>::
61 Path to vmlinux. Required for annotation functionality.
62
63-m <pages>::
64--mmap-pages=<pages>::
65 Number of mmapped data pages.
66
67-p <pid>::
68--pid=<pid>::
69 Profile events on existing pid.
70
71-r <priority>::
72--realtime=<priority>::
73 Collect data with this RT SCHED_FIFO priority.
74
75-s <symbol>::
76--sym-annotate=<symbol>::
77 Annotate this symbol. Requires -k option.
78
79-v::
80--verbose::
81 Be more verbose (show counter open errors, etc).
82
83-z::
84--zero::
85 Zero history across display updates.
86
87INTERACTIVE PROMPTING KEYS
88--------------------------
89
90[d]::
91 Display refresh delay.
92
93[e]::
94 Number of entries to display.
95
96[E]::
97 Event to display when multiple counters are active.
98
99[f]::
100 Profile display filter (>= hit count).
101
102[F]::
103 Annotation display filter (>= % of total).
104
105[s]::
106 Annotate symbol.
107
108[S]::
109 Stop annotation, return to full profile display.
110
111[w]::
112 Toggle between weighted sum and individual count[E]r profile.
113
114[z]::
115 Toggle event count zeroing across display updates.
116
117[qQ]::
118 Quit.
119
120Pressing any unmapped key displays a menu, and prompts for input.
33 121
34-l::
35 scale counter values
36 122
37SEE ALSO 123SEE ALSO
38-------- 124--------
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index a5e9b876ca09..c045b4271e57 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -158,8 +158,10 @@ uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
158uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') 158uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
159 159
160# If we're on a 64-bit kernel, use -m64 160# If we're on a 64-bit kernel, use -m64
161ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) 161ifndef NO_64BIT
162 M64 := -m64 162 ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M))
163 M64 := -m64
164 endif
163endif 165endif
164 166
165# CFLAGS and LDFLAGS are for the users to override from the command line. 167# CFLAGS and LDFLAGS are for the users to override from the command line.
@@ -345,7 +347,6 @@ BUILTIN_OBJS += builtin-stat.o
345BUILTIN_OBJS += builtin-top.o 347BUILTIN_OBJS += builtin-top.o
346 348
347PERFLIBS = $(LIB_FILE) 349PERFLIBS = $(LIB_FILE)
348EXTLIBS = -lbfd
349 350
350# 351#
351# Platform specific tweaks 352# Platform specific tweaks
@@ -374,6 +375,39 @@ ifeq ($(uname_S),Darwin)
374 PTHREAD_LIBS = 375 PTHREAD_LIBS =
375endif 376endif
376 377
378ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) > /dev/null 2>&1 && echo y"), y)
379 msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel);
380endif
381
382ifdef NO_DEMANGLE
383 BASIC_CFLAGS += -DNO_DEMANGLE
384else
385 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
386
387 ifeq ($(has_bfd),y)
388 EXTLIBS += -lbfd
389 else
390 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
391 ifeq ($(has_bfd_iberty),y)
392 EXTLIBS += -lbfd -liberty
393 else
394 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
395 ifeq ($(has_bfd_iberty_z),y)
396 EXTLIBS += -lbfd -liberty -lz
397 else
398 has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y")
399 ifeq ($(has_cplus_demangle),y)
400 EXTLIBS += -liberty
401 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
402 else
403 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
404 BASIC_CFLAGS += -DNO_DEMANGLE
405 endif
406 endif
407 endif
408 endif
409endif
410
377ifndef CC_LD_DYNPATH 411ifndef CC_LD_DYNPATH
378 ifdef NO_R_TO_GCC_LINKER 412 ifdef NO_R_TO_GCC_LINKER
379 # Some gcc does not accept and pass -R to the linker to specify 413 # Some gcc does not accept and pass -R to the linker to specify
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 1dba568e1941..5e17de984dc8 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -31,6 +31,7 @@ static char *vmlinux = "vmlinux";
31static char default_sort_order[] = "comm,symbol"; 31static char default_sort_order[] = "comm,symbol";
32static char *sort_order = default_sort_order; 32static char *sort_order = default_sort_order;
33 33
34static int force;
34static int input; 35static int input;
35static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; 36static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
36 37
@@ -980,6 +981,13 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head)
980 (void *)(long)(event->header.size), 981 (void *)(long)(event->header.size),
981 event->fork.pid, event->fork.ppid); 982 event->fork.pid, event->fork.ppid);
982 983
984 /*
985 * A thread clone will have the same PID for both
986 * parent and child.
987 */
988 if (thread == parent)
989 return 0;
990
983 if (!thread || !parent || thread__fork(thread, parent)) { 991 if (!thread || !parent || thread__fork(thread, parent)) {
984 dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); 992 dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
985 return -1; 993 return -1;
@@ -1327,6 +1335,11 @@ static int __cmd_annotate(void)
1327 exit(-1); 1335 exit(-1);
1328 } 1336 }
1329 1337
1338 if (!force && (stat.st_uid != geteuid())) {
1339 fprintf(stderr, "file: %s not owned by current user\n", input_name);
1340 exit(-1);
1341 }
1342
1330 if (!stat.st_size) { 1343 if (!stat.st_size) {
1331 fprintf(stderr, "zero-sized file, nothing to do!\n"); 1344 fprintf(stderr, "zero-sized file, nothing to do!\n");
1332 exit(0); 1345 exit(0);
@@ -1432,6 +1445,7 @@ static const struct option options[] = {
1432 "input file name"), 1445 "input file name"),
1433 OPT_STRING('s', "symbol", &sym_hist_filter, "symbol", 1446 OPT_STRING('s', "symbol", &sym_hist_filter, "symbol",
1434 "symbol to annotate"), 1447 "symbol to annotate"),
1448 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
1435 OPT_BOOLEAN('v', "verbose", &verbose, 1449 OPT_BOOLEAN('v', "verbose", &verbose,
1436 "be more verbose (show symbol address, etc)"), 1450 "be more verbose (show symbol address, etc)"),
1437 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 1451 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index f990fa8a35c9..d88c6961274c 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -10,11 +10,12 @@
10 10
11#include "perf.h" 11#include "perf.h"
12 12
13#include "util/parse-options.h"
14#include "util/parse-events.h" 13#include "util/parse-events.h"
14#include "util/cache.h"
15 15
16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) 16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used)
17{ 17{
18 setup_pager();
18 print_events(); 19 print_events();
19 return 0; 20 return 0;
20} 21}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6da09928130f..89a5ddcd1ded 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,7 +34,9 @@ static int output;
34static const char *output_name = "perf.data"; 34static const char *output_name = "perf.data";
35static int group = 0; 35static int group = 0;
36static unsigned int realtime_prio = 0; 36static unsigned int realtime_prio = 0;
37static int raw_samples = 0;
37static int system_wide = 0; 38static int system_wide = 0;
39static int profile_cpu = -1;
38static pid_t target_pid = -1; 40static pid_t target_pid = -1;
39static int inherit = 1; 41static int inherit = 1;
40static int force = 0; 42static int force = 0;
@@ -203,46 +205,48 @@ static void sig_atexit(void)
203 kill(getpid(), signr); 205 kill(getpid(), signr);
204} 206}
205 207
206static void pid_synthesize_comm_event(pid_t pid, int full) 208static pid_t pid_synthesize_comm_event(pid_t pid, int full)
207{ 209{
208 struct comm_event comm_ev; 210 struct comm_event comm_ev;
209 char filename[PATH_MAX]; 211 char filename[PATH_MAX];
210 char bf[BUFSIZ]; 212 char bf[BUFSIZ];
211 int fd; 213 FILE *fp;
212 size_t size; 214 size_t size = 0;
213 char *field, *sep;
214 DIR *tasks; 215 DIR *tasks;
215 struct dirent dirent, *next; 216 struct dirent dirent, *next;
217 pid_t tgid = 0;
216 218
217 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); 219 snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
218 220
219 fd = open(filename, O_RDONLY); 221 fp = fopen(filename, "r");
220 if (fd < 0) { 222 if (fp == NULL) {
221 /* 223 /*
222 * We raced with a task exiting - just return: 224 * We raced with a task exiting - just return:
223 */ 225 */
224 if (verbose) 226 if (verbose)
225 fprintf(stderr, "couldn't open %s\n", filename); 227 fprintf(stderr, "couldn't open %s\n", filename);
226 return; 228 return 0;
227 }
228 if (read(fd, bf, sizeof(bf)) < 0) {
229 fprintf(stderr, "couldn't read %s\n", filename);
230 exit(EXIT_FAILURE);
231 } 229 }
232 close(fd);
233 230
234 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
235 memset(&comm_ev, 0, sizeof(comm_ev)); 231 memset(&comm_ev, 0, sizeof(comm_ev));
236 field = strchr(bf, '('); 232 while (!comm_ev.comm[0] || !comm_ev.pid) {
237 if (field == NULL) 233 if (fgets(bf, sizeof(bf), fp) == NULL)
238 goto out_failure; 234 goto out_failure;
239 sep = strchr(++field, ')'); 235
240 if (sep == NULL) 236 if (memcmp(bf, "Name:", 5) == 0) {
241 goto out_failure; 237 char *name = bf + 5;
242 size = sep - field; 238 while (*name && isspace(*name))
243 memcpy(comm_ev.comm, field, size++); 239 ++name;
244 240 size = strlen(name) - 1;
245 comm_ev.pid = pid; 241 memcpy(comm_ev.comm, name, size++);
242 } else if (memcmp(bf, "Tgid:", 5) == 0) {
243 char *tgids = bf + 5;
244 while (*tgids && isspace(*tgids))
245 ++tgids;
246 tgid = comm_ev.pid = atoi(tgids);
247 }
248 }
249
246 comm_ev.header.type = PERF_EVENT_COMM; 250 comm_ev.header.type = PERF_EVENT_COMM;
247 size = ALIGN(size, sizeof(u64)); 251 size = ALIGN(size, sizeof(u64));
248 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); 252 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
@@ -251,7 +255,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
251 comm_ev.tid = pid; 255 comm_ev.tid = pid;
252 256
253 write_output(&comm_ev, comm_ev.header.size); 257 write_output(&comm_ev, comm_ev.header.size);
254 return; 258 goto out_fclose;
255 } 259 }
256 260
257 snprintf(filename, sizeof(filename), "/proc/%d/task", pid); 261 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -268,7 +272,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
268 write_output(&comm_ev, comm_ev.header.size); 272 write_output(&comm_ev, comm_ev.header.size);
269 } 273 }
270 closedir(tasks); 274 closedir(tasks);
271 return; 275
276out_fclose:
277 fclose(fp);
278 return tgid;
272 279
273out_failure: 280out_failure:
274 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", 281 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
@@ -276,7 +283,7 @@ out_failure:
276 exit(EXIT_FAILURE); 283 exit(EXIT_FAILURE);
277} 284}
278 285
279static void pid_synthesize_mmap_samples(pid_t pid) 286static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
280{ 287{
281 char filename[PATH_MAX]; 288 char filename[PATH_MAX];
282 FILE *fp; 289 FILE *fp;
@@ -328,7 +335,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
328 mmap_ev.len -= mmap_ev.start; 335 mmap_ev.len -= mmap_ev.start;
329 mmap_ev.header.size = (sizeof(mmap_ev) - 336 mmap_ev.header.size = (sizeof(mmap_ev) -
330 (sizeof(mmap_ev.filename) - size)); 337 (sizeof(mmap_ev.filename) - size));
331 mmap_ev.pid = pid; 338 mmap_ev.pid = tgid;
332 mmap_ev.tid = pid; 339 mmap_ev.tid = pid;
333 340
334 write_output(&mmap_ev, mmap_ev.header.size); 341 write_output(&mmap_ev, mmap_ev.header.size);
@@ -347,14 +354,14 @@ static void synthesize_all(void)
347 354
348 while (!readdir_r(proc, &dirent, &next) && next) { 355 while (!readdir_r(proc, &dirent, &next) && next) {
349 char *end; 356 char *end;
350 pid_t pid; 357 pid_t pid, tgid;
351 358
352 pid = strtol(dirent.d_name, &end, 10); 359 pid = strtol(dirent.d_name, &end, 10);
353 if (*end) /* only interested in proper numerical dirents */ 360 if (*end) /* only interested in proper numerical dirents */
354 continue; 361 continue;
355 362
356 pid_synthesize_comm_event(pid, 1); 363 tgid = pid_synthesize_comm_event(pid, 1);
357 pid_synthesize_mmap_samples(pid); 364 pid_synthesize_mmap_samples(pid, tgid);
358 } 365 }
359 366
360 closedir(proc); 367 closedir(proc);
@@ -392,7 +399,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
392 PERF_FORMAT_TOTAL_TIME_RUNNING | 399 PERF_FORMAT_TOTAL_TIME_RUNNING |
393 PERF_FORMAT_ID; 400 PERF_FORMAT_ID;
394 401
395 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 402 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
396 403
397 if (freq) { 404 if (freq) {
398 attr->sample_type |= PERF_SAMPLE_PERIOD; 405 attr->sample_type |= PERF_SAMPLE_PERIOD;
@@ -412,6 +419,9 @@ static void create_counter(int counter, int cpu, pid_t pid)
412 if (call_graph) 419 if (call_graph)
413 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 420 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
414 421
422 if (raw_samples)
423 attr->sample_type |= PERF_SAMPLE_RAW;
424
415 attr->mmap = track; 425 attr->mmap = track;
416 attr->comm = track; 426 attr->comm = track;
417 attr->inherit = (cpu < 0) && inherit; 427 attr->inherit = (cpu < 0) && inherit;
@@ -425,6 +435,8 @@ try_again:
425 435
426 if (err == EPERM) 436 if (err == EPERM)
427 die("Permission error - are you root?\n"); 437 die("Permission error - are you root?\n");
438 else if (err == ENODEV && profile_cpu != -1)
439 die("No such device - did you specify an out-of-range profile CPU?\n");
428 440
429 /* 441 /*
430 * If it's cycles then fall back to hrtimer 442 * If it's cycles then fall back to hrtimer
@@ -524,10 +536,14 @@ static int __cmd_record(int argc, const char **argv)
524 signal(SIGCHLD, sig_handler); 536 signal(SIGCHLD, sig_handler);
525 signal(SIGINT, sig_handler); 537 signal(SIGINT, sig_handler);
526 538
527 if (!stat(output_name, &st) && !force && !append_file) { 539 if (!stat(output_name, &st) && st.st_size) {
528 fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", 540 if (!force && !append_file) {
529 output_name); 541 fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
530 exit(-1); 542 output_name);
543 exit(-1);
544 }
545 } else {
546 append_file = 0;
531 } 547 }
532 548
533 flags = O_CREAT|O_RDWR; 549 flags = O_CREAT|O_RDWR;
@@ -554,16 +570,22 @@ static int __cmd_record(int argc, const char **argv)
554 if (pid == -1) 570 if (pid == -1)
555 pid = getpid(); 571 pid = getpid();
556 572
557 open_counters(-1, pid); 573 open_counters(profile_cpu, pid);
558 } else for (i = 0; i < nr_cpus; i++) 574 } else {
559 open_counters(i, target_pid); 575 if (profile_cpu != -1) {
576 open_counters(profile_cpu, target_pid);
577 } else {
578 for (i = 0; i < nr_cpus; i++)
579 open_counters(i, target_pid);
580 }
581 }
560 582
561 if (file_new) 583 if (file_new)
562 perf_header__write(header, output); 584 perf_header__write(header, output);
563 585
564 if (!system_wide) { 586 if (!system_wide) {
565 pid_synthesize_comm_event(pid, 0); 587 pid_t tgid = pid_synthesize_comm_event(pid, 0);
566 pid_synthesize_mmap_samples(pid); 588 pid_synthesize_mmap_samples(pid, tgid);
567 } else 589 } else
568 synthesize_all(); 590 synthesize_all();
569 591
@@ -631,10 +653,14 @@ static const struct option options[] = {
631 "record events on existing pid"), 653 "record events on existing pid"),
632 OPT_INTEGER('r', "realtime", &realtime_prio, 654 OPT_INTEGER('r', "realtime", &realtime_prio,
633 "collect data with this RT SCHED_FIFO priority"), 655 "collect data with this RT SCHED_FIFO priority"),
656 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
657 "collect raw sample records from all opened counters"),
634 OPT_BOOLEAN('a', "all-cpus", &system_wide, 658 OPT_BOOLEAN('a', "all-cpus", &system_wide,
635 "system-wide collection from all CPUs"), 659 "system-wide collection from all CPUs"),
636 OPT_BOOLEAN('A', "append", &append_file, 660 OPT_BOOLEAN('A', "append", &append_file,
637 "append to the output file to do incremental profiling"), 661 "append to the output file to do incremental profiling"),
662 OPT_INTEGER('C', "profile_cpu", &profile_cpu,
663 "CPU to profile on"),
638 OPT_BOOLEAN('f', "force", &force, 664 OPT_BOOLEAN('f', "force", &force,
639 "overwrite existing data file"), 665 "overwrite existing data file"),
640 OPT_LONG('c', "count", &default_interval, 666 OPT_LONG('c', "count", &default_interval,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b20a4b6e31b7..8b2ec882e6e0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -31,13 +31,14 @@
31static char const *input_name = "perf.data"; 31static char const *input_name = "perf.data";
32static char *vmlinux = NULL; 32static char *vmlinux = NULL;
33 33
34static char default_sort_order[] = "comm,dso"; 34static char default_sort_order[] = "comm,dso,symbol";
35static char *sort_order = default_sort_order; 35static char *sort_order = default_sort_order;
36static char *dso_list_str, *comm_list_str, *sym_list_str, 36static char *dso_list_str, *comm_list_str, *sym_list_str,
37 *col_width_list_str; 37 *col_width_list_str;
38static struct strlist *dso_list, *comm_list, *sym_list; 38static struct strlist *dso_list, *comm_list, *sym_list;
39static char *field_sep; 39static char *field_sep;
40 40
41static int force;
41static int input; 42static int input;
42static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; 43static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV;
43 44
@@ -68,7 +69,7 @@ static int callchain;
68 69
69static 70static
70struct callchain_param callchain_param = { 71struct callchain_param callchain_param = {
71 .mode = CHAIN_GRAPH_ABS, 72 .mode = CHAIN_GRAPH_REL,
72 .min_percent = 0.5 73 .min_percent = 0.5
73}; 74};
74 75
@@ -99,6 +100,7 @@ struct comm_event {
99struct fork_event { 100struct fork_event {
100 struct perf_event_header header; 101 struct perf_event_header header;
101 u32 pid, ppid; 102 u32 pid, ppid;
103 u32 tid, ptid;
102}; 104};
103 105
104struct lost_event { 106struct lost_event {
@@ -111,7 +113,9 @@ struct read_event {
111 struct perf_event_header header; 113 struct perf_event_header header;
112 u32 pid,tid; 114 u32 pid,tid;
113 u64 value; 115 u64 value;
114 u64 format[3]; 116 u64 time_enabled;
117 u64 time_running;
118 u64 id;
115}; 119};
116 120
117typedef union event_union { 121typedef union event_union {
@@ -252,7 +256,7 @@ static int strcommon(const char *pathname)
252{ 256{
253 int n = 0; 257 int n = 0;
254 258
255 while (pathname[n] == cwd[n] && n < cwdlen) 259 while (n < cwdlen && pathname[n] == cwd[n])
256 ++n; 260 ++n;
257 261
258 return n; 262 return n;
@@ -697,7 +701,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
697 size_t ret = 0; 701 size_t ret = 0;
698 702
699 if (verbose) 703 if (verbose)
700 ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); 704 ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip,
705 dso__symtab_origin(self->dso));
701 706
702 ret += repsep_fprintf(fp, "[%c] ", self->level); 707 ret += repsep_fprintf(fp, "[%c] ", self->level);
703 if (self->sym) { 708 if (self->sym) {
@@ -887,6 +892,21 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
887 return ret; 892 return ret;
888} 893}
889 894
895static struct symbol *rem_sq_bracket;
896static struct callchain_list rem_hits;
897
898static void init_rem_hits(void)
899{
900 rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6);
901 if (!rem_sq_bracket) {
902 fprintf(stderr, "Not enough memory to display remaining hits\n");
903 return;
904 }
905
906 strcpy(rem_sq_bracket->name, "[...]");
907 rem_hits.sym = rem_sq_bracket;
908}
909
890static size_t 910static size_t
891callchain__fprintf_graph(FILE *fp, struct callchain_node *self, 911callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
892 u64 total_samples, int depth, int depth_mask) 912 u64 total_samples, int depth, int depth_mask)
@@ -896,25 +916,34 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
896 struct callchain_list *chain; 916 struct callchain_list *chain;
897 int new_depth_mask = depth_mask; 917 int new_depth_mask = depth_mask;
898 u64 new_total; 918 u64 new_total;
919 u64 remaining;
899 size_t ret = 0; 920 size_t ret = 0;
900 int i; 921 int i;
901 922
902 if (callchain_param.mode == CHAIN_GRAPH_REL) 923 if (callchain_param.mode == CHAIN_GRAPH_REL)
903 new_total = self->cumul_hit; 924 new_total = self->children_hit;
904 else 925 else
905 new_total = total_samples; 926 new_total = total_samples;
906 927
928 remaining = new_total;
929
907 node = rb_first(&self->rb_root); 930 node = rb_first(&self->rb_root);
908 while (node) { 931 while (node) {
932 u64 cumul;
933
909 child = rb_entry(node, struct callchain_node, rb_node); 934 child = rb_entry(node, struct callchain_node, rb_node);
935 cumul = cumul_hits(child);
936 remaining -= cumul;
910 937
911 /* 938 /*
912 * The depth mask manages the output of pipes that show 939 * The depth mask manages the output of pipes that show
913 * the depth. We don't want to keep the pipes of the current 940 * the depth. We don't want to keep the pipes of the current
914 * level for the last child of this depth 941 * level for the last child of this depth.
942 * Except if we have remaining filtered hits. They will
943 * supersede the last child
915 */ 944 */
916 next = rb_next(node); 945 next = rb_next(node);
917 if (!next) 946 if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining))
918 new_depth_mask &= ~(1 << (depth - 1)); 947 new_depth_mask &= ~(1 << (depth - 1));
919 948
920 /* 949 /*
@@ -929,7 +958,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
929 ret += ipchain__fprintf_graph(fp, chain, depth, 958 ret += ipchain__fprintf_graph(fp, chain, depth,
930 new_depth_mask, i++, 959 new_depth_mask, i++,
931 new_total, 960 new_total,
932 child->cumul_hit); 961 cumul);
933 } 962 }
934 ret += callchain__fprintf_graph(fp, child, new_total, 963 ret += callchain__fprintf_graph(fp, child, new_total,
935 depth + 1, 964 depth + 1,
@@ -937,6 +966,19 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
937 node = next; 966 node = next;
938 } 967 }
939 968
969 if (callchain_param.mode == CHAIN_GRAPH_REL &&
970 remaining && remaining != new_total) {
971
972 if (!rem_sq_bracket)
973 return ret;
974
975 new_depth_mask &= ~(1 << (depth - 1));
976
977 ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
978 new_depth_mask, 0, new_total,
979 remaining);
980 }
981
940 return ret; 982 return ret;
941} 983}
942 984
@@ -1357,6 +1399,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples)
1357 unsigned int width; 1399 unsigned int width;
1358 char *col_width = col_width_list_str; 1400 char *col_width = col_width_list_str;
1359 1401
1402 init_rem_hits();
1403
1360 fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); 1404 fprintf(fp, "# Samples: %Ld\n", (u64)total_samples);
1361 fprintf(fp, "#\n"); 1405 fprintf(fp, "#\n");
1362 1406
@@ -1423,11 +1467,13 @@ print_entries:
1423 if (sort_order == default_sort_order && 1467 if (sort_order == default_sort_order &&
1424 parent_pattern == default_parent_pattern) { 1468 parent_pattern == default_parent_pattern) {
1425 fprintf(fp, "#\n"); 1469 fprintf(fp, "#\n");
1426 fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n"); 1470 fprintf(fp, "# (For a higher level overview, try: perf report --sort comm,dso)\n");
1427 fprintf(fp, "#\n"); 1471 fprintf(fp, "#\n");
1428 } 1472 }
1429 fprintf(fp, "\n"); 1473 fprintf(fp, "\n");
1430 1474
1475 free(rem_sq_bracket);
1476
1431 return ret; 1477 return ret;
1432} 1478}
1433 1479
@@ -1481,11 +1527,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1481 more_data += sizeof(u64); 1527 more_data += sizeof(u64);
1482 } 1528 }
1483 1529
1484 dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", 1530 dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
1485 (void *)(offset + head), 1531 (void *)(offset + head),
1486 (void *)(long)(event->header.size), 1532 (void *)(long)(event->header.size),
1487 event->header.misc, 1533 event->header.misc,
1488 event->ip.pid, 1534 event->ip.pid, event->ip.tid,
1489 (void *)(long)ip, 1535 (void *)(long)ip,
1490 (long long)period); 1536 (long long)period);
1491 1537
@@ -1545,10 +1591,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1545 if (show & show_mask) { 1591 if (show & show_mask) {
1546 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); 1592 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
1547 1593
1548 if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) 1594 if (dso_list && (!dso || !dso->name ||
1595 !strlist__has_entry(dso_list, dso->name)))
1549 return 0; 1596 return 0;
1550 1597
1551 if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) 1598 if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
1552 return 0; 1599 return 0;
1553 1600
1554 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { 1601 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
@@ -1567,10 +1614,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
1567 struct thread *thread = threads__findnew(event->mmap.pid); 1614 struct thread *thread = threads__findnew(event->mmap.pid);
1568 struct map *map = map__new(&event->mmap); 1615 struct map *map = map__new(&event->mmap);
1569 1616
1570 dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", 1617 dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
1571 (void *)(offset + head), 1618 (void *)(offset + head),
1572 (void *)(long)(event->header.size), 1619 (void *)(long)(event->header.size),
1573 event->mmap.pid, 1620 event->mmap.pid,
1621 event->mmap.tid,
1574 (void *)(long)event->mmap.start, 1622 (void *)(long)event->mmap.start,
1575 (void *)(long)event->mmap.len, 1623 (void *)(long)event->mmap.len,
1576 (void *)(long)event->mmap.pgoff, 1624 (void *)(long)event->mmap.pgoff,
@@ -1608,15 +1656,27 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
1608} 1656}
1609 1657
1610static int 1658static int
1611process_fork_event(event_t *event, unsigned long offset, unsigned long head) 1659process_task_event(event_t *event, unsigned long offset, unsigned long head)
1612{ 1660{
1613 struct thread *thread = threads__findnew(event->fork.pid); 1661 struct thread *thread = threads__findnew(event->fork.pid);
1614 struct thread *parent = threads__findnew(event->fork.ppid); 1662 struct thread *parent = threads__findnew(event->fork.ppid);
1615 1663
1616 dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", 1664 dprintf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n",
1617 (void *)(offset + head), 1665 (void *)(offset + head),
1618 (void *)(long)(event->header.size), 1666 (void *)(long)(event->header.size),
1619 event->fork.pid, event->fork.ppid); 1667 event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT",
1668 event->fork.pid, event->fork.tid,
1669 event->fork.ppid, event->fork.ptid);
1670
1671 /*
1672 * A thread clone will have the same PID for both
1673 * parent and child.
1674 */
1675 if (thread == parent)
1676 return 0;
1677
1678 if (event->header.type == PERF_EVENT_EXIT)
1679 return 0;
1620 1680
1621 if (!thread || !parent || thread__fork(thread, parent)) { 1681 if (!thread || !parent || thread__fork(thread, parent)) {
1622 dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); 1682 dprintf("problem processing PERF_EVENT_FORK, skipping event.\n");
@@ -1677,14 +1737,37 @@ static void trace_event(event_t *event)
1677 dprintf(".\n"); 1737 dprintf(".\n");
1678} 1738}
1679 1739
1740static struct perf_header *header;
1741
1742static struct perf_counter_attr *perf_header__find_attr(u64 id)
1743{
1744 int i;
1745
1746 for (i = 0; i < header->attrs; i++) {
1747 struct perf_header_attr *attr = header->attr[i];
1748 int j;
1749
1750 for (j = 0; j < attr->ids; j++) {
1751 if (attr->id[j] == id)
1752 return &attr->attr;
1753 }
1754 }
1755
1756 return NULL;
1757}
1758
1680static int 1759static int
1681process_read_event(event_t *event, unsigned long offset, unsigned long head) 1760process_read_event(event_t *event, unsigned long offset, unsigned long head)
1682{ 1761{
1683 dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", 1762 struct perf_counter_attr *attr = perf_header__find_attr(event->read.id);
1763
1764 dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
1684 (void *)(offset + head), 1765 (void *)(offset + head),
1685 (void *)(long)(event->header.size), 1766 (void *)(long)(event->header.size),
1686 event->read.pid, 1767 event->read.pid,
1687 event->read.tid, 1768 event->read.tid,
1769 attr ? __event_name(attr->type, attr->config)
1770 : "FAIL",
1688 event->read.value); 1771 event->read.value);
1689 1772
1690 return 0; 1773 return 0;
@@ -1706,7 +1789,8 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
1706 return process_comm_event(event, offset, head); 1789 return process_comm_event(event, offset, head);
1707 1790
1708 case PERF_EVENT_FORK: 1791 case PERF_EVENT_FORK:
1709 return process_fork_event(event, offset, head); 1792 case PERF_EVENT_EXIT:
1793 return process_task_event(event, offset, head);
1710 1794
1711 case PERF_EVENT_LOST: 1795 case PERF_EVENT_LOST:
1712 return process_lost_event(event, offset, head); 1796 return process_lost_event(event, offset, head);
@@ -1729,8 +1813,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
1729 return 0; 1813 return 0;
1730} 1814}
1731 1815
1732static struct perf_header *header;
1733
1734static u64 perf_header__sample_type(void) 1816static u64 perf_header__sample_type(void)
1735{ 1817{
1736 u64 sample_type = 0; 1818 u64 sample_type = 0;
@@ -1775,6 +1857,11 @@ static int __cmd_report(void)
1775 exit(-1); 1857 exit(-1);
1776 } 1858 }
1777 1859
1860 if (!force && (stat.st_uid != geteuid())) {
1861 fprintf(stderr, "file: %s not owned by current user\n", input_name);
1862 exit(-1);
1863 }
1864
1778 if (!stat.st_size) { 1865 if (!stat.st_size) {
1779 fprintf(stderr, "zero-sized file, nothing to do!\n"); 1866 fprintf(stderr, "zero-sized file, nothing to do!\n");
1780 exit(0); 1867 exit(0);
@@ -1798,6 +1885,13 @@ static int __cmd_report(void)
1798 " -g?\n"); 1885 " -g?\n");
1799 exit(-1); 1886 exit(-1);
1800 } 1887 }
1888 } else if (callchain_param.mode != CHAIN_NONE && !callchain) {
1889 callchain = 1;
1890 if (register_callchain_param(&callchain_param) < 0) {
1891 fprintf(stderr, "Can't register callchain"
1892 " params\n");
1893 exit(-1);
1894 }
1801 } 1895 }
1802 1896
1803 if (load_kernel() < 0) { 1897 if (load_kernel() < 0) {
@@ -1936,6 +2030,13 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
1936 else if (!strncmp(tok, "fractal", strlen(arg))) 2030 else if (!strncmp(tok, "fractal", strlen(arg)))
1937 callchain_param.mode = CHAIN_GRAPH_REL; 2031 callchain_param.mode = CHAIN_GRAPH_REL;
1938 2032
2033 else if (!strncmp(tok, "none", strlen(arg))) {
2034 callchain_param.mode = CHAIN_NONE;
2035 callchain = 0;
2036
2037 return 0;
2038 }
2039
1939 else 2040 else
1940 return -1; 2041 return -1;
1941 2042
@@ -1969,6 +2070,7 @@ static const struct option options[] = {
1969 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, 2070 OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
1970 "dump raw trace in ASCII"), 2071 "dump raw trace in ASCII"),
1971 OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), 2072 OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"),
2073 OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
1972 OPT_BOOLEAN('m', "modules", &modules, 2074 OPT_BOOLEAN('m', "modules", &modules,
1973 "load module symbols - WARNING: use only with -k and LIVE kernel"), 2075 "load module symbols - WARNING: use only with -k and LIVE kernel"),
1974 OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, 2076 OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f9510eeeb6c7..b4b06c7903e1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -496,7 +496,7 @@ static const struct option options[] = {
496 "stat events on existing pid"), 496 "stat events on existing pid"),
497 OPT_BOOLEAN('a', "all-cpus", &system_wide, 497 OPT_BOOLEAN('a', "all-cpus", &system_wide,
498 "system-wide collection from all CPUs"), 498 "system-wide collection from all CPUs"),
499 OPT_BOOLEAN('S', "scale", &scale, 499 OPT_BOOLEAN('c', "scale", &scale,
500 "scale/normalize counters"), 500 "scale/normalize counters"),
501 OPT_BOOLEAN('v', "verbose", &verbose, 501 OPT_BOOLEAN('v', "verbose", &verbose,
502 "be more verbose (show counter open errors, etc)"), 502 "be more verbose (show counter open errors, etc)"),
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index c0a423004e15..7de28ce9ca26 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -31,6 +31,8 @@
31#include <fcntl.h> 31#include <fcntl.h>
32 32
33#include <stdio.h> 33#include <stdio.h>
34#include <termios.h>
35#include <unistd.h>
34 36
35#include <errno.h> 37#include <errno.h>
36#include <time.h> 38#include <time.h>
@@ -54,7 +56,7 @@ static int system_wide = 0;
54 56
55static int default_interval = 100000; 57static int default_interval = 100000;
56 58
57static u64 count_filter = 5; 59static int count_filter = 5;
58static int print_entries = 15; 60static int print_entries = 15;
59 61
60static int target_pid = -1; 62static int target_pid = -1;
@@ -69,15 +71,28 @@ static int freq = 0;
69static int verbose = 0; 71static int verbose = 0;
70static char *vmlinux = NULL; 72static char *vmlinux = NULL;
71 73
72static char *sym_filter;
73static unsigned long filter_start;
74static unsigned long filter_end;
75
76static int delay_secs = 2; 74static int delay_secs = 2;
77static int zero; 75static int zero;
78static int dump_symtab; 76static int dump_symtab;
79 77
80/* 78/*
79 * Source
80 */
81
82struct source_line {
83 u64 eip;
84 unsigned long count[MAX_COUNTERS];
85 char *line;
86 struct source_line *next;
87};
88
89static char *sym_filter = NULL;
90struct sym_entry *sym_filter_entry = NULL;
91static int sym_pcnt_filter = 5;
92static int sym_counter = 0;
93static int display_weighted = -1;
94
95/*
81 * Symbols 96 * Symbols
82 */ 97 */
83 98
@@ -91,9 +106,237 @@ struct sym_entry {
91 unsigned long snap_count; 106 unsigned long snap_count;
92 double weight; 107 double weight;
93 int skip; 108 int skip;
109 struct source_line *source;
110 struct source_line *lines;
111 struct source_line **lines_tail;
112 pthread_mutex_t source_lock;
94}; 113};
95 114
96struct sym_entry *sym_filter_entry; 115/*
116 * Source functions
117 */
118
119static void parse_source(struct sym_entry *syme)
120{
121 struct symbol *sym;
122 struct module *module;
123 struct section *section = NULL;
124 FILE *file;
125 char command[PATH_MAX*2], *path = vmlinux;
126 u64 start, end, len;
127
128 if (!syme)
129 return;
130
131 if (syme->lines) {
132 pthread_mutex_lock(&syme->source_lock);
133 goto out_assign;
134 }
135
136 sym = (struct symbol *)(syme + 1);
137 module = sym->module;
138
139 if (module)
140 path = module->path;
141 if (!path)
142 return;
143
144 start = sym->obj_start;
145 if (!start)
146 start = sym->start;
147
148 if (module) {
149 section = module->sections->find_section(module->sections, ".text");
150 if (section)
151 start -= section->vma;
152 }
153
154 end = start + sym->end - sym->start + 1;
155 len = sym->end - sym->start;
156
157 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path);
158
159 file = popen(command, "r");
160 if (!file)
161 return;
162
163 pthread_mutex_lock(&syme->source_lock);
164 syme->lines_tail = &syme->lines;
165 while (!feof(file)) {
166 struct source_line *src;
167 size_t dummy = 0;
168 char *c;
169
170 src = malloc(sizeof(struct source_line));
171 assert(src != NULL);
172 memset(src, 0, sizeof(struct source_line));
173
174 if (getline(&src->line, &dummy, file) < 0)
175 break;
176 if (!src->line)
177 break;
178
179 c = strchr(src->line, '\n');
180 if (c)
181 *c = 0;
182
183 src->next = NULL;
184 *syme->lines_tail = src;
185 syme->lines_tail = &src->next;
186
187 if (strlen(src->line)>8 && src->line[8] == ':') {
188 src->eip = strtoull(src->line, NULL, 16);
189 if (section)
190 src->eip += section->vma;
191 }
192 if (strlen(src->line)>8 && src->line[16] == ':') {
193 src->eip = strtoull(src->line, NULL, 16);
194 if (section)
195 src->eip += section->vma;
196 }
197 }
198 pclose(file);
199out_assign:
200 sym_filter_entry = syme;
201 pthread_mutex_unlock(&syme->source_lock);
202}
203
204static void __zero_source_counters(struct sym_entry *syme)
205{
206 int i;
207 struct source_line *line;
208
209 line = syme->lines;
210 while (line) {
211 for (i = 0; i < nr_counters; i++)
212 line->count[i] = 0;
213 line = line->next;
214 }
215}
216
217static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
218{
219 struct source_line *line;
220
221 if (syme != sym_filter_entry)
222 return;
223
224 if (pthread_mutex_trylock(&syme->source_lock))
225 return;
226
227 if (!syme->source)
228 goto out_unlock;
229
230 for (line = syme->lines; line; line = line->next) {
231 if (line->eip == ip) {
232 line->count[counter]++;
233 break;
234 }
235 if (line->eip > ip)
236 break;
237 }
238out_unlock:
239 pthread_mutex_unlock(&syme->source_lock);
240}
241
242static void lookup_sym_source(struct sym_entry *syme)
243{
244 struct symbol *symbol = (struct symbol *)(syme + 1);
245 struct source_line *line;
246 char pattern[PATH_MAX];
247 char *idx;
248
249 sprintf(pattern, "<%s>:", symbol->name);
250
251 if (symbol->module) {
252 idx = strstr(pattern, "\t");
253 if (idx)
254 *idx = 0;
255 }
256
257 pthread_mutex_lock(&syme->source_lock);
258 for (line = syme->lines; line; line = line->next) {
259 if (strstr(line->line, pattern)) {
260 syme->source = line;
261 break;
262 }
263 }
264 pthread_mutex_unlock(&syme->source_lock);
265}
266
267static void show_lines(struct source_line *queue, int count, int total)
268{
269 int i;
270 struct source_line *line;
271
272 line = queue;
273 for (i = 0; i < count; i++) {
274 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
275
276 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
277 line = line->next;
278 }
279}
280
281#define TRACE_COUNT 3
282
283static void show_details(struct sym_entry *syme)
284{
285 struct symbol *symbol;
286 struct source_line *line;
287 struct source_line *line_queue = NULL;
288 int displayed = 0;
289 int line_queue_count = 0, total = 0, more = 0;
290
291 if (!syme)
292 return;
293
294 if (!syme->source)
295 lookup_sym_source(syme);
296
297 if (!syme->source)
298 return;
299
300 symbol = (struct symbol *)(syme + 1);
301 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
302 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
303
304 pthread_mutex_lock(&syme->source_lock);
305 line = syme->source;
306 while (line) {
307 total += line->count[sym_counter];
308 line = line->next;
309 }
310
311 line = syme->source;
312 while (line) {
313 float pcnt = 0.0;
314
315 if (!line_queue_count)
316 line_queue = line;
317 line_queue_count++;
318
319 if (line->count[sym_counter])
320 pcnt = 100.0 * line->count[sym_counter] / (float)total;
321 if (pcnt >= (float)sym_pcnt_filter) {
322 if (displayed <= print_entries)
323 show_lines(line_queue, line_queue_count, total);
324 else more++;
325 displayed += line_queue_count;
326 line_queue_count = 0;
327 line_queue = NULL;
328 } else if (line_queue_count > TRACE_COUNT) {
329 line_queue = line_queue->next;
330 line_queue_count--;
331 }
332
333 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
334 line = line->next;
335 }
336 pthread_mutex_unlock(&syme->source_lock);
337 if (more)
338 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
339}
97 340
98struct dso *kernel_dso; 341struct dso *kernel_dso;
99 342
@@ -112,6 +355,9 @@ static double sym_weight(const struct sym_entry *sym)
112 double weight = sym->snap_count; 355 double weight = sym->snap_count;
113 int counter; 356 int counter;
114 357
358 if (!display_weighted)
359 return weight;
360
115 for (counter = 1; counter < nr_counters-1; counter++) 361 for (counter = 1; counter < nr_counters-1; counter++)
116 weight *= sym->count[counter]; 362 weight *= sym->count[counter];
117 363
@@ -159,7 +405,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
159static void print_sym_table(void) 405static void print_sym_table(void)
160{ 406{
161 int printed = 0, j; 407 int printed = 0, j;
162 int counter; 408 int counter, snap = !display_weighted ? sym_counter : 0;
163 float samples_per_sec = samples/delay_secs; 409 float samples_per_sec = samples/delay_secs;
164 float ksamples_per_sec = (samples-userspace_samples)/delay_secs; 410 float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
165 float sum_ksamples = 0.0; 411 float sum_ksamples = 0.0;
@@ -175,7 +421,7 @@ static void print_sym_table(void)
175 pthread_mutex_unlock(&active_symbols_lock); 421 pthread_mutex_unlock(&active_symbols_lock);
176 422
177 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 423 list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
178 syme->snap_count = syme->count[0]; 424 syme->snap_count = syme->count[snap];
179 if (syme->snap_count != 0) { 425 if (syme->snap_count != 0) {
180 syme->weight = sym_weight(syme); 426 syme->weight = sym_weight(syme);
181 rb_insert_active_sym(&tmp, syme); 427 rb_insert_active_sym(&tmp, syme);
@@ -195,7 +441,7 @@ static void print_sym_table(void)
195 samples_per_sec, 441 samples_per_sec,
196 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); 442 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
197 443
198 if (nr_counters == 1) { 444 if (nr_counters == 1 || !display_weighted) {
199 printf("%Ld", (u64)attrs[0].sample_period); 445 printf("%Ld", (u64)attrs[0].sample_period);
200 if (freq) 446 if (freq)
201 printf("Hz "); 447 printf("Hz ");
@@ -203,7 +449,9 @@ static void print_sym_table(void)
203 printf(" "); 449 printf(" ");
204 } 450 }
205 451
206 for (counter = 0; counter < nr_counters; counter++) { 452 if (!display_weighted)
453 printf("%s", event_name(sym_counter));
454 else for (counter = 0; counter < nr_counters; counter++) {
207 if (counter) 455 if (counter)
208 printf("/"); 456 printf("/");
209 457
@@ -228,6 +476,11 @@ static void print_sym_table(void)
228 476
229 printf("------------------------------------------------------------------------------\n\n"); 477 printf("------------------------------------------------------------------------------\n\n");
230 478
479 if (sym_filter_entry) {
480 show_details(sym_filter_entry);
481 return;
482 }
483
231 if (nr_counters == 1) 484 if (nr_counters == 1)
232 printf(" samples pcnt"); 485 printf(" samples pcnt");
233 else 486 else
@@ -242,13 +495,13 @@ static void print_sym_table(void)
242 struct symbol *sym = (struct symbol *)(syme + 1); 495 struct symbol *sym = (struct symbol *)(syme + 1);
243 double pcnt; 496 double pcnt;
244 497
245 if (++printed > print_entries || syme->snap_count < count_filter) 498 if (++printed > print_entries || (int)syme->snap_count < count_filter)
246 continue; 499 continue;
247 500
248 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 501 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
249 sum_ksamples)); 502 sum_ksamples));
250 503
251 if (nr_counters == 1) 504 if (nr_counters == 1 || !display_weighted)
252 printf("%20.2f - ", syme->weight); 505 printf("%20.2f - ", syme->weight);
253 else 506 else
254 printf("%9.1f %10ld - ", syme->weight, syme->snap_count); 507 printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
@@ -261,19 +514,250 @@ static void print_sym_table(void)
261 } 514 }
262} 515}
263 516
517static void prompt_integer(int *target, const char *msg)
518{
519 char *buf = malloc(0), *p;
520 size_t dummy = 0;
521 int tmp;
522
523 fprintf(stdout, "\n%s: ", msg);
524 if (getline(&buf, &dummy, stdin) < 0)
525 return;
526
527 p = strchr(buf, '\n');
528 if (p)
529 *p = 0;
530
531 p = buf;
532 while(*p) {
533 if (!isdigit(*p))
534 goto out_free;
535 p++;
536 }
537 tmp = strtoul(buf, NULL, 10);
538 *target = tmp;
539out_free:
540 free(buf);
541}
542
543static void prompt_percent(int *target, const char *msg)
544{
545 int tmp = 0;
546
547 prompt_integer(&tmp, msg);
548 if (tmp >= 0 && tmp <= 100)
549 *target = tmp;
550}
551
552static void prompt_symbol(struct sym_entry **target, const char *msg)
553{
554 char *buf = malloc(0), *p;
555 struct sym_entry *syme = *target, *n, *found = NULL;
556 size_t dummy = 0;
557
558 /* zero counters of active symbol */
559 if (syme) {
560 pthread_mutex_lock(&syme->source_lock);
561 __zero_source_counters(syme);
562 *target = NULL;
563 pthread_mutex_unlock(&syme->source_lock);
564 }
565
566 fprintf(stdout, "\n%s: ", msg);
567 if (getline(&buf, &dummy, stdin) < 0)
568 goto out_free;
569
570 p = strchr(buf, '\n');
571 if (p)
572 *p = 0;
573
574 pthread_mutex_lock(&active_symbols_lock);
575 syme = list_entry(active_symbols.next, struct sym_entry, node);
576 pthread_mutex_unlock(&active_symbols_lock);
577
578 list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
579 struct symbol *sym = (struct symbol *)(syme + 1);
580
581 if (!strcmp(buf, sym->name)) {
582 found = syme;
583 break;
584 }
585 }
586
587 if (!found) {
588 fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
589 sleep(1);
590 return;
591 } else
592 parse_source(found);
593
594out_free:
595 free(buf);
596}
597
598static void print_mapped_keys(void)
599{
600 char *name = NULL;
601
602 if (sym_filter_entry) {
603 struct symbol *sym = (struct symbol *)(sym_filter_entry+1);
604 name = sym->name;
605 }
606
607 fprintf(stdout, "\nMapped keys:\n");
608 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs);
609 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
610
611 if (nr_counters > 1)
612 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter));
613
614 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
615
616 if (vmlinux) {
617 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
618 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
619 fprintf(stdout, "\t[S] stop annotation.\n");
620 }
621
622 if (nr_counters > 1)
623 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
624
625 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0);
626 fprintf(stdout, "\t[qQ] quit.\n");
627}
628
629static int key_mapped(int c)
630{
631 switch (c) {
632 case 'd':
633 case 'e':
634 case 'f':
635 case 'z':
636 case 'q':
637 case 'Q':
638 return 1;
639 case 'E':
640 case 'w':
641 return nr_counters > 1 ? 1 : 0;
642 case 'F':
643 case 's':
644 case 'S':
645 return vmlinux ? 1 : 0;
646 }
647
648 return 0;
649}
650
651static void handle_keypress(int c)
652{
653 if (!key_mapped(c)) {
654 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
655 struct termios tc, save;
656
657 print_mapped_keys();
658 fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
659 fflush(stdout);
660
661 tcgetattr(0, &save);
662 tc = save;
663 tc.c_lflag &= ~(ICANON | ECHO);
664 tc.c_cc[VMIN] = 0;
665 tc.c_cc[VTIME] = 0;
666 tcsetattr(0, TCSANOW, &tc);
667
668 poll(&stdin_poll, 1, -1);
669 c = getc(stdin);
670
671 tcsetattr(0, TCSAFLUSH, &save);
672 if (!key_mapped(c))
673 return;
674 }
675
676 switch (c) {
677 case 'd':
678 prompt_integer(&delay_secs, "Enter display delay");
679 break;
680 case 'e':
681 prompt_integer(&print_entries, "Enter display entries (lines)");
682 break;
683 case 'E':
684 if (nr_counters > 1) {
685 int i;
686
687 fprintf(stderr, "\nAvailable events:");
688 for (i = 0; i < nr_counters; i++)
689 fprintf(stderr, "\n\t%d %s", i, event_name(i));
690
691 prompt_integer(&sym_counter, "Enter details event counter");
692
693 if (sym_counter >= nr_counters) {
694 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
695 sym_counter = 0;
696 sleep(1);
697 }
698 } else sym_counter = 0;
699 break;
700 case 'f':
701 prompt_integer(&count_filter, "Enter display event count filter");
702 break;
703 case 'F':
704 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
705 break;
706 case 'q':
707 case 'Q':
708 printf("exiting.\n");
709 exit(0);
710 case 's':
711 prompt_symbol(&sym_filter_entry, "Enter details symbol");
712 break;
713 case 'S':
714 if (!sym_filter_entry)
715 break;
716 else {
717 struct sym_entry *syme = sym_filter_entry;
718
719 pthread_mutex_lock(&syme->source_lock);
720 sym_filter_entry = NULL;
721 __zero_source_counters(syme);
722 pthread_mutex_unlock(&syme->source_lock);
723 }
724 break;
725 case 'w':
726 display_weighted = ~display_weighted;
727 break;
728 case 'z':
729 zero = ~zero;
730 break;
731 }
732}
733
264static void *display_thread(void *arg __used) 734static void *display_thread(void *arg __used)
265{ 735{
266 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 736 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
267 int delay_msecs = delay_secs * 1000; 737 struct termios tc, save;
738 int delay_msecs, c;
739
740 tcgetattr(0, &save);
741 tc = save;
742 tc.c_lflag &= ~(ICANON | ECHO);
743 tc.c_cc[VMIN] = 0;
744 tc.c_cc[VTIME] = 0;
268 745
269 printf("PerfTop refresh period: %d seconds\n", delay_secs); 746repeat:
747 delay_msecs = delay_secs * 1000;
748 tcsetattr(0, TCSANOW, &tc);
749 /* trash return*/
750 getc(stdin);
270 751
271 do { 752 do {
272 print_sym_table(); 753 print_sym_table();
273 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 754 } while (!poll(&stdin_poll, 1, delay_msecs) == 1);
274 755
275 printf("key pressed - exiting.\n"); 756 c = getc(stdin);
276 exit(0); 757 tcsetattr(0, TCSAFLUSH, &save);
758
759 handle_keypress(c);
760 goto repeat;
277 761
278 return NULL; 762 return NULL;
279} 763}
@@ -285,6 +769,7 @@ static const char *skip_symbols[] = {
285 "enter_idle", 769 "enter_idle",
286 "exit_idle", 770 "exit_idle",
287 "mwait_idle", 771 "mwait_idle",
772 "mwait_idle_with_hints",
288 "ppc64_runlatch_off", 773 "ppc64_runlatch_off",
289 "pseries_dedicated_idle_sleep", 774 "pseries_dedicated_idle_sleep",
290 NULL 775 NULL
@@ -292,7 +777,6 @@ static const char *skip_symbols[] = {
292 777
293static int symbol_filter(struct dso *self, struct symbol *sym) 778static int symbol_filter(struct dso *self, struct symbol *sym)
294{ 779{
295 static int filter_match;
296 struct sym_entry *syme; 780 struct sym_entry *syme;
297 const char *name = sym->name; 781 const char *name = sym->name;
298 int i; 782 int i;
@@ -314,6 +798,10 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
314 return 1; 798 return 1;
315 799
316 syme = dso__sym_priv(self, sym); 800 syme = dso__sym_priv(self, sym);
801 pthread_mutex_init(&syme->source_lock, NULL);
802 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
803 sym_filter_entry = syme;
804
317 for (i = 0; skip_symbols[i]; i++) { 805 for (i = 0; skip_symbols[i]; i++) {
318 if (!strcmp(skip_symbols[i], name)) { 806 if (!strcmp(skip_symbols[i], name)) {
319 syme->skip = 1; 807 syme->skip = 1;
@@ -321,29 +809,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
321 } 809 }
322 } 810 }
323 811
324 if (filter_match == 1) {
325 filter_end = sym->start;
326 filter_match = -1;
327 if (filter_end - filter_start > 10000) {
328 fprintf(stderr,
329 "hm, too large filter symbol <%s> - skipping.\n",
330 sym_filter);
331 fprintf(stderr, "symbol filter start: %016lx\n",
332 filter_start);
333 fprintf(stderr, " end: %016lx\n",
334 filter_end);
335 filter_end = filter_start = 0;
336 sym_filter = NULL;
337 sleep(1);
338 }
339 }
340
341 if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
342 filter_match = 1;
343 filter_start = sym->start;
344 }
345
346
347 return 0; 812 return 0;
348} 813}
349 814
@@ -379,8 +844,6 @@ out_delete_dso:
379 return -1; 844 return -1;
380} 845}
381 846
382#define TRACE_COUNT 3
383
384/* 847/*
385 * Binary search in the histogram table and record the hit: 848 * Binary search in the histogram table and record the hit:
386 */ 849 */
@@ -393,6 +856,7 @@ static void record_ip(u64 ip, int counter)
393 856
394 if (!syme->skip) { 857 if (!syme->skip) {
395 syme->count[counter]++; 858 syme->count[counter]++;
859 record_precise_ip(syme, counter, ip);
396 pthread_mutex_lock(&active_symbols_lock); 860 pthread_mutex_lock(&active_symbols_lock);
397 if (list_empty(&syme->node) || !syme->node.next) 861 if (list_empty(&syme->node) || !syme->node.next)
398 __list_insert_active_sym(syme); 862 __list_insert_active_sym(syme);
@@ -689,8 +1153,8 @@ static const struct option options[] = {
689 "put the counters into a counter group"), 1153 "put the counters into a counter group"),
690 OPT_BOOLEAN('i', "inherit", &inherit, 1154 OPT_BOOLEAN('i', "inherit", &inherit,
691 "child tasks inherit counters"), 1155 "child tasks inherit counters"),
692 OPT_STRING('s', "sym-filter", &sym_filter, "pattern", 1156 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
693 "only display symbols matchig this pattern"), 1157 "symbol to annotate - requires -k option"),
694 OPT_BOOLEAN('z', "zero", &zero, 1158 OPT_BOOLEAN('z', "zero", &zero,
695 "zero history across updates"), 1159 "zero history across updates"),
696 OPT_INTEGER('F', "freq", &freq, 1160 OPT_INTEGER('F', "freq", &freq,
@@ -733,6 +1197,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
733 delay_secs = 1; 1197 delay_secs = 1;
734 1198
735 parse_symbols(); 1199 parse_symbols();
1200 parse_source(sym_filter_entry);
736 1201
737 /* 1202 /*
738 * Fill in the ones not specifically initialized via -c: 1203 * Fill in the ones not specifically initialized via -c:
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9d3c8141b8c1..011473411642 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -13,6 +13,7 @@
13#include <stdio.h> 13#include <stdio.h>
14#include <stdbool.h> 14#include <stdbool.h>
15#include <errno.h> 15#include <errno.h>
16#include <math.h>
16 17
17#include "callchain.h" 18#include "callchain.h"
18 19
@@ -26,10 +27,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
26 struct rb_node **p = &root->rb_node; 27 struct rb_node **p = &root->rb_node;
27 struct rb_node *parent = NULL; 28 struct rb_node *parent = NULL;
28 struct callchain_node *rnode; 29 struct callchain_node *rnode;
30 u64 chain_cumul = cumul_hits(chain);
29 31
30 while (*p) { 32 while (*p) {
33 u64 rnode_cumul;
34
31 parent = *p; 35 parent = *p;
32 rnode = rb_entry(parent, struct callchain_node, rb_node); 36 rnode = rb_entry(parent, struct callchain_node, rb_node);
37 rnode_cumul = cumul_hits(rnode);
33 38
34 switch (mode) { 39 switch (mode) {
35 case CHAIN_FLAT: 40 case CHAIN_FLAT:
@@ -40,7 +45,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
40 break; 45 break;
41 case CHAIN_GRAPH_ABS: /* Falldown */ 46 case CHAIN_GRAPH_ABS: /* Falldown */
42 case CHAIN_GRAPH_REL: 47 case CHAIN_GRAPH_REL:
43 if (rnode->cumul_hit < chain->cumul_hit) 48 if (rnode_cumul < chain_cumul)
44 p = &(*p)->rb_left; 49 p = &(*p)->rb_left;
45 else 50 else
46 p = &(*p)->rb_right; 51 p = &(*p)->rb_right;
@@ -87,7 +92,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
87 92
88 chain_for_each_child(child, node) { 93 chain_for_each_child(child, node) {
89 __sort_chain_graph_abs(child, min_hit); 94 __sort_chain_graph_abs(child, min_hit);
90 if (child->cumul_hit >= min_hit) 95 if (cumul_hits(child) >= min_hit)
91 rb_insert_callchain(&node->rb_root, child, 96 rb_insert_callchain(&node->rb_root, child,
92 CHAIN_GRAPH_ABS); 97 CHAIN_GRAPH_ABS);
93 } 98 }
@@ -108,11 +113,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
108 u64 min_hit; 113 u64 min_hit;
109 114
110 node->rb_root = RB_ROOT; 115 node->rb_root = RB_ROOT;
111 min_hit = node->cumul_hit * min_percent / 100.0; 116 min_hit = ceil(node->children_hit * min_percent);
112 117
113 chain_for_each_child(child, node) { 118 chain_for_each_child(child, node) {
114 __sort_chain_graph_rel(child, min_percent); 119 __sort_chain_graph_rel(child, min_percent);
115 if (child->cumul_hit >= min_hit) 120 if (cumul_hits(child) >= min_hit)
116 rb_insert_callchain(&node->rb_root, child, 121 rb_insert_callchain(&node->rb_root, child,
117 CHAIN_GRAPH_REL); 122 CHAIN_GRAPH_REL);
118 } 123 }
@@ -122,7 +127,7 @@ static void
122sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, 127sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root,
123 u64 min_hit __used, struct callchain_param *param) 128 u64 min_hit __used, struct callchain_param *param)
124{ 129{
125 __sort_chain_graph_rel(chain_root, param->min_percent); 130 __sort_chain_graph_rel(chain_root, param->min_percent / 100.0);
126 rb_root->rb_node = chain_root->rb_root.rb_node; 131 rb_root->rb_node = chain_root->rb_root.rb_node;
127} 132}
128 133
@@ -211,7 +216,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain,
211 new = create_child(parent, false); 216 new = create_child(parent, false);
212 fill_node(new, chain, start, syms); 217 fill_node(new, chain, start, syms);
213 218
214 new->cumul_hit = new->hit = 1; 219 new->children_hit = 0;
220 new->hit = 1;
215} 221}
216 222
217/* 223/*
@@ -241,7 +247,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
241 247
242 /* split the hits */ 248 /* split the hits */
243 new->hit = parent->hit; 249 new->hit = parent->hit;
244 new->cumul_hit = parent->cumul_hit; 250 new->children_hit = parent->children_hit;
251 parent->children_hit = cumul_hits(new);
245 new->val_nr = parent->val_nr - idx_local; 252 new->val_nr = parent->val_nr - idx_local;
246 parent->val_nr = idx_local; 253 parent->val_nr = idx_local;
247 254
@@ -249,6 +256,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
249 if (idx_total < chain->nr) { 256 if (idx_total < chain->nr) {
250 parent->hit = 0; 257 parent->hit = 0;
251 add_child(parent, chain, idx_total, syms); 258 add_child(parent, chain, idx_total, syms);
259 parent->children_hit++;
252 } else { 260 } else {
253 parent->hit = 1; 261 parent->hit = 1;
254 } 262 }
@@ -269,13 +277,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain,
269 unsigned int ret = __append_chain(rnode, chain, start, syms); 277 unsigned int ret = __append_chain(rnode, chain, start, syms);
270 278
271 if (!ret) 279 if (!ret)
272 goto cumul; 280 goto inc_children_hit;
273 } 281 }
274 /* nothing in children, add to the current node */ 282 /* nothing in children, add to the current node */
275 add_child(root, chain, start, syms); 283 add_child(root, chain, start, syms);
276 284
277cumul: 285inc_children_hit:
278 root->cumul_hit++; 286 root->children_hit++;
279} 287}
280 288
281static int 289static int
@@ -317,8 +325,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
317 /* we match 100% of the path, increment the hit */ 325 /* we match 100% of the path, increment the hit */
318 if (i - start == root->val_nr && i == chain->nr) { 326 if (i - start == root->val_nr && i == chain->nr) {
319 root->hit++; 327 root->hit++;
320 root->cumul_hit++;
321
322 return 0; 328 return 0;
323 } 329 }
324 330
@@ -331,5 +337,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
331void append_chain(struct callchain_node *root, struct ip_callchain *chain, 337void append_chain(struct callchain_node *root, struct ip_callchain *chain,
332 struct symbol **syms) 338 struct symbol **syms)
333{ 339{
340 if (!chain->nr)
341 return;
334 __append_chain_children(root, chain, syms, 0); 342 __append_chain_children(root, chain, syms, 0);
335} 343}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 7812122bea1d..a926ae4f5a16 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -7,6 +7,7 @@
7#include "symbol.h" 7#include "symbol.h"
8 8
9enum chain_mode { 9enum chain_mode {
10 CHAIN_NONE,
10 CHAIN_FLAT, 11 CHAIN_FLAT,
11 CHAIN_GRAPH_ABS, 12 CHAIN_GRAPH_ABS,
12 CHAIN_GRAPH_REL 13 CHAIN_GRAPH_REL
@@ -21,7 +22,7 @@ struct callchain_node {
21 struct rb_root rb_root; /* sorted tree of children */ 22 struct rb_root rb_root; /* sorted tree of children */
22 unsigned int val_nr; 23 unsigned int val_nr;
23 u64 hit; 24 u64 hit;
24 u64 cumul_hit; /* hit + hits of children */ 25 u64 children_hit;
25}; 26};
26 27
27struct callchain_param; 28struct callchain_param;
@@ -48,6 +49,11 @@ static inline void callchain_init(struct callchain_node *node)
48 INIT_LIST_HEAD(&node->val); 49 INIT_LIST_HEAD(&node->val);
49} 50}
50 51
52static inline u64 cumul_hits(struct callchain_node *node)
53{
54 return node->hit + node->children_hit;
55}
56
51int register_callchain_param(struct callchain_param *param); 57int register_callchain_param(struct callchain_param *param);
52void append_chain(struct callchain_node *root, struct ip_callchain *chain, 58void append_chain(struct callchain_node *root, struct ip_callchain *chain,
53 struct symbol **syms); 59 struct symbol **syms);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 450384b3bbe5..b92a457ca32e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -185,6 +185,8 @@ static void do_read(int fd, void *buf, size_t size)
185 185
186 if (ret < 0) 186 if (ret < 0)
187 die("failed to read"); 187 die("failed to read");
188 if (ret == 0)
189 die("failed to read: missing data");
188 190
189 size -= ret; 191 size -= ret;
190 buf += ret; 192 buf += ret;
@@ -213,9 +215,10 @@ struct perf_header *perf_header__read(int fd)
213 215
214 for (i = 0; i < nr_attrs; i++) { 216 for (i = 0; i < nr_attrs; i++) {
215 struct perf_header_attr *attr; 217 struct perf_header_attr *attr;
216 off_t tmp = lseek(fd, 0, SEEK_CUR); 218 off_t tmp;
217 219
218 do_read(fd, &f_attr, sizeof(f_attr)); 220 do_read(fd, &f_attr, sizeof(f_attr));
221 tmp = lseek(fd, 0, SEEK_CUR);
219 222
220 attr = perf_header_attr__new(&f_attr.attr); 223 attr = perf_header_attr__new(&f_attr.attr);
221 224
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 7bdad8df22a6..044178408783 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -121,13 +121,29 @@ static unsigned long hw_cache_stat[C(MAX)] = {
121 (strcmp(sys_dirent.d_name, ".")) && \ 121 (strcmp(sys_dirent.d_name, ".")) && \
122 (strcmp(sys_dirent.d_name, ".."))) 122 (strcmp(sys_dirent.d_name, "..")))
123 123
124static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
125{
126 char evt_path[MAXPATHLEN];
127 int fd;
128
129 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
130 sys_dir->d_name, evt_dir->d_name);
131 fd = open(evt_path, O_RDONLY);
132 if (fd < 0)
133 return -EINVAL;
134 close(fd);
135
136 return 0;
137}
138
124#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ 139#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \
125 while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ 140 while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \
126 if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ 141 if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \
127 sys_dirent.d_name, evt_dirent.d_name) && \ 142 sys_dirent.d_name, evt_dirent.d_name) && \
128 (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ 143 (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \
129 (strcmp(evt_dirent.d_name, ".")) && \ 144 (strcmp(evt_dirent.d_name, ".")) && \
130 (strcmp(evt_dirent.d_name, ".."))) 145 (strcmp(evt_dirent.d_name, "..")) && \
146 (!tp_event_has_id(&sys_dirent, &evt_dirent)))
131 147
132#define MAX_EVENT_LENGTH 30 148#define MAX_EVENT_LENGTH 30
133 149
@@ -223,9 +239,15 @@ char *event_name(int counter)
223{ 239{
224 u64 config = attrs[counter].config; 240 u64 config = attrs[counter].config;
225 int type = attrs[counter].type; 241 int type = attrs[counter].type;
242
243 return __event_name(type, config);
244}
245
246char *__event_name(int type, u64 config)
247{
226 static char buf[32]; 248 static char buf[32];
227 249
228 if (attrs[counter].type == PERF_TYPE_RAW) { 250 if (type == PERF_TYPE_RAW) {
229 sprintf(buf, "raw 0x%llx", config); 251 sprintf(buf, "raw 0x%llx", config);
230 return buf; 252 return buf;
231 } 253 }
@@ -357,6 +379,7 @@ static int parse_tracepoint_event(const char **strp,
357 struct perf_counter_attr *attr) 379 struct perf_counter_attr *attr)
358{ 380{
359 const char *evt_name; 381 const char *evt_name;
382 char *flags;
360 char sys_name[MAX_EVENT_LENGTH]; 383 char sys_name[MAX_EVENT_LENGTH];
361 char id_buf[4]; 384 char id_buf[4];
362 int fd; 385 int fd;
@@ -378,6 +401,15 @@ static int parse_tracepoint_event(const char **strp,
378 strncpy(sys_name, *strp, sys_length); 401 strncpy(sys_name, *strp, sys_length);
379 sys_name[sys_length] = '\0'; 402 sys_name[sys_length] = '\0';
380 evt_name = evt_name + 1; 403 evt_name = evt_name + 1;
404
405 flags = strchr(evt_name, ':');
406 if (flags) {
407 *flags = '\0';
408 flags++;
409 if (!strncmp(flags, "record", strlen(flags)))
410 attr->sample_type |= PERF_SAMPLE_RAW;
411 }
412
381 evt_length = strlen(evt_name); 413 evt_length = strlen(evt_name);
382 if (evt_length >= MAX_EVENT_LENGTH) 414 if (evt_length >= MAX_EVENT_LENGTH)
383 return 0; 415 return 0;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 1ea5d09b6eb1..192a962e3a0f 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -10,6 +10,7 @@ extern int nr_counters;
10extern struct perf_counter_attr attrs[MAX_COUNTERS]; 10extern struct perf_counter_attr attrs[MAX_COUNTERS];
11 11
12extern char *event_name(int ctr); 12extern char *event_name(int ctr);
13extern char *__event_name(int type, u64 config);
13 14
14extern int parse_events(const struct option *opt, const char *str, int unset); 15extern int parse_events(const struct option *opt, const char *str, int unset);
15 16
diff --git a/tools/perf/util/quote.c b/tools/perf/util/quote.c
index c6e5dc0dc82f..2726fe40eb5d 100644
--- a/tools/perf/util/quote.c
+++ b/tools/perf/util/quote.c
@@ -318,7 +318,7 @@ char *quote_path_relative(const char *in, int len,
318 strbuf_addch(out, '"'); 318 strbuf_addch(out, '"');
319 if (prefix) { 319 if (prefix) {
320 int off = 0; 320 int off = 0;
321 while (prefix[off] && off < len && prefix[off] == in[off]) 321 while (off < len && prefix[off] && prefix[off] == in[off])
322 if (prefix[off] == '/') { 322 if (prefix[off] == '/') {
323 prefix += off + 1; 323 prefix += off + 1;
324 in += off + 1; 324 in += off + 1;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 28106059bf12..5c0f42e6b33b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -6,14 +6,18 @@
6#include <libelf.h> 6#include <libelf.h>
7#include <gelf.h> 7#include <gelf.h>
8#include <elf.h> 8#include <elf.h>
9#include <bfd.h>
10 9
11const char *sym_hist_filter; 10const char *sym_hist_filter;
12 11
13#ifndef DMGL_PARAMS 12enum dso_origin {
14#define DMGL_PARAMS (1 << 0) /* Include function args */ 13 DSO__ORIG_KERNEL = 0,
15#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ 14 DSO__ORIG_JAVA_JIT,
16#endif 15 DSO__ORIG_FEDORA,
16 DSO__ORIG_UBUNTU,
17 DSO__ORIG_BUILDID,
18 DSO__ORIG_DSO,
19 DSO__ORIG_NOT_FOUND,
20};
17 21
18static struct symbol *symbol__new(u64 start, u64 len, 22static struct symbol *symbol__new(u64 start, u64 len,
19 const char *name, unsigned int priv_size, 23 const char *name, unsigned int priv_size,
@@ -72,6 +76,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size)
72 self->sym_priv_size = sym_priv_size; 76 self->sym_priv_size = sym_priv_size;
73 self->find_symbol = dso__find_symbol; 77 self->find_symbol = dso__find_symbol;
74 self->slen_calculated = 0; 78 self->slen_calculated = 0;
79 self->origin = DSO__ORIG_NOT_FOUND;
75 } 80 }
76 81
77 return self; 82 return self;
@@ -565,7 +570,7 @@ static int dso__load_sym(struct dso *self, int fd, const char *name,
565 goto out_elf_end; 570 goto out_elf_end;
566 571
567 secstrs = elf_getdata(sec_strndx, NULL); 572 secstrs = elf_getdata(sec_strndx, NULL);
568 if (symstrs == NULL) 573 if (secstrs == NULL)
569 goto out_elf_end; 574 goto out_elf_end;
570 575
571 nr_syms = shdr.sh_size / shdr.sh_entsize; 576 nr_syms = shdr.sh_size / shdr.sh_entsize;
@@ -652,11 +657,85 @@ out_close:
652 return err; 657 return err;
653} 658}
654 659
660#define BUILD_ID_SIZE 128
661
662static char *dso__read_build_id(struct dso *self, int verbose)
663{
664 int i;
665 GElf_Ehdr ehdr;
666 GElf_Shdr shdr;
667 Elf_Data *build_id_data;
668 Elf_Scn *sec;
669 char *build_id = NULL, *bid;
670 unsigned char *raw;
671 Elf *elf;
672 int fd = open(self->name, O_RDONLY);
673
674 if (fd < 0)
675 goto out;
676
677 elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
678 if (elf == NULL) {
679 if (verbose)
680 fprintf(stderr, "%s: cannot read %s ELF file.\n",
681 __func__, self->name);
682 goto out_close;
683 }
684
685 if (gelf_getehdr(elf, &ehdr) == NULL) {
686 if (verbose)
687 fprintf(stderr, "%s: cannot get elf header.\n", __func__);
688 goto out_elf_end;
689 }
690
691 sec = elf_section_by_name(elf, &ehdr, &shdr, ".note.gnu.build-id", NULL);
692 if (sec == NULL)
693 goto out_elf_end;
694
695 build_id_data = elf_getdata(sec, NULL);
696 if (build_id_data == NULL)
697 goto out_elf_end;
698 build_id = malloc(BUILD_ID_SIZE);
699 if (build_id == NULL)
700 goto out_elf_end;
701 raw = build_id_data->d_buf + 16;
702 bid = build_id;
703
704 for (i = 0; i < 20; ++i) {
705 sprintf(bid, "%02x", *raw);
706 ++raw;
707 bid += 2;
708 }
709 if (verbose >= 2)
710 printf("%s(%s): %s\n", __func__, self->name, build_id);
711out_elf_end:
712 elf_end(elf);
713out_close:
714 close(fd);
715out:
716 return build_id;
717}
718
719char dso__symtab_origin(const struct dso *self)
720{
721 static const char origin[] = {
722 [DSO__ORIG_KERNEL] = 'k',
723 [DSO__ORIG_JAVA_JIT] = 'j',
724 [DSO__ORIG_FEDORA] = 'f',
725 [DSO__ORIG_UBUNTU] = 'u',
726 [DSO__ORIG_BUILDID] = 'b',
727 [DSO__ORIG_DSO] = 'd',
728 };
729
730 if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND)
731 return '!';
732 return origin[self->origin];
733}
734
655int dso__load(struct dso *self, symbol_filter_t filter, int verbose) 735int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
656{ 736{
657 int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug"); 737 int size = PATH_MAX;
658 char *name = malloc(size); 738 char *name = malloc(size), *build_id = NULL;
659 int variant = 0;
660 int ret = -1; 739 int ret = -1;
661 int fd; 740 int fd;
662 741
@@ -665,26 +744,43 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
665 744
666 self->adjust_symbols = 0; 745 self->adjust_symbols = 0;
667 746
668 if (strncmp(self->name, "/tmp/perf-", 10) == 0) 747 if (strncmp(self->name, "/tmp/perf-", 10) == 0) {
669 return dso__load_perf_map(self, filter, verbose); 748 ret = dso__load_perf_map(self, filter, verbose);
749 self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT :
750 DSO__ORIG_NOT_FOUND;
751 return ret;
752 }
753
754 self->origin = DSO__ORIG_FEDORA - 1;
670 755
671more: 756more:
672 do { 757 do {
673 switch (variant) { 758 self->origin++;
674 case 0: /* Fedora */ 759 switch (self->origin) {
760 case DSO__ORIG_FEDORA:
675 snprintf(name, size, "/usr/lib/debug%s.debug", self->name); 761 snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
676 break; 762 break;
677 case 1: /* Ubuntu */ 763 case DSO__ORIG_UBUNTU:
678 snprintf(name, size, "/usr/lib/debug%s", self->name); 764 snprintf(name, size, "/usr/lib/debug%s", self->name);
679 break; 765 break;
680 case 2: /* Sane people */ 766 case DSO__ORIG_BUILDID:
767 build_id = dso__read_build_id(self, verbose);
768 if (build_id != NULL) {
769 snprintf(name, size,
770 "/usr/lib/debug/.build-id/%.2s/%s.debug",
771 build_id, build_id + 2);
772 free(build_id);
773 break;
774 }
775 self->origin++;
776 /* Fall thru */
777 case DSO__ORIG_DSO:
681 snprintf(name, size, "%s", self->name); 778 snprintf(name, size, "%s", self->name);
682 break; 779 break;
683 780
684 default: 781 default:
685 goto out; 782 goto out;
686 } 783 }
687 variant++;
688 784
689 fd = open(name, O_RDONLY); 785 fd = open(name, O_RDONLY);
690 } while (fd < 0); 786 } while (fd < 0);
@@ -705,6 +801,8 @@ more:
705 } 801 }
706out: 802out:
707 free(name); 803 free(name);
804 if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
805 return 0;
708 return ret; 806 return ret;
709} 807}
710 808
@@ -820,6 +918,9 @@ int dso__load_kernel(struct dso *self, const char *vmlinux,
820 if (err <= 0) 918 if (err <= 0)
821 err = dso__load_kallsyms(self, filter, verbose); 919 err = dso__load_kallsyms(self, filter, verbose);
822 920
921 if (err > 0)
922 self->origin = DSO__ORIG_KERNEL;
923
823 return err; 924 return err;
824} 925}
825 926
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 2f92b21c712d..b53bf0125c1b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -7,6 +7,30 @@
7#include <linux/rbtree.h> 7#include <linux/rbtree.h>
8#include "module.h" 8#include "module.h"
9 9
10#ifdef HAVE_CPLUS_DEMANGLE
11extern char *cplus_demangle(const char *, int);
12
13static inline char *bfd_demangle(void __used *v, const char *c, int i)
14{
15 return cplus_demangle(c, i);
16}
17#else
18#ifdef NO_DEMANGLE
19static inline char *bfd_demangle(void __used *v, const char __used *c,
20 int __used i)
21{
22 return NULL;
23}
24#else
25#include <bfd.h>
26#endif
27#endif
28
29#ifndef DMGL_PARAMS
30#define DMGL_PARAMS (1 << 0) /* Include function args */
31#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
32#endif
33
10struct symbol { 34struct symbol {
11 struct rb_node rb_node; 35 struct rb_node rb_node;
12 u64 start; 36 u64 start;
@@ -26,6 +50,7 @@ struct dso {
26 unsigned int sym_priv_size; 50 unsigned int sym_priv_size;
27 unsigned char adjust_symbols; 51 unsigned char adjust_symbols;
28 unsigned char slen_calculated; 52 unsigned char slen_calculated;
53 unsigned char origin;
29 char name[0]; 54 char name[0];
30}; 55};
31 56
@@ -49,6 +74,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose);
49int dso__load(struct dso *self, symbol_filter_t filter, int verbose); 74int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
50 75
51size_t dso__fprintf(struct dso *self, FILE *fp); 76size_t dso__fprintf(struct dso *self, FILE *fp);
77char dso__symtab_origin(const struct dso *self);
52 78
53void symbol__init(void); 79void symbol__init(void);
54#endif /* _PERF_SYMBOL_ */ 80#endif /* _PERF_SYMBOL_ */