diff options
author | Ingo Molnar <mingo@kernel.org> | 2015-10-14 10:05:18 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-10-14 10:05:18 -0400 |
commit | c7d77a7980e434c3af17de19e3348157f9b9ccce (patch) | |
tree | b32c5988ce8239b80c83e94c22d68f5eb0fb84da /tools/perf | |
parent | 0ce423b6492a02be11662bfaa837dd16945aad3e (diff) | |
parent | 8a53554e12e98d1759205afd7b8e9e2ea0936f48 (diff) |
Merge branch 'x86/urgent' into core/efi, to pick up a pending EFI fix
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf')
176 files changed, 16665 insertions, 1488 deletions
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 09db62ba5786..3d1bb802dbf4 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore | |||
@@ -29,3 +29,4 @@ config.mak.autogen | |||
29 | *.pyc | 29 | *.pyc |
30 | *.pyo | 30 | *.pyo |
31 | .config-detected | 31 | .config-detected |
32 | util/intel-pt-decoder/inat-tables.c | ||
diff --git a/tools/perf/Build b/tools/perf/Build index b77370ef7005..72237455b400 100644 --- a/tools/perf/Build +++ b/tools/perf/Build | |||
@@ -35,6 +35,7 @@ paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" | |||
35 | CFLAGS_builtin-help.o += $(paths) | 35 | CFLAGS_builtin-help.o += $(paths) |
36 | CFLAGS_builtin-timechart.o += $(paths) | 36 | CFLAGS_builtin-timechart.o += $(paths) |
37 | CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE | 37 | CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE |
38 | CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))" | ||
38 | 39 | ||
39 | libperf-y += util/ | 40 | libperf-y += util/ |
40 | libperf-y += arch/ | 41 | libperf-y += arch/ |
diff --git a/tools/perf/Documentation/intel-bts.txt b/tools/perf/Documentation/intel-bts.txt new file mode 100644 index 000000000000..8bdc93bd7fdb --- /dev/null +++ b/tools/perf/Documentation/intel-bts.txt | |||
@@ -0,0 +1,86 @@ | |||
1 | Intel Branch Trace Store | ||
2 | ======================== | ||
3 | |||
4 | Overview | ||
5 | ======== | ||
6 | |||
7 | Intel BTS could be regarded as a predecessor to Intel PT and has some | ||
8 | similarities because it can also identify every branch a program takes. A | ||
9 | notable difference is that Intel BTS has no timing information and as a | ||
10 | consequence the present implementation is limited to per-thread recording. | ||
11 | |||
12 | While decoding Intel BTS does not require walking the object code, the object | ||
13 | code is still needed to pair up calls and returns correctly, consequently much | ||
14 | of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT | ||
15 | documentation and consider that the PMU 'intel_bts' can usually be used in | ||
16 | place of 'intel_pt' in the examples provided, with the proviso that per-thread | ||
17 | recording must also be stipulated i.e. the --per-thread option for | ||
18 | 'perf record'. | ||
19 | |||
20 | |||
21 | perf record | ||
22 | =========== | ||
23 | |||
24 | new event | ||
25 | --------- | ||
26 | |||
27 | The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record | ||
28 | option is: | ||
29 | |||
30 | -e intel_bts// | ||
31 | |||
32 | Currently Intel BTS is limited to per-thread tracing so the --per-thread option | ||
33 | is also needed. | ||
34 | |||
35 | |||
36 | snapshot option | ||
37 | --------------- | ||
38 | |||
39 | The snapshot option is the same as Intel PT (refer Intel PT documentation). | ||
40 | |||
41 | |||
42 | auxtrace mmap size option | ||
43 | ----------------------- | ||
44 | |||
45 | The mmap size option is the same as Intel PT (refer Intel PT documentation). | ||
46 | |||
47 | |||
48 | perf script | ||
49 | =========== | ||
50 | |||
51 | By default, perf script will decode trace data found in the perf.data file. | ||
52 | This can be further controlled by option --itrace. The --itrace option is | ||
53 | the same as Intel PT (refer Intel PT documentation) except that neither | ||
54 | "instructions" events nor "transactions" events (and consequently call | ||
55 | chains) are supported. | ||
56 | |||
57 | To disable trace decoding entirely, use the option --no-itrace. | ||
58 | |||
59 | |||
60 | dump option | ||
61 | ----------- | ||
62 | |||
63 | perf script has an option (-D) to "dump" the events i.e. display the binary | ||
64 | data. | ||
65 | |||
66 | When -D is used, Intel BTS packets are displayed. | ||
67 | |||
68 | To disable the display of Intel BTS packets, combine the -D option with | ||
69 | --no-itrace. | ||
70 | |||
71 | |||
72 | perf report | ||
73 | =========== | ||
74 | |||
75 | By default, perf report will decode trace data found in the perf.data file. | ||
76 | This can be further controlled by new option --itrace exactly the same as | ||
77 | perf script. | ||
78 | |||
79 | |||
80 | perf inject | ||
81 | =========== | ||
82 | |||
83 | perf inject also accepts the --itrace option in which case tracing data is | ||
84 | removed and replaced with the synthesized events. e.g. | ||
85 | |||
86 | perf inject --itrace -i perf.data -o perf.data.new | ||
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt new file mode 100644 index 000000000000..c94c9de3173e --- /dev/null +++ b/tools/perf/Documentation/intel-pt.txt | |||
@@ -0,0 +1,751 @@ | |||
1 | Intel Processor Trace | ||
2 | ===================== | ||
3 | |||
4 | Overview | ||
5 | ======== | ||
6 | |||
7 | Intel Processor Trace (Intel PT) is an extension of Intel Architecture that | ||
8 | collects information about software execution such as control flow, execution | ||
9 | modes and timings and formats it into highly compressed binary packets. | ||
10 | Technical details are documented in the Intel 64 and IA-32 Architectures | ||
11 | Software Developer Manuals, Chapter 36 Intel Processor Trace. | ||
12 | |||
13 | Intel PT is first supported in Intel Core M and 5th generation Intel Core | ||
14 | processors that are based on the Intel micro-architecture code name Broadwell. | ||
15 | |||
16 | Trace data is collected by 'perf record' and stored within the perf.data file. | ||
17 | See below for options to 'perf record'. | ||
18 | |||
19 | Trace data must be 'decoded' which involves walking the object code and matching | ||
20 | the trace data packets. For example a TNT packet only tells whether a | ||
21 | conditional branch was taken or not taken, so to make use of that packet the | ||
22 | decoder must know precisely which instruction was being executed. | ||
23 | |||
24 | Decoding is done on-the-fly. The decoder outputs samples in the same format as | ||
25 | samples output by perf hardware events, for example as though the "instructions" | ||
26 | or "branches" events had been recorded. Presently 3 tools support this: | ||
27 | 'perf script', 'perf report' and 'perf inject'. See below for more information | ||
28 | on using those tools. | ||
29 | |||
30 | The main distinguishing feature of Intel PT is that the decoder can determine | ||
31 | the exact flow of software execution. Intel PT can be used to understand why | ||
32 | and how did software get to a certain point, or behave a certain way. The | ||
33 | software does not have to be recompiled, so Intel PT works with debug or release | ||
34 | builds, however the executed images are needed - which makes use in JIT-compiled | ||
35 | environments, or with self-modified code, a challenge. Also symbols need to be | ||
36 | provided to make sense of addresses. | ||
37 | |||
38 | A limitation of Intel PT is that it produces huge amounts of trace data | ||
39 | (hundreds of megabytes per second per core) which takes a long time to decode, | ||
40 | for example two or three orders of magnitude longer than it took to collect. | ||
41 | Another limitation is the performance impact of tracing, something that will | ||
42 | vary depending on the use-case and architecture. | ||
43 | |||
44 | |||
45 | Quickstart | ||
46 | ========== | ||
47 | |||
48 | It is important to start small. That is because it is easy to capture vastly | ||
49 | more data than can possibly be processed. | ||
50 | |||
51 | The simplest thing to do with Intel PT is userspace profiling of small programs. | ||
52 | Data is captured with 'perf record' e.g. to trace 'ls' userspace-only: | ||
53 | |||
54 | perf record -e intel_pt//u ls | ||
55 | |||
56 | And profiled with 'perf report' e.g. | ||
57 | |||
58 | perf report | ||
59 | |||
60 | To also trace kernel space presents a problem, namely kernel self-modifying | ||
61 | code. A fairly good kernel image is available in /proc/kcore but to get an | ||
62 | accurate image a copy of /proc/kcore needs to be made under the same conditions | ||
63 | as the data capture. A script perf-with-kcore can do that, but beware that the | ||
64 | script makes use of 'sudo' to copy /proc/kcore. If you have perf installed | ||
65 | locally from the source tree you can do: | ||
66 | |||
67 | ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls | ||
68 | |||
69 | which will create a directory named 'pt_ls' and put the perf.data file and | ||
70 | copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use | ||
71 | 'perf report' becomes: | ||
72 | |||
73 | ~/libexec/perf-core/perf-with-kcore report pt_ls | ||
74 | |||
75 | Because samples are synthesized after-the-fact, the sampling period can be | ||
76 | selected for reporting. e.g. sample every microsecond | ||
77 | |||
78 | ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge | ||
79 | |||
80 | See the sections below for more information about the --itrace option. | ||
81 | |||
82 | Beware the smaller the period, the more samples that are produced, and the | ||
83 | longer it takes to process them. | ||
84 | |||
85 | Also note that the coarseness of Intel PT timing information will start to | ||
86 | distort the statistical value of the sampling as the sampling period becomes | ||
87 | smaller. | ||
88 | |||
89 | To represent software control flow, "branches" samples are produced. By default | ||
90 | a branch sample is synthesized for every single branch. To get an idea what | ||
91 | data is available you can use the 'perf script' tool with no parameters, which | ||
92 | will list all the samples. | ||
93 | |||
94 | perf record -e intel_pt//u ls | ||
95 | perf script | ||
96 | |||
97 | An interesting field that is not printed by default is 'flags' which can be | ||
98 | displayed as follows: | ||
99 | |||
100 | perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags | ||
101 | |||
102 | The flags are "bcrosyiABEx" which stand for branch, call, return, conditional, | ||
103 | system, asynchronous, interrupt, transaction abort, trace begin, trace end, and | ||
104 | in transaction, respectively. | ||
105 | |||
106 | While it is possible to create scripts to analyze the data, an alternative | ||
107 | approach is available to export the data to a postgresql database. Refer to | ||
108 | script export-to-postgresql.py for more details, and to script | ||
109 | call-graph-from-postgresql.py for an example of using the database. | ||
110 | |||
111 | As mentioned above, it is easy to capture too much data. One way to limit the | ||
112 | data captured is to use 'snapshot' mode which is explained further below. | ||
113 | Refer to 'new snapshot option' and 'Intel PT modes of operation' further below. | ||
114 | |||
115 | Another problem that will be experienced is decoder errors. They can be caused | ||
116 | by inability to access the executed image, self-modified or JIT-ed code, or the | ||
117 | inability to match side-band information (such as context switches and mmaps) | ||
118 | which results in the decoder not knowing what code was executed. | ||
119 | |||
120 | There is also the problem of perf not being able to copy the data fast enough, | ||
121 | resulting in data lost because the buffer was full. See 'Buffer handling' below | ||
122 | for more details. | ||
123 | |||
124 | |||
125 | perf record | ||
126 | =========== | ||
127 | |||
128 | new event | ||
129 | --------- | ||
130 | |||
131 | The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are | ||
132 | selected by providing the PMU name followed by the "config" separated by slashes. | ||
133 | An enhancement has been made to allow default "config" e.g. the option | ||
134 | |||
135 | -e intel_pt// | ||
136 | |||
137 | will use a default config value. Currently that is the same as | ||
138 | |||
139 | -e intel_pt/tsc,noretcomp=0/ | ||
140 | |||
141 | which is the same as | ||
142 | |||
143 | -e intel_pt/tsc=1,noretcomp=0/ | ||
144 | |||
145 | Note there are now new config terms - see section 'config terms' further below. | ||
146 | |||
147 | The config terms are listed in /sys/devices/intel_pt/format. They are bit | ||
148 | fields within the config member of the struct perf_event_attr which is | ||
149 | passed to the kernel by the perf_event_open system call. They correspond to bit | ||
150 | fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions: | ||
151 | |||
152 | $ grep -H . /sys/bus/event_source/devices/intel_pt/format/* | ||
153 | /sys/bus/event_source/devices/intel_pt/format/cyc:config:1 | ||
154 | /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22 | ||
155 | /sys/bus/event_source/devices/intel_pt/format/mtc:config:9 | ||
156 | /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17 | ||
157 | /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11 | ||
158 | /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27 | ||
159 | /sys/bus/event_source/devices/intel_pt/format/tsc:config:10 | ||
160 | |||
161 | Note that the default config must be overridden for each term i.e. | ||
162 | |||
163 | -e intel_pt/noretcomp=0/ | ||
164 | |||
165 | is the same as: | ||
166 | |||
167 | -e intel_pt/tsc=1,noretcomp=0/ | ||
168 | |||
169 | So, to disable TSC packets use: | ||
170 | |||
171 | -e intel_pt/tsc=0/ | ||
172 | |||
173 | It is also possible to specify the config value explicitly: | ||
174 | |||
175 | -e intel_pt/config=0x400/ | ||
176 | |||
177 | Note that, as with all events, the event is suffixed with event modifiers: | ||
178 | |||
179 | u userspace | ||
180 | k kernel | ||
181 | h hypervisor | ||
182 | G guest | ||
183 | H host | ||
184 | p precise ip | ||
185 | |||
186 | 'h', 'G' and 'H' are for virtualization which is not supported by Intel PT. | ||
187 | 'p' is also not relevant to Intel PT. So only options 'u' and 'k' are | ||
188 | meaningful for Intel PT. | ||
189 | |||
190 | perf_event_attr is displayed if the -vv option is used e.g. | ||
191 | |||
192 | ------------------------------------------------------------ | ||
193 | perf_event_attr: | ||
194 | type 6 | ||
195 | size 112 | ||
196 | config 0x400 | ||
197 | { sample_period, sample_freq } 1 | ||
198 | sample_type IP|TID|TIME|CPU|IDENTIFIER | ||
199 | read_format ID | ||
200 | disabled 1 | ||
201 | inherit 1 | ||
202 | exclude_kernel 1 | ||
203 | exclude_hv 1 | ||
204 | enable_on_exec 1 | ||
205 | sample_id_all 1 | ||
206 | ------------------------------------------------------------ | ||
207 | sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 | ||
208 | sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 | ||
209 | sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 | ||
210 | sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 | ||
211 | ------------------------------------------------------------ | ||
212 | |||
213 | |||
214 | config terms | ||
215 | ------------ | ||
216 | |||
217 | The June 2015 version of Intel 64 and IA-32 Architectures Software Developer | ||
218 | Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features. | ||
219 | Some of the features are reflect in new config terms. All the config terms are | ||
220 | described below. | ||
221 | |||
222 | tsc Always supported. Produces TSC timestamp packets to provide | ||
223 | timing information. In some cases it is possible to decode | ||
224 | without timing information, for example a per-thread context | ||
225 | that does not overlap executable memory maps. | ||
226 | |||
227 | The default config selects tsc (i.e. tsc=1). | ||
228 | |||
229 | noretcomp Always supported. Disables "return compression" so a TIP packet | ||
230 | is produced when a function returns. Causes more packets to be | ||
231 | produced but might make decoding more reliable. | ||
232 | |||
233 | The default config does not select noretcomp (i.e. noretcomp=0). | ||
234 | |||
235 | psb_period Allows the frequency of PSB packets to be specified. | ||
236 | |||
237 | The PSB packet is a synchronization packet that provides a | ||
238 | starting point for decoding or recovery from errors. | ||
239 | |||
240 | Support for psb_period is indicated by: | ||
241 | |||
242 | /sys/bus/event_source/devices/intel_pt/caps/psb_cyc | ||
243 | |||
244 | which contains "1" if the feature is supported and "0" | ||
245 | otherwise. | ||
246 | |||
247 | Valid values are given by: | ||
248 | |||
249 | /sys/bus/event_source/devices/intel_pt/caps/psb_periods | ||
250 | |||
251 | which contains a hexadecimal value, the bits of which represent | ||
252 | valid values e.g. bit 2 set means value 2 is valid. | ||
253 | |||
254 | The psb_period value is converted to the approximate number of | ||
255 | trace bytes between PSB packets as: | ||
256 | |||
257 | 2 ^ (value + 11) | ||
258 | |||
259 | e.g. value 3 means 16KiB bytes between PSBs | ||
260 | |||
261 | If an invalid value is entered, the error message | ||
262 | will give a list of valid values e.g. | ||
263 | |||
264 | $ perf record -e intel_pt/psb_period=15/u uname | ||
265 | Invalid psb_period for intel_pt. Valid values are: 0-5 | ||
266 | |||
267 | If MTC packets are selected, the default config selects a value | ||
268 | of 3 (i.e. psb_period=3) or the nearest lower value that is | ||
269 | supported (0 is always supported). Otherwise the default is 0. | ||
270 | |||
271 | If decoding is expected to be reliable and the buffer is large | ||
272 | then a large PSB period can be used. | ||
273 | |||
274 | Because a TSC packet is produced with PSB, the PSB period can | ||
275 | also affect the granularity to timing information in the absence | ||
276 | of MTC or CYC. | ||
277 | |||
278 | mtc Produces MTC timing packets. | ||
279 | |||
280 | MTC packets provide finer grain timestamp information than TSC | ||
281 | packets. MTC packets record time using the hardware crystal | ||
282 | clock (CTC) which is related to TSC packets using a TMA packet. | ||
283 | |||
284 | Support for this feature is indicated by: | ||
285 | |||
286 | /sys/bus/event_source/devices/intel_pt/caps/mtc | ||
287 | |||
288 | which contains "1" if the feature is supported and | ||
289 | "0" otherwise. | ||
290 | |||
291 | The frequency of MTC packets can also be specified - see | ||
292 | mtc_period below. | ||
293 | |||
294 | mtc_period Specifies how frequently MTC packets are produced - see mtc | ||
295 | above for how to determine if MTC packets are supported. | ||
296 | |||
297 | Valid values are given by: | ||
298 | |||
299 | /sys/bus/event_source/devices/intel_pt/caps/mtc_periods | ||
300 | |||
301 | which contains a hexadecimal value, the bits of which represent | ||
302 | valid values e.g. bit 2 set means value 2 is valid. | ||
303 | |||
304 | The mtc_period value is converted to the MTC frequency as: | ||
305 | |||
306 | CTC-frequency / (2 ^ value) | ||
307 | |||
308 | e.g. value 3 means one eighth of CTC-frequency | ||
309 | |||
310 | Where CTC is the hardware crystal clock, the frequency of which | ||
311 | can be related to TSC via values provided in cpuid leaf 0x15. | ||
312 | |||
313 | If an invalid value is entered, the error message | ||
314 | will give a list of valid values e.g. | ||
315 | |||
316 | $ perf record -e intel_pt/mtc_period=15/u uname | ||
317 | Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9 | ||
318 | |||
319 | The default value is 3 or the nearest lower value | ||
320 | that is supported (0 is always supported). | ||
321 | |||
322 | cyc Produces CYC timing packets. | ||
323 | |||
324 | CYC packets provide even finer grain timestamp information than | ||
325 | MTC and TSC packets. A CYC packet contains the number of CPU | ||
326 | cycles since the last CYC packet. Unlike MTC and TSC packets, | ||
327 | CYC packets are only sent when another packet is also sent. | ||
328 | |||
329 | Support for this feature is indicated by: | ||
330 | |||
331 | /sys/bus/event_source/devices/intel_pt/caps/psb_cyc | ||
332 | |||
333 | which contains "1" if the feature is supported and | ||
334 | "0" otherwise. | ||
335 | |||
336 | The number of CYC packets produced can be reduced by specifying | ||
337 | a threshold - see cyc_thresh below. | ||
338 | |||
339 | cyc_thresh Specifies how frequently CYC packets are produced - see cyc | ||
340 | above for how to determine if CYC packets are supported. | ||
341 | |||
342 | Valid cyc_thresh values are given by: | ||
343 | |||
344 | /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds | ||
345 | |||
346 | which contains a hexadecimal value, the bits of which represent | ||
347 | valid values e.g. bit 2 set means value 2 is valid. | ||
348 | |||
349 | The cyc_thresh value represents the minimum number of CPU cycles | ||
350 | that must have passed before a CYC packet can be sent. The | ||
351 | number of CPU cycles is: | ||
352 | |||
353 | 2 ^ (value - 1) | ||
354 | |||
355 | e.g. value 4 means 8 CPU cycles must pass before a CYC packet | ||
356 | can be sent. Note a CYC packet is still only sent when another | ||
357 | packet is sent, not at, e.g. every 8 CPU cycles. | ||
358 | |||
359 | If an invalid value is entered, the error message | ||
360 | will give a list of valid values e.g. | ||
361 | |||
362 | $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname | ||
363 | Invalid cyc_thresh for intel_pt. Valid values are: 0-12 | ||
364 | |||
365 | CYC packets are not requested by default. | ||
366 | |||
367 | |||
368 | new snapshot option | ||
369 | ------------------- | ||
370 | |||
371 | The difference between full trace and snapshot from the kernel's perspective is | ||
372 | that in full trace we don't overwrite trace data that the user hasn't collected | ||
373 | yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let | ||
374 | the trace run and overwrite older data in the buffer so that whenever something | ||
375 | interesting happens, we can stop it and grab a snapshot of what was going on | ||
376 | around that interesting moment. | ||
377 | |||
378 | To select snapshot mode a new option has been added: | ||
379 | |||
380 | -S | ||
381 | |||
382 | Optionally it can be followed by the snapshot size e.g. | ||
383 | |||
384 | -S0x100000 | ||
385 | |||
386 | The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size | ||
387 | nor snapshot size is specified, then the default is 4MiB for privileged users | ||
388 | (or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. | ||
389 | If an unprivileged user does not specify mmap pages, the mmap pages will be | ||
390 | reduced as described in the 'new auxtrace mmap size option' section below. | ||
391 | |||
392 | The snapshot size is displayed if the option -vv is used e.g. | ||
393 | |||
394 | Intel PT snapshot size: %zu | ||
395 | |||
396 | |||
397 | new auxtrace mmap size option | ||
398 | --------------------------- | ||
399 | |||
400 | Intel PT buffer size is specified by an addition to the -m option e.g. | ||
401 | |||
402 | -m,16 | ||
403 | |||
404 | selects a buffer size of 16 pages i.e. 64KiB. | ||
405 | |||
406 | Note that the existing functionality of -m is unchanged. The auxtrace mmap size | ||
407 | is specified by the optional addition of a comma and the value. | ||
408 | |||
409 | The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users | ||
410 | (or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users. | ||
411 | If an unprivileged user does not specify mmap pages, the mmap pages will be | ||
412 | reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the | ||
413 | user is likely to get an error as they exceed their mlock limit (Max locked | ||
414 | memory as shown in /proc/self/limits). Note that perf does not count the first | ||
415 | 512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu | ||
416 | against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus | ||
417 | their mlock limit (which defaults to 64KiB but is not multiplied by the number | ||
418 | of cpus). | ||
419 | |||
420 | In full-trace mode, powers of two are allowed for buffer size, with a minimum | ||
421 | size of 2 pages. In snapshot mode, it is the same but the minimum size is | ||
422 | 1 page. | ||
423 | |||
424 | The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g. | ||
425 | |||
426 | mmap length 528384 | ||
427 | auxtrace mmap length 4198400 | ||
428 | |||
429 | |||
430 | Intel PT modes of operation | ||
431 | --------------------------- | ||
432 | |||
433 | Intel PT can be used in 2 modes: | ||
434 | full-trace mode | ||
435 | snapshot mode | ||
436 | |||
437 | Full-trace mode traces continuously e.g. | ||
438 | |||
439 | perf record -e intel_pt//u uname | ||
440 | |||
441 | Snapshot mode captures the available data when a signal is sent e.g. | ||
442 | |||
443 | perf record -v -e intel_pt//u -S ./loopy 1000000000 & | ||
444 | [1] 11435 | ||
445 | kill -USR2 11435 | ||
446 | Recording AUX area tracing snapshot | ||
447 | |||
448 | Note that the signal sent is SIGUSR2. | ||
449 | Note that "Recording AUX area tracing snapshot" is displayed because the -v | ||
450 | option is used. | ||
451 | |||
452 | The 2 modes cannot be used together. | ||
453 | |||
454 | |||
455 | Buffer handling | ||
456 | --------------- | ||
457 | |||
458 | There may be buffer limitations (i.e. single ToPa entry) which means that actual | ||
459 | buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to | ||
460 | provide other sizes, and in particular an arbitrarily large size, multiple | ||
461 | buffers are logically concatenated. However an interrupt must be used to switch | ||
462 | between buffers. That has two potential problems: | ||
463 | a) the interrupt may not be handled in time so that the current buffer | ||
464 | becomes full and some trace data is lost. | ||
465 | b) the interrupts may slow the system and affect the performance | ||
466 | results. | ||
467 | |||
468 | If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event | ||
469 | which the tools report as an error. | ||
470 | |||
471 | In full-trace mode, the driver waits for data to be copied out before allowing | ||
472 | the (logical) buffer to wrap-around. If data is not copied out quickly enough, | ||
473 | again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to | ||
474 | wait, the intel_pt event gets disabled. Because it is difficult to know when | ||
475 | that happens, perf tools always re-enable the intel_pt event after copying out | ||
476 | data. | ||
477 | |||
478 | |||
479 | Intel PT and build ids | ||
480 | ---------------------- | ||
481 | |||
482 | By default "perf record" post-processes the event stream to find all build ids | ||
483 | for executables for all addresses sampled. Deliberately, Intel PT is not | ||
484 | decoded for that purpose (it would take too long). Instead the build ids for | ||
485 | all executables encountered (due to mmap, comm or task events) are included | ||
486 | in the perf.data file. | ||
487 | |||
488 | To see buildids included in the perf.data file use the command: | ||
489 | |||
490 | perf buildid-list | ||
491 | |||
492 | If the perf.data file contains Intel PT data, that is the same as: | ||
493 | |||
494 | perf buildid-list --with-hits | ||
495 | |||
496 | |||
497 | Snapshot mode and event disabling | ||
498 | --------------------------------- | ||
499 | |||
500 | In order to make a snapshot, the intel_pt event is disabled using an IOCTL, | ||
501 | namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the | ||
502 | collection of side-band information. In order to prevent that, a dummy | ||
503 | software event has been introduced that permits tracking events (like mmaps) to | ||
504 | continue to be recorded while intel_pt is disabled. That is important to ensure | ||
505 | there is complete side-band information to allow the decoding of subsequent | ||
506 | snapshots. | ||
507 | |||
508 | A test has been created for that. To find the test: | ||
509 | |||
510 | perf test list | ||
511 | ... | ||
512 | 23: Test using a dummy software event to keep tracking | ||
513 | |||
514 | To run the test: | ||
515 | |||
516 | perf test 23 | ||
517 | 23: Test using a dummy software event to keep tracking : Ok | ||
518 | |||
519 | |||
520 | perf record modes (nothing new here) | ||
521 | ------------------------------------ | ||
522 | |||
523 | perf record essentially operates in one of three modes: | ||
524 | per thread | ||
525 | per cpu | ||
526 | workload only | ||
527 | |||
528 | "per thread" mode is selected by -t or by --per-thread (with -p or -u or just a | ||
529 | workload). | ||
530 | "per cpu" is selected by -C or -a. | ||
531 | "workload only" mode is selected by not using the other options but providing a | ||
532 | command to run (i.e. the workload). | ||
533 | |||
534 | In per-thread mode an exact list of threads is traced. There is no inheritance. | ||
535 | Each thread has its own event buffer. | ||
536 | |||
537 | In per-cpu mode all processes (or processes from the selected cgroup i.e. -G | ||
538 | option, or processes selected with -p or -u) are traced. Each cpu has its own | ||
539 | buffer. Inheritance is allowed. | ||
540 | |||
541 | In workload-only mode, the workload is traced but with per-cpu buffers. | ||
542 | Inheritance is allowed. Note that you can now trace a workload in per-thread | ||
543 | mode by using the --per-thread option. | ||
544 | |||
545 | |||
546 | Privileged vs non-privileged users | ||
547 | ---------------------------------- | ||
548 | |||
549 | Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users | ||
550 | have memory limits imposed upon them. That affects what buffer sizes they can | ||
551 | have as outlined above. | ||
552 | |||
553 | Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are | ||
554 | not permitted to use tracepoints which means there is insufficient side-band | ||
555 | information to decode Intel PT in per-cpu mode, and potentially workload-only | ||
556 | mode too if the workload creates new processes. | ||
557 | |||
558 | Note also, that to use tracepoints, read-access to debugfs is required. So if | ||
559 | debugfs is not mounted or the user does not have read-access, it will again not | ||
560 | be possible to decode Intel PT in per-cpu mode. | ||
561 | |||
562 | |||
563 | sched_switch tracepoint | ||
564 | ----------------------- | ||
565 | |||
566 | The sched_switch tracepoint is used to provide side-band data for Intel PT | ||
567 | decoding. sched_switch events are automatically added. e.g. the second event | ||
568 | shown below | ||
569 | |||
570 | $ perf record -vv -e intel_pt//u uname | ||
571 | ------------------------------------------------------------ | ||
572 | perf_event_attr: | ||
573 | type 6 | ||
574 | size 112 | ||
575 | config 0x400 | ||
576 | { sample_period, sample_freq } 1 | ||
577 | sample_type IP|TID|TIME|CPU|IDENTIFIER | ||
578 | read_format ID | ||
579 | disabled 1 | ||
580 | inherit 1 | ||
581 | exclude_kernel 1 | ||
582 | exclude_hv 1 | ||
583 | enable_on_exec 1 | ||
584 | sample_id_all 1 | ||
585 | ------------------------------------------------------------ | ||
586 | sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 | ||
587 | sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 | ||
588 | sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 | ||
589 | sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 | ||
590 | ------------------------------------------------------------ | ||
591 | perf_event_attr: | ||
592 | type 2 | ||
593 | size 112 | ||
594 | config 0x108 | ||
595 | { sample_period, sample_freq } 1 | ||
596 | sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER | ||
597 | read_format ID | ||
598 | inherit 1 | ||
599 | sample_id_all 1 | ||
600 | exclude_guest 1 | ||
601 | ------------------------------------------------------------ | ||
602 | sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8 | ||
603 | sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8 | ||
604 | sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8 | ||
605 | sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8 | ||
606 | ------------------------------------------------------------ | ||
607 | perf_event_attr: | ||
608 | type 1 | ||
609 | size 112 | ||
610 | config 0x9 | ||
611 | { sample_period, sample_freq } 1 | ||
612 | sample_type IP|TID|TIME|IDENTIFIER | ||
613 | read_format ID | ||
614 | disabled 1 | ||
615 | inherit 1 | ||
616 | exclude_kernel 1 | ||
617 | exclude_hv 1 | ||
618 | mmap 1 | ||
619 | comm 1 | ||
620 | enable_on_exec 1 | ||
621 | task 1 | ||
622 | sample_id_all 1 | ||
623 | mmap2 1 | ||
624 | comm_exec 1 | ||
625 | ------------------------------------------------------------ | ||
626 | sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8 | ||
627 | sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8 | ||
628 | sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8 | ||
629 | sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8 | ||
630 | mmap size 528384B | ||
631 | AUX area mmap length 4194304 | ||
632 | perf event ring buffer mmapped per cpu | ||
633 | Synthesizing auxtrace information | ||
634 | Linux | ||
635 | [ perf record: Woken up 1 times to write data ] | ||
636 | [ perf record: Captured and wrote 0.042 MB perf.data ] | ||
637 | |||
638 | Note, the sched_switch event is only added if the user is permitted to use it | ||
639 | and only in per-cpu mode. | ||
640 | |||
641 | Note also, the sched_switch event is only added if TSC packets are requested. | ||
642 | That is because, in the absence of timing information, the sched_switch events | ||
643 | cannot be matched against the Intel PT trace. | ||
644 | |||
645 | |||
646 | perf script | ||
647 | =========== | ||
648 | |||
649 | By default, perf script will decode trace data found in the perf.data file. | ||
650 | This can be further controlled by new option --itrace. | ||
651 | |||
652 | |||
653 | New --itrace option | ||
654 | ------------------- | ||
655 | |||
656 | Having no option is the same as | ||
657 | |||
658 | --itrace | ||
659 | |||
660 | which, in turn, is the same as | ||
661 | |||
662 | --itrace=ibxe | ||
663 | |||
664 | The letters are: | ||
665 | |||
666 | i synthesize "instructions" events | ||
667 | b synthesize "branches" events | ||
668 | x synthesize "transactions" events | ||
669 | c synthesize branches events (calls only) | ||
670 | r synthesize branches events (returns only) | ||
671 | e synthesize tracing error events | ||
672 | d create a debug log | ||
673 | g synthesize a call chain (use with i or x) | ||
674 | |||
675 | "Instructions" events look like they were recorded by "perf record -e | ||
676 | instructions". | ||
677 | |||
678 | "Branches" events look like they were recorded by "perf record -e branches". "c" | ||
679 | and "r" can be combined to get calls and returns. | ||
680 | |||
681 | "Transactions" events correspond to the start or end of transactions. The | ||
682 | 'flags' field can be used in perf script to determine whether the event is a | ||
683 | tranasaction start, commit or abort. | ||
684 | |||
685 | Error events are new. They show where the decoder lost the trace. Error events | ||
686 | are quite important. Users must know if what they are seeing is a complete | ||
687 | picture or not. | ||
688 | |||
689 | The "d" option will cause the creation of a file "intel_pt.log" containing all | ||
690 | decoded packets and instructions. Note that this option slows down the decoder | ||
691 | and that the resulting file may be very large. | ||
692 | |||
693 | In addition, the period of the "instructions" event can be specified. e.g. | ||
694 | |||
695 | --itrace=i10us | ||
696 | |||
697 | sets the period to 10us i.e. one instruction sample is synthesized for each 10 | ||
698 | microseconds of trace. Alternatives to "us" are "ms" (milliseconds), | ||
699 | "ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions). | ||
700 | |||
701 | "ms", "us" and "ns" are converted to TSC ticks. | ||
702 | |||
703 | The timing information included with Intel PT does not give the time of every | ||
704 | instruction. Consequently, for the purpose of sampling, the decoder estimates | ||
705 | the time since the last timing packet based on 1 tick per instruction. The time | ||
706 | on the sample is *not* adjusted and reflects the last known value of TSC. | ||
707 | |||
708 | For Intel PT, the default period is 100us. | ||
709 | |||
710 | Also the call chain size (default 16, max. 1024) for instructions or | ||
711 | transactions events can be specified. e.g. | ||
712 | |||
713 | --itrace=ig32 | ||
714 | --itrace=xg32 | ||
715 | |||
716 | To disable trace decoding entirely, use the option --no-itrace. | ||
717 | |||
718 | |||
719 | dump option | ||
720 | ----------- | ||
721 | |||
722 | perf script has an option (-D) to "dump" the events i.e. display the binary | ||
723 | data. | ||
724 | |||
725 | When -D is used, Intel PT packets are displayed. The packet decoder does not | ||
726 | pay attention to PSB packets, but just decodes the bytes - so the packets seen | ||
727 | by the actual decoder may not be identical in places where the data is corrupt. | ||
728 | One example of that would be when the buffer-switching interrupt has been too | ||
729 | slow, and the buffer has been filled completely. In that case, the last packet | ||
730 | in the buffer might be truncated and immediately followed by a PSB as the trace | ||
731 | continues in the next buffer. | ||
732 | |||
733 | To disable the display of Intel PT packets, combine the -D option with | ||
734 | --no-itrace. | ||
735 | |||
736 | |||
737 | perf report | ||
738 | =========== | ||
739 | |||
740 | By default, perf report will decode trace data found in the perf.data file. | ||
741 | This can be further controlled by new option --itrace exactly the same as | ||
742 | perf script, with the exception that the default is --itrace=igxe. | ||
743 | |||
744 | |||
745 | perf inject | ||
746 | =========== | ||
747 | |||
748 | perf inject also accepts the --itrace option in which case tracing data is | ||
749 | removed and replaced with the synthesized events. e.g. | ||
750 | |||
751 | perf inject --itrace -i perf.data -o perf.data.new | ||
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt new file mode 100644 index 000000000000..2ff946677e3b --- /dev/null +++ b/tools/perf/Documentation/itrace.txt | |||
@@ -0,0 +1,22 @@ | |||
1 | i synthesize instructions events | ||
2 | b synthesize branches events | ||
3 | c synthesize branches events (calls only) | ||
4 | r synthesize branches events (returns only) | ||
5 | x synthesize transactions events | ||
6 | e synthesize error events | ||
7 | d create a debug log | ||
8 | g synthesize a call chain (use with i or x) | ||
9 | |||
10 | The default is all events i.e. the same as --itrace=ibxe | ||
11 | |||
12 | In addition, the period (default 100000) for instructions events | ||
13 | can be specified in units of: | ||
14 | |||
15 | i instructions | ||
16 | t ticks | ||
17 | ms milliseconds | ||
18 | us microseconds | ||
19 | ns nanoseconds (default) | ||
20 | |||
21 | Also the call chain size (default 16, max. 1024) for instructions or | ||
22 | transactions events can be specified. | ||
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index bf3d0644bf10..ab632d9fbd7d 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt | |||
@@ -216,6 +216,10 @@ Suite for evaluating parallel wake calls. | |||
216 | *requeue*:: | 216 | *requeue*:: |
217 | Suite for evaluating requeue calls. | 217 | Suite for evaluating requeue calls. |
218 | 218 | ||
219 | *lock-pi*:: | ||
220 | Suite for evaluating futex lock_pi calls. | ||
221 | |||
222 | |||
219 | SEE ALSO | 223 | SEE ALSO |
220 | -------- | 224 | -------- |
221 | linkperf:perf[1] | 225 | linkperf:perf[1] |
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index b876ae312699..0c721c3e37e1 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt | |||
@@ -48,28 +48,7 @@ OPTIONS | |||
48 | Decode Instruction Tracing data, replacing it with synthesized events. | 48 | Decode Instruction Tracing data, replacing it with synthesized events. |
49 | Options are: | 49 | Options are: |
50 | 50 | ||
51 | i synthesize instructions events | 51 | include::itrace.txt[] |
52 | b synthesize branches events | ||
53 | c synthesize branches events (calls only) | ||
54 | r synthesize branches events (returns only) | ||
55 | x synthesize transactions events | ||
56 | e synthesize error events | ||
57 | d create a debug log | ||
58 | g synthesize a call chain (use with i or x) | ||
59 | |||
60 | The default is all events i.e. the same as --itrace=ibxe | ||
61 | |||
62 | In addition, the period (default 100000) for instructions events | ||
63 | can be specified in units of: | ||
64 | |||
65 | i instructions | ||
66 | t ticks | ||
67 | ms milliseconds | ||
68 | us microseconds | ||
69 | ns nanoseconds (default) | ||
70 | |||
71 | Also the call chain size (default 16, max. 1024) for instructions or | ||
72 | transactions events can be specified. | ||
73 | 52 | ||
74 | SEE ALSO | 53 | SEE ALSO |
75 | -------- | 54 | -------- |
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 9b9d9d086680..2e9ce77b5e14 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt | |||
@@ -45,6 +45,21 @@ OPTIONS | |||
45 | param1 and param2 are defined as formats for the PMU in: | 45 | param1 and param2 are defined as formats for the PMU in: |
46 | /sys/bus/event_sources/devices/<pmu>/format/* | 46 | /sys/bus/event_sources/devices/<pmu>/format/* |
47 | 47 | ||
48 | There are also some params which are not defined in .../<pmu>/format/*. | ||
49 | These params can be used to overload default config values per event. | ||
50 | Here is a list of the params. | ||
51 | - 'period': Set event sampling period | ||
52 | - 'freq': Set event sampling frequency | ||
53 | - 'time': Disable/enable time stamping. Acceptable values are 1 for | ||
54 | enabling time stamping. 0 for disabling time stamping. | ||
55 | The default is 1. | ||
56 | - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for | ||
57 | FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and | ||
58 | "no" for disable callgraph. | ||
59 | - 'stack-size': user stack size for dwarf mode | ||
60 | Note: If user explicitly sets options which conflict with the params, | ||
61 | the value set by the params will be overridden. | ||
62 | |||
48 | - a hardware breakpoint event in the form of '\mem:addr[/len][:access]' | 63 | - a hardware breakpoint event in the form of '\mem:addr[/len][:access]' |
49 | where addr is the address in memory you want to break in. | 64 | where addr is the address in memory you want to break in. |
50 | Access is the memory access type (read, write, execute) it can | 65 | Access is the memory access type (read, write, execute) it can |
@@ -61,7 +76,16 @@ OPTIONS | |||
61 | "perf report" to view group events together. | 76 | "perf report" to view group events together. |
62 | 77 | ||
63 | --filter=<filter>:: | 78 | --filter=<filter>:: |
64 | Event filter. | 79 | Event filter. This option should follow a event selector (-e) which |
80 | selects tracepoint event(s). Multiple '--filter' options are combined | ||
81 | using '&&'. | ||
82 | |||
83 | --exclude-perf:: | ||
84 | Don't record events issued by perf itself. This option should follow | ||
85 | a event selector (-e) which selects tracepoint event(s). It adds a | ||
86 | filter expression 'common_pid != $PERFPID' to filters. If other | ||
87 | '--filter' exists, the new filter expression will be combined with | ||
88 | them by '&&'. | ||
65 | 89 | ||
66 | -a:: | 90 | -a:: |
67 | --all-cpus:: | 91 | --all-cpus:: |
@@ -252,7 +276,11 @@ filter out the startup phase of the program, which is often very different. | |||
252 | --intr-regs:: | 276 | --intr-regs:: |
253 | Capture machine state (registers) at interrupt, i.e., on counter overflows for | 277 | Capture machine state (registers) at interrupt, i.e., on counter overflows for |
254 | each sample. List of captured registers depends on the architecture. This option | 278 | each sample. List of captured registers depends on the architecture. This option |
255 | is off by default. | 279 | is off by default. It is possible to select the registers to sample using their |
280 | symbolic names, e.g. on x86, ax, si. To list the available registers use | ||
281 | --intr-regs=\?. To name registers, pass a comma separated list such as | ||
282 | --intr-regs=ax,bx. The list of register is architecture dependent. | ||
283 | |||
256 | 284 | ||
257 | --running-time:: | 285 | --running-time:: |
258 | Record running and enabled time for read events (:S) | 286 | Record running and enabled time for read events (:S) |
@@ -276,6 +304,10 @@ When processing pre-existing threads /proc/XXX/mmap, it may take a long time, | |||
276 | because the file may be huge. A time out is needed in such cases. | 304 | because the file may be huge. A time out is needed in such cases. |
277 | This option sets the time out limit. The default value is 500 ms. | 305 | This option sets the time out limit. The default value is 500 ms. |
278 | 306 | ||
307 | --switch-events:: | ||
308 | Record context switch events i.e. events of type PERF_RECORD_SWITCH or | ||
309 | PERF_RECORD_SWITCH_CPU_WIDE. | ||
310 | |||
279 | SEE ALSO | 311 | SEE ALSO |
280 | -------- | 312 | -------- |
281 | linkperf:perf-stat[1], linkperf:perf-list[1] | 313 | linkperf:perf-stat[1], linkperf:perf-list[1] |
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index c33b69f3374f..9c7981bfddad 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt | |||
@@ -81,6 +81,8 @@ OPTIONS | |||
81 | - cpu: cpu number the task ran at the time of sample | 81 | - cpu: cpu number the task ran at the time of sample |
82 | - srcline: filename and line number executed at the time of sample. The | 82 | - srcline: filename and line number executed at the time of sample. The |
83 | DWARF debugging info must be provided. | 83 | DWARF debugging info must be provided. |
84 | - srcfile: file name of the source file of the same. Requires dwarf | ||
85 | information. | ||
84 | - weight: Event specific weight, e.g. memory latency or transaction | 86 | - weight: Event specific weight, e.g. memory latency or transaction |
85 | abort cost. This is the global weight. | 87 | abort cost. This is the global weight. |
86 | - local_weight: Local weight version of the weight above. | 88 | - local_weight: Local weight version of the weight above. |
@@ -109,6 +111,7 @@ OPTIONS | |||
109 | - mispredict: "N" for predicted branch, "Y" for mispredicted branch | 111 | - mispredict: "N" for predicted branch, "Y" for mispredicted branch |
110 | - in_tx: branch in TSX transaction | 112 | - in_tx: branch in TSX transaction |
111 | - abort: TSX transaction abort. | 113 | - abort: TSX transaction abort. |
114 | - cycles: Cycles in basic block | ||
112 | 115 | ||
113 | And default sort keys are changed to comm, dso_from, symbol_from, dso_to | 116 | And default sort keys are changed to comm, dso_from, symbol_from, dso_to |
114 | and symbol_to, see '--branch-stack'. | 117 | and symbol_to, see '--branch-stack'. |
@@ -328,31 +331,23 @@ OPTIONS | |||
328 | --itrace:: | 331 | --itrace:: |
329 | Options for decoding instruction tracing data. The options are: | 332 | Options for decoding instruction tracing data. The options are: |
330 | 333 | ||
331 | i synthesize instructions events | 334 | include::itrace.txt[] |
332 | b synthesize branches events | ||
333 | c synthesize branches events (calls only) | ||
334 | r synthesize branches events (returns only) | ||
335 | x synthesize transactions events | ||
336 | e synthesize error events | ||
337 | d create a debug log | ||
338 | g synthesize a call chain (use with i or x) | ||
339 | |||
340 | The default is all events i.e. the same as --itrace=ibxe | ||
341 | |||
342 | In addition, the period (default 100000) for instructions events | ||
343 | can be specified in units of: | ||
344 | |||
345 | i instructions | ||
346 | t ticks | ||
347 | ms milliseconds | ||
348 | us microseconds | ||
349 | ns nanoseconds (default) | ||
350 | |||
351 | Also the call chain size (default 16, max. 1024) for instructions or | ||
352 | transactions events can be specified. | ||
353 | 335 | ||
354 | To disable decoding entirely, use --no-itrace. | 336 | To disable decoding entirely, use --no-itrace. |
355 | 337 | ||
338 | --full-source-path:: | ||
339 | Show the full path for source files for srcline output. | ||
340 | |||
341 | --show-ref-call-graph:: | ||
342 | When multiple events are sampled, it may not be needed to collect | ||
343 | callgraphs for all of them. The sample sites are usually nearby, | ||
344 | and it's enough to collect the callgraphs on a reference event. | ||
345 | So user can use "call-graph=no" event modifier to disable callgraph | ||
346 | for other events to reduce the overhead. | ||
347 | However, perf report cannot show callgraphs for the event which | ||
348 | disable the callgraph. | ||
349 | This option extends the perf report to show reference callgraphs, | ||
350 | which collected by reference event, in no callgraph event. | ||
356 | 351 | ||
357 | include::callchain-overhead-calculation.txt[] | 352 | include::callchain-overhead-calculation.txt[] |
358 | 353 | ||
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index c82df572fac2..dc3ec783b7bd 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt | |||
@@ -116,7 +116,7 @@ OPTIONS | |||
116 | --fields:: | 116 | --fields:: |
117 | Comma separated list of fields to print. Options are: | 117 | Comma separated list of fields to print. Options are: |
118 | comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, | 118 | comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, |
119 | srcline, period, flags. | 119 | srcline, period, iregs, flags. |
120 | Field list can be prepended with the type, trace, sw or hw, | 120 | Field list can be prepended with the type, trace, sw or hw, |
121 | to indicate to which event type the field list applies. | 121 | to indicate to which event type the field list applies. |
122 | e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace | 122 | e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace |
@@ -222,6 +222,17 @@ OPTIONS | |||
222 | --show-mmap-events | 222 | --show-mmap-events |
223 | Display mmap related events (e.g. MMAP, MMAP2). | 223 | Display mmap related events (e.g. MMAP, MMAP2). |
224 | 224 | ||
225 | --show-switch-events | ||
226 | Display context switch events i.e. events of type PERF_RECORD_SWITCH or | ||
227 | PERF_RECORD_SWITCH_CPU_WIDE. | ||
228 | |||
229 | --demangle:: | ||
230 | Demangle symbol names to human readable form. It's enabled by default, | ||
231 | disable with --no-demangle. | ||
232 | |||
233 | --demangle-kernel:: | ||
234 | Demangle kernel symbol names to human readable form (for C++ kernels). | ||
235 | |||
225 | --header | 236 | --header |
226 | Show perf.data header. | 237 | Show perf.data header. |
227 | 238 | ||
@@ -231,31 +242,13 @@ OPTIONS | |||
231 | --itrace:: | 242 | --itrace:: |
232 | Options for decoding instruction tracing data. The options are: | 243 | Options for decoding instruction tracing data. The options are: |
233 | 244 | ||
234 | i synthesize instructions events | 245 | include::itrace.txt[] |
235 | b synthesize branches events | ||
236 | c synthesize branches events (calls only) | ||
237 | r synthesize branches events (returns only) | ||
238 | x synthesize transactions events | ||
239 | e synthesize error events | ||
240 | d create a debug log | ||
241 | g synthesize a call chain (use with i or x) | ||
242 | |||
243 | The default is all events i.e. the same as --itrace=ibxe | ||
244 | |||
245 | In addition, the period (default 100000) for instructions events | ||
246 | can be specified in units of: | ||
247 | |||
248 | i instructions | ||
249 | t ticks | ||
250 | ms milliseconds | ||
251 | us microseconds | ||
252 | ns nanoseconds (default) | ||
253 | |||
254 | Also the call chain size (default 16, max. 1024) for instructions or | ||
255 | transactions events can be specified. | ||
256 | 246 | ||
257 | To disable decoding entirely, use --no-itrace. | 247 | To disable decoding entirely, use --no-itrace. |
258 | 248 | ||
249 | --full-source-path:: | ||
250 | Show the full path for source files for srcline output. | ||
251 | |||
259 | SEE ALSO | 252 | SEE ALSO |
260 | -------- | 253 | -------- |
261 | linkperf:perf-record[1], linkperf:perf-script-perl[1], | 254 | linkperf:perf-record[1], linkperf:perf-script-perl[1], |
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 776aec4d0927..f6a23eb294e7 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt | |||
@@ -208,6 +208,27 @@ Default is to monitor all CPUS. | |||
208 | This option sets the time out limit. The default value is 500 ms. | 208 | This option sets the time out limit. The default value is 500 ms. |
209 | 209 | ||
210 | 210 | ||
211 | -b:: | ||
212 | --branch-any:: | ||
213 | Enable taken branch stack sampling. Any type of taken branch may be sampled. | ||
214 | This is a shortcut for --branch-filter any. See --branch-filter for more infos. | ||
215 | |||
216 | -j:: | ||
217 | --branch-filter:: | ||
218 | Enable taken branch stack sampling. Each sample captures a series of consecutive | ||
219 | taken branches. The number of branches captured with each sample depends on the | ||
220 | underlying hardware, the type of branches of interest, and the executed code. | ||
221 | It is possible to select the types of branches captured by enabling filters. | ||
222 | For a full list of modifiers please see the perf record manpage. | ||
223 | |||
224 | The option requires at least one branch type among any, any_call, any_ret, ind_call, cond. | ||
225 | The privilege levels may be omitted, in which case, the privilege levels of the associated | ||
226 | event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege | ||
227 | levels are subject to permissions. When sampling on multiple events, branch stack sampling | ||
228 | is enabled for all the sampling events. The sampled branch type is the same for all events. | ||
229 | The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k | ||
230 | Note that this feature may not be available on all processors. | ||
231 | |||
211 | INTERACTIVE PROMPTING KEYS | 232 | INTERACTIVE PROMPTING KEYS |
212 | -------------------------- | 233 | -------------------------- |
213 | 234 | ||
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index d01a0aad5a01..af009bd6e6b7 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST | |||
@@ -18,6 +18,7 @@ tools/arch/x86/include/asm/atomic.h | |||
18 | tools/arch/x86/include/asm/rmwcc.h | 18 | tools/arch/x86/include/asm/rmwcc.h |
19 | tools/lib/traceevent | 19 | tools/lib/traceevent |
20 | tools/lib/api | 20 | tools/lib/api |
21 | tools/lib/bpf | ||
21 | tools/lib/hweight.c | 22 | tools/lib/hweight.c |
22 | tools/lib/rbtree.c | 23 | tools/lib/rbtree.c |
23 | tools/lib/symbol/kallsyms.c | 24 | tools/lib/symbol/kallsyms.c |
@@ -40,7 +41,6 @@ tools/include/asm-generic/bitops.h | |||
40 | tools/include/linux/atomic.h | 41 | tools/include/linux/atomic.h |
41 | tools/include/linux/bitops.h | 42 | tools/include/linux/bitops.h |
42 | tools/include/linux/compiler.h | 43 | tools/include/linux/compiler.h |
43 | tools/include/linux/export.h | ||
44 | tools/include/linux/hash.h | 44 | tools/include/linux/hash.h |
45 | tools/include/linux/kernel.h | 45 | tools/include/linux/kernel.h |
46 | tools/include/linux/list.h | 46 | tools/include/linux/list.h |
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index bba34636b733..d9863cb96f59 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf | |||
@@ -76,6 +76,12 @@ include config/utilities.mak | |||
76 | # | 76 | # |
77 | # Define NO_AUXTRACE if you do not want AUX area tracing support | 77 | # Define NO_AUXTRACE if you do not want AUX area tracing support |
78 | 78 | ||
79 | # As per kernel Makefile, avoid funny character set dependencies | ||
80 | unexport LC_ALL | ||
81 | LC_COLLATE=C | ||
82 | LC_NUMERIC=C | ||
83 | export LC_COLLATE LC_NUMERIC | ||
84 | |||
79 | ifeq ($(srctree),) | 85 | ifeq ($(srctree),) |
80 | srctree := $(patsubst %/,%,$(dir $(shell pwd))) | 86 | srctree := $(patsubst %/,%,$(dir $(shell pwd))) |
81 | srctree := $(patsubst %/,%,$(dir $(srctree))) | 87 | srctree := $(patsubst %/,%,$(dir $(srctree))) |
@@ -135,6 +141,7 @@ INSTALL = install | |||
135 | FLEX = flex | 141 | FLEX = flex |
136 | BISON = bison | 142 | BISON = bison |
137 | STRIP = strip | 143 | STRIP = strip |
144 | AWK = awk | ||
138 | 145 | ||
139 | LIB_DIR = $(srctree)/tools/lib/api/ | 146 | LIB_DIR = $(srctree)/tools/lib/api/ |
140 | TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ | 147 | TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ |
@@ -289,7 +296,7 @@ strip: $(PROGRAMS) $(OUTPUT)perf | |||
289 | 296 | ||
290 | PERF_IN := $(OUTPUT)perf-in.o | 297 | PERF_IN := $(OUTPUT)perf-in.o |
291 | 298 | ||
292 | export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX | 299 | export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX AWK |
293 | build := -f $(srctree)/tools/build/Makefile.build dir=. obj | 300 | build := -f $(srctree)/tools/build/Makefile.build dir=. obj |
294 | 301 | ||
295 | $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE | 302 | $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE |
@@ -507,6 +514,11 @@ endif | |||
507 | $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' | 514 | $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' |
508 | $(call QUIET_INSTALL, perf-with-kcore) \ | 515 | $(call QUIET_INSTALL, perf-with-kcore) \ |
509 | $(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' | 516 | $(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' |
517 | ifndef NO_LIBAUDIT | ||
518 | $(call QUIET_INSTALL, strace/groups) \ | ||
519 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \ | ||
520 | $(INSTALL) trace/strace/groups/* -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)' | ||
521 | endif | ||
510 | ifndef NO_LIBPERL | 522 | ifndef NO_LIBPERL |
511 | $(call QUIET_INSTALL, perl-scripts) \ | 523 | $(call QUIET_INSTALL, perl-scripts) \ |
512 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \ | 524 | $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \ |
@@ -560,7 +572,8 @@ clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean | |||
560 | $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete | 572 | $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete |
561 | $(Q)$(RM) $(OUTPUT).config-detected | 573 | $(Q)$(RM) $(OUTPUT).config-detected |
562 | $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 | 574 | $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 |
563 | $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* | 575 | $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \ |
576 | $(OUTPUT)util/intel-pt-decoder/inat-tables.c | ||
564 | $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean | 577 | $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean |
565 | $(python-clean) | 578 | $(python-clean) |
566 | 579 | ||
diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/alpha/Build | |||
@@ -0,0 +1 @@ | |||
# empty | |||
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index b7bb42c44694..b00dfd92ea73 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c | |||
@@ -128,7 +128,7 @@ static const char *normalize_arch(char *arch) | |||
128 | return arch; | 128 | return arch; |
129 | } | 129 | } |
130 | 130 | ||
131 | static int perf_session_env__lookup_binutils_path(struct perf_session_env *env, | 131 | static int perf_session_env__lookup_binutils_path(struct perf_env *env, |
132 | const char *name, | 132 | const char *name, |
133 | const char **path) | 133 | const char **path) |
134 | { | 134 | { |
@@ -206,7 +206,7 @@ out_error: | |||
206 | return -1; | 206 | return -1; |
207 | } | 207 | } |
208 | 208 | ||
209 | int perf_session_env__lookup_objdump(struct perf_session_env *env) | 209 | int perf_session_env__lookup_objdump(struct perf_env *env) |
210 | { | 210 | { |
211 | /* | 211 | /* |
212 | * For live mode, env->arch will be NULL and we can use | 212 | * For live mode, env->arch will be NULL and we can use |
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index ede246eda9be..20176df69fc8 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h | |||
@@ -5,6 +5,6 @@ | |||
5 | 5 | ||
6 | extern const char *objdump_path; | 6 | extern const char *objdump_path; |
7 | 7 | ||
8 | int perf_session_env__lookup_objdump(struct perf_session_env *env); | 8 | int perf_session_env__lookup_objdump(struct perf_env *env); |
9 | 9 | ||
10 | #endif /* ARCH_PERF_COMMON_H */ | 10 | #endif /* ARCH_PERF_COMMON_H */ |
diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/mips/Build | |||
@@ -0,0 +1 @@ | |||
# empty | |||
diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build new file mode 100644 index 000000000000..1bb8bf6d7fd4 --- /dev/null +++ b/tools/perf/arch/parisc/Build | |||
@@ -0,0 +1 @@ | |||
# empty | |||
diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c index 0d0897f57a10..f8dfa89696f4 100644 --- a/tools/perf/arch/sh/util/dwarf-regs.c +++ b/tools/perf/arch/sh/util/dwarf-regs.c | |||
@@ -51,5 +51,5 @@ const char *sh_regs_table[SH_MAX_REGS] = { | |||
51 | /* Return architecture dependent register string (for kprobe-tracer) */ | 51 | /* Return architecture dependent register string (for kprobe-tracer) */ |
52 | const char *get_arch_regstr(unsigned int n) | 52 | const char *get_arch_regstr(unsigned int n) |
53 | { | 53 | { |
54 | return (n <= SH_MAX_REGS) ? sh_regs_table[n] : NULL; | 54 | return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL; |
55 | } | 55 | } |
diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c index 92eda412fed3..b704fdb9237a 100644 --- a/tools/perf/arch/sparc/util/dwarf-regs.c +++ b/tools/perf/arch/sparc/util/dwarf-regs.c | |||
@@ -39,5 +39,5 @@ const char *sparc_regs_table[SPARC_MAX_REGS] = { | |||
39 | */ | 39 | */ |
40 | const char *get_arch_regstr(unsigned int n) | 40 | const char *get_arch_regstr(unsigned int n) |
41 | { | 41 | { |
42 | return (n <= SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; | 42 | return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; |
43 | } | 43 | } |
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index cfbccc4e3187..ff63649fa9ac 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build | |||
@@ -1,8 +1,14 @@ | |||
1 | libperf-y += header.o | 1 | libperf-y += header.o |
2 | libperf-y += tsc.o | 2 | libperf-y += tsc.o |
3 | libperf-y += pmu.o | ||
3 | libperf-y += kvm-stat.o | 4 | libperf-y += kvm-stat.o |
5 | libperf-y += perf_regs.o | ||
4 | 6 | ||
5 | libperf-$(CONFIG_DWARF) += dwarf-regs.o | 7 | libperf-$(CONFIG_DWARF) += dwarf-regs.o |
6 | 8 | ||
7 | libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o | 9 | libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o |
8 | libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o | 10 | libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o |
11 | |||
12 | libperf-$(CONFIG_AUXTRACE) += auxtrace.o | ||
13 | libperf-$(CONFIG_AUXTRACE) += intel-pt.o | ||
14 | libperf-$(CONFIG_AUXTRACE) += intel-bts.o | ||
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c new file mode 100644 index 000000000000..7a7805583e3f --- /dev/null +++ b/tools/perf/arch/x86/util/auxtrace.c | |||
@@ -0,0 +1,83 @@ | |||
1 | /* | ||
2 | * auxtrace.c: AUX area tracing support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <stdbool.h> | ||
17 | |||
18 | #include "../../util/header.h" | ||
19 | #include "../../util/debug.h" | ||
20 | #include "../../util/pmu.h" | ||
21 | #include "../../util/auxtrace.h" | ||
22 | #include "../../util/intel-pt.h" | ||
23 | #include "../../util/intel-bts.h" | ||
24 | #include "../../util/evlist.h" | ||
25 | |||
26 | static | ||
27 | struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist, | ||
28 | int *err) | ||
29 | { | ||
30 | struct perf_pmu *intel_pt_pmu; | ||
31 | struct perf_pmu *intel_bts_pmu; | ||
32 | struct perf_evsel *evsel; | ||
33 | bool found_pt = false; | ||
34 | bool found_bts = false; | ||
35 | |||
36 | intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); | ||
37 | intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); | ||
38 | |||
39 | if (evlist) { | ||
40 | evlist__for_each(evlist, evsel) { | ||
41 | if (intel_pt_pmu && | ||
42 | evsel->attr.type == intel_pt_pmu->type) | ||
43 | found_pt = true; | ||
44 | if (intel_bts_pmu && | ||
45 | evsel->attr.type == intel_bts_pmu->type) | ||
46 | found_bts = true; | ||
47 | } | ||
48 | } | ||
49 | |||
50 | if (found_pt && found_bts) { | ||
51 | pr_err("intel_pt and intel_bts may not be used together\n"); | ||
52 | *err = -EINVAL; | ||
53 | return NULL; | ||
54 | } | ||
55 | |||
56 | if (found_pt) | ||
57 | return intel_pt_recording_init(err); | ||
58 | |||
59 | if (found_bts) | ||
60 | return intel_bts_recording_init(err); | ||
61 | |||
62 | return NULL; | ||
63 | } | ||
64 | |||
65 | struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist, | ||
66 | int *err) | ||
67 | { | ||
68 | char buffer[64]; | ||
69 | int ret; | ||
70 | |||
71 | *err = 0; | ||
72 | |||
73 | ret = get_cpuid(buffer, sizeof(buffer)); | ||
74 | if (ret) { | ||
75 | *err = ret; | ||
76 | return NULL; | ||
77 | } | ||
78 | |||
79 | if (!strncmp(buffer, "GenuineIntel,", 13)) | ||
80 | return auxtrace_record__init_intel(evlist, err); | ||
81 | |||
82 | return NULL; | ||
83 | } | ||
diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index be22dd463232..a08de0a35b83 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c | |||
@@ -71,5 +71,5 @@ const char *x86_64_regs_table[X86_64_MAX_REGS] = { | |||
71 | /* Return architecture dependent register string (for kprobe-tracer) */ | 71 | /* Return architecture dependent register string (for kprobe-tracer) */ |
72 | const char *get_arch_regstr(unsigned int n) | 72 | const char *get_arch_regstr(unsigned int n) |
73 | { | 73 | { |
74 | return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; | 74 | return (n < ARCH_MAX_REGS) ? arch_regs_table[n] : NULL; |
75 | } | 75 | } |
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c new file mode 100644 index 000000000000..9b94ce520917 --- /dev/null +++ b/tools/perf/arch/x86/util/intel-bts.c | |||
@@ -0,0 +1,458 @@ | |||
1 | /* | ||
2 | * intel-bts.c: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2015, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <linux/bitops.h> | ||
19 | #include <linux/log2.h> | ||
20 | |||
21 | #include "../../util/cpumap.h" | ||
22 | #include "../../util/evsel.h" | ||
23 | #include "../../util/evlist.h" | ||
24 | #include "../../util/session.h" | ||
25 | #include "../../util/util.h" | ||
26 | #include "../../util/pmu.h" | ||
27 | #include "../../util/debug.h" | ||
28 | #include "../../util/tsc.h" | ||
29 | #include "../../util/auxtrace.h" | ||
30 | #include "../../util/intel-bts.h" | ||
31 | |||
32 | #define KiB(x) ((x) * 1024) | ||
33 | #define MiB(x) ((x) * 1024 * 1024) | ||
34 | #define KiB_MASK(x) (KiB(x) - 1) | ||
35 | #define MiB_MASK(x) (MiB(x) - 1) | ||
36 | |||
37 | #define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4) | ||
38 | |||
39 | #define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60) | ||
40 | |||
41 | struct intel_bts_snapshot_ref { | ||
42 | void *ref_buf; | ||
43 | size_t ref_offset; | ||
44 | bool wrapped; | ||
45 | }; | ||
46 | |||
47 | struct intel_bts_recording { | ||
48 | struct auxtrace_record itr; | ||
49 | struct perf_pmu *intel_bts_pmu; | ||
50 | struct perf_evlist *evlist; | ||
51 | bool snapshot_mode; | ||
52 | size_t snapshot_size; | ||
53 | int snapshot_ref_cnt; | ||
54 | struct intel_bts_snapshot_ref *snapshot_refs; | ||
55 | }; | ||
56 | |||
57 | struct branch { | ||
58 | u64 from; | ||
59 | u64 to; | ||
60 | u64 misc; | ||
61 | }; | ||
62 | |||
63 | static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused) | ||
64 | { | ||
65 | return INTEL_BTS_AUXTRACE_PRIV_SIZE; | ||
66 | } | ||
67 | |||
68 | static int intel_bts_info_fill(struct auxtrace_record *itr, | ||
69 | struct perf_session *session, | ||
70 | struct auxtrace_info_event *auxtrace_info, | ||
71 | size_t priv_size) | ||
72 | { | ||
73 | struct intel_bts_recording *btsr = | ||
74 | container_of(itr, struct intel_bts_recording, itr); | ||
75 | struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; | ||
76 | struct perf_event_mmap_page *pc; | ||
77 | struct perf_tsc_conversion tc = { .time_mult = 0, }; | ||
78 | bool cap_user_time_zero = false; | ||
79 | int err; | ||
80 | |||
81 | if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE) | ||
82 | return -EINVAL; | ||
83 | |||
84 | if (!session->evlist->nr_mmaps) | ||
85 | return -EINVAL; | ||
86 | |||
87 | pc = session->evlist->mmap[0].base; | ||
88 | if (pc) { | ||
89 | err = perf_read_tsc_conversion(pc, &tc); | ||
90 | if (err) { | ||
91 | if (err != -EOPNOTSUPP) | ||
92 | return err; | ||
93 | } else { | ||
94 | cap_user_time_zero = tc.time_mult != 0; | ||
95 | } | ||
96 | if (!cap_user_time_zero) | ||
97 | ui__warning("Intel BTS: TSC not available\n"); | ||
98 | } | ||
99 | |||
100 | auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS; | ||
101 | auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type; | ||
102 | auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift; | ||
103 | auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult; | ||
104 | auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero; | ||
105 | auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero; | ||
106 | auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode; | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static int intel_bts_recording_options(struct auxtrace_record *itr, | ||
112 | struct perf_evlist *evlist, | ||
113 | struct record_opts *opts) | ||
114 | { | ||
115 | struct intel_bts_recording *btsr = | ||
116 | container_of(itr, struct intel_bts_recording, itr); | ||
117 | struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu; | ||
118 | struct perf_evsel *evsel, *intel_bts_evsel = NULL; | ||
119 | const struct cpu_map *cpus = evlist->cpus; | ||
120 | bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; | ||
121 | |||
122 | btsr->evlist = evlist; | ||
123 | btsr->snapshot_mode = opts->auxtrace_snapshot_mode; | ||
124 | |||
125 | evlist__for_each(evlist, evsel) { | ||
126 | if (evsel->attr.type == intel_bts_pmu->type) { | ||
127 | if (intel_bts_evsel) { | ||
128 | pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n"); | ||
129 | return -EINVAL; | ||
130 | } | ||
131 | evsel->attr.freq = 0; | ||
132 | evsel->attr.sample_period = 1; | ||
133 | intel_bts_evsel = evsel; | ||
134 | opts->full_auxtrace = true; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { | ||
139 | pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n"); | ||
140 | return -EINVAL; | ||
141 | } | ||
142 | |||
143 | if (!opts->full_auxtrace) | ||
144 | return 0; | ||
145 | |||
146 | if (opts->full_auxtrace && !cpu_map__empty(cpus)) { | ||
147 | pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n"); | ||
148 | return -EINVAL; | ||
149 | } | ||
150 | |||
151 | /* Set default sizes for snapshot mode */ | ||
152 | if (opts->auxtrace_snapshot_mode) { | ||
153 | if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { | ||
154 | if (privileged) { | ||
155 | opts->auxtrace_mmap_pages = MiB(4) / page_size; | ||
156 | } else { | ||
157 | opts->auxtrace_mmap_pages = KiB(128) / page_size; | ||
158 | if (opts->mmap_pages == UINT_MAX) | ||
159 | opts->mmap_pages = KiB(256) / page_size; | ||
160 | } | ||
161 | } else if (!opts->auxtrace_mmap_pages && !privileged && | ||
162 | opts->mmap_pages == UINT_MAX) { | ||
163 | opts->mmap_pages = KiB(256) / page_size; | ||
164 | } | ||
165 | if (!opts->auxtrace_snapshot_size) | ||
166 | opts->auxtrace_snapshot_size = | ||
167 | opts->auxtrace_mmap_pages * (size_t)page_size; | ||
168 | if (!opts->auxtrace_mmap_pages) { | ||
169 | size_t sz = opts->auxtrace_snapshot_size; | ||
170 | |||
171 | sz = round_up(sz, page_size) / page_size; | ||
172 | opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); | ||
173 | } | ||
174 | if (opts->auxtrace_snapshot_size > | ||
175 | opts->auxtrace_mmap_pages * (size_t)page_size) { | ||
176 | pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", | ||
177 | opts->auxtrace_snapshot_size, | ||
178 | opts->auxtrace_mmap_pages * (size_t)page_size); | ||
179 | return -EINVAL; | ||
180 | } | ||
181 | if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { | ||
182 | pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); | ||
183 | return -EINVAL; | ||
184 | } | ||
185 | pr_debug2("Intel BTS snapshot size: %zu\n", | ||
186 | opts->auxtrace_snapshot_size); | ||
187 | } | ||
188 | |||
189 | /* Set default sizes for full trace mode */ | ||
190 | if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { | ||
191 | if (privileged) { | ||
192 | opts->auxtrace_mmap_pages = MiB(4) / page_size; | ||
193 | } else { | ||
194 | opts->auxtrace_mmap_pages = KiB(128) / page_size; | ||
195 | if (opts->mmap_pages == UINT_MAX) | ||
196 | opts->mmap_pages = KiB(256) / page_size; | ||
197 | } | ||
198 | } | ||
199 | |||
200 | /* Validate auxtrace_mmap_pages */ | ||
201 | if (opts->auxtrace_mmap_pages) { | ||
202 | size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; | ||
203 | size_t min_sz; | ||
204 | |||
205 | if (opts->auxtrace_snapshot_mode) | ||
206 | min_sz = KiB(4); | ||
207 | else | ||
208 | min_sz = KiB(8); | ||
209 | |||
210 | if (sz < min_sz || !is_power_of_2(sz)) { | ||
211 | pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n", | ||
212 | min_sz / 1024); | ||
213 | return -EINVAL; | ||
214 | } | ||
215 | } | ||
216 | |||
217 | if (intel_bts_evsel) { | ||
218 | /* | ||
219 | * To obtain the auxtrace buffer file descriptor, the auxtrace event | ||
220 | * must come first. | ||
221 | */ | ||
222 | perf_evlist__to_front(evlist, intel_bts_evsel); | ||
223 | /* | ||
224 | * In the case of per-cpu mmaps, we need the CPU on the | ||
225 | * AUX event. | ||
226 | */ | ||
227 | if (!cpu_map__empty(cpus)) | ||
228 | perf_evsel__set_sample_bit(intel_bts_evsel, CPU); | ||
229 | } | ||
230 | |||
231 | /* Add dummy event to keep tracking */ | ||
232 | if (opts->full_auxtrace) { | ||
233 | struct perf_evsel *tracking_evsel; | ||
234 | int err; | ||
235 | |||
236 | err = parse_events(evlist, "dummy:u", NULL); | ||
237 | if (err) | ||
238 | return err; | ||
239 | |||
240 | tracking_evsel = perf_evlist__last(evlist); | ||
241 | |||
242 | perf_evlist__set_tracking_event(evlist, tracking_evsel); | ||
243 | |||
244 | tracking_evsel->attr.freq = 0; | ||
245 | tracking_evsel->attr.sample_period = 1; | ||
246 | } | ||
247 | |||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr, | ||
252 | struct record_opts *opts, | ||
253 | const char *str) | ||
254 | { | ||
255 | struct intel_bts_recording *btsr = | ||
256 | container_of(itr, struct intel_bts_recording, itr); | ||
257 | unsigned long long snapshot_size = 0; | ||
258 | char *endptr; | ||
259 | |||
260 | if (str) { | ||
261 | snapshot_size = strtoull(str, &endptr, 0); | ||
262 | if (*endptr || snapshot_size > SIZE_MAX) | ||
263 | return -1; | ||
264 | } | ||
265 | |||
266 | opts->auxtrace_snapshot_mode = true; | ||
267 | opts->auxtrace_snapshot_size = snapshot_size; | ||
268 | |||
269 | btsr->snapshot_size = snapshot_size; | ||
270 | |||
271 | return 0; | ||
272 | } | ||
273 | |||
274 | static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused) | ||
275 | { | ||
276 | return rdtsc(); | ||
277 | } | ||
278 | |||
279 | static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr, | ||
280 | int idx) | ||
281 | { | ||
282 | const size_t sz = sizeof(struct intel_bts_snapshot_ref); | ||
283 | int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2; | ||
284 | struct intel_bts_snapshot_ref *refs; | ||
285 | |||
286 | if (!new_cnt) | ||
287 | new_cnt = 16; | ||
288 | |||
289 | while (new_cnt <= idx) | ||
290 | new_cnt *= 2; | ||
291 | |||
292 | refs = calloc(new_cnt, sz); | ||
293 | if (!refs) | ||
294 | return -ENOMEM; | ||
295 | |||
296 | memcpy(refs, btsr->snapshot_refs, cnt * sz); | ||
297 | |||
298 | btsr->snapshot_refs = refs; | ||
299 | btsr->snapshot_ref_cnt = new_cnt; | ||
300 | |||
301 | return 0; | ||
302 | } | ||
303 | |||
304 | static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr) | ||
305 | { | ||
306 | int i; | ||
307 | |||
308 | for (i = 0; i < btsr->snapshot_ref_cnt; i++) | ||
309 | zfree(&btsr->snapshot_refs[i].ref_buf); | ||
310 | zfree(&btsr->snapshot_refs); | ||
311 | } | ||
312 | |||
313 | static void intel_bts_recording_free(struct auxtrace_record *itr) | ||
314 | { | ||
315 | struct intel_bts_recording *btsr = | ||
316 | container_of(itr, struct intel_bts_recording, itr); | ||
317 | |||
318 | intel_bts_free_snapshot_refs(btsr); | ||
319 | free(btsr); | ||
320 | } | ||
321 | |||
322 | static int intel_bts_snapshot_start(struct auxtrace_record *itr) | ||
323 | { | ||
324 | struct intel_bts_recording *btsr = | ||
325 | container_of(itr, struct intel_bts_recording, itr); | ||
326 | struct perf_evsel *evsel; | ||
327 | |||
328 | evlist__for_each(btsr->evlist, evsel) { | ||
329 | if (evsel->attr.type == btsr->intel_bts_pmu->type) | ||
330 | return perf_evlist__disable_event(btsr->evlist, evsel); | ||
331 | } | ||
332 | return -EINVAL; | ||
333 | } | ||
334 | |||
335 | static int intel_bts_snapshot_finish(struct auxtrace_record *itr) | ||
336 | { | ||
337 | struct intel_bts_recording *btsr = | ||
338 | container_of(itr, struct intel_bts_recording, itr); | ||
339 | struct perf_evsel *evsel; | ||
340 | |||
341 | evlist__for_each(btsr->evlist, evsel) { | ||
342 | if (evsel->attr.type == btsr->intel_bts_pmu->type) | ||
343 | return perf_evlist__enable_event(btsr->evlist, evsel); | ||
344 | } | ||
345 | return -EINVAL; | ||
346 | } | ||
347 | |||
348 | static bool intel_bts_first_wrap(u64 *data, size_t buf_size) | ||
349 | { | ||
350 | int i, a, b; | ||
351 | |||
352 | b = buf_size >> 3; | ||
353 | a = b - 512; | ||
354 | if (a < 0) | ||
355 | a = 0; | ||
356 | |||
357 | for (i = a; i < b; i++) { | ||
358 | if (data[i]) | ||
359 | return true; | ||
360 | } | ||
361 | |||
362 | return false; | ||
363 | } | ||
364 | |||
365 | static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx, | ||
366 | struct auxtrace_mmap *mm, unsigned char *data, | ||
367 | u64 *head, u64 *old) | ||
368 | { | ||
369 | struct intel_bts_recording *btsr = | ||
370 | container_of(itr, struct intel_bts_recording, itr); | ||
371 | bool wrapped; | ||
372 | int err; | ||
373 | |||
374 | pr_debug3("%s: mmap index %d old head %zu new head %zu\n", | ||
375 | __func__, idx, (size_t)*old, (size_t)*head); | ||
376 | |||
377 | if (idx >= btsr->snapshot_ref_cnt) { | ||
378 | err = intel_bts_alloc_snapshot_refs(btsr, idx); | ||
379 | if (err) | ||
380 | goto out_err; | ||
381 | } | ||
382 | |||
383 | wrapped = btsr->snapshot_refs[idx].wrapped; | ||
384 | if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) { | ||
385 | btsr->snapshot_refs[idx].wrapped = true; | ||
386 | wrapped = true; | ||
387 | } | ||
388 | |||
389 | /* | ||
390 | * In full trace mode 'head' continually increases. However in snapshot | ||
391 | * mode 'head' is an offset within the buffer. Here 'old' and 'head' | ||
392 | * are adjusted to match the full trace case which expects that 'old' is | ||
393 | * always less than 'head'. | ||
394 | */ | ||
395 | if (wrapped) { | ||
396 | *old = *head; | ||
397 | *head += mm->len; | ||
398 | } else { | ||
399 | if (mm->mask) | ||
400 | *old &= mm->mask; | ||
401 | else | ||
402 | *old %= mm->len; | ||
403 | if (*old > *head) | ||
404 | *head += mm->len; | ||
405 | } | ||
406 | |||
407 | pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", | ||
408 | __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); | ||
409 | |||
410 | return 0; | ||
411 | |||
412 | out_err: | ||
413 | pr_err("%s: failed, error %d\n", __func__, err); | ||
414 | return err; | ||
415 | } | ||
416 | |||
417 | static int intel_bts_read_finish(struct auxtrace_record *itr, int idx) | ||
418 | { | ||
419 | struct intel_bts_recording *btsr = | ||
420 | container_of(itr, struct intel_bts_recording, itr); | ||
421 | struct perf_evsel *evsel; | ||
422 | |||
423 | evlist__for_each(btsr->evlist, evsel) { | ||
424 | if (evsel->attr.type == btsr->intel_bts_pmu->type) | ||
425 | return perf_evlist__enable_event_idx(btsr->evlist, | ||
426 | evsel, idx); | ||
427 | } | ||
428 | return -EINVAL; | ||
429 | } | ||
430 | |||
431 | struct auxtrace_record *intel_bts_recording_init(int *err) | ||
432 | { | ||
433 | struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME); | ||
434 | struct intel_bts_recording *btsr; | ||
435 | |||
436 | if (!intel_bts_pmu) | ||
437 | return NULL; | ||
438 | |||
439 | btsr = zalloc(sizeof(struct intel_bts_recording)); | ||
440 | if (!btsr) { | ||
441 | *err = -ENOMEM; | ||
442 | return NULL; | ||
443 | } | ||
444 | |||
445 | btsr->intel_bts_pmu = intel_bts_pmu; | ||
446 | btsr->itr.recording_options = intel_bts_recording_options; | ||
447 | btsr->itr.info_priv_size = intel_bts_info_priv_size; | ||
448 | btsr->itr.info_fill = intel_bts_info_fill; | ||
449 | btsr->itr.free = intel_bts_recording_free; | ||
450 | btsr->itr.snapshot_start = intel_bts_snapshot_start; | ||
451 | btsr->itr.snapshot_finish = intel_bts_snapshot_finish; | ||
452 | btsr->itr.find_snapshot = intel_bts_find_snapshot; | ||
453 | btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options; | ||
454 | btsr->itr.reference = intel_bts_reference; | ||
455 | btsr->itr.read_finish = intel_bts_read_finish; | ||
456 | btsr->itr.alignment = sizeof(struct branch); | ||
457 | return &btsr->itr; | ||
458 | } | ||
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c new file mode 100644 index 000000000000..2ca10d796c0b --- /dev/null +++ b/tools/perf/arch/x86/util/intel-pt.c | |||
@@ -0,0 +1,1007 @@ | |||
1 | /* | ||
2 | * intel_pt.c: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2015, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <stdbool.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <linux/bitops.h> | ||
20 | #include <linux/log2.h> | ||
21 | #include <cpuid.h> | ||
22 | |||
23 | #include "../../perf.h" | ||
24 | #include "../../util/session.h" | ||
25 | #include "../../util/event.h" | ||
26 | #include "../../util/evlist.h" | ||
27 | #include "../../util/evsel.h" | ||
28 | #include "../../util/cpumap.h" | ||
29 | #include "../../util/parse-options.h" | ||
30 | #include "../../util/parse-events.h" | ||
31 | #include "../../util/pmu.h" | ||
32 | #include "../../util/debug.h" | ||
33 | #include "../../util/auxtrace.h" | ||
34 | #include "../../util/tsc.h" | ||
35 | #include "../../util/intel-pt.h" | ||
36 | |||
37 | #define KiB(x) ((x) * 1024) | ||
38 | #define MiB(x) ((x) * 1024 * 1024) | ||
39 | #define KiB_MASK(x) (KiB(x) - 1) | ||
40 | #define MiB_MASK(x) (MiB(x) - 1) | ||
41 | |||
42 | #define INTEL_PT_DEFAULT_SAMPLE_SIZE KiB(4) | ||
43 | |||
44 | #define INTEL_PT_MAX_SAMPLE_SIZE KiB(60) | ||
45 | |||
46 | #define INTEL_PT_PSB_PERIOD_NEAR 256 | ||
47 | |||
48 | struct intel_pt_snapshot_ref { | ||
49 | void *ref_buf; | ||
50 | size_t ref_offset; | ||
51 | bool wrapped; | ||
52 | }; | ||
53 | |||
54 | struct intel_pt_recording { | ||
55 | struct auxtrace_record itr; | ||
56 | struct perf_pmu *intel_pt_pmu; | ||
57 | int have_sched_switch; | ||
58 | struct perf_evlist *evlist; | ||
59 | bool snapshot_mode; | ||
60 | bool snapshot_init_done; | ||
61 | size_t snapshot_size; | ||
62 | size_t snapshot_ref_buf_size; | ||
63 | int snapshot_ref_cnt; | ||
64 | struct intel_pt_snapshot_ref *snapshot_refs; | ||
65 | }; | ||
66 | |||
67 | static int intel_pt_parse_terms_with_default(struct list_head *formats, | ||
68 | const char *str, | ||
69 | u64 *config) | ||
70 | { | ||
71 | struct list_head *terms; | ||
72 | struct perf_event_attr attr = { .size = 0, }; | ||
73 | int err; | ||
74 | |||
75 | terms = malloc(sizeof(struct list_head)); | ||
76 | if (!terms) | ||
77 | return -ENOMEM; | ||
78 | |||
79 | INIT_LIST_HEAD(terms); | ||
80 | |||
81 | err = parse_events_terms(terms, str); | ||
82 | if (err) | ||
83 | goto out_free; | ||
84 | |||
85 | attr.config = *config; | ||
86 | err = perf_pmu__config_terms(formats, &attr, terms, true, NULL); | ||
87 | if (err) | ||
88 | goto out_free; | ||
89 | |||
90 | *config = attr.config; | ||
91 | out_free: | ||
92 | parse_events__free_terms(terms); | ||
93 | return err; | ||
94 | } | ||
95 | |||
96 | static int intel_pt_parse_terms(struct list_head *formats, const char *str, | ||
97 | u64 *config) | ||
98 | { | ||
99 | *config = 0; | ||
100 | return intel_pt_parse_terms_with_default(formats, str, config); | ||
101 | } | ||
102 | |||
103 | static u64 intel_pt_masked_bits(u64 mask, u64 bits) | ||
104 | { | ||
105 | const u64 top_bit = 1ULL << 63; | ||
106 | u64 res = 0; | ||
107 | int i; | ||
108 | |||
109 | for (i = 0; i < 64; i++) { | ||
110 | if (mask & top_bit) { | ||
111 | res <<= 1; | ||
112 | if (bits & top_bit) | ||
113 | res |= 1; | ||
114 | } | ||
115 | mask <<= 1; | ||
116 | bits <<= 1; | ||
117 | } | ||
118 | |||
119 | return res; | ||
120 | } | ||
121 | |||
122 | static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str, | ||
123 | struct perf_evlist *evlist, u64 *res) | ||
124 | { | ||
125 | struct perf_evsel *evsel; | ||
126 | u64 mask; | ||
127 | |||
128 | *res = 0; | ||
129 | |||
130 | mask = perf_pmu__format_bits(&intel_pt_pmu->format, str); | ||
131 | if (!mask) | ||
132 | return -EINVAL; | ||
133 | |||
134 | evlist__for_each(evlist, evsel) { | ||
135 | if (evsel->attr.type == intel_pt_pmu->type) { | ||
136 | *res = intel_pt_masked_bits(mask, evsel->attr.config); | ||
137 | return 0; | ||
138 | } | ||
139 | } | ||
140 | |||
141 | return -EINVAL; | ||
142 | } | ||
143 | |||
144 | static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu, | ||
145 | struct perf_evlist *evlist) | ||
146 | { | ||
147 | u64 val; | ||
148 | int err, topa_multiple_entries; | ||
149 | size_t psb_period; | ||
150 | |||
151 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries", | ||
152 | "%d", &topa_multiple_entries) != 1) | ||
153 | topa_multiple_entries = 0; | ||
154 | |||
155 | /* | ||
156 | * Use caps/topa_multiple_entries to indicate early hardware that had | ||
157 | * extra frequent PSBs. | ||
158 | */ | ||
159 | if (!topa_multiple_entries) { | ||
160 | psb_period = 256; | ||
161 | goto out; | ||
162 | } | ||
163 | |||
164 | err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val); | ||
165 | if (err) | ||
166 | val = 0; | ||
167 | |||
168 | psb_period = 1 << (val + 11); | ||
169 | out: | ||
170 | pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period); | ||
171 | return psb_period; | ||
172 | } | ||
173 | |||
174 | static int intel_pt_pick_bit(int bits, int target) | ||
175 | { | ||
176 | int pos, pick = -1; | ||
177 | |||
178 | for (pos = 0; bits; bits >>= 1, pos++) { | ||
179 | if (bits & 1) { | ||
180 | if (pos <= target || pick < 0) | ||
181 | pick = pos; | ||
182 | if (pos >= target) | ||
183 | break; | ||
184 | } | ||
185 | } | ||
186 | |||
187 | return pick; | ||
188 | } | ||
189 | |||
190 | static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu) | ||
191 | { | ||
192 | char buf[256]; | ||
193 | int mtc, mtc_periods = 0, mtc_period; | ||
194 | int psb_cyc, psb_periods, psb_period; | ||
195 | int pos = 0; | ||
196 | u64 config; | ||
197 | |||
198 | pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc"); | ||
199 | |||
200 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d", | ||
201 | &mtc) != 1) | ||
202 | mtc = 1; | ||
203 | |||
204 | if (mtc) { | ||
205 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x", | ||
206 | &mtc_periods) != 1) | ||
207 | mtc_periods = 0; | ||
208 | if (mtc_periods) { | ||
209 | mtc_period = intel_pt_pick_bit(mtc_periods, 3); | ||
210 | pos += scnprintf(buf + pos, sizeof(buf) - pos, | ||
211 | ",mtc,mtc_period=%d", mtc_period); | ||
212 | } | ||
213 | } | ||
214 | |||
215 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d", | ||
216 | &psb_cyc) != 1) | ||
217 | psb_cyc = 1; | ||
218 | |||
219 | if (psb_cyc && mtc_periods) { | ||
220 | if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x", | ||
221 | &psb_periods) != 1) | ||
222 | psb_periods = 0; | ||
223 | if (psb_periods) { | ||
224 | psb_period = intel_pt_pick_bit(psb_periods, 3); | ||
225 | pos += scnprintf(buf + pos, sizeof(buf) - pos, | ||
226 | ",psb_period=%d", psb_period); | ||
227 | } | ||
228 | } | ||
229 | |||
230 | pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf); | ||
231 | |||
232 | intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config); | ||
233 | |||
234 | return config; | ||
235 | } | ||
236 | |||
237 | static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr, | ||
238 | struct record_opts *opts, | ||
239 | const char *str) | ||
240 | { | ||
241 | struct intel_pt_recording *ptr = | ||
242 | container_of(itr, struct intel_pt_recording, itr); | ||
243 | unsigned long long snapshot_size = 0; | ||
244 | char *endptr; | ||
245 | |||
246 | if (str) { | ||
247 | snapshot_size = strtoull(str, &endptr, 0); | ||
248 | if (*endptr || snapshot_size > SIZE_MAX) | ||
249 | return -1; | ||
250 | } | ||
251 | |||
252 | opts->auxtrace_snapshot_mode = true; | ||
253 | opts->auxtrace_snapshot_size = snapshot_size; | ||
254 | |||
255 | ptr->snapshot_size = snapshot_size; | ||
256 | |||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | struct perf_event_attr * | ||
261 | intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu) | ||
262 | { | ||
263 | struct perf_event_attr *attr; | ||
264 | |||
265 | attr = zalloc(sizeof(struct perf_event_attr)); | ||
266 | if (!attr) | ||
267 | return NULL; | ||
268 | |||
269 | attr->config = intel_pt_default_config(intel_pt_pmu); | ||
270 | |||
271 | intel_pt_pmu->selectable = true; | ||
272 | |||
273 | return attr; | ||
274 | } | ||
275 | |||
276 | static size_t intel_pt_info_priv_size(struct auxtrace_record *itr __maybe_unused) | ||
277 | { | ||
278 | return INTEL_PT_AUXTRACE_PRIV_SIZE; | ||
279 | } | ||
280 | |||
281 | static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d) | ||
282 | { | ||
283 | unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; | ||
284 | |||
285 | __get_cpuid(0x15, &eax, &ebx, &ecx, &edx); | ||
286 | *n = ebx; | ||
287 | *d = eax; | ||
288 | } | ||
289 | |||
290 | static int intel_pt_info_fill(struct auxtrace_record *itr, | ||
291 | struct perf_session *session, | ||
292 | struct auxtrace_info_event *auxtrace_info, | ||
293 | size_t priv_size) | ||
294 | { | ||
295 | struct intel_pt_recording *ptr = | ||
296 | container_of(itr, struct intel_pt_recording, itr); | ||
297 | struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; | ||
298 | struct perf_event_mmap_page *pc; | ||
299 | struct perf_tsc_conversion tc = { .time_mult = 0, }; | ||
300 | bool cap_user_time_zero = false, per_cpu_mmaps; | ||
301 | u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit; | ||
302 | u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d; | ||
303 | int err; | ||
304 | |||
305 | if (priv_size != INTEL_PT_AUXTRACE_PRIV_SIZE) | ||
306 | return -EINVAL; | ||
307 | |||
308 | intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); | ||
309 | intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp", | ||
310 | &noretcomp_bit); | ||
311 | intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit); | ||
312 | mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format, | ||
313 | "mtc_period"); | ||
314 | intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit); | ||
315 | |||
316 | intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d); | ||
317 | |||
318 | if (!session->evlist->nr_mmaps) | ||
319 | return -EINVAL; | ||
320 | |||
321 | pc = session->evlist->mmap[0].base; | ||
322 | if (pc) { | ||
323 | err = perf_read_tsc_conversion(pc, &tc); | ||
324 | if (err) { | ||
325 | if (err != -EOPNOTSUPP) | ||
326 | return err; | ||
327 | } else { | ||
328 | cap_user_time_zero = tc.time_mult != 0; | ||
329 | } | ||
330 | if (!cap_user_time_zero) | ||
331 | ui__warning("Intel Processor Trace: TSC not available\n"); | ||
332 | } | ||
333 | |||
334 | per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus); | ||
335 | |||
336 | auxtrace_info->type = PERF_AUXTRACE_INTEL_PT; | ||
337 | auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type; | ||
338 | auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift; | ||
339 | auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult; | ||
340 | auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero; | ||
341 | auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero; | ||
342 | auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit; | ||
343 | auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit; | ||
344 | auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch; | ||
345 | auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode; | ||
346 | auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps; | ||
347 | auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit; | ||
348 | auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits; | ||
349 | auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n; | ||
350 | auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d; | ||
351 | auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit; | ||
352 | |||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | static int intel_pt_track_switches(struct perf_evlist *evlist) | ||
357 | { | ||
358 | const char *sched_switch = "sched:sched_switch"; | ||
359 | struct perf_evsel *evsel; | ||
360 | int err; | ||
361 | |||
362 | if (!perf_evlist__can_select_event(evlist, sched_switch)) | ||
363 | return -EPERM; | ||
364 | |||
365 | err = parse_events(evlist, sched_switch, NULL); | ||
366 | if (err) { | ||
367 | pr_debug2("%s: failed to parse %s, error %d\n", | ||
368 | __func__, sched_switch, err); | ||
369 | return err; | ||
370 | } | ||
371 | |||
372 | evsel = perf_evlist__last(evlist); | ||
373 | |||
374 | perf_evsel__set_sample_bit(evsel, CPU); | ||
375 | perf_evsel__set_sample_bit(evsel, TIME); | ||
376 | |||
377 | evsel->system_wide = true; | ||
378 | evsel->no_aux_samples = true; | ||
379 | evsel->immediate = true; | ||
380 | |||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | static void intel_pt_valid_str(char *str, size_t len, u64 valid) | ||
385 | { | ||
386 | unsigned int val, last = 0, state = 1; | ||
387 | int p = 0; | ||
388 | |||
389 | str[0] = '\0'; | ||
390 | |||
391 | for (val = 0; val <= 64; val++, valid >>= 1) { | ||
392 | if (valid & 1) { | ||
393 | last = val; | ||
394 | switch (state) { | ||
395 | case 0: | ||
396 | p += scnprintf(str + p, len - p, ","); | ||
397 | /* Fall through */ | ||
398 | case 1: | ||
399 | p += scnprintf(str + p, len - p, "%u", val); | ||
400 | state = 2; | ||
401 | break; | ||
402 | case 2: | ||
403 | state = 3; | ||
404 | break; | ||
405 | case 3: | ||
406 | state = 4; | ||
407 | break; | ||
408 | default: | ||
409 | break; | ||
410 | } | ||
411 | } else { | ||
412 | switch (state) { | ||
413 | case 3: | ||
414 | p += scnprintf(str + p, len - p, ",%u", last); | ||
415 | state = 0; | ||
416 | break; | ||
417 | case 4: | ||
418 | p += scnprintf(str + p, len - p, "-%u", last); | ||
419 | state = 0; | ||
420 | break; | ||
421 | default: | ||
422 | break; | ||
423 | } | ||
424 | if (state != 1) | ||
425 | state = 0; | ||
426 | } | ||
427 | } | ||
428 | } | ||
429 | |||
430 | static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu, | ||
431 | const char *caps, const char *name, | ||
432 | const char *supported, u64 config) | ||
433 | { | ||
434 | char valid_str[256]; | ||
435 | unsigned int shift; | ||
436 | unsigned long long valid; | ||
437 | u64 bits; | ||
438 | int ok; | ||
439 | |||
440 | if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1) | ||
441 | valid = 0; | ||
442 | |||
443 | if (supported && | ||
444 | perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok) | ||
445 | valid = 0; | ||
446 | |||
447 | valid |= 1; | ||
448 | |||
449 | bits = perf_pmu__format_bits(&intel_pt_pmu->format, name); | ||
450 | |||
451 | config &= bits; | ||
452 | |||
453 | for (shift = 0; bits && !(bits & 1); shift++) | ||
454 | bits >>= 1; | ||
455 | |||
456 | config >>= shift; | ||
457 | |||
458 | if (config > 63) | ||
459 | goto out_err; | ||
460 | |||
461 | if (valid & (1 << config)) | ||
462 | return 0; | ||
463 | out_err: | ||
464 | intel_pt_valid_str(valid_str, sizeof(valid_str), valid); | ||
465 | pr_err("Invalid %s for %s. Valid values are: %s\n", | ||
466 | name, INTEL_PT_PMU_NAME, valid_str); | ||
467 | return -EINVAL; | ||
468 | } | ||
469 | |||
470 | static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu, | ||
471 | struct perf_evsel *evsel) | ||
472 | { | ||
473 | int err; | ||
474 | |||
475 | if (!evsel) | ||
476 | return 0; | ||
477 | |||
478 | err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds", | ||
479 | "cyc_thresh", "caps/psb_cyc", | ||
480 | evsel->attr.config); | ||
481 | if (err) | ||
482 | return err; | ||
483 | |||
484 | err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods", | ||
485 | "mtc_period", "caps/mtc", | ||
486 | evsel->attr.config); | ||
487 | if (err) | ||
488 | return err; | ||
489 | |||
490 | return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods", | ||
491 | "psb_period", "caps/psb_cyc", | ||
492 | evsel->attr.config); | ||
493 | } | ||
494 | |||
495 | static int intel_pt_recording_options(struct auxtrace_record *itr, | ||
496 | struct perf_evlist *evlist, | ||
497 | struct record_opts *opts) | ||
498 | { | ||
499 | struct intel_pt_recording *ptr = | ||
500 | container_of(itr, struct intel_pt_recording, itr); | ||
501 | struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; | ||
502 | bool have_timing_info; | ||
503 | struct perf_evsel *evsel, *intel_pt_evsel = NULL; | ||
504 | const struct cpu_map *cpus = evlist->cpus; | ||
505 | bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; | ||
506 | u64 tsc_bit; | ||
507 | int err; | ||
508 | |||
509 | ptr->evlist = evlist; | ||
510 | ptr->snapshot_mode = opts->auxtrace_snapshot_mode; | ||
511 | |||
512 | evlist__for_each(evlist, evsel) { | ||
513 | if (evsel->attr.type == intel_pt_pmu->type) { | ||
514 | if (intel_pt_evsel) { | ||
515 | pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n"); | ||
516 | return -EINVAL; | ||
517 | } | ||
518 | evsel->attr.freq = 0; | ||
519 | evsel->attr.sample_period = 1; | ||
520 | intel_pt_evsel = evsel; | ||
521 | opts->full_auxtrace = true; | ||
522 | } | ||
523 | } | ||
524 | |||
525 | if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) { | ||
526 | pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n"); | ||
527 | return -EINVAL; | ||
528 | } | ||
529 | |||
530 | if (opts->use_clockid) { | ||
531 | pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n"); | ||
532 | return -EINVAL; | ||
533 | } | ||
534 | |||
535 | if (!opts->full_auxtrace) | ||
536 | return 0; | ||
537 | |||
538 | err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel); | ||
539 | if (err) | ||
540 | return err; | ||
541 | |||
542 | /* Set default sizes for snapshot mode */ | ||
543 | if (opts->auxtrace_snapshot_mode) { | ||
544 | size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist); | ||
545 | |||
546 | if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) { | ||
547 | if (privileged) { | ||
548 | opts->auxtrace_mmap_pages = MiB(4) / page_size; | ||
549 | } else { | ||
550 | opts->auxtrace_mmap_pages = KiB(128) / page_size; | ||
551 | if (opts->mmap_pages == UINT_MAX) | ||
552 | opts->mmap_pages = KiB(256) / page_size; | ||
553 | } | ||
554 | } else if (!opts->auxtrace_mmap_pages && !privileged && | ||
555 | opts->mmap_pages == UINT_MAX) { | ||
556 | opts->mmap_pages = KiB(256) / page_size; | ||
557 | } | ||
558 | if (!opts->auxtrace_snapshot_size) | ||
559 | opts->auxtrace_snapshot_size = | ||
560 | opts->auxtrace_mmap_pages * (size_t)page_size; | ||
561 | if (!opts->auxtrace_mmap_pages) { | ||
562 | size_t sz = opts->auxtrace_snapshot_size; | ||
563 | |||
564 | sz = round_up(sz, page_size) / page_size; | ||
565 | opts->auxtrace_mmap_pages = roundup_pow_of_two(sz); | ||
566 | } | ||
567 | if (opts->auxtrace_snapshot_size > | ||
568 | opts->auxtrace_mmap_pages * (size_t)page_size) { | ||
569 | pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n", | ||
570 | opts->auxtrace_snapshot_size, | ||
571 | opts->auxtrace_mmap_pages * (size_t)page_size); | ||
572 | return -EINVAL; | ||
573 | } | ||
574 | if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) { | ||
575 | pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n"); | ||
576 | return -EINVAL; | ||
577 | } | ||
578 | pr_debug2("Intel PT snapshot size: %zu\n", | ||
579 | opts->auxtrace_snapshot_size); | ||
580 | if (psb_period && | ||
581 | opts->auxtrace_snapshot_size <= psb_period + | ||
582 | INTEL_PT_PSB_PERIOD_NEAR) | ||
583 | ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n", | ||
584 | opts->auxtrace_snapshot_size, psb_period); | ||
585 | } | ||
586 | |||
587 | /* Set default sizes for full trace mode */ | ||
588 | if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { | ||
589 | if (privileged) { | ||
590 | opts->auxtrace_mmap_pages = MiB(4) / page_size; | ||
591 | } else { | ||
592 | opts->auxtrace_mmap_pages = KiB(128) / page_size; | ||
593 | if (opts->mmap_pages == UINT_MAX) | ||
594 | opts->mmap_pages = KiB(256) / page_size; | ||
595 | } | ||
596 | } | ||
597 | |||
598 | /* Validate auxtrace_mmap_pages */ | ||
599 | if (opts->auxtrace_mmap_pages) { | ||
600 | size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; | ||
601 | size_t min_sz; | ||
602 | |||
603 | if (opts->auxtrace_snapshot_mode) | ||
604 | min_sz = KiB(4); | ||
605 | else | ||
606 | min_sz = KiB(8); | ||
607 | |||
608 | if (sz < min_sz || !is_power_of_2(sz)) { | ||
609 | pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n", | ||
610 | min_sz / 1024); | ||
611 | return -EINVAL; | ||
612 | } | ||
613 | } | ||
614 | |||
615 | intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit); | ||
616 | |||
617 | if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit)) | ||
618 | have_timing_info = true; | ||
619 | else | ||
620 | have_timing_info = false; | ||
621 | |||
622 | /* | ||
623 | * Per-cpu recording needs sched_switch events to distinguish different | ||
624 | * threads. | ||
625 | */ | ||
626 | if (have_timing_info && !cpu_map__empty(cpus)) { | ||
627 | err = intel_pt_track_switches(evlist); | ||
628 | if (err == -EPERM) | ||
629 | pr_debug2("Unable to select sched:sched_switch\n"); | ||
630 | else if (err) | ||
631 | return err; | ||
632 | else | ||
633 | ptr->have_sched_switch = 1; | ||
634 | } | ||
635 | |||
636 | if (intel_pt_evsel) { | ||
637 | /* | ||
638 | * To obtain the auxtrace buffer file descriptor, the auxtrace | ||
639 | * event must come first. | ||
640 | */ | ||
641 | perf_evlist__to_front(evlist, intel_pt_evsel); | ||
642 | /* | ||
643 | * In the case of per-cpu mmaps, we need the CPU on the | ||
644 | * AUX event. | ||
645 | */ | ||
646 | if (!cpu_map__empty(cpus)) | ||
647 | perf_evsel__set_sample_bit(intel_pt_evsel, CPU); | ||
648 | } | ||
649 | |||
650 | /* Add dummy event to keep tracking */ | ||
651 | if (opts->full_auxtrace) { | ||
652 | struct perf_evsel *tracking_evsel; | ||
653 | |||
654 | err = parse_events(evlist, "dummy:u", NULL); | ||
655 | if (err) | ||
656 | return err; | ||
657 | |||
658 | tracking_evsel = perf_evlist__last(evlist); | ||
659 | |||
660 | perf_evlist__set_tracking_event(evlist, tracking_evsel); | ||
661 | |||
662 | tracking_evsel->attr.freq = 0; | ||
663 | tracking_evsel->attr.sample_period = 1; | ||
664 | |||
665 | /* In per-cpu case, always need the time of mmap events etc */ | ||
666 | if (!cpu_map__empty(cpus)) | ||
667 | perf_evsel__set_sample_bit(tracking_evsel, TIME); | ||
668 | } | ||
669 | |||
670 | /* | ||
671 | * Warn the user when we do not have enough information to decode i.e. | ||
672 | * per-cpu with no sched_switch (except workload-only). | ||
673 | */ | ||
674 | if (!ptr->have_sched_switch && !cpu_map__empty(cpus) && | ||
675 | !target__none(&opts->target)) | ||
676 | ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n"); | ||
677 | |||
678 | return 0; | ||
679 | } | ||
680 | |||
681 | static int intel_pt_snapshot_start(struct auxtrace_record *itr) | ||
682 | { | ||
683 | struct intel_pt_recording *ptr = | ||
684 | container_of(itr, struct intel_pt_recording, itr); | ||
685 | struct perf_evsel *evsel; | ||
686 | |||
687 | evlist__for_each(ptr->evlist, evsel) { | ||
688 | if (evsel->attr.type == ptr->intel_pt_pmu->type) | ||
689 | return perf_evlist__disable_event(ptr->evlist, evsel); | ||
690 | } | ||
691 | return -EINVAL; | ||
692 | } | ||
693 | |||
694 | static int intel_pt_snapshot_finish(struct auxtrace_record *itr) | ||
695 | { | ||
696 | struct intel_pt_recording *ptr = | ||
697 | container_of(itr, struct intel_pt_recording, itr); | ||
698 | struct perf_evsel *evsel; | ||
699 | |||
700 | evlist__for_each(ptr->evlist, evsel) { | ||
701 | if (evsel->attr.type == ptr->intel_pt_pmu->type) | ||
702 | return perf_evlist__enable_event(ptr->evlist, evsel); | ||
703 | } | ||
704 | return -EINVAL; | ||
705 | } | ||
706 | |||
707 | static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx) | ||
708 | { | ||
709 | const size_t sz = sizeof(struct intel_pt_snapshot_ref); | ||
710 | int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2; | ||
711 | struct intel_pt_snapshot_ref *refs; | ||
712 | |||
713 | if (!new_cnt) | ||
714 | new_cnt = 16; | ||
715 | |||
716 | while (new_cnt <= idx) | ||
717 | new_cnt *= 2; | ||
718 | |||
719 | refs = calloc(new_cnt, sz); | ||
720 | if (!refs) | ||
721 | return -ENOMEM; | ||
722 | |||
723 | memcpy(refs, ptr->snapshot_refs, cnt * sz); | ||
724 | |||
725 | ptr->snapshot_refs = refs; | ||
726 | ptr->snapshot_ref_cnt = new_cnt; | ||
727 | |||
728 | return 0; | ||
729 | } | ||
730 | |||
731 | static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr) | ||
732 | { | ||
733 | int i; | ||
734 | |||
735 | for (i = 0; i < ptr->snapshot_ref_cnt; i++) | ||
736 | zfree(&ptr->snapshot_refs[i].ref_buf); | ||
737 | zfree(&ptr->snapshot_refs); | ||
738 | } | ||
739 | |||
740 | static void intel_pt_recording_free(struct auxtrace_record *itr) | ||
741 | { | ||
742 | struct intel_pt_recording *ptr = | ||
743 | container_of(itr, struct intel_pt_recording, itr); | ||
744 | |||
745 | intel_pt_free_snapshot_refs(ptr); | ||
746 | free(ptr); | ||
747 | } | ||
748 | |||
749 | static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx, | ||
750 | size_t snapshot_buf_size) | ||
751 | { | ||
752 | size_t ref_buf_size = ptr->snapshot_ref_buf_size; | ||
753 | void *ref_buf; | ||
754 | |||
755 | ref_buf = zalloc(ref_buf_size); | ||
756 | if (!ref_buf) | ||
757 | return -ENOMEM; | ||
758 | |||
759 | ptr->snapshot_refs[idx].ref_buf = ref_buf; | ||
760 | ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size; | ||
761 | |||
762 | return 0; | ||
763 | } | ||
764 | |||
765 | static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr, | ||
766 | size_t snapshot_buf_size) | ||
767 | { | ||
768 | const size_t max_size = 256 * 1024; | ||
769 | size_t buf_size = 0, psb_period; | ||
770 | |||
771 | if (ptr->snapshot_size <= 64 * 1024) | ||
772 | return 0; | ||
773 | |||
774 | psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist); | ||
775 | if (psb_period) | ||
776 | buf_size = psb_period * 2; | ||
777 | |||
778 | if (!buf_size || buf_size > max_size) | ||
779 | buf_size = max_size; | ||
780 | |||
781 | if (buf_size >= snapshot_buf_size) | ||
782 | return 0; | ||
783 | |||
784 | if (buf_size >= ptr->snapshot_size / 2) | ||
785 | return 0; | ||
786 | |||
787 | return buf_size; | ||
788 | } | ||
789 | |||
790 | static int intel_pt_snapshot_init(struct intel_pt_recording *ptr, | ||
791 | size_t snapshot_buf_size) | ||
792 | { | ||
793 | if (ptr->snapshot_init_done) | ||
794 | return 0; | ||
795 | |||
796 | ptr->snapshot_init_done = true; | ||
797 | |||
798 | ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr, | ||
799 | snapshot_buf_size); | ||
800 | |||
801 | return 0; | ||
802 | } | ||
803 | |||
804 | /** | ||
805 | * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer. | ||
806 | * @buf1: first buffer | ||
807 | * @compare_size: number of bytes to compare | ||
808 | * @buf2: second buffer (a circular buffer) | ||
809 | * @offs2: offset in second buffer | ||
810 | * @buf2_size: size of second buffer | ||
811 | * | ||
812 | * The comparison allows for the possibility that the bytes to compare in the | ||
813 | * circular buffer are not contiguous. It is assumed that @compare_size <= | ||
814 | * @buf2_size. This function returns %false if the bytes are identical, %true | ||
815 | * otherwise. | ||
816 | */ | ||
817 | static bool intel_pt_compare_buffers(void *buf1, size_t compare_size, | ||
818 | void *buf2, size_t offs2, size_t buf2_size) | ||
819 | { | ||
820 | size_t end2 = offs2 + compare_size, part_size; | ||
821 | |||
822 | if (end2 <= buf2_size) | ||
823 | return memcmp(buf1, buf2 + offs2, compare_size); | ||
824 | |||
825 | part_size = end2 - buf2_size; | ||
826 | if (memcmp(buf1, buf2 + offs2, part_size)) | ||
827 | return true; | ||
828 | |||
829 | compare_size -= part_size; | ||
830 | |||
831 | return memcmp(buf1 + part_size, buf2, compare_size); | ||
832 | } | ||
833 | |||
834 | static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset, | ||
835 | size_t ref_size, size_t buf_size, | ||
836 | void *data, size_t head) | ||
837 | { | ||
838 | size_t ref_end = ref_offset + ref_size; | ||
839 | |||
840 | if (ref_end > buf_size) { | ||
841 | if (head > ref_offset || head < ref_end - buf_size) | ||
842 | return true; | ||
843 | } else if (head > ref_offset && head < ref_end) { | ||
844 | return true; | ||
845 | } | ||
846 | |||
847 | return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset, | ||
848 | buf_size); | ||
849 | } | ||
850 | |||
851 | static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size, | ||
852 | void *data, size_t head) | ||
853 | { | ||
854 | if (head >= ref_size) { | ||
855 | memcpy(ref_buf, data + head - ref_size, ref_size); | ||
856 | } else { | ||
857 | memcpy(ref_buf, data, head); | ||
858 | ref_size -= head; | ||
859 | memcpy(ref_buf + head, data + buf_size - ref_size, ref_size); | ||
860 | } | ||
861 | } | ||
862 | |||
863 | static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx, | ||
864 | struct auxtrace_mmap *mm, unsigned char *data, | ||
865 | u64 head) | ||
866 | { | ||
867 | struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx]; | ||
868 | bool wrapped; | ||
869 | |||
870 | wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset, | ||
871 | ptr->snapshot_ref_buf_size, mm->len, | ||
872 | data, head); | ||
873 | |||
874 | intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len, | ||
875 | data, head); | ||
876 | |||
877 | return wrapped; | ||
878 | } | ||
879 | |||
880 | static bool intel_pt_first_wrap(u64 *data, size_t buf_size) | ||
881 | { | ||
882 | int i, a, b; | ||
883 | |||
884 | b = buf_size >> 3; | ||
885 | a = b - 512; | ||
886 | if (a < 0) | ||
887 | a = 0; | ||
888 | |||
889 | for (i = a; i < b; i++) { | ||
890 | if (data[i]) | ||
891 | return true; | ||
892 | } | ||
893 | |||
894 | return false; | ||
895 | } | ||
896 | |||
897 | static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx, | ||
898 | struct auxtrace_mmap *mm, unsigned char *data, | ||
899 | u64 *head, u64 *old) | ||
900 | { | ||
901 | struct intel_pt_recording *ptr = | ||
902 | container_of(itr, struct intel_pt_recording, itr); | ||
903 | bool wrapped; | ||
904 | int err; | ||
905 | |||
906 | pr_debug3("%s: mmap index %d old head %zu new head %zu\n", | ||
907 | __func__, idx, (size_t)*old, (size_t)*head); | ||
908 | |||
909 | err = intel_pt_snapshot_init(ptr, mm->len); | ||
910 | if (err) | ||
911 | goto out_err; | ||
912 | |||
913 | if (idx >= ptr->snapshot_ref_cnt) { | ||
914 | err = intel_pt_alloc_snapshot_refs(ptr, idx); | ||
915 | if (err) | ||
916 | goto out_err; | ||
917 | } | ||
918 | |||
919 | if (ptr->snapshot_ref_buf_size) { | ||
920 | if (!ptr->snapshot_refs[idx].ref_buf) { | ||
921 | err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len); | ||
922 | if (err) | ||
923 | goto out_err; | ||
924 | } | ||
925 | wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head); | ||
926 | } else { | ||
927 | wrapped = ptr->snapshot_refs[idx].wrapped; | ||
928 | if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) { | ||
929 | ptr->snapshot_refs[idx].wrapped = true; | ||
930 | wrapped = true; | ||
931 | } | ||
932 | } | ||
933 | |||
934 | /* | ||
935 | * In full trace mode 'head' continually increases. However in snapshot | ||
936 | * mode 'head' is an offset within the buffer. Here 'old' and 'head' | ||
937 | * are adjusted to match the full trace case which expects that 'old' is | ||
938 | * always less than 'head'. | ||
939 | */ | ||
940 | if (wrapped) { | ||
941 | *old = *head; | ||
942 | *head += mm->len; | ||
943 | } else { | ||
944 | if (mm->mask) | ||
945 | *old &= mm->mask; | ||
946 | else | ||
947 | *old %= mm->len; | ||
948 | if (*old > *head) | ||
949 | *head += mm->len; | ||
950 | } | ||
951 | |||
952 | pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n", | ||
953 | __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head); | ||
954 | |||
955 | return 0; | ||
956 | |||
957 | out_err: | ||
958 | pr_err("%s: failed, error %d\n", __func__, err); | ||
959 | return err; | ||
960 | } | ||
961 | |||
962 | static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) | ||
963 | { | ||
964 | return rdtsc(); | ||
965 | } | ||
966 | |||
967 | static int intel_pt_read_finish(struct auxtrace_record *itr, int idx) | ||
968 | { | ||
969 | struct intel_pt_recording *ptr = | ||
970 | container_of(itr, struct intel_pt_recording, itr); | ||
971 | struct perf_evsel *evsel; | ||
972 | |||
973 | evlist__for_each(ptr->evlist, evsel) { | ||
974 | if (evsel->attr.type == ptr->intel_pt_pmu->type) | ||
975 | return perf_evlist__enable_event_idx(ptr->evlist, evsel, | ||
976 | idx); | ||
977 | } | ||
978 | return -EINVAL; | ||
979 | } | ||
980 | |||
981 | struct auxtrace_record *intel_pt_recording_init(int *err) | ||
982 | { | ||
983 | struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME); | ||
984 | struct intel_pt_recording *ptr; | ||
985 | |||
986 | if (!intel_pt_pmu) | ||
987 | return NULL; | ||
988 | |||
989 | ptr = zalloc(sizeof(struct intel_pt_recording)); | ||
990 | if (!ptr) { | ||
991 | *err = -ENOMEM; | ||
992 | return NULL; | ||
993 | } | ||
994 | |||
995 | ptr->intel_pt_pmu = intel_pt_pmu; | ||
996 | ptr->itr.recording_options = intel_pt_recording_options; | ||
997 | ptr->itr.info_priv_size = intel_pt_info_priv_size; | ||
998 | ptr->itr.info_fill = intel_pt_info_fill; | ||
999 | ptr->itr.free = intel_pt_recording_free; | ||
1000 | ptr->itr.snapshot_start = intel_pt_snapshot_start; | ||
1001 | ptr->itr.snapshot_finish = intel_pt_snapshot_finish; | ||
1002 | ptr->itr.find_snapshot = intel_pt_find_snapshot; | ||
1003 | ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options; | ||
1004 | ptr->itr.reference = intel_pt_reference; | ||
1005 | ptr->itr.read_finish = intel_pt_read_finish; | ||
1006 | return &ptr->itr; | ||
1007 | } | ||
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c new file mode 100644 index 000000000000..c5db14f36cc7 --- /dev/null +++ b/tools/perf/arch/x86/util/perf_regs.c | |||
@@ -0,0 +1,28 @@ | |||
1 | #include "../../perf.h" | ||
2 | #include "../../util/perf_regs.h" | ||
3 | |||
4 | const struct sample_reg sample_reg_masks[] = { | ||
5 | SMPL_REG(AX, PERF_REG_X86_AX), | ||
6 | SMPL_REG(BX, PERF_REG_X86_BX), | ||
7 | SMPL_REG(CX, PERF_REG_X86_CX), | ||
8 | SMPL_REG(DX, PERF_REG_X86_DX), | ||
9 | SMPL_REG(SI, PERF_REG_X86_SI), | ||
10 | SMPL_REG(DI, PERF_REG_X86_DI), | ||
11 | SMPL_REG(BP, PERF_REG_X86_BP), | ||
12 | SMPL_REG(SP, PERF_REG_X86_SP), | ||
13 | SMPL_REG(IP, PERF_REG_X86_IP), | ||
14 | SMPL_REG(FLAGS, PERF_REG_X86_FLAGS), | ||
15 | SMPL_REG(CS, PERF_REG_X86_CS), | ||
16 | SMPL_REG(SS, PERF_REG_X86_SS), | ||
17 | #ifdef HAVE_ARCH_X86_64_SUPPORT | ||
18 | SMPL_REG(R8, PERF_REG_X86_R8), | ||
19 | SMPL_REG(R9, PERF_REG_X86_R9), | ||
20 | SMPL_REG(R10, PERF_REG_X86_R10), | ||
21 | SMPL_REG(R11, PERF_REG_X86_R11), | ||
22 | SMPL_REG(R12, PERF_REG_X86_R12), | ||
23 | SMPL_REG(R13, PERF_REG_X86_R13), | ||
24 | SMPL_REG(R14, PERF_REG_X86_R14), | ||
25 | SMPL_REG(R15, PERF_REG_X86_R15), | ||
26 | #endif | ||
27 | SMPL_REG_END | ||
28 | }; | ||
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c new file mode 100644 index 000000000000..79fe07158d00 --- /dev/null +++ b/tools/perf/arch/x86/util/pmu.c | |||
@@ -0,0 +1,18 @@ | |||
1 | #include <string.h> | ||
2 | |||
3 | #include <linux/perf_event.h> | ||
4 | |||
5 | #include "../../util/intel-pt.h" | ||
6 | #include "../../util/intel-bts.h" | ||
7 | #include "../../util/pmu.h" | ||
8 | |||
9 | struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) | ||
10 | { | ||
11 | #ifdef HAVE_AUXTRACE_SUPPORT | ||
12 | if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) | ||
13 | return intel_pt_pmu_default_config(pmu); | ||
14 | if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) | ||
15 | pmu->selectable = true; | ||
16 | #endif | ||
17 | return NULL; | ||
18 | } | ||
diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/xtensa/Build | |||
@@ -0,0 +1 @@ | |||
libperf-y += util/ | |||
diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile new file mode 100644 index 000000000000..7fbca175099e --- /dev/null +++ b/tools/perf/arch/xtensa/Makefile | |||
@@ -0,0 +1,3 @@ | |||
1 | ifndef NO_DWARF | ||
2 | PERF_HAVE_DWARF_REGS := 1 | ||
3 | endif | ||
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/xtensa/util/Build | |||
@@ -0,0 +1 @@ | |||
libperf-$(CONFIG_DWARF) += dwarf-regs.o | |||
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c new file mode 100644 index 000000000000..4dba76bfb4ce --- /dev/null +++ b/tools/perf/arch/xtensa/util/dwarf-regs.c | |||
@@ -0,0 +1,25 @@ | |||
1 | /* | ||
2 | * Mapping of DWARF debug register numbers into register names. | ||
3 | * | ||
4 | * Copyright (c) 2015 Cadence Design Systems Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; either version | ||
9 | * 2 of the License, or (at your option) any later version. | ||
10 | */ | ||
11 | |||
12 | #include <stddef.h> | ||
13 | #include <dwarf-regs.h> | ||
14 | |||
15 | #define XTENSA_MAX_REGS 16 | ||
16 | |||
17 | const char *xtensa_regs_table[XTENSA_MAX_REGS] = { | ||
18 | "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", | ||
19 | "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", | ||
20 | }; | ||
21 | |||
22 | const char *get_arch_regstr(unsigned int n) | ||
23 | { | ||
24 | return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL; | ||
25 | } | ||
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index c3ab760e06b4..573e28896038 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build | |||
@@ -5,6 +5,7 @@ perf-y += futex-hash.o | |||
5 | perf-y += futex-wake.o | 5 | perf-y += futex-wake.o |
6 | perf-y += futex-wake-parallel.o | 6 | perf-y += futex-wake-parallel.o |
7 | perf-y += futex-requeue.o | 7 | perf-y += futex-requeue.o |
8 | perf-y += futex-lock-pi.o | ||
8 | 9 | ||
9 | perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o | 10 | perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o |
10 | perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o | 11 | perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o |
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 70b2f718cc21..a50df86f2b9b 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h | |||
@@ -36,6 +36,8 @@ extern int bench_futex_wake(int argc, const char **argv, const char *prefix); | |||
36 | extern int bench_futex_wake_parallel(int argc, const char **argv, | 36 | extern int bench_futex_wake_parallel(int argc, const char **argv, |
37 | const char *prefix); | 37 | const char *prefix); |
38 | extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); | 38 | extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); |
39 | /* pi futexes */ | ||
40 | extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix); | ||
39 | 41 | ||
40 | #define BENCH_FORMAT_DEFAULT_STR "default" | 42 | #define BENCH_FORMAT_DEFAULT_STR "default" |
41 | #define BENCH_FORMAT_DEFAULT 0 | 43 | #define BENCH_FORMAT_DEFAULT 0 |
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c new file mode 100644 index 000000000000..bc6a16adbca8 --- /dev/null +++ b/tools/perf/bench/futex-lock-pi.c | |||
@@ -0,0 +1,219 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015 Davidlohr Bueso. | ||
3 | */ | ||
4 | |||
5 | #include "../perf.h" | ||
6 | #include "../util/util.h" | ||
7 | #include "../util/stat.h" | ||
8 | #include "../util/parse-options.h" | ||
9 | #include "../util/header.h" | ||
10 | #include "bench.h" | ||
11 | #include "futex.h" | ||
12 | |||
13 | #include <err.h> | ||
14 | #include <stdlib.h> | ||
15 | #include <sys/time.h> | ||
16 | #include <pthread.h> | ||
17 | |||
18 | struct worker { | ||
19 | int tid; | ||
20 | u_int32_t *futex; | ||
21 | pthread_t thread; | ||
22 | unsigned long ops; | ||
23 | }; | ||
24 | |||
25 | static u_int32_t global_futex = 0; | ||
26 | static struct worker *worker; | ||
27 | static unsigned int nsecs = 10; | ||
28 | static bool silent = false, multi = false; | ||
29 | static bool done = false, fshared = false; | ||
30 | static unsigned int ncpus, nthreads = 0; | ||
31 | static int futex_flag = 0; | ||
32 | struct timeval start, end, runtime; | ||
33 | static pthread_mutex_t thread_lock; | ||
34 | static unsigned int threads_starting; | ||
35 | static struct stats throughput_stats; | ||
36 | static pthread_cond_t thread_parent, thread_worker; | ||
37 | |||
38 | static const struct option options[] = { | ||
39 | OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), | ||
40 | OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), | ||
41 | OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"), | ||
42 | OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), | ||
43 | OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), | ||
44 | OPT_END() | ||
45 | }; | ||
46 | |||
47 | static const char * const bench_futex_lock_pi_usage[] = { | ||
48 | "perf bench futex requeue <options>", | ||
49 | NULL | ||
50 | }; | ||
51 | |||
52 | static void print_summary(void) | ||
53 | { | ||
54 | unsigned long avg = avg_stats(&throughput_stats); | ||
55 | double stddev = stddev_stats(&throughput_stats); | ||
56 | |||
57 | printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", | ||
58 | !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), | ||
59 | (int) runtime.tv_sec); | ||
60 | } | ||
61 | |||
62 | static void toggle_done(int sig __maybe_unused, | ||
63 | siginfo_t *info __maybe_unused, | ||
64 | void *uc __maybe_unused) | ||
65 | { | ||
66 | /* inform all threads that we're done for the day */ | ||
67 | done = true; | ||
68 | gettimeofday(&end, NULL); | ||
69 | timersub(&end, &start, &runtime); | ||
70 | } | ||
71 | |||
72 | static void *workerfn(void *arg) | ||
73 | { | ||
74 | struct worker *w = (struct worker *) arg; | ||
75 | |||
76 | pthread_mutex_lock(&thread_lock); | ||
77 | threads_starting--; | ||
78 | if (!threads_starting) | ||
79 | pthread_cond_signal(&thread_parent); | ||
80 | pthread_cond_wait(&thread_worker, &thread_lock); | ||
81 | pthread_mutex_unlock(&thread_lock); | ||
82 | |||
83 | do { | ||
84 | int ret; | ||
85 | again: | ||
86 | ret = futex_lock_pi(w->futex, NULL, 0, futex_flag); | ||
87 | |||
88 | if (ret) { /* handle lock acquisition */ | ||
89 | if (!silent) | ||
90 | warn("thread %d: Could not lock pi-lock for %p (%d)", | ||
91 | w->tid, w->futex, ret); | ||
92 | if (done) | ||
93 | break; | ||
94 | |||
95 | goto again; | ||
96 | } | ||
97 | |||
98 | usleep(1); | ||
99 | ret = futex_unlock_pi(w->futex, futex_flag); | ||
100 | if (ret && !silent) | ||
101 | warn("thread %d: Could not unlock pi-lock for %p (%d)", | ||
102 | w->tid, w->futex, ret); | ||
103 | w->ops++; /* account for thread's share of work */ | ||
104 | } while (!done); | ||
105 | |||
106 | return NULL; | ||
107 | } | ||
108 | |||
109 | static void create_threads(struct worker *w, pthread_attr_t thread_attr) | ||
110 | { | ||
111 | cpu_set_t cpu; | ||
112 | unsigned int i; | ||
113 | |||
114 | threads_starting = nthreads; | ||
115 | |||
116 | for (i = 0; i < nthreads; i++) { | ||
117 | worker[i].tid = i; | ||
118 | |||
119 | if (multi) { | ||
120 | worker[i].futex = calloc(1, sizeof(u_int32_t)); | ||
121 | if (!worker[i].futex) | ||
122 | err(EXIT_FAILURE, "calloc"); | ||
123 | } else | ||
124 | worker[i].futex = &global_futex; | ||
125 | |||
126 | CPU_ZERO(&cpu); | ||
127 | CPU_SET(i % ncpus, &cpu); | ||
128 | |||
129 | if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) | ||
130 | err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); | ||
131 | |||
132 | if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) | ||
133 | err(EXIT_FAILURE, "pthread_create"); | ||
134 | } | ||
135 | } | ||
136 | |||
137 | int bench_futex_lock_pi(int argc, const char **argv, | ||
138 | const char *prefix __maybe_unused) | ||
139 | { | ||
140 | int ret = 0; | ||
141 | unsigned int i; | ||
142 | struct sigaction act; | ||
143 | pthread_attr_t thread_attr; | ||
144 | |||
145 | argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0); | ||
146 | if (argc) | ||
147 | goto err; | ||
148 | |||
149 | ncpus = sysconf(_SC_NPROCESSORS_ONLN); | ||
150 | |||
151 | sigfillset(&act.sa_mask); | ||
152 | act.sa_sigaction = toggle_done; | ||
153 | sigaction(SIGINT, &act, NULL); | ||
154 | |||
155 | if (!nthreads) | ||
156 | nthreads = ncpus; | ||
157 | |||
158 | worker = calloc(nthreads, sizeof(*worker)); | ||
159 | if (!worker) | ||
160 | err(EXIT_FAILURE, "calloc"); | ||
161 | |||
162 | if (!fshared) | ||
163 | futex_flag = FUTEX_PRIVATE_FLAG; | ||
164 | |||
165 | printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n", | ||
166 | getpid(), nthreads, nsecs); | ||
167 | |||
168 | init_stats(&throughput_stats); | ||
169 | pthread_mutex_init(&thread_lock, NULL); | ||
170 | pthread_cond_init(&thread_parent, NULL); | ||
171 | pthread_cond_init(&thread_worker, NULL); | ||
172 | |||
173 | threads_starting = nthreads; | ||
174 | pthread_attr_init(&thread_attr); | ||
175 | gettimeofday(&start, NULL); | ||
176 | |||
177 | create_threads(worker, thread_attr); | ||
178 | pthread_attr_destroy(&thread_attr); | ||
179 | |||
180 | pthread_mutex_lock(&thread_lock); | ||
181 | while (threads_starting) | ||
182 | pthread_cond_wait(&thread_parent, &thread_lock); | ||
183 | pthread_cond_broadcast(&thread_worker); | ||
184 | pthread_mutex_unlock(&thread_lock); | ||
185 | |||
186 | sleep(nsecs); | ||
187 | toggle_done(0, NULL, NULL); | ||
188 | |||
189 | for (i = 0; i < nthreads; i++) { | ||
190 | ret = pthread_join(worker[i].thread, NULL); | ||
191 | if (ret) | ||
192 | err(EXIT_FAILURE, "pthread_join"); | ||
193 | } | ||
194 | |||
195 | /* cleanup & report results */ | ||
196 | pthread_cond_destroy(&thread_parent); | ||
197 | pthread_cond_destroy(&thread_worker); | ||
198 | pthread_mutex_destroy(&thread_lock); | ||
199 | |||
200 | for (i = 0; i < nthreads; i++) { | ||
201 | unsigned long t = worker[i].ops/runtime.tv_sec; | ||
202 | |||
203 | update_stats(&throughput_stats, t); | ||
204 | if (!silent) | ||
205 | printf("[thread %3d] futex: %p [ %ld ops/sec ]\n", | ||
206 | worker[i].tid, worker[i].futex, t); | ||
207 | |||
208 | if (multi) | ||
209 | free(worker[i].futex); | ||
210 | } | ||
211 | |||
212 | print_summary(); | ||
213 | |||
214 | free(worker); | ||
215 | return ret; | ||
216 | err: | ||
217 | usage_with_options(bench_futex_lock_pi_usage, options); | ||
218 | exit(EXIT_FAILURE); | ||
219 | } | ||
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 7ed22ff1e1ac..d44de9f44281 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h | |||
@@ -56,6 +56,26 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) | |||
56 | } | 56 | } |
57 | 57 | ||
58 | /** | 58 | /** |
59 | * futex_lock_pi() - block on uaddr as a PI mutex | ||
60 | * @detect: whether (1) or not (0) to perform deadlock detection | ||
61 | */ | ||
62 | static inline int | ||
63 | futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect, | ||
64 | int opflags) | ||
65 | { | ||
66 | return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags); | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter | ||
71 | */ | ||
72 | static inline int | ||
73 | futex_unlock_pi(u_int32_t *uaddr, int opflags) | ||
74 | { | ||
75 | return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); | ||
76 | } | ||
77 | |||
78 | /** | ||
59 | * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 | 79 | * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 |
60 | * @nr_wake: wake up to this many tasks | 80 | * @nr_wake: wake up to this many tasks |
61 | * @nr_requeue: requeue up to this many tasks | 81 | * @nr_requeue: requeue up to this many tasks |
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 2c1bec39c30e..8edc205ff9a7 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c | |||
@@ -67,6 +67,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, | |||
67 | rb_erase(&al->sym->rb_node, | 67 | rb_erase(&al->sym->rb_node, |
68 | &al->map->dso->symbols[al->map->type]); | 68 | &al->map->dso->symbols[al->map->type]); |
69 | symbol__delete(al->sym); | 69 | symbol__delete(al->sym); |
70 | dso__reset_find_symbol_cache(al->map->dso); | ||
70 | } | 71 | } |
71 | return 0; | 72 | return 0; |
72 | } | 73 | } |
@@ -187,6 +188,7 @@ find_next: | |||
187 | * symbol, free he->ms.sym->src to signal we already | 188 | * symbol, free he->ms.sym->src to signal we already |
188 | * processed this symbol. | 189 | * processed this symbol. |
189 | */ | 190 | */ |
191 | zfree(¬es->src->cycles_hist); | ||
190 | zfree(¬es->src); | 192 | zfree(¬es->src); |
191 | } | 193 | } |
192 | } | 194 | } |
@@ -238,6 +240,8 @@ static int __cmd_annotate(struct perf_annotate *ann) | |||
238 | if (nr_samples > 0) { | 240 | if (nr_samples > 0) { |
239 | total_nr_samples += nr_samples; | 241 | total_nr_samples += nr_samples; |
240 | hists__collapse_resort(hists, NULL); | 242 | hists__collapse_resort(hists, NULL); |
243 | /* Don't sort callchain */ | ||
244 | perf_evsel__reset_sample_bit(pos, CALLCHAIN); | ||
241 | hists__output_resort(hists, NULL); | 245 | hists__output_resort(hists, NULL); |
242 | 246 | ||
243 | if (symbol_conf.event_group && | 247 | if (symbol_conf.event_group && |
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index b5314e452ec7..f67934d46d40 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c | |||
@@ -60,6 +60,8 @@ static struct bench futex_benchmarks[] = { | |||
60 | { "wake", "Benchmark for futex wake calls", bench_futex_wake }, | 60 | { "wake", "Benchmark for futex wake calls", bench_futex_wake }, |
61 | { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel }, | 61 | { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel }, |
62 | { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, | 62 | { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, |
63 | /* pi-futexes */ | ||
64 | { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi }, | ||
63 | { "all", "Test all futex benchmarks", NULL }, | 65 | { "all", "Test all futex benchmarks", NULL }, |
64 | { NULL, NULL, NULL } | 66 | { NULL, NULL, NULL } |
65 | }; | 67 | }; |
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index d47a0cdc71c9..7b8450cd33c2 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c | |||
@@ -25,8 +25,6 @@ | |||
25 | static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) | 25 | static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) |
26 | { | 26 | { |
27 | char root_dir[PATH_MAX]; | 27 | char root_dir[PATH_MAX]; |
28 | char notes[PATH_MAX]; | ||
29 | u8 build_id[BUILD_ID_SIZE]; | ||
30 | char *p; | 28 | char *p; |
31 | 29 | ||
32 | strlcpy(root_dir, proc_dir, sizeof(root_dir)); | 30 | strlcpy(root_dir, proc_dir, sizeof(root_dir)); |
@@ -35,15 +33,7 @@ static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid) | |||
35 | if (!p) | 33 | if (!p) |
36 | return -1; | 34 | return -1; |
37 | *p = '\0'; | 35 | *p = '\0'; |
38 | 36 | return sysfs__sprintf_build_id(root_dir, sbuildid); | |
39 | scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); | ||
40 | |||
41 | if (sysfs__read_build_id(notes, build_id, sizeof(build_id))) | ||
42 | return -1; | ||
43 | |||
44 | build_id__sprintf(build_id, sizeof(build_id), sbuildid); | ||
45 | |||
46 | return 0; | ||
47 | } | 37 | } |
48 | 38 | ||
49 | static int build_id_cache__kcore_dir(char *dir, size_t sz) | 39 | static int build_id_cache__kcore_dir(char *dir, size_t sz) |
@@ -127,7 +117,7 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, | |||
127 | 117 | ||
128 | static int build_id_cache__add_kcore(const char *filename, bool force) | 118 | static int build_id_cache__add_kcore(const char *filename, bool force) |
129 | { | 119 | { |
130 | char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; | 120 | char dir[32], sbuildid[SBUILD_ID_SIZE]; |
131 | char from_dir[PATH_MAX], to_dir[PATH_MAX]; | 121 | char from_dir[PATH_MAX], to_dir[PATH_MAX]; |
132 | char *p; | 122 | char *p; |
133 | 123 | ||
@@ -138,7 +128,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force) | |||
138 | return -1; | 128 | return -1; |
139 | *p = '\0'; | 129 | *p = '\0'; |
140 | 130 | ||
141 | if (build_id_cache__kcore_buildid(from_dir, sbuildid)) | 131 | if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0) |
142 | return -1; | 132 | return -1; |
143 | 133 | ||
144 | scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", | 134 | scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", |
@@ -184,7 +174,7 @@ static int build_id_cache__add_kcore(const char *filename, bool force) | |||
184 | 174 | ||
185 | static int build_id_cache__add_file(const char *filename) | 175 | static int build_id_cache__add_file(const char *filename) |
186 | { | 176 | { |
187 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | 177 | char sbuild_id[SBUILD_ID_SIZE]; |
188 | u8 build_id[BUILD_ID_SIZE]; | 178 | u8 build_id[BUILD_ID_SIZE]; |
189 | int err; | 179 | int err; |
190 | 180 | ||
@@ -204,7 +194,7 @@ static int build_id_cache__add_file(const char *filename) | |||
204 | static int build_id_cache__remove_file(const char *filename) | 194 | static int build_id_cache__remove_file(const char *filename) |
205 | { | 195 | { |
206 | u8 build_id[BUILD_ID_SIZE]; | 196 | u8 build_id[BUILD_ID_SIZE]; |
207 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | 197 | char sbuild_id[SBUILD_ID_SIZE]; |
208 | 198 | ||
209 | int err; | 199 | int err; |
210 | 200 | ||
@@ -276,7 +266,7 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f | |||
276 | static int build_id_cache__update_file(const char *filename) | 266 | static int build_id_cache__update_file(const char *filename) |
277 | { | 267 | { |
278 | u8 build_id[BUILD_ID_SIZE]; | 268 | u8 build_id[BUILD_ID_SIZE]; |
279 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | 269 | char sbuild_id[SBUILD_ID_SIZE]; |
280 | 270 | ||
281 | int err = 0; | 271 | int err = 0; |
282 | 272 | ||
@@ -363,7 +353,7 @@ int cmd_buildid_cache(int argc, const char **argv, | |||
363 | setup_pager(); | 353 | setup_pager(); |
364 | 354 | ||
365 | if (add_name_list_str) { | 355 | if (add_name_list_str) { |
366 | list = strlist__new(true, add_name_list_str); | 356 | list = strlist__new(add_name_list_str, NULL); |
367 | if (list) { | 357 | if (list) { |
368 | strlist__for_each(pos, list) | 358 | strlist__for_each(pos, list) |
369 | if (build_id_cache__add_file(pos->s)) { | 359 | if (build_id_cache__add_file(pos->s)) { |
@@ -381,7 +371,7 @@ int cmd_buildid_cache(int argc, const char **argv, | |||
381 | } | 371 | } |
382 | 372 | ||
383 | if (remove_name_list_str) { | 373 | if (remove_name_list_str) { |
384 | list = strlist__new(true, remove_name_list_str); | 374 | list = strlist__new(remove_name_list_str, NULL); |
385 | if (list) { | 375 | if (list) { |
386 | strlist__for_each(pos, list) | 376 | strlist__for_each(pos, list) |
387 | if (build_id_cache__remove_file(pos->s)) { | 377 | if (build_id_cache__remove_file(pos->s)) { |
@@ -399,7 +389,7 @@ int cmd_buildid_cache(int argc, const char **argv, | |||
399 | } | 389 | } |
400 | 390 | ||
401 | if (purge_name_list_str) { | 391 | if (purge_name_list_str) { |
402 | list = strlist__new(true, purge_name_list_str); | 392 | list = strlist__new(purge_name_list_str, NULL); |
403 | if (list) { | 393 | if (list) { |
404 | strlist__for_each(pos, list) | 394 | strlist__for_each(pos, list) |
405 | if (build_id_cache__purge_path(pos->s)) { | 395 | if (build_id_cache__purge_path(pos->s)) { |
@@ -420,7 +410,7 @@ int cmd_buildid_cache(int argc, const char **argv, | |||
420 | ret = build_id_cache__fprintf_missing(session, stdout); | 410 | ret = build_id_cache__fprintf_missing(session, stdout); |
421 | 411 | ||
422 | if (update_name_list_str) { | 412 | if (update_name_list_str) { |
423 | list = strlist__new(true, update_name_list_str); | 413 | list = strlist__new(update_name_list_str, NULL); |
424 | if (list) { | 414 | if (list) { |
425 | strlist__for_each(pos, list) | 415 | strlist__for_each(pos, list) |
426 | if (build_id_cache__update_file(pos->s)) { | 416 | if (build_id_cache__update_file(pos->s)) { |
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index 9fe93c8d4fcf..918b4de29de4 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c | |||
@@ -19,29 +19,25 @@ | |||
19 | 19 | ||
20 | static int sysfs__fprintf_build_id(FILE *fp) | 20 | static int sysfs__fprintf_build_id(FILE *fp) |
21 | { | 21 | { |
22 | u8 kallsyms_build_id[BUILD_ID_SIZE]; | 22 | char sbuild_id[SBUILD_ID_SIZE]; |
23 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | 23 | int ret; |
24 | 24 | ||
25 | if (sysfs__read_build_id("/sys/kernel/notes", kallsyms_build_id, | 25 | ret = sysfs__sprintf_build_id("/", sbuild_id); |
26 | sizeof(kallsyms_build_id)) != 0) | 26 | if (ret != sizeof(sbuild_id)) |
27 | return -1; | 27 | return ret < 0 ? ret : -EINVAL; |
28 | 28 | ||
29 | build_id__sprintf(kallsyms_build_id, sizeof(kallsyms_build_id), | 29 | return fprintf(fp, "%s\n", sbuild_id); |
30 | sbuild_id); | ||
31 | fprintf(fp, "%s\n", sbuild_id); | ||
32 | return 0; | ||
33 | } | 30 | } |
34 | 31 | ||
35 | static int filename__fprintf_build_id(const char *name, FILE *fp) | 32 | static int filename__fprintf_build_id(const char *name, FILE *fp) |
36 | { | 33 | { |
37 | u8 build_id[BUILD_ID_SIZE]; | 34 | char sbuild_id[SBUILD_ID_SIZE]; |
38 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | 35 | int ret; |
39 | 36 | ||
40 | if (filename__read_build_id(name, build_id, | 37 | ret = filename__sprintf_build_id(name, sbuild_id); |
41 | sizeof(build_id)) != sizeof(build_id)) | 38 | if (ret != sizeof(sbuild_id)) |
42 | return 0; | 39 | return ret < 0 ? ret : -EINVAL; |
43 | 40 | ||
44 | build_id__sprintf(build_id, sizeof(build_id), sbuild_id); | ||
45 | return fprintf(fp, "%s\n", sbuild_id); | 41 | return fprintf(fp, "%s\n", sbuild_id); |
46 | } | 42 | } |
47 | 43 | ||
@@ -63,7 +59,7 @@ static int perf_session__list_build_ids(bool force, bool with_hits) | |||
63 | /* | 59 | /* |
64 | * See if this is an ELF file first: | 60 | * See if this is an ELF file first: |
65 | */ | 61 | */ |
66 | if (filename__fprintf_build_id(input_name, stdout)) | 62 | if (filename__fprintf_build_id(input_name, stdout) > 0) |
67 | goto out; | 63 | goto out; |
68 | 64 | ||
69 | session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); | 65 | session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); |
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index daaa7dca9c3b..0b180a885ba3 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c | |||
@@ -722,6 +722,9 @@ static void data_process(void) | |||
722 | if (verbose || data__files_cnt > 2) | 722 | if (verbose || data__files_cnt > 2) |
723 | data__fprintf(); | 723 | data__fprintf(); |
724 | 724 | ||
725 | /* Don't sort callchain for perf diff */ | ||
726 | perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN); | ||
727 | |||
725 | hists__process(hists_base); | 728 | hists__process(hists_base); |
726 | } | 729 | } |
727 | } | 730 | } |
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 01b06492bd6a..f62c49b35be0 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c | |||
@@ -561,6 +561,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) | |||
561 | .lost = perf_event__repipe, | 561 | .lost = perf_event__repipe, |
562 | .aux = perf_event__repipe, | 562 | .aux = perf_event__repipe, |
563 | .itrace_start = perf_event__repipe, | 563 | .itrace_start = perf_event__repipe, |
564 | .context_switch = perf_event__repipe, | ||
564 | .read = perf_event__repipe_sample, | 565 | .read = perf_event__repipe_sample, |
565 | .throttle = perf_event__repipe, | 566 | .throttle = perf_event__repipe, |
566 | .unthrottle = perf_event__repipe, | 567 | .unthrottle = perf_event__repipe, |
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 1272559fa22d..b81cec33b4b2 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c | |||
@@ -297,8 +297,7 @@ static void cleanup_params(void) | |||
297 | clear_perf_probe_event(params.events + i); | 297 | clear_perf_probe_event(params.events + i); |
298 | line_range__clear(¶ms.line_range); | 298 | line_range__clear(¶ms.line_range); |
299 | free(params.target); | 299 | free(params.target); |
300 | if (params.filter) | 300 | strfilter__delete(params.filter); |
301 | strfilter__delete(params.filter); | ||
302 | memset(¶ms, 0, sizeof(params)); | 301 | memset(¶ms, 0, sizeof(params)); |
303 | } | 302 | } |
304 | 303 | ||
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index de165a1b9240..142eeb341b29 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c | |||
@@ -27,8 +27,10 @@ | |||
27 | #include "util/cpumap.h" | 27 | #include "util/cpumap.h" |
28 | #include "util/thread_map.h" | 28 | #include "util/thread_map.h" |
29 | #include "util/data.h" | 29 | #include "util/data.h" |
30 | #include "util/perf_regs.h" | ||
30 | #include "util/auxtrace.h" | 31 | #include "util/auxtrace.h" |
31 | #include "util/parse-branch-options.h" | 32 | #include "util/parse-branch-options.h" |
33 | #include "util/parse-regs-options.h" | ||
32 | 34 | ||
33 | #include <unistd.h> | 35 | #include <unistd.h> |
34 | #include <sched.h> | 36 | #include <sched.h> |
@@ -279,7 +281,7 @@ static int record__open(struct record *rec) | |||
279 | 281 | ||
280 | evlist__for_each(evlist, pos) { | 282 | evlist__for_each(evlist, pos) { |
281 | try_again: | 283 | try_again: |
282 | if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) { | 284 | if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) { |
283 | if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { | 285 | if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) { |
284 | if (verbose) | 286 | if (verbose) |
285 | ui__warning("%s\n", msg); | 287 | ui__warning("%s\n", msg); |
@@ -521,6 +523,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) | |||
521 | goto out_child; | 523 | goto out_child; |
522 | } | 524 | } |
523 | 525 | ||
526 | /* | ||
527 | * Normally perf_session__new would do this, but it doesn't have the | ||
528 | * evlist. | ||
529 | */ | ||
530 | if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) { | ||
531 | pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n"); | ||
532 | rec->tool.ordered_events = false; | ||
533 | } | ||
534 | |||
524 | if (!rec->evlist->nr_groups) | 535 | if (!rec->evlist->nr_groups) |
525 | perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); | 536 | perf_header__clear_feat(&session->header, HEADER_GROUP_DESC); |
526 | 537 | ||
@@ -762,12 +773,14 @@ static void callchain_debug(void) | |||
762 | callchain_param.dump_size); | 773 | callchain_param.dump_size); |
763 | } | 774 | } |
764 | 775 | ||
765 | int record_parse_callchain_opt(const struct option *opt __maybe_unused, | 776 | int record_parse_callchain_opt(const struct option *opt, |
766 | const char *arg, | 777 | const char *arg, |
767 | int unset) | 778 | int unset) |
768 | { | 779 | { |
769 | int ret; | 780 | int ret; |
781 | struct record_opts *record = (struct record_opts *)opt->value; | ||
770 | 782 | ||
783 | record->callgraph_set = true; | ||
771 | callchain_param.enabled = !unset; | 784 | callchain_param.enabled = !unset; |
772 | 785 | ||
773 | /* --no-call-graph */ | 786 | /* --no-call-graph */ |
@@ -777,17 +790,20 @@ int record_parse_callchain_opt(const struct option *opt __maybe_unused, | |||
777 | return 0; | 790 | return 0; |
778 | } | 791 | } |
779 | 792 | ||
780 | ret = parse_callchain_record_opt(arg); | 793 | ret = parse_callchain_record_opt(arg, &callchain_param); |
781 | if (!ret) | 794 | if (!ret) |
782 | callchain_debug(); | 795 | callchain_debug(); |
783 | 796 | ||
784 | return ret; | 797 | return ret; |
785 | } | 798 | } |
786 | 799 | ||
787 | int record_callchain_opt(const struct option *opt __maybe_unused, | 800 | int record_callchain_opt(const struct option *opt, |
788 | const char *arg __maybe_unused, | 801 | const char *arg __maybe_unused, |
789 | int unset __maybe_unused) | 802 | int unset __maybe_unused) |
790 | { | 803 | { |
804 | struct record_opts *record = (struct record_opts *)opt->value; | ||
805 | |||
806 | record->callgraph_set = true; | ||
791 | callchain_param.enabled = true; | 807 | callchain_param.enabled = true; |
792 | 808 | ||
793 | if (callchain_param.record_mode == CALLCHAIN_NONE) | 809 | if (callchain_param.record_mode == CALLCHAIN_NONE) |
@@ -965,9 +981,11 @@ static struct record record = { | |||
965 | .tool = { | 981 | .tool = { |
966 | .sample = process_sample_event, | 982 | .sample = process_sample_event, |
967 | .fork = perf_event__process_fork, | 983 | .fork = perf_event__process_fork, |
984 | .exit = perf_event__process_exit, | ||
968 | .comm = perf_event__process_comm, | 985 | .comm = perf_event__process_comm, |
969 | .mmap = perf_event__process_mmap, | 986 | .mmap = perf_event__process_mmap, |
970 | .mmap2 = perf_event__process_mmap2, | 987 | .mmap2 = perf_event__process_mmap2, |
988 | .ordered_events = true, | ||
971 | }, | 989 | }, |
972 | }; | 990 | }; |
973 | 991 | ||
@@ -992,6 +1010,9 @@ struct option __record_options[] = { | |||
992 | parse_events_option), | 1010 | parse_events_option), |
993 | OPT_CALLBACK(0, "filter", &record.evlist, "filter", | 1011 | OPT_CALLBACK(0, "filter", &record.evlist, "filter", |
994 | "event filter", parse_filter), | 1012 | "event filter", parse_filter), |
1013 | OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist, | ||
1014 | NULL, "don't record events from perf itself", | ||
1015 | exclude_perf), | ||
995 | OPT_STRING('p', "pid", &record.opts.target.pid, "pid", | 1016 | OPT_STRING('p', "pid", &record.opts.target.pid, "pid", |
996 | "record events on existing process id"), | 1017 | "record events on existing process id"), |
997 | OPT_STRING('t', "tid", &record.opts.target.tid, "tid", | 1018 | OPT_STRING('t', "tid", &record.opts.target.tid, "tid", |
@@ -1030,7 +1051,9 @@ struct option __record_options[] = { | |||
1030 | OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, | 1051 | OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, |
1031 | "per thread counts"), | 1052 | "per thread counts"), |
1032 | OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), | 1053 | OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), |
1033 | OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Record the sample timestamps"), | 1054 | OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time, |
1055 | &record.opts.sample_time_set, | ||
1056 | "Record the sample timestamps"), | ||
1034 | OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), | 1057 | OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), |
1035 | OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, | 1058 | OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, |
1036 | "don't sample"), | 1059 | "don't sample"), |
@@ -1059,8 +1082,9 @@ struct option __record_options[] = { | |||
1059 | "sample transaction flags (special events only)"), | 1082 | "sample transaction flags (special events only)"), |
1060 | OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, | 1083 | OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread, |
1061 | "use per-thread mmaps"), | 1084 | "use per-thread mmaps"), |
1062 | OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, | 1085 | OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", |
1063 | "Sample machine registers on interrupt"), | 1086 | "sample selected machine registers on interrupt," |
1087 | " use -I ? to list register names", parse_regs), | ||
1064 | OPT_BOOLEAN(0, "running-time", &record.opts.running_time, | 1088 | OPT_BOOLEAN(0, "running-time", &record.opts.running_time, |
1065 | "Record running/enabled time of read (:S) events"), | 1089 | "Record running/enabled time of read (:S) events"), |
1066 | OPT_CALLBACK('k', "clockid", &record.opts, | 1090 | OPT_CALLBACK('k', "clockid", &record.opts, |
@@ -1070,6 +1094,8 @@ struct option __record_options[] = { | |||
1070 | "opts", "AUX area tracing Snapshot Mode", ""), | 1094 | "opts", "AUX area tracing Snapshot Mode", ""), |
1071 | OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, | 1095 | OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, |
1072 | "per thread proc mmap processing timeout in ms"), | 1096 | "per thread proc mmap processing timeout in ms"), |
1097 | OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events, | ||
1098 | "Record context switch events"), | ||
1073 | OPT_END() | 1099 | OPT_END() |
1074 | }; | 1100 | }; |
1075 | 1101 | ||
@@ -1097,6 +1123,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1097 | " system-wide mode\n"); | 1123 | " system-wide mode\n"); |
1098 | usage_with_options(record_usage, record_options); | 1124 | usage_with_options(record_usage, record_options); |
1099 | } | 1125 | } |
1126 | if (rec->opts.record_switch_events && | ||
1127 | !perf_can_record_switch_events()) { | ||
1128 | ui__error("kernel does not support recording context switch events (--switch-events option)\n"); | ||
1129 | usage_with_options(record_usage, record_options); | ||
1130 | } | ||
1100 | 1131 | ||
1101 | if (!rec->itr) { | 1132 | if (!rec->itr) { |
1102 | rec->itr = auxtrace_record__init(rec->evlist, &err); | 1133 | rec->itr = auxtrace_record__init(rec->evlist, &err); |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 95a47719aec3..62b285e32aa5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -53,6 +53,7 @@ struct report { | |||
53 | bool mem_mode; | 53 | bool mem_mode; |
54 | bool header; | 54 | bool header; |
55 | bool header_only; | 55 | bool header_only; |
56 | bool nonany_branch_mode; | ||
56 | int max_stack; | 57 | int max_stack; |
57 | struct perf_read_values show_threads_values; | 58 | struct perf_read_values show_threads_values; |
58 | const char *pretty_printing_style; | 59 | const char *pretty_printing_style; |
@@ -102,6 +103,9 @@ static int hist_iter__report_callback(struct hist_entry_iter *iter, | |||
102 | if (!ui__has_annotation()) | 103 | if (!ui__has_annotation()) |
103 | return 0; | 104 | return 0; |
104 | 105 | ||
106 | hist__account_cycles(iter->sample->branch_stack, al, iter->sample, | ||
107 | rep->nonany_branch_mode); | ||
108 | |||
105 | if (sort__mode == SORT_MODE__BRANCH) { | 109 | if (sort__mode == SORT_MODE__BRANCH) { |
106 | bi = he->branch_info; | 110 | bi = he->branch_info; |
107 | err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); | 111 | err = addr_map_symbol__inc_samples(&bi->from, evsel->idx); |
@@ -258,6 +262,12 @@ static int report__setup_sample_type(struct report *rep) | |||
258 | else | 262 | else |
259 | callchain_param.record_mode = CALLCHAIN_FP; | 263 | callchain_param.record_mode = CALLCHAIN_FP; |
260 | } | 264 | } |
265 | |||
266 | /* ??? handle more cases than just ANY? */ | ||
267 | if (!(perf_evlist__combined_branch_type(session->evlist) & | ||
268 | PERF_SAMPLE_BRANCH_ANY)) | ||
269 | rep->nonany_branch_mode = true; | ||
270 | |||
261 | return 0; | 271 | return 0; |
262 | } | 272 | } |
263 | 273 | ||
@@ -306,6 +316,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report | |||
306 | if (evname != NULL) | 316 | if (evname != NULL) |
307 | ret += fprintf(fp, " of event '%s'", evname); | 317 | ret += fprintf(fp, " of event '%s'", evname); |
308 | 318 | ||
319 | if (symbol_conf.show_ref_callgraph && | ||
320 | strstr(evname, "call-graph=no")) { | ||
321 | ret += fprintf(fp, ", show reference callgraph"); | ||
322 | } | ||
323 | |||
309 | if (rep->mem_mode) { | 324 | if (rep->mem_mode) { |
310 | ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); | 325 | ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); |
311 | ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); | 326 | ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); |
@@ -728,6 +743,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) | |||
728 | OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", | 743 | OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", |
729 | "Instruction Tracing options", | 744 | "Instruction Tracing options", |
730 | itrace_parse_synth_opts), | 745 | itrace_parse_synth_opts), |
746 | OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, | ||
747 | "Show full source file name path for source lines"), | ||
748 | OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph, | ||
749 | "Show callgraph from reference event"), | ||
731 | OPT_END() | 750 | OPT_END() |
732 | }; | 751 | }; |
733 | struct perf_data_file file = { | 752 | struct perf_data_file file = { |
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 24809787369f..284a76e04628 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include "util/exec_cmd.h" | 6 | #include "util/exec_cmd.h" |
7 | #include "util/header.h" | 7 | #include "util/header.h" |
8 | #include "util/parse-options.h" | 8 | #include "util/parse-options.h" |
9 | #include "util/perf_regs.h" | ||
9 | #include "util/session.h" | 10 | #include "util/session.h" |
10 | #include "util/tool.h" | 11 | #include "util/tool.h" |
11 | #include "util/symbol.h" | 12 | #include "util/symbol.h" |
@@ -46,6 +47,7 @@ enum perf_output_field { | |||
46 | PERF_OUTPUT_SYMOFFSET = 1U << 11, | 47 | PERF_OUTPUT_SYMOFFSET = 1U << 11, |
47 | PERF_OUTPUT_SRCLINE = 1U << 12, | 48 | PERF_OUTPUT_SRCLINE = 1U << 12, |
48 | PERF_OUTPUT_PERIOD = 1U << 13, | 49 | PERF_OUTPUT_PERIOD = 1U << 13, |
50 | PERF_OUTPUT_IREGS = 1U << 14, | ||
49 | }; | 51 | }; |
50 | 52 | ||
51 | struct output_option { | 53 | struct output_option { |
@@ -66,6 +68,7 @@ struct output_option { | |||
66 | {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, | 68 | {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET}, |
67 | {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, | 69 | {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, |
68 | {.str = "period", .field = PERF_OUTPUT_PERIOD}, | 70 | {.str = "period", .field = PERF_OUTPUT_PERIOD}, |
71 | {.str = "iregs", .field = PERF_OUTPUT_IREGS}, | ||
69 | }; | 72 | }; |
70 | 73 | ||
71 | /* default set to maintain compatibility with current format */ | 74 | /* default set to maintain compatibility with current format */ |
@@ -255,6 +258,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, | |||
255 | PERF_OUTPUT_PERIOD)) | 258 | PERF_OUTPUT_PERIOD)) |
256 | return -EINVAL; | 259 | return -EINVAL; |
257 | 260 | ||
261 | if (PRINT_FIELD(IREGS) && | ||
262 | perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", | ||
263 | PERF_OUTPUT_IREGS)) | ||
264 | return -EINVAL; | ||
265 | |||
258 | return 0; | 266 | return 0; |
259 | } | 267 | } |
260 | 268 | ||
@@ -352,6 +360,24 @@ out: | |||
352 | return 0; | 360 | return 0; |
353 | } | 361 | } |
354 | 362 | ||
363 | static void print_sample_iregs(union perf_event *event __maybe_unused, | ||
364 | struct perf_sample *sample, | ||
365 | struct thread *thread __maybe_unused, | ||
366 | struct perf_event_attr *attr) | ||
367 | { | ||
368 | struct regs_dump *regs = &sample->intr_regs; | ||
369 | uint64_t mask = attr->sample_regs_intr; | ||
370 | unsigned i = 0, r; | ||
371 | |||
372 | if (!regs) | ||
373 | return; | ||
374 | |||
375 | for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) { | ||
376 | u64 val = regs->regs[i++]; | ||
377 | printf("%5s:0x%"PRIx64" ", perf_reg_name(r), val); | ||
378 | } | ||
379 | } | ||
380 | |||
355 | static void print_sample_start(struct perf_sample *sample, | 381 | static void print_sample_start(struct perf_sample *sample, |
356 | struct thread *thread, | 382 | struct thread *thread, |
357 | struct perf_evsel *evsel) | 383 | struct perf_evsel *evsel) |
@@ -525,6 +551,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample, | |||
525 | PERF_MAX_STACK_DEPTH); | 551 | PERF_MAX_STACK_DEPTH); |
526 | } | 552 | } |
527 | 553 | ||
554 | if (PRINT_FIELD(IREGS)) | ||
555 | print_sample_iregs(event, sample, thread, attr); | ||
556 | |||
528 | printf("\n"); | 557 | printf("\n"); |
529 | } | 558 | } |
530 | 559 | ||
@@ -623,6 +652,7 @@ struct perf_script { | |||
623 | struct perf_session *session; | 652 | struct perf_session *session; |
624 | bool show_task_events; | 653 | bool show_task_events; |
625 | bool show_mmap_events; | 654 | bool show_mmap_events; |
655 | bool show_switch_events; | ||
626 | }; | 656 | }; |
627 | 657 | ||
628 | static int process_attr(struct perf_tool *tool, union perf_event *event, | 658 | static int process_attr(struct perf_tool *tool, union perf_event *event, |
@@ -661,7 +691,7 @@ static int process_comm_event(struct perf_tool *tool, | |||
661 | struct thread *thread; | 691 | struct thread *thread; |
662 | struct perf_script *script = container_of(tool, struct perf_script, tool); | 692 | struct perf_script *script = container_of(tool, struct perf_script, tool); |
663 | struct perf_session *session = script->session; | 693 | struct perf_session *session = script->session; |
664 | struct perf_evsel *evsel = perf_evlist__first(session->evlist); | 694 | struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); |
665 | int ret = -1; | 695 | int ret = -1; |
666 | 696 | ||
667 | thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid); | 697 | thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid); |
@@ -695,7 +725,7 @@ static int process_fork_event(struct perf_tool *tool, | |||
695 | struct thread *thread; | 725 | struct thread *thread; |
696 | struct perf_script *script = container_of(tool, struct perf_script, tool); | 726 | struct perf_script *script = container_of(tool, struct perf_script, tool); |
697 | struct perf_session *session = script->session; | 727 | struct perf_session *session = script->session; |
698 | struct perf_evsel *evsel = perf_evlist__first(session->evlist); | 728 | struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); |
699 | 729 | ||
700 | if (perf_event__process_fork(tool, event, sample, machine) < 0) | 730 | if (perf_event__process_fork(tool, event, sample, machine) < 0) |
701 | return -1; | 731 | return -1; |
@@ -727,7 +757,7 @@ static int process_exit_event(struct perf_tool *tool, | |||
727 | struct thread *thread; | 757 | struct thread *thread; |
728 | struct perf_script *script = container_of(tool, struct perf_script, tool); | 758 | struct perf_script *script = container_of(tool, struct perf_script, tool); |
729 | struct perf_session *session = script->session; | 759 | struct perf_session *session = script->session; |
730 | struct perf_evsel *evsel = perf_evlist__first(session->evlist); | 760 | struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); |
731 | 761 | ||
732 | thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); | 762 | thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); |
733 | if (thread == NULL) { | 763 | if (thread == NULL) { |
@@ -738,8 +768,8 @@ static int process_exit_event(struct perf_tool *tool, | |||
738 | if (!evsel->attr.sample_id_all) { | 768 | if (!evsel->attr.sample_id_all) { |
739 | sample->cpu = 0; | 769 | sample->cpu = 0; |
740 | sample->time = 0; | 770 | sample->time = 0; |
741 | sample->tid = event->comm.tid; | 771 | sample->tid = event->fork.tid; |
742 | sample->pid = event->comm.pid; | 772 | sample->pid = event->fork.pid; |
743 | } | 773 | } |
744 | print_sample_start(sample, thread, evsel); | 774 | print_sample_start(sample, thread, evsel); |
745 | perf_event__fprintf(event, stdout); | 775 | perf_event__fprintf(event, stdout); |
@@ -759,7 +789,7 @@ static int process_mmap_event(struct perf_tool *tool, | |||
759 | struct thread *thread; | 789 | struct thread *thread; |
760 | struct perf_script *script = container_of(tool, struct perf_script, tool); | 790 | struct perf_script *script = container_of(tool, struct perf_script, tool); |
761 | struct perf_session *session = script->session; | 791 | struct perf_session *session = script->session; |
762 | struct perf_evsel *evsel = perf_evlist__first(session->evlist); | 792 | struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); |
763 | 793 | ||
764 | if (perf_event__process_mmap(tool, event, sample, machine) < 0) | 794 | if (perf_event__process_mmap(tool, event, sample, machine) < 0) |
765 | return -1; | 795 | return -1; |
@@ -790,7 +820,7 @@ static int process_mmap2_event(struct perf_tool *tool, | |||
790 | struct thread *thread; | 820 | struct thread *thread; |
791 | struct perf_script *script = container_of(tool, struct perf_script, tool); | 821 | struct perf_script *script = container_of(tool, struct perf_script, tool); |
792 | struct perf_session *session = script->session; | 822 | struct perf_session *session = script->session; |
793 | struct perf_evsel *evsel = perf_evlist__first(session->evlist); | 823 | struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); |
794 | 824 | ||
795 | if (perf_event__process_mmap2(tool, event, sample, machine) < 0) | 825 | if (perf_event__process_mmap2(tool, event, sample, machine) < 0) |
796 | return -1; | 826 | return -1; |
@@ -813,6 +843,32 @@ static int process_mmap2_event(struct perf_tool *tool, | |||
813 | return 0; | 843 | return 0; |
814 | } | 844 | } |
815 | 845 | ||
846 | static int process_switch_event(struct perf_tool *tool, | ||
847 | union perf_event *event, | ||
848 | struct perf_sample *sample, | ||
849 | struct machine *machine) | ||
850 | { | ||
851 | struct thread *thread; | ||
852 | struct perf_script *script = container_of(tool, struct perf_script, tool); | ||
853 | struct perf_session *session = script->session; | ||
854 | struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); | ||
855 | |||
856 | if (perf_event__process_switch(tool, event, sample, machine) < 0) | ||
857 | return -1; | ||
858 | |||
859 | thread = machine__findnew_thread(machine, sample->pid, | ||
860 | sample->tid); | ||
861 | if (thread == NULL) { | ||
862 | pr_debug("problem processing SWITCH event, skipping it.\n"); | ||
863 | return -1; | ||
864 | } | ||
865 | |||
866 | print_sample_start(sample, thread, evsel); | ||
867 | perf_event__fprintf(event, stdout); | ||
868 | thread__put(thread); | ||
869 | return 0; | ||
870 | } | ||
871 | |||
816 | static void sig_handler(int sig __maybe_unused) | 872 | static void sig_handler(int sig __maybe_unused) |
817 | { | 873 | { |
818 | session_done = 1; | 874 | session_done = 1; |
@@ -834,6 +890,8 @@ static int __cmd_script(struct perf_script *script) | |||
834 | script->tool.mmap = process_mmap_event; | 890 | script->tool.mmap = process_mmap_event; |
835 | script->tool.mmap2 = process_mmap2_event; | 891 | script->tool.mmap2 = process_mmap2_event; |
836 | } | 892 | } |
893 | if (script->show_switch_events) | ||
894 | script->tool.context_switch = process_switch_event; | ||
837 | 895 | ||
838 | ret = perf_session__process_events(script->session); | 896 | ret = perf_session__process_events(script->session); |
839 | 897 | ||
@@ -1532,6 +1590,22 @@ static int have_cmd(int argc, const char **argv) | |||
1532 | return 0; | 1590 | return 0; |
1533 | } | 1591 | } |
1534 | 1592 | ||
1593 | static void script__setup_sample_type(struct perf_script *script) | ||
1594 | { | ||
1595 | struct perf_session *session = script->session; | ||
1596 | u64 sample_type = perf_evlist__combined_sample_type(session->evlist); | ||
1597 | |||
1598 | if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { | ||
1599 | if ((sample_type & PERF_SAMPLE_REGS_USER) && | ||
1600 | (sample_type & PERF_SAMPLE_STACK_USER)) | ||
1601 | callchain_param.record_mode = CALLCHAIN_DWARF; | ||
1602 | else if (sample_type & PERF_SAMPLE_BRANCH_STACK) | ||
1603 | callchain_param.record_mode = CALLCHAIN_LBR; | ||
1604 | else | ||
1605 | callchain_param.record_mode = CALLCHAIN_FP; | ||
1606 | } | ||
1607 | } | ||
1608 | |||
1535 | int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) | 1609 | int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) |
1536 | { | 1610 | { |
1537 | bool show_full_info = false; | 1611 | bool show_full_info = false; |
@@ -1598,7 +1672,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1598 | "comma separated output fields prepend with 'type:'. " | 1672 | "comma separated output fields prepend with 'type:'. " |
1599 | "Valid types: hw,sw,trace,raw. " | 1673 | "Valid types: hw,sw,trace,raw. " |
1600 | "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," | 1674 | "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," |
1601 | "addr,symoff,period,flags", parse_output_fields), | 1675 | "addr,symoff,period,iregs,flags", parse_output_fields), |
1602 | OPT_BOOLEAN('a', "all-cpus", &system_wide, | 1676 | OPT_BOOLEAN('a', "all-cpus", &system_wide, |
1603 | "system-wide collection from all CPUs"), | 1677 | "system-wide collection from all CPUs"), |
1604 | OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", | 1678 | OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", |
@@ -1618,10 +1692,19 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1618 | "Show the fork/comm/exit events"), | 1692 | "Show the fork/comm/exit events"), |
1619 | OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, | 1693 | OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, |
1620 | "Show the mmap events"), | 1694 | "Show the mmap events"), |
1695 | OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, | ||
1696 | "Show context switch events (if recorded)"), | ||
1621 | OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), | 1697 | OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), |
1622 | OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", | 1698 | OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", |
1623 | "Instruction Tracing options", | 1699 | "Instruction Tracing options", |
1624 | itrace_parse_synth_opts), | 1700 | itrace_parse_synth_opts), |
1701 | OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename, | ||
1702 | "Show full source file name path for source lines"), | ||
1703 | OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, | ||
1704 | "Enable symbol demangling"), | ||
1705 | OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, | ||
1706 | "Enable kernel symbol demangling"), | ||
1707 | |||
1625 | OPT_END() | 1708 | OPT_END() |
1626 | }; | 1709 | }; |
1627 | const char * const script_subcommands[] = { "record", "report", NULL }; | 1710 | const char * const script_subcommands[] = { "record", "report", NULL }; |
@@ -1816,6 +1899,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1816 | goto out_delete; | 1899 | goto out_delete; |
1817 | 1900 | ||
1818 | script.session = session; | 1901 | script.session = session; |
1902 | script__setup_sample_type(&script); | ||
1819 | 1903 | ||
1820 | session->itrace_synth_opts = &itrace_synth_opts; | 1904 | session->itrace_synth_opts = &itrace_synth_opts; |
1821 | 1905 | ||
@@ -1830,6 +1914,14 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1830 | else | 1914 | else |
1831 | symbol_conf.use_callchain = false; | 1915 | symbol_conf.use_callchain = false; |
1832 | 1916 | ||
1917 | if (session->tevent.pevent && | ||
1918 | pevent_set_function_resolver(session->tevent.pevent, | ||
1919 | machine__resolve_kernel_addr, | ||
1920 | &session->machines.host) < 0) { | ||
1921 | pr_err("%s: failed to set libtraceevent function resolver\n", __func__); | ||
1922 | return -1; | ||
1923 | } | ||
1924 | |||
1833 | if (generate_script_lang) { | 1925 | if (generate_script_lang) { |
1834 | struct stat perf_stat; | 1926 | struct stat perf_stat; |
1835 | int input; | 1927 | int input; |
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d99d850e1444..d46dbb1bc65d 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include "util/cpumap.h" | 58 | #include "util/cpumap.h" |
59 | #include "util/thread.h" | 59 | #include "util/thread.h" |
60 | #include "util/thread_map.h" | 60 | #include "util/thread_map.h" |
61 | #include "util/counts.h" | ||
61 | 62 | ||
62 | #include <stdlib.h> | 63 | #include <stdlib.h> |
63 | #include <sys/prctl.h> | 64 | #include <sys/prctl.h> |
@@ -101,8 +102,6 @@ static struct target target = { | |||
101 | 102 | ||
102 | static int run_count = 1; | 103 | static int run_count = 1; |
103 | static bool no_inherit = false; | 104 | static bool no_inherit = false; |
104 | static bool scale = true; | ||
105 | static enum aggr_mode aggr_mode = AGGR_GLOBAL; | ||
106 | static volatile pid_t child_pid = -1; | 105 | static volatile pid_t child_pid = -1; |
107 | static bool null_run = false; | 106 | static bool null_run = false; |
108 | static int detailed_run = 0; | 107 | static int detailed_run = 0; |
@@ -112,11 +111,9 @@ static int big_num_opt = -1; | |||
112 | static const char *csv_sep = NULL; | 111 | static const char *csv_sep = NULL; |
113 | static bool csv_output = false; | 112 | static bool csv_output = false; |
114 | static bool group = false; | 113 | static bool group = false; |
115 | static FILE *output = NULL; | ||
116 | static const char *pre_cmd = NULL; | 114 | static const char *pre_cmd = NULL; |
117 | static const char *post_cmd = NULL; | 115 | static const char *post_cmd = NULL; |
118 | static bool sync_run = false; | 116 | static bool sync_run = false; |
119 | static unsigned int interval = 0; | ||
120 | static unsigned int initial_delay = 0; | 117 | static unsigned int initial_delay = 0; |
121 | static unsigned int unit_width = 4; /* strlen("unit") */ | 118 | static unsigned int unit_width = 4; /* strlen("unit") */ |
122 | static bool forever = false; | 119 | static bool forever = false; |
@@ -126,6 +123,11 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu); | |||
126 | 123 | ||
127 | static volatile int done = 0; | 124 | static volatile int done = 0; |
128 | 125 | ||
126 | static struct perf_stat_config stat_config = { | ||
127 | .aggr_mode = AGGR_GLOBAL, | ||
128 | .scale = true, | ||
129 | }; | ||
130 | |||
129 | static inline void diff_timespec(struct timespec *r, struct timespec *a, | 131 | static inline void diff_timespec(struct timespec *r, struct timespec *a, |
130 | struct timespec *b) | 132 | struct timespec *b) |
131 | { | 133 | { |
@@ -148,7 +150,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) | |||
148 | { | 150 | { |
149 | struct perf_event_attr *attr = &evsel->attr; | 151 | struct perf_event_attr *attr = &evsel->attr; |
150 | 152 | ||
151 | if (scale) | 153 | if (stat_config.scale) |
152 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | | 154 | attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | |
153 | PERF_FORMAT_TOTAL_TIME_RUNNING; | 155 | PERF_FORMAT_TOTAL_TIME_RUNNING; |
154 | 156 | ||
@@ -178,142 +180,6 @@ static inline int nsec_counter(struct perf_evsel *evsel) | |||
178 | return 0; | 180 | return 0; |
179 | } | 181 | } |
180 | 182 | ||
181 | static void zero_per_pkg(struct perf_evsel *counter) | ||
182 | { | ||
183 | if (counter->per_pkg_mask) | ||
184 | memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); | ||
185 | } | ||
186 | |||
187 | static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) | ||
188 | { | ||
189 | unsigned long *mask = counter->per_pkg_mask; | ||
190 | struct cpu_map *cpus = perf_evsel__cpus(counter); | ||
191 | int s; | ||
192 | |||
193 | *skip = false; | ||
194 | |||
195 | if (!counter->per_pkg) | ||
196 | return 0; | ||
197 | |||
198 | if (cpu_map__empty(cpus)) | ||
199 | return 0; | ||
200 | |||
201 | if (!mask) { | ||
202 | mask = zalloc(MAX_NR_CPUS); | ||
203 | if (!mask) | ||
204 | return -ENOMEM; | ||
205 | |||
206 | counter->per_pkg_mask = mask; | ||
207 | } | ||
208 | |||
209 | s = cpu_map__get_socket(cpus, cpu); | ||
210 | if (s < 0) | ||
211 | return -1; | ||
212 | |||
213 | *skip = test_and_set_bit(s, mask) == 1; | ||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | static int | ||
218 | process_counter_values(struct perf_evsel *evsel, int cpu, int thread, | ||
219 | struct perf_counts_values *count) | ||
220 | { | ||
221 | struct perf_counts_values *aggr = &evsel->counts->aggr; | ||
222 | static struct perf_counts_values zero; | ||
223 | bool skip = false; | ||
224 | |||
225 | if (check_per_pkg(evsel, cpu, &skip)) { | ||
226 | pr_err("failed to read per-pkg counter\n"); | ||
227 | return -1; | ||
228 | } | ||
229 | |||
230 | if (skip) | ||
231 | count = &zero; | ||
232 | |||
233 | switch (aggr_mode) { | ||
234 | case AGGR_THREAD: | ||
235 | case AGGR_CORE: | ||
236 | case AGGR_SOCKET: | ||
237 | case AGGR_NONE: | ||
238 | if (!evsel->snapshot) | ||
239 | perf_evsel__compute_deltas(evsel, cpu, thread, count); | ||
240 | perf_counts_values__scale(count, scale, NULL); | ||
241 | if (aggr_mode == AGGR_NONE) | ||
242 | perf_stat__update_shadow_stats(evsel, count->values, cpu); | ||
243 | break; | ||
244 | case AGGR_GLOBAL: | ||
245 | aggr->val += count->val; | ||
246 | if (scale) { | ||
247 | aggr->ena += count->ena; | ||
248 | aggr->run += count->run; | ||
249 | } | ||
250 | default: | ||
251 | break; | ||
252 | } | ||
253 | |||
254 | return 0; | ||
255 | } | ||
256 | |||
257 | static int process_counter_maps(struct perf_evsel *counter) | ||
258 | { | ||
259 | int nthreads = thread_map__nr(counter->threads); | ||
260 | int ncpus = perf_evsel__nr_cpus(counter); | ||
261 | int cpu, thread; | ||
262 | |||
263 | if (counter->system_wide) | ||
264 | nthreads = 1; | ||
265 | |||
266 | for (thread = 0; thread < nthreads; thread++) { | ||
267 | for (cpu = 0; cpu < ncpus; cpu++) { | ||
268 | if (process_counter_values(counter, cpu, thread, | ||
269 | perf_counts(counter->counts, cpu, thread))) | ||
270 | return -1; | ||
271 | } | ||
272 | } | ||
273 | |||
274 | return 0; | ||
275 | } | ||
276 | |||
277 | static int process_counter(struct perf_evsel *counter) | ||
278 | { | ||
279 | struct perf_counts_values *aggr = &counter->counts->aggr; | ||
280 | struct perf_stat *ps = counter->priv; | ||
281 | u64 *count = counter->counts->aggr.values; | ||
282 | int i, ret; | ||
283 | |||
284 | aggr->val = aggr->ena = aggr->run = 0; | ||
285 | init_stats(ps->res_stats); | ||
286 | |||
287 | if (counter->per_pkg) | ||
288 | zero_per_pkg(counter); | ||
289 | |||
290 | ret = process_counter_maps(counter); | ||
291 | if (ret) | ||
292 | return ret; | ||
293 | |||
294 | if (aggr_mode != AGGR_GLOBAL) | ||
295 | return 0; | ||
296 | |||
297 | if (!counter->snapshot) | ||
298 | perf_evsel__compute_deltas(counter, -1, -1, aggr); | ||
299 | perf_counts_values__scale(aggr, scale, &counter->counts->scaled); | ||
300 | |||
301 | for (i = 0; i < 3; i++) | ||
302 | update_stats(&ps->res_stats[i], count[i]); | ||
303 | |||
304 | if (verbose) { | ||
305 | fprintf(output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", | ||
306 | perf_evsel__name(counter), count[0], count[1], count[2]); | ||
307 | } | ||
308 | |||
309 | /* | ||
310 | * Save the full runtime - to allow normalization during printout: | ||
311 | */ | ||
312 | perf_stat__update_shadow_stats(counter, count, 0); | ||
313 | |||
314 | return 0; | ||
315 | } | ||
316 | |||
317 | /* | 183 | /* |
318 | * Read out the results of a single counter: | 184 | * Read out the results of a single counter: |
319 | * do not aggregate counts across CPUs in system-wide mode | 185 | * do not aggregate counts across CPUs in system-wide mode |
@@ -351,7 +217,7 @@ static void read_counters(bool close_counters) | |||
351 | if (read_counter(counter)) | 217 | if (read_counter(counter)) |
352 | pr_warning("failed to read counter %s\n", counter->name); | 218 | pr_warning("failed to read counter %s\n", counter->name); |
353 | 219 | ||
354 | if (process_counter(counter)) | 220 | if (perf_stat_process_counter(&stat_config, counter)) |
355 | pr_warning("failed to process counter %s\n", counter->name); | 221 | pr_warning("failed to process counter %s\n", counter->name); |
356 | 222 | ||
357 | if (close_counters) { | 223 | if (close_counters) { |
@@ -402,6 +268,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf | |||
402 | 268 | ||
403 | static int __run_perf_stat(int argc, const char **argv) | 269 | static int __run_perf_stat(int argc, const char **argv) |
404 | { | 270 | { |
271 | int interval = stat_config.interval; | ||
405 | char msg[512]; | 272 | char msg[512]; |
406 | unsigned long long t0, t1; | 273 | unsigned long long t0, t1; |
407 | struct perf_evsel *counter; | 274 | struct perf_evsel *counter; |
@@ -545,13 +412,13 @@ static int run_perf_stat(int argc, const char **argv) | |||
545 | static void print_running(u64 run, u64 ena) | 412 | static void print_running(u64 run, u64 ena) |
546 | { | 413 | { |
547 | if (csv_output) { | 414 | if (csv_output) { |
548 | fprintf(output, "%s%" PRIu64 "%s%.2f", | 415 | fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f", |
549 | csv_sep, | 416 | csv_sep, |
550 | run, | 417 | run, |
551 | csv_sep, | 418 | csv_sep, |
552 | ena ? 100.0 * run / ena : 100.0); | 419 | ena ? 100.0 * run / ena : 100.0); |
553 | } else if (run != ena) { | 420 | } else if (run != ena) { |
554 | fprintf(output, " (%.2f%%)", 100.0 * run / ena); | 421 | fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena); |
555 | } | 422 | } |
556 | } | 423 | } |
557 | 424 | ||
@@ -560,9 +427,9 @@ static void print_noise_pct(double total, double avg) | |||
560 | double pct = rel_stddev_stats(total, avg); | 427 | double pct = rel_stddev_stats(total, avg); |
561 | 428 | ||
562 | if (csv_output) | 429 | if (csv_output) |
563 | fprintf(output, "%s%.2f%%", csv_sep, pct); | 430 | fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct); |
564 | else if (pct) | 431 | else if (pct) |
565 | fprintf(output, " ( +-%6.2f%% )", pct); | 432 | fprintf(stat_config.output, " ( +-%6.2f%% )", pct); |
566 | } | 433 | } |
567 | 434 | ||
568 | static void print_noise(struct perf_evsel *evsel, double avg) | 435 | static void print_noise(struct perf_evsel *evsel, double avg) |
@@ -578,9 +445,9 @@ static void print_noise(struct perf_evsel *evsel, double avg) | |||
578 | 445 | ||
579 | static void aggr_printout(struct perf_evsel *evsel, int id, int nr) | 446 | static void aggr_printout(struct perf_evsel *evsel, int id, int nr) |
580 | { | 447 | { |
581 | switch (aggr_mode) { | 448 | switch (stat_config.aggr_mode) { |
582 | case AGGR_CORE: | 449 | case AGGR_CORE: |
583 | fprintf(output, "S%d-C%*d%s%*d%s", | 450 | fprintf(stat_config.output, "S%d-C%*d%s%*d%s", |
584 | cpu_map__id_to_socket(id), | 451 | cpu_map__id_to_socket(id), |
585 | csv_output ? 0 : -8, | 452 | csv_output ? 0 : -8, |
586 | cpu_map__id_to_cpu(id), | 453 | cpu_map__id_to_cpu(id), |
@@ -590,7 +457,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) | |||
590 | csv_sep); | 457 | csv_sep); |
591 | break; | 458 | break; |
592 | case AGGR_SOCKET: | 459 | case AGGR_SOCKET: |
593 | fprintf(output, "S%*d%s%*d%s", | 460 | fprintf(stat_config.output, "S%*d%s%*d%s", |
594 | csv_output ? 0 : -5, | 461 | csv_output ? 0 : -5, |
595 | id, | 462 | id, |
596 | csv_sep, | 463 | csv_sep, |
@@ -599,12 +466,12 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) | |||
599 | csv_sep); | 466 | csv_sep); |
600 | break; | 467 | break; |
601 | case AGGR_NONE: | 468 | case AGGR_NONE: |
602 | fprintf(output, "CPU%*d%s", | 469 | fprintf(stat_config.output, "CPU%*d%s", |
603 | csv_output ? 0 : -4, | 470 | csv_output ? 0 : -4, |
604 | perf_evsel__cpus(evsel)->map[id], csv_sep); | 471 | perf_evsel__cpus(evsel)->map[id], csv_sep); |
605 | break; | 472 | break; |
606 | case AGGR_THREAD: | 473 | case AGGR_THREAD: |
607 | fprintf(output, "%*s-%*d%s", | 474 | fprintf(stat_config.output, "%*s-%*d%s", |
608 | csv_output ? 0 : 16, | 475 | csv_output ? 0 : 16, |
609 | thread_map__comm(evsel->threads, id), | 476 | thread_map__comm(evsel->threads, id), |
610 | csv_output ? 0 : -8, | 477 | csv_output ? 0 : -8, |
@@ -619,6 +486,7 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) | |||
619 | 486 | ||
620 | static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) | 487 | static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) |
621 | { | 488 | { |
489 | FILE *output = stat_config.output; | ||
622 | double msecs = avg / 1e6; | 490 | double msecs = avg / 1e6; |
623 | const char *fmt_v, *fmt_n; | 491 | const char *fmt_v, *fmt_n; |
624 | char name[25]; | 492 | char name[25]; |
@@ -643,7 +511,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
643 | if (evsel->cgrp) | 511 | if (evsel->cgrp) |
644 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); | 512 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); |
645 | 513 | ||
646 | if (csv_output || interval) | 514 | if (csv_output || stat_config.interval) |
647 | return; | 515 | return; |
648 | 516 | ||
649 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) | 517 | if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK)) |
@@ -655,6 +523,7 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
655 | 523 | ||
656 | static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) | 524 | static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) |
657 | { | 525 | { |
526 | FILE *output = stat_config.output; | ||
658 | double sc = evsel->scale; | 527 | double sc = evsel->scale; |
659 | const char *fmt; | 528 | const char *fmt; |
660 | int cpu = cpu_map__id_to_cpu(id); | 529 | int cpu = cpu_map__id_to_cpu(id); |
@@ -670,7 +539,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
670 | 539 | ||
671 | aggr_printout(evsel, id, nr); | 540 | aggr_printout(evsel, id, nr); |
672 | 541 | ||
673 | if (aggr_mode == AGGR_GLOBAL) | 542 | if (stat_config.aggr_mode == AGGR_GLOBAL) |
674 | cpu = 0; | 543 | cpu = 0; |
675 | 544 | ||
676 | fprintf(output, fmt, avg, csv_sep); | 545 | fprintf(output, fmt, avg, csv_sep); |
@@ -685,16 +554,18 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) | |||
685 | if (evsel->cgrp) | 554 | if (evsel->cgrp) |
686 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); | 555 | fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); |
687 | 556 | ||
688 | if (csv_output || interval) | 557 | if (csv_output || stat_config.interval) |
689 | return; | 558 | return; |
690 | 559 | ||
691 | perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); | 560 | perf_stat__print_shadow_stats(output, evsel, avg, cpu, |
561 | stat_config.aggr_mode); | ||
692 | } | 562 | } |
693 | 563 | ||
694 | static void print_aggr(char *prefix) | 564 | static void print_aggr(char *prefix) |
695 | { | 565 | { |
566 | FILE *output = stat_config.output; | ||
696 | struct perf_evsel *counter; | 567 | struct perf_evsel *counter; |
697 | int cpu, cpu2, s, s2, id, nr; | 568 | int cpu, s, s2, id, nr; |
698 | double uval; | 569 | double uval; |
699 | u64 ena, run, val; | 570 | u64 ena, run, val; |
700 | 571 | ||
@@ -707,8 +578,7 @@ static void print_aggr(char *prefix) | |||
707 | val = ena = run = 0; | 578 | val = ena = run = 0; |
708 | nr = 0; | 579 | nr = 0; |
709 | for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { | 580 | for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { |
710 | cpu2 = perf_evsel__cpus(counter)->map[cpu]; | 581 | s2 = aggr_get_id(perf_evsel__cpus(counter), cpu); |
711 | s2 = aggr_get_id(evsel_list->cpus, cpu2); | ||
712 | if (s2 != id) | 582 | if (s2 != id) |
713 | continue; | 583 | continue; |
714 | val += perf_counts(counter->counts, cpu, 0)->val; | 584 | val += perf_counts(counter->counts, cpu, 0)->val; |
@@ -761,6 +631,7 @@ static void print_aggr(char *prefix) | |||
761 | 631 | ||
762 | static void print_aggr_thread(struct perf_evsel *counter, char *prefix) | 632 | static void print_aggr_thread(struct perf_evsel *counter, char *prefix) |
763 | { | 633 | { |
634 | FILE *output = stat_config.output; | ||
764 | int nthreads = thread_map__nr(counter->threads); | 635 | int nthreads = thread_map__nr(counter->threads); |
765 | int ncpus = cpu_map__nr(counter->cpus); | 636 | int ncpus = cpu_map__nr(counter->cpus); |
766 | int cpu, thread; | 637 | int cpu, thread; |
@@ -799,6 +670,7 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) | |||
799 | */ | 670 | */ |
800 | static void print_counter_aggr(struct perf_evsel *counter, char *prefix) | 671 | static void print_counter_aggr(struct perf_evsel *counter, char *prefix) |
801 | { | 672 | { |
673 | FILE *output = stat_config.output; | ||
802 | struct perf_stat *ps = counter->priv; | 674 | struct perf_stat *ps = counter->priv; |
803 | double avg = avg_stats(&ps->res_stats[0]); | 675 | double avg = avg_stats(&ps->res_stats[0]); |
804 | int scaled = counter->counts->scaled; | 676 | int scaled = counter->counts->scaled; |
@@ -850,6 +722,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) | |||
850 | */ | 722 | */ |
851 | static void print_counter(struct perf_evsel *counter, char *prefix) | 723 | static void print_counter(struct perf_evsel *counter, char *prefix) |
852 | { | 724 | { |
725 | FILE *output = stat_config.output; | ||
853 | u64 ena, run, val; | 726 | u64 ena, run, val; |
854 | double uval; | 727 | double uval; |
855 | int cpu; | 728 | int cpu; |
@@ -904,12 +777,13 @@ static void print_counter(struct perf_evsel *counter, char *prefix) | |||
904 | 777 | ||
905 | static void print_interval(char *prefix, struct timespec *ts) | 778 | static void print_interval(char *prefix, struct timespec *ts) |
906 | { | 779 | { |
780 | FILE *output = stat_config.output; | ||
907 | static int num_print_interval; | 781 | static int num_print_interval; |
908 | 782 | ||
909 | sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); | 783 | sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); |
910 | 784 | ||
911 | if (num_print_interval == 0 && !csv_output) { | 785 | if (num_print_interval == 0 && !csv_output) { |
912 | switch (aggr_mode) { | 786 | switch (stat_config.aggr_mode) { |
913 | case AGGR_SOCKET: | 787 | case AGGR_SOCKET: |
914 | fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); | 788 | fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); |
915 | break; | 789 | break; |
@@ -934,6 +808,7 @@ static void print_interval(char *prefix, struct timespec *ts) | |||
934 | 808 | ||
935 | static void print_header(int argc, const char **argv) | 809 | static void print_header(int argc, const char **argv) |
936 | { | 810 | { |
811 | FILE *output = stat_config.output; | ||
937 | int i; | 812 | int i; |
938 | 813 | ||
939 | fflush(stdout); | 814 | fflush(stdout); |
@@ -963,6 +838,8 @@ static void print_header(int argc, const char **argv) | |||
963 | 838 | ||
964 | static void print_footer(void) | 839 | static void print_footer(void) |
965 | { | 840 | { |
841 | FILE *output = stat_config.output; | ||
842 | |||
966 | if (!null_run) | 843 | if (!null_run) |
967 | fprintf(output, "\n"); | 844 | fprintf(output, "\n"); |
968 | fprintf(output, " %17.9f seconds time elapsed", | 845 | fprintf(output, " %17.9f seconds time elapsed", |
@@ -977,6 +854,7 @@ static void print_footer(void) | |||
977 | 854 | ||
978 | static void print_counters(struct timespec *ts, int argc, const char **argv) | 855 | static void print_counters(struct timespec *ts, int argc, const char **argv) |
979 | { | 856 | { |
857 | int interval = stat_config.interval; | ||
980 | struct perf_evsel *counter; | 858 | struct perf_evsel *counter; |
981 | char buf[64], *prefix = NULL; | 859 | char buf[64], *prefix = NULL; |
982 | 860 | ||
@@ -985,7 +863,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) | |||
985 | else | 863 | else |
986 | print_header(argc, argv); | 864 | print_header(argc, argv); |
987 | 865 | ||
988 | switch (aggr_mode) { | 866 | switch (stat_config.aggr_mode) { |
989 | case AGGR_CORE: | 867 | case AGGR_CORE: |
990 | case AGGR_SOCKET: | 868 | case AGGR_SOCKET: |
991 | print_aggr(prefix); | 869 | print_aggr(prefix); |
@@ -1009,14 +887,14 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) | |||
1009 | if (!interval && !csv_output) | 887 | if (!interval && !csv_output) |
1010 | print_footer(); | 888 | print_footer(); |
1011 | 889 | ||
1012 | fflush(output); | 890 | fflush(stat_config.output); |
1013 | } | 891 | } |
1014 | 892 | ||
1015 | static volatile int signr = -1; | 893 | static volatile int signr = -1; |
1016 | 894 | ||
1017 | static void skip_signal(int signo) | 895 | static void skip_signal(int signo) |
1018 | { | 896 | { |
1019 | if ((child_pid == -1) || interval) | 897 | if ((child_pid == -1) || stat_config.interval) |
1020 | done = 1; | 898 | done = 1; |
1021 | 899 | ||
1022 | signr = signo; | 900 | signr = signo; |
@@ -1064,7 +942,7 @@ static int stat__set_big_num(const struct option *opt __maybe_unused, | |||
1064 | 942 | ||
1065 | static int perf_stat_init_aggr_mode(void) | 943 | static int perf_stat_init_aggr_mode(void) |
1066 | { | 944 | { |
1067 | switch (aggr_mode) { | 945 | switch (stat_config.aggr_mode) { |
1068 | case AGGR_SOCKET: | 946 | case AGGR_SOCKET: |
1069 | if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { | 947 | if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) { |
1070 | perror("cannot build socket map"); | 948 | perror("cannot build socket map"); |
@@ -1270,7 +1148,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1270 | "system-wide collection from all CPUs"), | 1148 | "system-wide collection from all CPUs"), |
1271 | OPT_BOOLEAN('g', "group", &group, | 1149 | OPT_BOOLEAN('g', "group", &group, |
1272 | "put the counters into a counter group"), | 1150 | "put the counters into a counter group"), |
1273 | OPT_BOOLEAN('c', "scale", &scale, "scale/normalize counters"), | 1151 | OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), |
1274 | OPT_INCR('v', "verbose", &verbose, | 1152 | OPT_INCR('v', "verbose", &verbose, |
1275 | "be more verbose (show counter open errors, etc)"), | 1153 | "be more verbose (show counter open errors, etc)"), |
1276 | OPT_INTEGER('r', "repeat", &run_count, | 1154 | OPT_INTEGER('r', "repeat", &run_count, |
@@ -1286,7 +1164,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1286 | stat__set_big_num), | 1164 | stat__set_big_num), |
1287 | OPT_STRING('C', "cpu", &target.cpu_list, "cpu", | 1165 | OPT_STRING('C', "cpu", &target.cpu_list, "cpu", |
1288 | "list of cpus to monitor in system-wide"), | 1166 | "list of cpus to monitor in system-wide"), |
1289 | OPT_SET_UINT('A', "no-aggr", &aggr_mode, | 1167 | OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode, |
1290 | "disable CPU count aggregation", AGGR_NONE), | 1168 | "disable CPU count aggregation", AGGR_NONE), |
1291 | OPT_STRING('x', "field-separator", &csv_sep, "separator", | 1169 | OPT_STRING('x', "field-separator", &csv_sep, "separator", |
1292 | "print counts with custom separator"), | 1170 | "print counts with custom separator"), |
@@ -1300,13 +1178,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1300 | "command to run prior to the measured command"), | 1178 | "command to run prior to the measured command"), |
1301 | OPT_STRING(0, "post", &post_cmd, "command", | 1179 | OPT_STRING(0, "post", &post_cmd, "command", |
1302 | "command to run after to the measured command"), | 1180 | "command to run after to the measured command"), |
1303 | OPT_UINTEGER('I', "interval-print", &interval, | 1181 | OPT_UINTEGER('I', "interval-print", &stat_config.interval, |
1304 | "print counts at regular interval in ms (>= 100)"), | 1182 | "print counts at regular interval in ms (>= 100)"), |
1305 | OPT_SET_UINT(0, "per-socket", &aggr_mode, | 1183 | OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode, |
1306 | "aggregate counts per processor socket", AGGR_SOCKET), | 1184 | "aggregate counts per processor socket", AGGR_SOCKET), |
1307 | OPT_SET_UINT(0, "per-core", &aggr_mode, | 1185 | OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode, |
1308 | "aggregate counts per physical processor core", AGGR_CORE), | 1186 | "aggregate counts per physical processor core", AGGR_CORE), |
1309 | OPT_SET_UINT(0, "per-thread", &aggr_mode, | 1187 | OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode, |
1310 | "aggregate counts per thread", AGGR_THREAD), | 1188 | "aggregate counts per thread", AGGR_THREAD), |
1311 | OPT_UINTEGER('D', "delay", &initial_delay, | 1189 | OPT_UINTEGER('D', "delay", &initial_delay, |
1312 | "ms to wait before starting measurement after program start"), | 1190 | "ms to wait before starting measurement after program start"), |
@@ -1318,6 +1196,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1318 | }; | 1196 | }; |
1319 | int status = -EINVAL, run_idx; | 1197 | int status = -EINVAL, run_idx; |
1320 | const char *mode; | 1198 | const char *mode; |
1199 | FILE *output = stderr; | ||
1200 | unsigned int interval; | ||
1321 | 1201 | ||
1322 | setlocale(LC_ALL, ""); | 1202 | setlocale(LC_ALL, ""); |
1323 | 1203 | ||
@@ -1328,7 +1208,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1328 | argc = parse_options(argc, argv, options, stat_usage, | 1208 | argc = parse_options(argc, argv, options, stat_usage, |
1329 | PARSE_OPT_STOP_AT_NON_OPTION); | 1209 | PARSE_OPT_STOP_AT_NON_OPTION); |
1330 | 1210 | ||
1331 | output = stderr; | 1211 | interval = stat_config.interval; |
1212 | |||
1332 | if (output_name && strcmp(output_name, "-")) | 1213 | if (output_name && strcmp(output_name, "-")) |
1333 | output = NULL; | 1214 | output = NULL; |
1334 | 1215 | ||
@@ -1365,6 +1246,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1365 | } | 1246 | } |
1366 | } | 1247 | } |
1367 | 1248 | ||
1249 | stat_config.output = output; | ||
1250 | |||
1368 | if (csv_sep) { | 1251 | if (csv_sep) { |
1369 | csv_output = true; | 1252 | csv_output = true; |
1370 | if (!strcmp(csv_sep, "\\t")) | 1253 | if (!strcmp(csv_sep, "\\t")) |
@@ -1399,7 +1282,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1399 | run_count = 1; | 1282 | run_count = 1; |
1400 | } | 1283 | } |
1401 | 1284 | ||
1402 | if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { | 1285 | if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { |
1403 | fprintf(stderr, "The --per-thread option is only available " | 1286 | fprintf(stderr, "The --per-thread option is only available " |
1404 | "when monitoring via -p -t options.\n"); | 1287 | "when monitoring via -p -t options.\n"); |
1405 | parse_options_usage(NULL, options, "p", 1); | 1288 | parse_options_usage(NULL, options, "p", 1); |
@@ -1411,7 +1294,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1411 | * no_aggr, cgroup are for system-wide only | 1294 | * no_aggr, cgroup are for system-wide only |
1412 | * --per-thread is aggregated per thread, we dont mix it with cpu mode | 1295 | * --per-thread is aggregated per thread, we dont mix it with cpu mode |
1413 | */ | 1296 | */ |
1414 | if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && | 1297 | if (((stat_config.aggr_mode != AGGR_GLOBAL && |
1298 | stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) && | ||
1415 | !target__has_cpu(&target)) { | 1299 | !target__has_cpu(&target)) { |
1416 | fprintf(stderr, "both cgroup and no-aggregation " | 1300 | fprintf(stderr, "both cgroup and no-aggregation " |
1417 | "modes only available in system-wide mode\n"); | 1301 | "modes only available in system-wide mode\n"); |
@@ -1444,7 +1328,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1444 | * Initialize thread_map with comm names, | 1328 | * Initialize thread_map with comm names, |
1445 | * so we could print it out on output. | 1329 | * so we could print it out on output. |
1446 | */ | 1330 | */ |
1447 | if (aggr_mode == AGGR_THREAD) | 1331 | if (stat_config.aggr_mode == AGGR_THREAD) |
1448 | thread_map__read_comms(evsel_list->threads); | 1332 | thread_map__read_comms(evsel_list->threads); |
1449 | 1333 | ||
1450 | if (interval && interval < 100) { | 1334 | if (interval && interval < 100) { |
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index ecf319728f25..8c465c83aabf 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "util/xyarray.h" | 40 | #include "util/xyarray.h" |
41 | #include "util/sort.h" | 41 | #include "util/sort.h" |
42 | #include "util/intlist.h" | 42 | #include "util/intlist.h" |
43 | #include "util/parse-branch-options.h" | ||
43 | #include "arch/common.h" | 44 | #include "arch/common.h" |
44 | 45 | ||
45 | #include "util/debug.h" | 46 | #include "util/debug.h" |
@@ -601,8 +602,8 @@ static void display_sig(int sig __maybe_unused) | |||
601 | 602 | ||
602 | static void display_setup_sig(void) | 603 | static void display_setup_sig(void) |
603 | { | 604 | { |
604 | signal(SIGSEGV, display_sig); | 605 | signal(SIGSEGV, sighandler_dump_stack); |
605 | signal(SIGFPE, display_sig); | 606 | signal(SIGFPE, sighandler_dump_stack); |
606 | signal(SIGINT, display_sig); | 607 | signal(SIGINT, display_sig); |
607 | signal(SIGQUIT, display_sig); | 608 | signal(SIGQUIT, display_sig); |
608 | signal(SIGTERM, display_sig); | 609 | signal(SIGTERM, display_sig); |
@@ -695,6 +696,8 @@ static int hist_iter__top_callback(struct hist_entry_iter *iter, | |||
695 | perf_top__record_precise_ip(top, he, evsel->idx, ip); | 696 | perf_top__record_precise_ip(top, he, evsel->idx, ip); |
696 | } | 697 | } |
697 | 698 | ||
699 | hist__account_cycles(iter->sample->branch_stack, al, iter->sample, | ||
700 | !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY)); | ||
698 | return 0; | 701 | return 0; |
699 | } | 702 | } |
700 | 703 | ||
@@ -1171,6 +1174,12 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) | |||
1171 | "don't try to adjust column width, use these fixed values"), | 1174 | "don't try to adjust column width, use these fixed values"), |
1172 | OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, | 1175 | OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, |
1173 | "per thread proc mmap processing timeout in ms"), | 1176 | "per thread proc mmap processing timeout in ms"), |
1177 | OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack, | ||
1178 | "branch any", "sample any taken branches", | ||
1179 | parse_branch_stack), | ||
1180 | OPT_CALLBACK('j', "branch-filter", &opts->branch_stack, | ||
1181 | "branch filter mask", "branch stack filter modes", | ||
1182 | parse_branch_stack), | ||
1174 | OPT_END() | 1183 | OPT_END() |
1175 | }; | 1184 | }; |
1176 | const char * const top_usage[] = { | 1185 | const char * const top_usage[] = { |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 39ad4d0ca884..4e3abba03062 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
@@ -1,8 +1,27 @@ | |||
1 | /* | ||
2 | * builtin-trace.c | ||
3 | * | ||
4 | * Builtin 'trace' command: | ||
5 | * | ||
6 | * Display a continuously updated trace of any workload, CPU, specific PID, | ||
7 | * system wide, etc. Default format is loosely strace like, but any other | ||
8 | * event may be specified using --event. | ||
9 | * | ||
10 | * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com> | ||
11 | * | ||
12 | * Initially based on the 'trace' prototype by Thomas Gleixner: | ||
13 | * | ||
14 | * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'") | ||
15 | * | ||
16 | * Released under the GPL v2. (and only v2, not any later version) | ||
17 | */ | ||
18 | |||
1 | #include <traceevent/event-parse.h> | 19 | #include <traceevent/event-parse.h> |
2 | #include "builtin.h" | 20 | #include "builtin.h" |
3 | #include "util/color.h" | 21 | #include "util/color.h" |
4 | #include "util/debug.h" | 22 | #include "util/debug.h" |
5 | #include "util/evlist.h" | 23 | #include "util/evlist.h" |
24 | #include "util/exec_cmd.h" | ||
6 | #include "util/machine.h" | 25 | #include "util/machine.h" |
7 | #include "util/session.h" | 26 | #include "util/session.h" |
8 | #include "util/thread.h" | 27 | #include "util/thread.h" |
@@ -26,6 +45,7 @@ | |||
26 | 45 | ||
27 | #ifndef MADV_HWPOISON | 46 | #ifndef MADV_HWPOISON |
28 | # define MADV_HWPOISON 100 | 47 | # define MADV_HWPOISON 100 |
48 | |||
29 | #endif | 49 | #endif |
30 | 50 | ||
31 | #ifndef MADV_MERGEABLE | 51 | #ifndef MADV_MERGEABLE |
@@ -247,42 +267,6 @@ out_delete: | |||
247 | ({ struct syscall_tp *fields = evsel->priv; \ | 267 | ({ struct syscall_tp *fields = evsel->priv; \ |
248 | fields->name.pointer(&fields->name, sample); }) | 268 | fields->name.pointer(&fields->name, sample); }) |
249 | 269 | ||
250 | static int perf_evlist__add_syscall_newtp(struct perf_evlist *evlist, | ||
251 | void *sys_enter_handler, | ||
252 | void *sys_exit_handler) | ||
253 | { | ||
254 | int ret = -1; | ||
255 | struct perf_evsel *sys_enter, *sys_exit; | ||
256 | |||
257 | sys_enter = perf_evsel__syscall_newtp("sys_enter", sys_enter_handler); | ||
258 | if (sys_enter == NULL) | ||
259 | goto out; | ||
260 | |||
261 | if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) | ||
262 | goto out_delete_sys_enter; | ||
263 | |||
264 | sys_exit = perf_evsel__syscall_newtp("sys_exit", sys_exit_handler); | ||
265 | if (sys_exit == NULL) | ||
266 | goto out_delete_sys_enter; | ||
267 | |||
268 | if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) | ||
269 | goto out_delete_sys_exit; | ||
270 | |||
271 | perf_evlist__add(evlist, sys_enter); | ||
272 | perf_evlist__add(evlist, sys_exit); | ||
273 | |||
274 | ret = 0; | ||
275 | out: | ||
276 | return ret; | ||
277 | |||
278 | out_delete_sys_exit: | ||
279 | perf_evsel__delete_priv(sys_exit); | ||
280 | out_delete_sys_enter: | ||
281 | perf_evsel__delete_priv(sys_enter); | ||
282 | goto out; | ||
283 | } | ||
284 | |||
285 | |||
286 | struct syscall_arg { | 270 | struct syscall_arg { |
287 | unsigned long val; | 271 | unsigned long val; |
288 | struct thread *thread; | 272 | struct thread *thread; |
@@ -604,6 +588,15 @@ static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1); | |||
604 | static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; | 588 | static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", }; |
605 | static DEFINE_STRARRAY(itimers); | 589 | static DEFINE_STRARRAY(itimers); |
606 | 590 | ||
591 | static const char *keyctl_options[] = { | ||
592 | "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN", | ||
593 | "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ", | ||
594 | "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT", | ||
595 | "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT", | ||
596 | "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT", | ||
597 | }; | ||
598 | static DEFINE_STRARRAY(keyctl_options); | ||
599 | |||
607 | static const char *whences[] = { "SET", "CUR", "END", | 600 | static const char *whences[] = { "SET", "CUR", "END", |
608 | #ifdef SEEK_DATA | 601 | #ifdef SEEK_DATA |
609 | "DATA", | 602 | "DATA", |
@@ -634,7 +627,8 @@ static DEFINE_STRARRAY(sighow); | |||
634 | 627 | ||
635 | static const char *clockid[] = { | 628 | static const char *clockid[] = { |
636 | "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", | 629 | "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID", |
637 | "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", | 630 | "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME", |
631 | "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI" | ||
638 | }; | 632 | }; |
639 | static DEFINE_STRARRAY(clockid); | 633 | static DEFINE_STRARRAY(clockid); |
640 | 634 | ||
@@ -779,6 +773,11 @@ static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size, | |||
779 | 773 | ||
780 | #define SCA_ACCMODE syscall_arg__scnprintf_access_mode | 774 | #define SCA_ACCMODE syscall_arg__scnprintf_access_mode |
781 | 775 | ||
776 | static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, | ||
777 | struct syscall_arg *arg); | ||
778 | |||
779 | #define SCA_FILENAME syscall_arg__scnprintf_filename | ||
780 | |||
782 | static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, | 781 | static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, |
783 | struct syscall_arg *arg) | 782 | struct syscall_arg *arg) |
784 | { | 783 | { |
@@ -1006,14 +1005,23 @@ static struct syscall_fmt { | |||
1006 | bool hexret; | 1005 | bool hexret; |
1007 | } syscall_fmts[] = { | 1006 | } syscall_fmts[] = { |
1008 | { .name = "access", .errmsg = true, | 1007 | { .name = "access", .errmsg = true, |
1009 | .arg_scnprintf = { [1] = SCA_ACCMODE, /* mode */ }, }, | 1008 | .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ |
1009 | [1] = SCA_ACCMODE, /* mode */ }, }, | ||
1010 | { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, | 1010 | { .name = "arch_prctl", .errmsg = true, .alias = "prctl", }, |
1011 | { .name = "brk", .hexret = true, | 1011 | { .name = "brk", .hexret = true, |
1012 | .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, | 1012 | .arg_scnprintf = { [0] = SCA_HEX, /* brk */ }, }, |
1013 | { .name = "chdir", .errmsg = true, | ||
1014 | .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, | ||
1015 | { .name = "chmod", .errmsg = true, | ||
1016 | .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, | ||
1017 | { .name = "chroot", .errmsg = true, | ||
1018 | .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, | ||
1013 | { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, | 1019 | { .name = "clock_gettime", .errmsg = true, STRARRAY(0, clk_id, clockid), }, |
1014 | { .name = "close", .errmsg = true, | 1020 | { .name = "close", .errmsg = true, |
1015 | .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, | 1021 | .arg_scnprintf = { [0] = SCA_CLOSE_FD, /* fd */ }, }, |
1016 | { .name = "connect", .errmsg = true, }, | 1022 | { .name = "connect", .errmsg = true, }, |
1023 | { .name = "creat", .errmsg = true, | ||
1024 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1017 | { .name = "dup", .errmsg = true, | 1025 | { .name = "dup", .errmsg = true, |
1018 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1026 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1019 | { .name = "dup2", .errmsg = true, | 1027 | { .name = "dup2", .errmsg = true, |
@@ -1024,7 +1032,8 @@ static struct syscall_fmt { | |||
1024 | { .name = "eventfd2", .errmsg = true, | 1032 | { .name = "eventfd2", .errmsg = true, |
1025 | .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, | 1033 | .arg_scnprintf = { [1] = SCA_EFD_FLAGS, /* flags */ }, }, |
1026 | { .name = "faccessat", .errmsg = true, | 1034 | { .name = "faccessat", .errmsg = true, |
1027 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, | 1035 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ |
1036 | [1] = SCA_FILENAME, /* filename */ }, }, | ||
1028 | { .name = "fadvise64", .errmsg = true, | 1037 | { .name = "fadvise64", .errmsg = true, |
1029 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1038 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1030 | { .name = "fallocate", .errmsg = true, | 1039 | { .name = "fallocate", .errmsg = true, |
@@ -1034,11 +1043,13 @@ static struct syscall_fmt { | |||
1034 | { .name = "fchmod", .errmsg = true, | 1043 | { .name = "fchmod", .errmsg = true, |
1035 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1044 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1036 | { .name = "fchmodat", .errmsg = true, | 1045 | { .name = "fchmodat", .errmsg = true, |
1037 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, | 1046 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ |
1047 | [1] = SCA_FILENAME, /* filename */ }, }, | ||
1038 | { .name = "fchown", .errmsg = true, | 1048 | { .name = "fchown", .errmsg = true, |
1039 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1049 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1040 | { .name = "fchownat", .errmsg = true, | 1050 | { .name = "fchownat", .errmsg = true, |
1041 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, | 1051 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ |
1052 | [1] = SCA_FILENAME, /* filename */ }, }, | ||
1042 | { .name = "fcntl", .errmsg = true, | 1053 | { .name = "fcntl", .errmsg = true, |
1043 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ | 1054 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ |
1044 | [1] = SCA_STRARRAY, /* cmd */ }, | 1055 | [1] = SCA_STRARRAY, /* cmd */ }, |
@@ -1053,7 +1064,8 @@ static struct syscall_fmt { | |||
1053 | { .name = "fstat", .errmsg = true, .alias = "newfstat", | 1064 | { .name = "fstat", .errmsg = true, .alias = "newfstat", |
1054 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1065 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1055 | { .name = "fstatat", .errmsg = true, .alias = "newfstatat", | 1066 | { .name = "fstatat", .errmsg = true, .alias = "newfstatat", |
1056 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, | 1067 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ |
1068 | [1] = SCA_FILENAME, /* filename */ }, }, | ||
1057 | { .name = "fstatfs", .errmsg = true, | 1069 | { .name = "fstatfs", .errmsg = true, |
1058 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1070 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1059 | { .name = "fsync", .errmsg = true, | 1071 | { .name = "fsync", .errmsg = true, |
@@ -1063,13 +1075,18 @@ static struct syscall_fmt { | |||
1063 | { .name = "futex", .errmsg = true, | 1075 | { .name = "futex", .errmsg = true, |
1064 | .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, | 1076 | .arg_scnprintf = { [1] = SCA_FUTEX_OP, /* op */ }, }, |
1065 | { .name = "futimesat", .errmsg = true, | 1077 | { .name = "futimesat", .errmsg = true, |
1066 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, | 1078 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ |
1079 | [1] = SCA_FILENAME, /* filename */ }, }, | ||
1067 | { .name = "getdents", .errmsg = true, | 1080 | { .name = "getdents", .errmsg = true, |
1068 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1081 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1069 | { .name = "getdents64", .errmsg = true, | 1082 | { .name = "getdents64", .errmsg = true, |
1070 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1083 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1071 | { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, | 1084 | { .name = "getitimer", .errmsg = true, STRARRAY(0, which, itimers), }, |
1072 | { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, | 1085 | { .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, |
1086 | { .name = "getxattr", .errmsg = true, | ||
1087 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1088 | { .name = "inotify_add_watch", .errmsg = true, | ||
1089 | .arg_scnprintf = { [1] = SCA_FILENAME, /* pathname */ }, }, | ||
1073 | { .name = "ioctl", .errmsg = true, | 1090 | { .name = "ioctl", .errmsg = true, |
1074 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ | 1091 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ |
1075 | #if defined(__i386__) || defined(__x86_64__) | 1092 | #if defined(__i386__) || defined(__x86_64__) |
@@ -1082,22 +1099,44 @@ static struct syscall_fmt { | |||
1082 | #else | 1099 | #else |
1083 | [2] = SCA_HEX, /* arg */ }, }, | 1100 | [2] = SCA_HEX, /* arg */ }, }, |
1084 | #endif | 1101 | #endif |
1102 | { .name = "keyctl", .errmsg = true, STRARRAY(0, option, keyctl_options), }, | ||
1085 | { .name = "kill", .errmsg = true, | 1103 | { .name = "kill", .errmsg = true, |
1086 | .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, | 1104 | .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, |
1105 | { .name = "lchown", .errmsg = true, | ||
1106 | .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, | ||
1107 | { .name = "lgetxattr", .errmsg = true, | ||
1108 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1087 | { .name = "linkat", .errmsg = true, | 1109 | { .name = "linkat", .errmsg = true, |
1088 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, | 1110 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, |
1111 | { .name = "listxattr", .errmsg = true, | ||
1112 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1113 | { .name = "llistxattr", .errmsg = true, | ||
1114 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1115 | { .name = "lremovexattr", .errmsg = true, | ||
1116 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1089 | { .name = "lseek", .errmsg = true, | 1117 | { .name = "lseek", .errmsg = true, |
1090 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ | 1118 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ |
1091 | [2] = SCA_STRARRAY, /* whence */ }, | 1119 | [2] = SCA_STRARRAY, /* whence */ }, |
1092 | .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, | 1120 | .arg_parm = { [2] = &strarray__whences, /* whence */ }, }, |
1093 | { .name = "lstat", .errmsg = true, .alias = "newlstat", }, | 1121 | { .name = "lsetxattr", .errmsg = true, |
1122 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1123 | { .name = "lstat", .errmsg = true, .alias = "newlstat", | ||
1124 | .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, | ||
1125 | { .name = "lsxattr", .errmsg = true, | ||
1126 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1094 | { .name = "madvise", .errmsg = true, | 1127 | { .name = "madvise", .errmsg = true, |
1095 | .arg_scnprintf = { [0] = SCA_HEX, /* start */ | 1128 | .arg_scnprintf = { [0] = SCA_HEX, /* start */ |
1096 | [2] = SCA_MADV_BHV, /* behavior */ }, }, | 1129 | [2] = SCA_MADV_BHV, /* behavior */ }, }, |
1130 | { .name = "mkdir", .errmsg = true, | ||
1131 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1097 | { .name = "mkdirat", .errmsg = true, | 1132 | { .name = "mkdirat", .errmsg = true, |
1098 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, | 1133 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ |
1134 | [1] = SCA_FILENAME, /* pathname */ }, }, | ||
1135 | { .name = "mknod", .errmsg = true, | ||
1136 | .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, | ||
1099 | { .name = "mknodat", .errmsg = true, | 1137 | { .name = "mknodat", .errmsg = true, |
1100 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ }, }, | 1138 | .arg_scnprintf = { [0] = SCA_FDAT, /* fd */ |
1139 | [1] = SCA_FILENAME, /* filename */ }, }, | ||
1101 | { .name = "mlock", .errmsg = true, | 1140 | { .name = "mlock", .errmsg = true, |
1102 | .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, | 1141 | .arg_scnprintf = { [0] = SCA_HEX, /* addr */ }, }, |
1103 | { .name = "mlockall", .errmsg = true, | 1142 | { .name = "mlockall", .errmsg = true, |
@@ -1110,6 +1149,8 @@ static struct syscall_fmt { | |||
1110 | { .name = "mprotect", .errmsg = true, | 1149 | { .name = "mprotect", .errmsg = true, |
1111 | .arg_scnprintf = { [0] = SCA_HEX, /* start */ | 1150 | .arg_scnprintf = { [0] = SCA_HEX, /* start */ |
1112 | [2] = SCA_MMAP_PROT, /* prot */ }, }, | 1151 | [2] = SCA_MMAP_PROT, /* prot */ }, }, |
1152 | { .name = "mq_unlink", .errmsg = true, | ||
1153 | .arg_scnprintf = { [0] = SCA_FILENAME, /* u_name */ }, }, | ||
1113 | { .name = "mremap", .hexret = true, | 1154 | { .name = "mremap", .hexret = true, |
1114 | .arg_scnprintf = { [0] = SCA_HEX, /* addr */ | 1155 | .arg_scnprintf = { [0] = SCA_HEX, /* addr */ |
1115 | [3] = SCA_MREMAP_FLAGS, /* flags */ | 1156 | [3] = SCA_MREMAP_FLAGS, /* flags */ |
@@ -1121,14 +1162,17 @@ static struct syscall_fmt { | |||
1121 | { .name = "name_to_handle_at", .errmsg = true, | 1162 | { .name = "name_to_handle_at", .errmsg = true, |
1122 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, | 1163 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, |
1123 | { .name = "newfstatat", .errmsg = true, | 1164 | { .name = "newfstatat", .errmsg = true, |
1124 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, | 1165 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ |
1166 | [1] = SCA_FILENAME, /* filename */ }, }, | ||
1125 | { .name = "open", .errmsg = true, | 1167 | { .name = "open", .errmsg = true, |
1126 | .arg_scnprintf = { [1] = SCA_OPEN_FLAGS, /* flags */ }, }, | 1168 | .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ |
1169 | [1] = SCA_OPEN_FLAGS, /* flags */ }, }, | ||
1127 | { .name = "open_by_handle_at", .errmsg = true, | 1170 | { .name = "open_by_handle_at", .errmsg = true, |
1128 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ | 1171 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ |
1129 | [2] = SCA_OPEN_FLAGS, /* flags */ }, }, | 1172 | [2] = SCA_OPEN_FLAGS, /* flags */ }, }, |
1130 | { .name = "openat", .errmsg = true, | 1173 | { .name = "openat", .errmsg = true, |
1131 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ | 1174 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ |
1175 | [1] = SCA_FILENAME, /* filename */ | ||
1132 | [2] = SCA_OPEN_FLAGS, /* flags */ }, }, | 1176 | [2] = SCA_OPEN_FLAGS, /* flags */ }, }, |
1133 | { .name = "perf_event_open", .errmsg = true, | 1177 | { .name = "perf_event_open", .errmsg = true, |
1134 | .arg_scnprintf = { [1] = SCA_INT, /* pid */ | 1178 | .arg_scnprintf = { [1] = SCA_INT, /* pid */ |
@@ -1150,18 +1194,28 @@ static struct syscall_fmt { | |||
1150 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1194 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1151 | { .name = "read", .errmsg = true, | 1195 | { .name = "read", .errmsg = true, |
1152 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1196 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1197 | { .name = "readlink", .errmsg = true, | ||
1198 | .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, | ||
1153 | { .name = "readlinkat", .errmsg = true, | 1199 | { .name = "readlinkat", .errmsg = true, |
1154 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, | 1200 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ |
1201 | [1] = SCA_FILENAME, /* pathname */ }, }, | ||
1155 | { .name = "readv", .errmsg = true, | 1202 | { .name = "readv", .errmsg = true, |
1156 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1203 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1157 | { .name = "recvfrom", .errmsg = true, | 1204 | { .name = "recvfrom", .errmsg = true, |
1158 | .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, | 1205 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ |
1206 | [3] = SCA_MSG_FLAGS, /* flags */ }, }, | ||
1159 | { .name = "recvmmsg", .errmsg = true, | 1207 | { .name = "recvmmsg", .errmsg = true, |
1160 | .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, | 1208 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ |
1209 | [3] = SCA_MSG_FLAGS, /* flags */ }, }, | ||
1161 | { .name = "recvmsg", .errmsg = true, | 1210 | { .name = "recvmsg", .errmsg = true, |
1162 | .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, | 1211 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ |
1212 | [2] = SCA_MSG_FLAGS, /* flags */ }, }, | ||
1213 | { .name = "removexattr", .errmsg = true, | ||
1214 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1163 | { .name = "renameat", .errmsg = true, | 1215 | { .name = "renameat", .errmsg = true, |
1164 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, | 1216 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, |
1217 | { .name = "rmdir", .errmsg = true, | ||
1218 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1165 | { .name = "rt_sigaction", .errmsg = true, | 1219 | { .name = "rt_sigaction", .errmsg = true, |
1166 | .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, | 1220 | .arg_scnprintf = { [0] = SCA_SIGNUM, /* sig */ }, }, |
1167 | { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, | 1221 | { .name = "rt_sigprocmask", .errmsg = true, STRARRAY(0, how, sighow), }, |
@@ -1171,13 +1225,18 @@ static struct syscall_fmt { | |||
1171 | .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, | 1225 | .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, |
1172 | { .name = "select", .errmsg = true, .timeout = true, }, | 1226 | { .name = "select", .errmsg = true, .timeout = true, }, |
1173 | { .name = "sendmmsg", .errmsg = true, | 1227 | { .name = "sendmmsg", .errmsg = true, |
1174 | .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, | 1228 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ |
1229 | [3] = SCA_MSG_FLAGS, /* flags */ }, }, | ||
1175 | { .name = "sendmsg", .errmsg = true, | 1230 | { .name = "sendmsg", .errmsg = true, |
1176 | .arg_scnprintf = { [2] = SCA_MSG_FLAGS, /* flags */ }, }, | 1231 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ |
1232 | [2] = SCA_MSG_FLAGS, /* flags */ }, }, | ||
1177 | { .name = "sendto", .errmsg = true, | 1233 | { .name = "sendto", .errmsg = true, |
1178 | .arg_scnprintf = { [3] = SCA_MSG_FLAGS, /* flags */ }, }, | 1234 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ |
1235 | [3] = SCA_MSG_FLAGS, /* flags */ }, }, | ||
1179 | { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, | 1236 | { .name = "setitimer", .errmsg = true, STRARRAY(0, which, itimers), }, |
1180 | { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, | 1237 | { .name = "setrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), }, |
1238 | { .name = "setxattr", .errmsg = true, | ||
1239 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1181 | { .name = "shutdown", .errmsg = true, | 1240 | { .name = "shutdown", .errmsg = true, |
1182 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1241 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1183 | { .name = "socket", .errmsg = true, | 1242 | { .name = "socket", .errmsg = true, |
@@ -1188,18 +1247,35 @@ static struct syscall_fmt { | |||
1188 | .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ | 1247 | .arg_scnprintf = { [0] = SCA_STRARRAY, /* family */ |
1189 | [1] = SCA_SK_TYPE, /* type */ }, | 1248 | [1] = SCA_SK_TYPE, /* type */ }, |
1190 | .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, | 1249 | .arg_parm = { [0] = &strarray__socket_families, /* family */ }, }, |
1191 | { .name = "stat", .errmsg = true, .alias = "newstat", }, | 1250 | { .name = "stat", .errmsg = true, .alias = "newstat", |
1251 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1252 | { .name = "statfs", .errmsg = true, | ||
1253 | .arg_scnprintf = { [0] = SCA_FILENAME, /* pathname */ }, }, | ||
1254 | { .name = "swapoff", .errmsg = true, | ||
1255 | .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, | ||
1256 | { .name = "swapon", .errmsg = true, | ||
1257 | .arg_scnprintf = { [0] = SCA_FILENAME, /* specialfile */ }, }, | ||
1192 | { .name = "symlinkat", .errmsg = true, | 1258 | { .name = "symlinkat", .errmsg = true, |
1193 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, | 1259 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, |
1194 | { .name = "tgkill", .errmsg = true, | 1260 | { .name = "tgkill", .errmsg = true, |
1195 | .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, | 1261 | .arg_scnprintf = { [2] = SCA_SIGNUM, /* sig */ }, }, |
1196 | { .name = "tkill", .errmsg = true, | 1262 | { .name = "tkill", .errmsg = true, |
1197 | .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, | 1263 | .arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, }, |
1264 | { .name = "truncate", .errmsg = true, | ||
1265 | .arg_scnprintf = { [0] = SCA_FILENAME, /* path */ }, }, | ||
1198 | { .name = "uname", .errmsg = true, .alias = "newuname", }, | 1266 | { .name = "uname", .errmsg = true, .alias = "newuname", }, |
1199 | { .name = "unlinkat", .errmsg = true, | 1267 | { .name = "unlinkat", .errmsg = true, |
1200 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ }, }, | 1268 | .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ |
1269 | [1] = SCA_FILENAME, /* pathname */ }, }, | ||
1270 | { .name = "utime", .errmsg = true, | ||
1271 | .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, | ||
1201 | { .name = "utimensat", .errmsg = true, | 1272 | { .name = "utimensat", .errmsg = true, |
1202 | .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ }, }, | 1273 | .arg_scnprintf = { [0] = SCA_FDAT, /* dirfd */ |
1274 | [1] = SCA_FILENAME, /* filename */ }, }, | ||
1275 | { .name = "utimes", .errmsg = true, | ||
1276 | .arg_scnprintf = { [0] = SCA_FILENAME, /* filename */ }, }, | ||
1277 | { .name = "vmsplice", .errmsg = true, | ||
1278 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | ||
1203 | { .name = "write", .errmsg = true, | 1279 | { .name = "write", .errmsg = true, |
1204 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, | 1280 | .arg_scnprintf = { [0] = SCA_FD, /* fd */ }, }, |
1205 | { .name = "writev", .errmsg = true, | 1281 | { .name = "writev", .errmsg = true, |
@@ -1223,7 +1299,6 @@ struct syscall { | |||
1223 | int nr_args; | 1299 | int nr_args; |
1224 | struct format_field *args; | 1300 | struct format_field *args; |
1225 | const char *name; | 1301 | const char *name; |
1226 | bool filtered; | ||
1227 | bool is_exit; | 1302 | bool is_exit; |
1228 | struct syscall_fmt *fmt; | 1303 | struct syscall_fmt *fmt; |
1229 | size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); | 1304 | size_t (**arg_scnprintf)(char *bf, size_t size, struct syscall_arg *arg); |
@@ -1244,6 +1319,11 @@ static size_t fprintf_duration(unsigned long t, FILE *fp) | |||
1244 | return printed + fprintf(fp, "): "); | 1319 | return printed + fprintf(fp, "): "); |
1245 | } | 1320 | } |
1246 | 1321 | ||
1322 | /** | ||
1323 | * filename.ptr: The filename char pointer that will be vfs_getname'd | ||
1324 | * filename.entry_str_pos: Where to insert the string translated from | ||
1325 | * filename.ptr by the vfs_getname tracepoint/kprobe. | ||
1326 | */ | ||
1247 | struct thread_trace { | 1327 | struct thread_trace { |
1248 | u64 entry_time; | 1328 | u64 entry_time; |
1249 | u64 exit_time; | 1329 | u64 exit_time; |
@@ -1252,6 +1332,13 @@ struct thread_trace { | |||
1252 | unsigned long pfmaj, pfmin; | 1332 | unsigned long pfmaj, pfmin; |
1253 | char *entry_str; | 1333 | char *entry_str; |
1254 | double runtime_ms; | 1334 | double runtime_ms; |
1335 | struct { | ||
1336 | unsigned long ptr; | ||
1337 | short int entry_str_pos; | ||
1338 | bool pending_open; | ||
1339 | unsigned int namelen; | ||
1340 | char *name; | ||
1341 | } filename; | ||
1255 | struct { | 1342 | struct { |
1256 | int max; | 1343 | int max; |
1257 | char **table; | 1344 | char **table; |
@@ -1298,6 +1385,8 @@ fail: | |||
1298 | #define TRACE_PFMAJ (1 << 0) | 1385 | #define TRACE_PFMAJ (1 << 0) |
1299 | #define TRACE_PFMIN (1 << 1) | 1386 | #define TRACE_PFMIN (1 << 1) |
1300 | 1387 | ||
1388 | static const size_t trace__entry_str_size = 2048; | ||
1389 | |||
1301 | struct trace { | 1390 | struct trace { |
1302 | struct perf_tool tool; | 1391 | struct perf_tool tool; |
1303 | struct { | 1392 | struct { |
@@ -1307,6 +1396,10 @@ struct trace { | |||
1307 | struct { | 1396 | struct { |
1308 | int max; | 1397 | int max; |
1309 | struct syscall *table; | 1398 | struct syscall *table; |
1399 | struct { | ||
1400 | struct perf_evsel *sys_enter, | ||
1401 | *sys_exit; | ||
1402 | } events; | ||
1310 | } syscalls; | 1403 | } syscalls; |
1311 | struct record_opts opts; | 1404 | struct record_opts opts; |
1312 | struct perf_evlist *evlist; | 1405 | struct perf_evlist *evlist; |
@@ -1316,7 +1409,10 @@ struct trace { | |||
1316 | FILE *output; | 1409 | FILE *output; |
1317 | unsigned long nr_events; | 1410 | unsigned long nr_events; |
1318 | struct strlist *ev_qualifier; | 1411 | struct strlist *ev_qualifier; |
1319 | const char *last_vfs_getname; | 1412 | struct { |
1413 | size_t nr; | ||
1414 | int *entries; | ||
1415 | } ev_qualifier_ids; | ||
1320 | struct intlist *tid_list; | 1416 | struct intlist *tid_list; |
1321 | struct intlist *pid_list; | 1417 | struct intlist *pid_list; |
1322 | struct { | 1418 | struct { |
@@ -1340,6 +1436,7 @@ struct trace { | |||
1340 | bool show_tool_stats; | 1436 | bool show_tool_stats; |
1341 | bool trace_syscalls; | 1437 | bool trace_syscalls; |
1342 | bool force; | 1438 | bool force; |
1439 | bool vfs_getname; | ||
1343 | int trace_pgfaults; | 1440 | int trace_pgfaults; |
1344 | }; | 1441 | }; |
1345 | 1442 | ||
@@ -1443,6 +1540,27 @@ static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size, | |||
1443 | return printed; | 1540 | return printed; |
1444 | } | 1541 | } |
1445 | 1542 | ||
1543 | static void thread__set_filename_pos(struct thread *thread, const char *bf, | ||
1544 | unsigned long ptr) | ||
1545 | { | ||
1546 | struct thread_trace *ttrace = thread__priv(thread); | ||
1547 | |||
1548 | ttrace->filename.ptr = ptr; | ||
1549 | ttrace->filename.entry_str_pos = bf - ttrace->entry_str; | ||
1550 | } | ||
1551 | |||
1552 | static size_t syscall_arg__scnprintf_filename(char *bf, size_t size, | ||
1553 | struct syscall_arg *arg) | ||
1554 | { | ||
1555 | unsigned long ptr = arg->val; | ||
1556 | |||
1557 | if (!arg->trace->vfs_getname) | ||
1558 | return scnprintf(bf, size, "%#x", ptr); | ||
1559 | |||
1560 | thread__set_filename_pos(arg->thread, bf, ptr); | ||
1561 | return 0; | ||
1562 | } | ||
1563 | |||
1446 | static bool trace__filter_duration(struct trace *trace, double t) | 1564 | static bool trace__filter_duration(struct trace *trace, double t) |
1447 | { | 1565 | { |
1448 | return t < (trace->duration_filter * NSEC_PER_MSEC); | 1566 | return t < (trace->duration_filter * NSEC_PER_MSEC); |
@@ -1517,6 +1635,9 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) | |||
1517 | if (trace->host == NULL) | 1635 | if (trace->host == NULL) |
1518 | return -ENOMEM; | 1636 | return -ENOMEM; |
1519 | 1637 | ||
1638 | if (trace_event__register_resolver(trace->host, machine__resolve_kernel_addr) < 0) | ||
1639 | return -errno; | ||
1640 | |||
1520 | err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, | 1641 | err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, |
1521 | evlist->threads, trace__tool_process, false, | 1642 | evlist->threads, trace__tool_process, false, |
1522 | trace->opts.proc_map_timeout); | 1643 | trace->opts.proc_map_timeout); |
@@ -1578,19 +1699,6 @@ static int trace__read_syscall_info(struct trace *trace, int id) | |||
1578 | sc = trace->syscalls.table + id; | 1699 | sc = trace->syscalls.table + id; |
1579 | sc->name = name; | 1700 | sc->name = name; |
1580 | 1701 | ||
1581 | if (trace->ev_qualifier) { | ||
1582 | bool in = strlist__find(trace->ev_qualifier, name) != NULL; | ||
1583 | |||
1584 | if (!(in ^ trace->not_ev_qualifier)) { | ||
1585 | sc->filtered = true; | ||
1586 | /* | ||
1587 | * No need to do read tracepoint information since this will be | ||
1588 | * filtered out. | ||
1589 | */ | ||
1590 | return 0; | ||
1591 | } | ||
1592 | } | ||
1593 | |||
1594 | sc->fmt = syscall_fmt__find(sc->name); | 1702 | sc->fmt = syscall_fmt__find(sc->name); |
1595 | 1703 | ||
1596 | snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); | 1704 | snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name); |
@@ -1619,13 +1727,27 @@ static int trace__read_syscall_info(struct trace *trace, int id) | |||
1619 | 1727 | ||
1620 | static int trace__validate_ev_qualifier(struct trace *trace) | 1728 | static int trace__validate_ev_qualifier(struct trace *trace) |
1621 | { | 1729 | { |
1622 | int err = 0; | 1730 | int err = 0, i; |
1623 | struct str_node *pos; | 1731 | struct str_node *pos; |
1624 | 1732 | ||
1733 | trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier); | ||
1734 | trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr * | ||
1735 | sizeof(trace->ev_qualifier_ids.entries[0])); | ||
1736 | |||
1737 | if (trace->ev_qualifier_ids.entries == NULL) { | ||
1738 | fputs("Error:\tNot enough memory for allocating events qualifier ids\n", | ||
1739 | trace->output); | ||
1740 | err = -EINVAL; | ||
1741 | goto out; | ||
1742 | } | ||
1743 | |||
1744 | i = 0; | ||
1745 | |||
1625 | strlist__for_each(pos, trace->ev_qualifier) { | 1746 | strlist__for_each(pos, trace->ev_qualifier) { |
1626 | const char *sc = pos->s; | 1747 | const char *sc = pos->s; |
1748 | int id = audit_name_to_syscall(sc, trace->audit.machine); | ||
1627 | 1749 | ||
1628 | if (audit_name_to_syscall(sc, trace->audit.machine) < 0) { | 1750 | if (id < 0) { |
1629 | if (err == 0) { | 1751 | if (err == 0) { |
1630 | fputs("Error:\tInvalid syscall ", trace->output); | 1752 | fputs("Error:\tInvalid syscall ", trace->output); |
1631 | err = -EINVAL; | 1753 | err = -EINVAL; |
@@ -1635,13 +1757,17 @@ static int trace__validate_ev_qualifier(struct trace *trace) | |||
1635 | 1757 | ||
1636 | fputs(sc, trace->output); | 1758 | fputs(sc, trace->output); |
1637 | } | 1759 | } |
1760 | |||
1761 | trace->ev_qualifier_ids.entries[i++] = id; | ||
1638 | } | 1762 | } |
1639 | 1763 | ||
1640 | if (err < 0) { | 1764 | if (err < 0) { |
1641 | fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" | 1765 | fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" |
1642 | "\nHint:\tand: 'man syscalls'\n", trace->output); | 1766 | "\nHint:\tand: 'man syscalls'\n", trace->output); |
1767 | zfree(&trace->ev_qualifier_ids.entries); | ||
1768 | trace->ev_qualifier_ids.nr = 0; | ||
1643 | } | 1769 | } |
1644 | 1770 | out: | |
1645 | return err; | 1771 | return err; |
1646 | } | 1772 | } |
1647 | 1773 | ||
@@ -1833,9 +1959,6 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, | |||
1833 | if (sc == NULL) | 1959 | if (sc == NULL) |
1834 | return -1; | 1960 | return -1; |
1835 | 1961 | ||
1836 | if (sc->filtered) | ||
1837 | return 0; | ||
1838 | |||
1839 | thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); | 1962 | thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); |
1840 | ttrace = thread__trace(thread, trace->output); | 1963 | ttrace = thread__trace(thread, trace->output); |
1841 | if (ttrace == NULL) | 1964 | if (ttrace == NULL) |
@@ -1844,7 +1967,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, | |||
1844 | args = perf_evsel__sc_tp_ptr(evsel, args, sample); | 1967 | args = perf_evsel__sc_tp_ptr(evsel, args, sample); |
1845 | 1968 | ||
1846 | if (ttrace->entry_str == NULL) { | 1969 | if (ttrace->entry_str == NULL) { |
1847 | ttrace->entry_str = malloc(1024); | 1970 | ttrace->entry_str = malloc(trace__entry_str_size); |
1848 | if (!ttrace->entry_str) | 1971 | if (!ttrace->entry_str) |
1849 | goto out_put; | 1972 | goto out_put; |
1850 | } | 1973 | } |
@@ -1854,9 +1977,9 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, | |||
1854 | 1977 | ||
1855 | ttrace->entry_time = sample->time; | 1978 | ttrace->entry_time = sample->time; |
1856 | msg = ttrace->entry_str; | 1979 | msg = ttrace->entry_str; |
1857 | printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); | 1980 | printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name); |
1858 | 1981 | ||
1859 | printed += syscall__scnprintf_args(sc, msg + printed, 1024 - printed, | 1982 | printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed, |
1860 | args, trace, thread); | 1983 | args, trace, thread); |
1861 | 1984 | ||
1862 | if (sc->is_exit) { | 1985 | if (sc->is_exit) { |
@@ -1864,8 +1987,11 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, | |||
1864 | trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); | 1987 | trace__fprintf_entry_head(trace, thread, 1, sample->time, trace->output); |
1865 | fprintf(trace->output, "%-70s\n", ttrace->entry_str); | 1988 | fprintf(trace->output, "%-70s\n", ttrace->entry_str); |
1866 | } | 1989 | } |
1867 | } else | 1990 | } else { |
1868 | ttrace->entry_pending = true; | 1991 | ttrace->entry_pending = true; |
1992 | /* See trace__vfs_getname & trace__sys_exit */ | ||
1993 | ttrace->filename.pending_open = false; | ||
1994 | } | ||
1869 | 1995 | ||
1870 | if (trace->current != thread) { | 1996 | if (trace->current != thread) { |
1871 | thread__put(trace->current); | 1997 | thread__put(trace->current); |
@@ -1891,9 +2017,6 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, | |||
1891 | if (sc == NULL) | 2017 | if (sc == NULL) |
1892 | return -1; | 2018 | return -1; |
1893 | 2019 | ||
1894 | if (sc->filtered) | ||
1895 | return 0; | ||
1896 | |||
1897 | thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); | 2020 | thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); |
1898 | ttrace = thread__trace(thread, trace->output); | 2021 | ttrace = thread__trace(thread, trace->output); |
1899 | if (ttrace == NULL) | 2022 | if (ttrace == NULL) |
@@ -1904,9 +2027,9 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, | |||
1904 | 2027 | ||
1905 | ret = perf_evsel__sc_tp_uint(evsel, ret, sample); | 2028 | ret = perf_evsel__sc_tp_uint(evsel, ret, sample); |
1906 | 2029 | ||
1907 | if (id == trace->audit.open_id && ret >= 0 && trace->last_vfs_getname) { | 2030 | if (id == trace->audit.open_id && ret >= 0 && ttrace->filename.pending_open) { |
1908 | trace__set_fd_pathname(thread, ret, trace->last_vfs_getname); | 2031 | trace__set_fd_pathname(thread, ret, ttrace->filename.name); |
1909 | trace->last_vfs_getname = NULL; | 2032 | ttrace->filename.pending_open = false; |
1910 | ++trace->stats.vfs_getname; | 2033 | ++trace->stats.vfs_getname; |
1911 | } | 2034 | } |
1912 | 2035 | ||
@@ -1961,7 +2084,56 @@ static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, | |||
1961 | union perf_event *event __maybe_unused, | 2084 | union perf_event *event __maybe_unused, |
1962 | struct perf_sample *sample) | 2085 | struct perf_sample *sample) |
1963 | { | 2086 | { |
1964 | trace->last_vfs_getname = perf_evsel__rawptr(evsel, sample, "pathname"); | 2087 | struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); |
2088 | struct thread_trace *ttrace; | ||
2089 | size_t filename_len, entry_str_len, to_move; | ||
2090 | ssize_t remaining_space; | ||
2091 | char *pos; | ||
2092 | const char *filename = perf_evsel__rawptr(evsel, sample, "pathname"); | ||
2093 | |||
2094 | if (!thread) | ||
2095 | goto out; | ||
2096 | |||
2097 | ttrace = thread__priv(thread); | ||
2098 | if (!ttrace) | ||
2099 | goto out; | ||
2100 | |||
2101 | filename_len = strlen(filename); | ||
2102 | |||
2103 | if (ttrace->filename.namelen < filename_len) { | ||
2104 | char *f = realloc(ttrace->filename.name, filename_len + 1); | ||
2105 | |||
2106 | if (f == NULL) | ||
2107 | goto out; | ||
2108 | |||
2109 | ttrace->filename.namelen = filename_len; | ||
2110 | ttrace->filename.name = f; | ||
2111 | } | ||
2112 | |||
2113 | strcpy(ttrace->filename.name, filename); | ||
2114 | ttrace->filename.pending_open = true; | ||
2115 | |||
2116 | if (!ttrace->filename.ptr) | ||
2117 | goto out; | ||
2118 | |||
2119 | entry_str_len = strlen(ttrace->entry_str); | ||
2120 | remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */ | ||
2121 | if (remaining_space <= 0) | ||
2122 | goto out; | ||
2123 | |||
2124 | if (filename_len > (size_t)remaining_space) { | ||
2125 | filename += filename_len - remaining_space; | ||
2126 | filename_len = remaining_space; | ||
2127 | } | ||
2128 | |||
2129 | to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */ | ||
2130 | pos = ttrace->entry_str + ttrace->filename.entry_str_pos; | ||
2131 | memmove(pos + filename_len, pos, to_move); | ||
2132 | memcpy(pos, filename, filename_len); | ||
2133 | |||
2134 | ttrace->filename.ptr = 0; | ||
2135 | ttrace->filename.entry_str_pos = 0; | ||
2136 | out: | ||
1965 | return 0; | 2137 | return 0; |
1966 | } | 2138 | } |
1967 | 2139 | ||
@@ -2214,19 +2386,20 @@ static int trace__record(struct trace *trace, int argc, const char **argv) | |||
2214 | 2386 | ||
2215 | static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); | 2387 | static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); |
2216 | 2388 | ||
2217 | static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist) | 2389 | static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist) |
2218 | { | 2390 | { |
2219 | struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); | 2391 | struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname"); |
2220 | if (evsel == NULL) | 2392 | if (evsel == NULL) |
2221 | return; | 2393 | return false; |
2222 | 2394 | ||
2223 | if (perf_evsel__field(evsel, "pathname") == NULL) { | 2395 | if (perf_evsel__field(evsel, "pathname") == NULL) { |
2224 | perf_evsel__delete(evsel); | 2396 | perf_evsel__delete(evsel); |
2225 | return; | 2397 | return false; |
2226 | } | 2398 | } |
2227 | 2399 | ||
2228 | evsel->handler = trace__vfs_getname; | 2400 | evsel->handler = trace__vfs_getname; |
2229 | perf_evlist__add(evlist, evsel); | 2401 | perf_evlist__add(evlist, evsel); |
2402 | return true; | ||
2230 | } | 2403 | } |
2231 | 2404 | ||
2232 | static int perf_evlist__add_pgfault(struct perf_evlist *evlist, | 2405 | static int perf_evlist__add_pgfault(struct perf_evlist *evlist, |
@@ -2283,9 +2456,68 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st | |||
2283 | } | 2456 | } |
2284 | } | 2457 | } |
2285 | 2458 | ||
2459 | static int trace__add_syscall_newtp(struct trace *trace) | ||
2460 | { | ||
2461 | int ret = -1; | ||
2462 | struct perf_evlist *evlist = trace->evlist; | ||
2463 | struct perf_evsel *sys_enter, *sys_exit; | ||
2464 | |||
2465 | sys_enter = perf_evsel__syscall_newtp("sys_enter", trace__sys_enter); | ||
2466 | if (sys_enter == NULL) | ||
2467 | goto out; | ||
2468 | |||
2469 | if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args)) | ||
2470 | goto out_delete_sys_enter; | ||
2471 | |||
2472 | sys_exit = perf_evsel__syscall_newtp("sys_exit", trace__sys_exit); | ||
2473 | if (sys_exit == NULL) | ||
2474 | goto out_delete_sys_enter; | ||
2475 | |||
2476 | if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) | ||
2477 | goto out_delete_sys_exit; | ||
2478 | |||
2479 | perf_evlist__add(evlist, sys_enter); | ||
2480 | perf_evlist__add(evlist, sys_exit); | ||
2481 | |||
2482 | trace->syscalls.events.sys_enter = sys_enter; | ||
2483 | trace->syscalls.events.sys_exit = sys_exit; | ||
2484 | |||
2485 | ret = 0; | ||
2486 | out: | ||
2487 | return ret; | ||
2488 | |||
2489 | out_delete_sys_exit: | ||
2490 | perf_evsel__delete_priv(sys_exit); | ||
2491 | out_delete_sys_enter: | ||
2492 | perf_evsel__delete_priv(sys_enter); | ||
2493 | goto out; | ||
2494 | } | ||
2495 | |||
2496 | static int trace__set_ev_qualifier_filter(struct trace *trace) | ||
2497 | { | ||
2498 | int err = -1; | ||
2499 | char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier, | ||
2500 | trace->ev_qualifier_ids.nr, | ||
2501 | trace->ev_qualifier_ids.entries); | ||
2502 | |||
2503 | if (filter == NULL) | ||
2504 | goto out_enomem; | ||
2505 | |||
2506 | if (!perf_evsel__append_filter(trace->syscalls.events.sys_enter, "&&", filter)) | ||
2507 | err = perf_evsel__append_filter(trace->syscalls.events.sys_exit, "&&", filter); | ||
2508 | |||
2509 | free(filter); | ||
2510 | out: | ||
2511 | return err; | ||
2512 | out_enomem: | ||
2513 | errno = ENOMEM; | ||
2514 | goto out; | ||
2515 | } | ||
2516 | |||
2286 | static int trace__run(struct trace *trace, int argc, const char **argv) | 2517 | static int trace__run(struct trace *trace, int argc, const char **argv) |
2287 | { | 2518 | { |
2288 | struct perf_evlist *evlist = trace->evlist; | 2519 | struct perf_evlist *evlist = trace->evlist; |
2520 | struct perf_evsel *evsel; | ||
2289 | int err = -1, i; | 2521 | int err = -1, i; |
2290 | unsigned long before; | 2522 | unsigned long before; |
2291 | const bool forks = argc > 0; | 2523 | const bool forks = argc > 0; |
@@ -2293,13 +2525,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
2293 | 2525 | ||
2294 | trace->live = true; | 2526 | trace->live = true; |
2295 | 2527 | ||
2296 | if (trace->trace_syscalls && | 2528 | if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) |
2297 | perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, | ||
2298 | trace__sys_exit)) | ||
2299 | goto out_error_raw_syscalls; | 2529 | goto out_error_raw_syscalls; |
2300 | 2530 | ||
2301 | if (trace->trace_syscalls) | 2531 | if (trace->trace_syscalls) |
2302 | perf_evlist__add_vfs_getname(evlist); | 2532 | trace->vfs_getname = perf_evlist__add_vfs_getname(evlist); |
2303 | 2533 | ||
2304 | if ((trace->trace_pgfaults & TRACE_PFMAJ) && | 2534 | if ((trace->trace_pgfaults & TRACE_PFMAJ) && |
2305 | perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { | 2535 | perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ)) { |
@@ -2356,11 +2586,22 @@ static int trace__run(struct trace *trace, int argc, const char **argv) | |||
2356 | else if (thread_map__pid(evlist->threads, 0) == -1) | 2586 | else if (thread_map__pid(evlist->threads, 0) == -1) |
2357 | err = perf_evlist__set_filter_pid(evlist, getpid()); | 2587 | err = perf_evlist__set_filter_pid(evlist, getpid()); |
2358 | 2588 | ||
2359 | if (err < 0) { | 2589 | if (err < 0) |
2360 | printf("err=%d,%s\n", -err, strerror(-err)); | 2590 | goto out_error_mem; |
2361 | exit(1); | 2591 | |
2592 | if (trace->ev_qualifier_ids.nr > 0) { | ||
2593 | err = trace__set_ev_qualifier_filter(trace); | ||
2594 | if (err < 0) | ||
2595 | goto out_errno; | ||
2596 | |||
2597 | pr_debug("event qualifier tracepoint filter: %s\n", | ||
2598 | trace->syscalls.events.sys_exit->filter); | ||
2362 | } | 2599 | } |
2363 | 2600 | ||
2601 | err = perf_evlist__apply_filters(evlist, &evsel); | ||
2602 | if (err < 0) | ||
2603 | goto out_error_apply_filters; | ||
2604 | |||
2364 | err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); | 2605 | err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); |
2365 | if (err < 0) | 2606 | if (err < 0) |
2366 | goto out_error_mmap; | 2607 | goto out_error_mmap; |
@@ -2462,10 +2703,21 @@ out_error_open: | |||
2462 | out_error: | 2703 | out_error: |
2463 | fprintf(trace->output, "%s\n", errbuf); | 2704 | fprintf(trace->output, "%s\n", errbuf); |
2464 | goto out_delete_evlist; | 2705 | goto out_delete_evlist; |
2706 | |||
2707 | out_error_apply_filters: | ||
2708 | fprintf(trace->output, | ||
2709 | "Failed to set filter \"%s\" on event %s with %d (%s)\n", | ||
2710 | evsel->filter, perf_evsel__name(evsel), errno, | ||
2711 | strerror_r(errno, errbuf, sizeof(errbuf))); | ||
2712 | goto out_delete_evlist; | ||
2465 | } | 2713 | } |
2466 | out_error_mem: | 2714 | out_error_mem: |
2467 | fprintf(trace->output, "Not enough memory to run!\n"); | 2715 | fprintf(trace->output, "Not enough memory to run!\n"); |
2468 | goto out_delete_evlist; | 2716 | goto out_delete_evlist; |
2717 | |||
2718 | out_errno: | ||
2719 | fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno)); | ||
2720 | goto out_delete_evlist; | ||
2469 | } | 2721 | } |
2470 | 2722 | ||
2471 | static int trace__replay(struct trace *trace) | 2723 | static int trace__replay(struct trace *trace) |
@@ -2586,9 +2838,9 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, | |||
2586 | 2838 | ||
2587 | printed += fprintf(fp, "\n"); | 2839 | printed += fprintf(fp, "\n"); |
2588 | 2840 | ||
2589 | printed += fprintf(fp, " syscall calls min avg max stddev\n"); | 2841 | printed += fprintf(fp, " syscall calls total min avg max stddev\n"); |
2590 | printed += fprintf(fp, " (msec) (msec) (msec) (%%)\n"); | 2842 | printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); |
2591 | printed += fprintf(fp, " --------------- -------- --------- --------- --------- ------\n"); | 2843 | printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n"); |
2592 | 2844 | ||
2593 | /* each int_node is a syscall */ | 2845 | /* each int_node is a syscall */ |
2594 | while (inode) { | 2846 | while (inode) { |
@@ -2605,8 +2857,8 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, | |||
2605 | 2857 | ||
2606 | sc = &trace->syscalls.table[inode->i]; | 2858 | sc = &trace->syscalls.table[inode->i]; |
2607 | printed += fprintf(fp, " %-15s", sc->name); | 2859 | printed += fprintf(fp, " %-15s", sc->name); |
2608 | printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f", | 2860 | printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f", |
2609 | n, min, avg); | 2861 | n, avg * n, min, avg); |
2610 | printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); | 2862 | printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); |
2611 | } | 2863 | } |
2612 | 2864 | ||
@@ -2778,7 +3030,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) | |||
2778 | .mmap_pages = UINT_MAX, | 3030 | .mmap_pages = UINT_MAX, |
2779 | .proc_map_timeout = 500, | 3031 | .proc_map_timeout = 500, |
2780 | }, | 3032 | }, |
2781 | .output = stdout, | 3033 | .output = stderr, |
2782 | .show_comm = true, | 3034 | .show_comm = true, |
2783 | .trace_syscalls = true, | 3035 | .trace_syscalls = true, |
2784 | }; | 3036 | }; |
@@ -2879,11 +3131,14 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) | |||
2879 | 3131 | ||
2880 | if (ev_qualifier_str != NULL) { | 3132 | if (ev_qualifier_str != NULL) { |
2881 | const char *s = ev_qualifier_str; | 3133 | const char *s = ev_qualifier_str; |
3134 | struct strlist_config slist_config = { | ||
3135 | .dirname = system_path(STRACE_GROUPS_DIR), | ||
3136 | }; | ||
2882 | 3137 | ||
2883 | trace.not_ev_qualifier = *s == '!'; | 3138 | trace.not_ev_qualifier = *s == '!'; |
2884 | if (trace.not_ev_qualifier) | 3139 | if (trace.not_ev_qualifier) |
2885 | ++s; | 3140 | ++s; |
2886 | trace.ev_qualifier = strlist__new(true, s); | 3141 | trace.ev_qualifier = strlist__new(s, &slist_config); |
2887 | if (trace.ev_qualifier == NULL) { | 3142 | if (trace.ev_qualifier == NULL) { |
2888 | fputs("Not enough memory to parse event qualifier", | 3143 | fputs("Not enough memory to parse event qualifier", |
2889 | trace.output); | 3144 | trace.output); |
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 094ddaee104c..38a08539f4bf 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile | |||
@@ -11,7 +11,7 @@ ifneq ($(obj-perf),) | |||
11 | obj-perf := $(abspath $(obj-perf))/ | 11 | obj-perf := $(abspath $(obj-perf))/ |
12 | endif | 12 | endif |
13 | 13 | ||
14 | $(shell echo -n > $(OUTPUT).config-detected) | 14 | $(shell printf "" > $(OUTPUT).config-detected) |
15 | detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) | 15 | detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) |
16 | detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) | 16 | detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) |
17 | 17 | ||
@@ -297,7 +297,11 @@ ifndef NO_LIBELF | |||
297 | else | 297 | else |
298 | CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) | 298 | CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) |
299 | LDFLAGS += $(LIBDW_LDFLAGS) | 299 | LDFLAGS += $(LIBDW_LDFLAGS) |
300 | EXTLIBS += -ldw | 300 | DWARFLIBS := -ldw |
301 | ifeq ($(findstring -static,${LDFLAGS}),-static) | ||
302 | DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 | ||
303 | endif | ||
304 | EXTLIBS += ${DWARFLIBS} | ||
301 | $(call detected,CONFIG_DWARF) | 305 | $(call detected,CONFIG_DWARF) |
302 | endif # PERF_HAVE_DWARF_REGS | 306 | endif # PERF_HAVE_DWARF_REGS |
303 | endif # NO_DWARF | 307 | endif # NO_DWARF |
@@ -569,9 +573,14 @@ ifndef NO_LIBNUMA | |||
569 | msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev); | 573 | msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev); |
570 | NO_LIBNUMA := 1 | 574 | NO_LIBNUMA := 1 |
571 | else | 575 | else |
572 | CFLAGS += -DHAVE_LIBNUMA_SUPPORT | 576 | ifeq ($(feature-numa_num_possible_cpus), 0) |
573 | EXTLIBS += -lnuma | 577 | msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8); |
574 | $(call detected,CONFIG_NUMA) | 578 | NO_LIBNUMA := 1 |
579 | else | ||
580 | CFLAGS += -DHAVE_LIBNUMA_SUPPORT | ||
581 | EXTLIBS += -lnuma | ||
582 | $(call detected,CONFIG_NUMA) | ||
583 | endif | ||
575 | endif | 584 | endif |
576 | endif | 585 | endif |
577 | 586 | ||
@@ -617,8 +626,13 @@ ifdef LIBBABELTRACE | |||
617 | endif | 626 | endif |
618 | 627 | ||
619 | ifndef NO_AUXTRACE | 628 | ifndef NO_AUXTRACE |
620 | $(call detected,CONFIG_AUXTRACE) | 629 | ifeq ($(feature-get_cpuid), 0) |
621 | CFLAGS += -DHAVE_AUXTRACE_SUPPORT | 630 | msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc); |
631 | NO_AUXTRACE := 1 | ||
632 | else | ||
633 | $(call detected,CONFIG_AUXTRACE) | ||
634 | CFLAGS += -DHAVE_AUXTRACE_SUPPORT | ||
635 | endif | ||
622 | endif | 636 | endif |
623 | 637 | ||
624 | # Among the variables below, these: | 638 | # Among the variables below, these: |
@@ -638,12 +652,13 @@ ifndef DESTDIR | |||
638 | prefix ?= $(HOME) | 652 | prefix ?= $(HOME) |
639 | endif | 653 | endif |
640 | bindir_relative = bin | 654 | bindir_relative = bin |
641 | bindir = $(prefix)/$(bindir_relative) | 655 | bindir = $(abspath $(prefix)/$(bindir_relative)) |
642 | mandir = share/man | 656 | mandir = share/man |
643 | infodir = share/info | 657 | infodir = share/info |
644 | perfexecdir = libexec/perf-core | 658 | perfexecdir = libexec/perf-core |
645 | sharedir = $(prefix)/share | 659 | sharedir = $(prefix)/share |
646 | template_dir = share/perf-core/templates | 660 | template_dir = share/perf-core/templates |
661 | STRACE_GROUPS_DIR = share/perf-core/strace/groups | ||
647 | htmldir = share/doc/perf-doc | 662 | htmldir = share/doc/perf-doc |
648 | ifeq ($(prefix),/usr) | 663 | ifeq ($(prefix),/usr) |
649 | sysconfdir = /etc | 664 | sysconfdir = /etc |
@@ -663,6 +678,7 @@ libdir = $(prefix)/$(lib) | |||
663 | 678 | ||
664 | # Shell quote (do not use $(call) to accommodate ancient setups); | 679 | # Shell quote (do not use $(call) to accommodate ancient setups); |
665 | ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) | 680 | ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) |
681 | STRACE_GROUPS_DIR_SQ = $(subst ','\'',$(STRACE_GROUPS_DIR)) | ||
666 | DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) | 682 | DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) |
667 | bindir_SQ = $(subst ','\'',$(bindir)) | 683 | bindir_SQ = $(subst ','\'',$(bindir)) |
668 | mandir_SQ = $(subst ','\'',$(mandir)) | 684 | mandir_SQ = $(subst ','\'',$(mandir)) |
@@ -676,10 +692,13 @@ libdir_SQ = $(subst ','\'',$(libdir)) | |||
676 | 692 | ||
677 | ifneq ($(filter /%,$(firstword $(perfexecdir))),) | 693 | ifneq ($(filter /%,$(firstword $(perfexecdir))),) |
678 | perfexec_instdir = $(perfexecdir) | 694 | perfexec_instdir = $(perfexecdir) |
695 | STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR) | ||
679 | else | 696 | else |
680 | perfexec_instdir = $(prefix)/$(perfexecdir) | 697 | perfexec_instdir = $(prefix)/$(perfexecdir) |
698 | STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR) | ||
681 | endif | 699 | endif |
682 | perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) | 700 | perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) |
701 | STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR)) | ||
683 | 702 | ||
684 | # If we install to $(HOME) we keep the traceevent default: | 703 | # If we install to $(HOME) we keep the traceevent default: |
685 | # $(HOME)/.traceevent/plugins | 704 | # $(HOME)/.traceevent/plugins |
@@ -713,6 +732,7 @@ $(call detected_var,htmldir_SQ) | |||
713 | $(call detected_var,infodir_SQ) | 732 | $(call detected_var,infodir_SQ) |
714 | $(call detected_var,mandir_SQ) | 733 | $(call detected_var,mandir_SQ) |
715 | $(call detected_var,ETC_PERFCONFIG_SQ) | 734 | $(call detected_var,ETC_PERFCONFIG_SQ) |
735 | $(call detected_var,STRACE_GROUPS_DIR_SQ) | ||
716 | $(call detected_var,prefix_SQ) | 736 | $(call detected_var,prefix_SQ) |
717 | $(call detected_var,perfexecdir_SQ) | 737 | $(call detected_var,perfexecdir_SQ) |
718 | $(call detected_var,LIBDIR) | 738 | $(call detected_var,LIBDIR) |
diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh index c7ff90a90e4e..7e47a7cbc195 100644 --- a/tools/perf/perf-with-kcore.sh +++ b/tools/perf/perf-with-kcore.sh | |||
@@ -50,7 +50,7 @@ copy_kcore() | |||
50 | fi | 50 | fi |
51 | 51 | ||
52 | rm -f perf.data.junk | 52 | rm -f perf.data.junk |
53 | ("$PERF" record -o perf.data.junk $PERF_OPTIONS -- sleep 60) >/dev/null 2>/dev/null & | 53 | ("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null & |
54 | PERF_PID=$! | 54 | PERF_PID=$! |
55 | 55 | ||
56 | # Need to make sure that perf has started | 56 | # Need to make sure that perf has started |
@@ -160,18 +160,18 @@ record() | |||
160 | echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2 | 160 | echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2 |
161 | fi | 161 | fi |
162 | 162 | ||
163 | if echo "$PERF_OPTIONS" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then | 163 | if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then |
164 | echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2 | 164 | echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2 |
165 | fi | 165 | fi |
166 | 166 | ||
167 | if echo "$PERF_OPTIONS" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then | 167 | if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then |
168 | if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then | 168 | if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then |
169 | echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2 | 169 | echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2 |
170 | fi | 170 | fi |
171 | 171 | ||
172 | if echo "$PERF_OPTIONS" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then | 172 | if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then |
173 | true | 173 | true |
174 | elif echo "$PERF_OPTIONS" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then | 174 | elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then |
175 | true | 175 | true |
176 | elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then | 176 | elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then |
177 | echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2 | 177 | echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2 |
@@ -193,8 +193,8 @@ record() | |||
193 | 193 | ||
194 | mkdir "$PERF_DATA_DIR" | 194 | mkdir "$PERF_DATA_DIR" |
195 | 195 | ||
196 | echo "$PERF record -o $PERF_DATA_DIR/perf.data $PERF_OPTIONS -- $*" | 196 | echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@" |
197 | "$PERF" record -o "$PERF_DATA_DIR/perf.data" $PERF_OPTIONS -- $* || true | 197 | "$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true |
198 | 198 | ||
199 | if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then | 199 | if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then |
200 | exit 1 | 200 | exit 1 |
@@ -209,8 +209,8 @@ subcommand() | |||
209 | { | 209 | { |
210 | find_perf | 210 | find_perf |
211 | check_buildid_cache_permissions | 211 | check_buildid_cache_permissions |
212 | echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $*" | 212 | echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@" |
213 | "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" $* | 213 | "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@" |
214 | } | 214 | } |
215 | 215 | ||
216 | if [ "$1" = "fix_buildid_cache_permissions" ] ; then | 216 | if [ "$1" = "fix_buildid_cache_permissions" ] ; then |
@@ -234,7 +234,7 @@ fi | |||
234 | case "$PERF_SUB_COMMAND" in | 234 | case "$PERF_SUB_COMMAND" in |
235 | "record") | 235 | "record") |
236 | while [ "$1" != "--" ] ; do | 236 | while [ "$1" != "--" ] ; do |
237 | PERF_OPTIONS+="$1 " | 237 | PERF_OPTIONS+=("$1") |
238 | shift || break | 238 | shift || break |
239 | done | 239 | done |
240 | if [ "$1" != "--" ] ; then | 240 | if [ "$1" != "--" ] ; then |
@@ -242,16 +242,16 @@ case "$PERF_SUB_COMMAND" in | |||
242 | usage | 242 | usage |
243 | fi | 243 | fi |
244 | shift | 244 | shift |
245 | record $* | 245 | record "$@" |
246 | ;; | 246 | ;; |
247 | "script") | 247 | "script") |
248 | subcommand $* | 248 | subcommand "$@" |
249 | ;; | 249 | ;; |
250 | "report") | 250 | "report") |
251 | subcommand $* | 251 | subcommand "$@" |
252 | ;; | 252 | ;; |
253 | "inject") | 253 | "inject") |
254 | subcommand $* | 254 | subcommand "$@" |
255 | ;; | 255 | ;; |
256 | *) | 256 | *) |
257 | usage | 257 | usage |
diff --git a/tools/perf/perf.c b/tools/perf/perf.c index b857fcbd00cf..07dbff5c0e60 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c | |||
@@ -231,7 +231,7 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) | |||
231 | (*argc)--; | 231 | (*argc)--; |
232 | } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { | 232 | } else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) { |
233 | perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR)); | 233 | perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR)); |
234 | fprintf(stderr, "dir: %s\n", debugfs_mountpoint); | 234 | fprintf(stderr, "dir: %s\n", tracing_path); |
235 | if (envchanged) | 235 | if (envchanged) |
236 | *envchanged = 1; | 236 | *envchanged = 1; |
237 | } else if (!strcmp(cmd, "--list-cmds")) { | 237 | } else if (!strcmp(cmd, "--list-cmds")) { |
diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 4a5827fff799..90129accffbe 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h | |||
@@ -51,16 +51,19 @@ struct record_opts { | |||
51 | bool sample_address; | 51 | bool sample_address; |
52 | bool sample_weight; | 52 | bool sample_weight; |
53 | bool sample_time; | 53 | bool sample_time; |
54 | bool sample_time_set; | ||
55 | bool callgraph_set; | ||
54 | bool period; | 56 | bool period; |
55 | bool sample_intr_regs; | ||
56 | bool running_time; | 57 | bool running_time; |
57 | bool full_auxtrace; | 58 | bool full_auxtrace; |
58 | bool auxtrace_snapshot_mode; | 59 | bool auxtrace_snapshot_mode; |
60 | bool record_switch_events; | ||
59 | unsigned int freq; | 61 | unsigned int freq; |
60 | unsigned int mmap_pages; | 62 | unsigned int mmap_pages; |
61 | unsigned int auxtrace_mmap_pages; | 63 | unsigned int auxtrace_mmap_pages; |
62 | unsigned int user_freq; | 64 | unsigned int user_freq; |
63 | u64 branch_stack; | 65 | u64 branch_stack; |
66 | u64 sample_intr_regs; | ||
64 | u64 default_interval; | 67 | u64 default_interval; |
65 | u64 user_interval; | 68 | u64 user_interval; |
66 | size_t auxtrace_snapshot_size; | 69 | size_t auxtrace_snapshot_size; |
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py index 2225162ee1fc..b9d508336ae6 100755 --- a/tools/perf/python/twatch.py +++ b/tools/perf/python/twatch.py | |||
@@ -18,10 +18,20 @@ import perf | |||
18 | def main(): | 18 | def main(): |
19 | cpus = perf.cpu_map() | 19 | cpus = perf.cpu_map() |
20 | threads = perf.thread_map() | 20 | threads = perf.thread_map() |
21 | evsel = perf.evsel(task = 1, comm = 1, mmap = 0, | 21 | evsel = perf.evsel(type = perf.TYPE_SOFTWARE, |
22 | config = perf.COUNT_SW_DUMMY, | ||
23 | task = 1, comm = 1, mmap = 0, freq = 0, | ||
22 | wakeup_events = 1, watermark = 1, | 24 | wakeup_events = 1, watermark = 1, |
23 | sample_id_all = 1, | 25 | sample_id_all = 1, |
24 | sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU) | 26 | sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU) |
27 | |||
28 | """What we want are just the PERF_RECORD_ lifetime events for threads, | ||
29 | using the default, PERF_TYPE_HARDWARE + PERF_COUNT_HW_CYCLES & freq=1 | ||
30 | (the default), makes perf reenable irq_vectors:local_timer_entry, when | ||
31 | disabling nohz, not good for some use cases where all we want is to get | ||
32 | threads comes and goes... So use (perf.TYPE_SOFTWARE, perf_COUNT_SW_DUMMY, | ||
33 | freq=0) instead.""" | ||
34 | |||
25 | evsel.open(cpus = cpus, threads = threads); | 35 | evsel.open(cpus = cpus, threads = threads); |
26 | evlist = perf.evlist(cpus, threads) | 36 | evlist = perf.evlist(cpus, threads) |
27 | evlist.add(evsel) | 37 | evlist.add(evsel) |
diff --git a/tools/perf/scripts/python/bin/compaction-times-record b/tools/perf/scripts/python/bin/compaction-times-record new file mode 100644 index 000000000000..6edcd40e14e8 --- /dev/null +++ b/tools/perf/scripts/python/bin/compaction-times-record | |||
@@ -0,0 +1,2 @@ | |||
1 | #!/bin/bash | ||
2 | perf record -e compaction:mm_compaction_begin -e compaction:mm_compaction_end -e compaction:mm_compaction_migratepages -e compaction:mm_compaction_isolate_migratepages -e compaction:mm_compaction_isolate_freepages $@ | ||
diff --git a/tools/perf/scripts/python/bin/compaction-times-report b/tools/perf/scripts/python/bin/compaction-times-report new file mode 100644 index 000000000000..3dc13897cfde --- /dev/null +++ b/tools/perf/scripts/python/bin/compaction-times-report | |||
@@ -0,0 +1,4 @@ | |||
1 | #!/bin/bash | ||
2 | #description: display time taken by mm compaction | ||
3 | #args: [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex] | ||
4 | perf script -s "$PERF_EXEC_PATH"/scripts/python/compaction-times.py $@ | ||
diff --git a/tools/perf/scripts/python/call-graph-from-postgresql.py b/tools/perf/scripts/python/call-graph-from-postgresql.py new file mode 100644 index 000000000000..e78fdc2a5a9d --- /dev/null +++ b/tools/perf/scripts/python/call-graph-from-postgresql.py | |||
@@ -0,0 +1,327 @@ | |||
1 | #!/usr/bin/python2 | ||
2 | # call-graph-from-postgresql.py: create call-graph from postgresql database | ||
3 | # Copyright (c) 2014, Intel Corporation. | ||
4 | # | ||
5 | # This program is free software; you can redistribute it and/or modify it | ||
6 | # under the terms and conditions of the GNU General Public License, | ||
7 | # version 2, as published by the Free Software Foundation. | ||
8 | # | ||
9 | # This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | # more details. | ||
13 | |||
14 | # To use this script you will need to have exported data using the | ||
15 | # export-to-postgresql.py script. Refer to that script for details. | ||
16 | # | ||
17 | # Following on from the example in the export-to-postgresql.py script, a | ||
18 | # call-graph can be displayed for the pt_example database like this: | ||
19 | # | ||
20 | # python tools/perf/scripts/python/call-graph-from-postgresql.py pt_example | ||
21 | # | ||
22 | # Note this script supports connecting to remote databases by setting hostname, | ||
23 | # port, username, password, and dbname e.g. | ||
24 | # | ||
25 | # python tools/perf/scripts/python/call-graph-from-postgresql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example" | ||
26 | # | ||
27 | # The result is a GUI window with a tree representing a context-sensitive | ||
28 | # call-graph. Expanding a couple of levels of the tree and adjusting column | ||
29 | # widths to suit will display something like: | ||
30 | # | ||
31 | # Call Graph: pt_example | ||
32 | # Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%) | ||
33 | # v- ls | ||
34 | # v- 2638:2638 | ||
35 | # v- _start ld-2.19.so 1 10074071 100.0 211135 100.0 | ||
36 | # |- unknown unknown 1 13198 0.1 1 0.0 | ||
37 | # >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3 | ||
38 | # >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3 | ||
39 | # v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4 | ||
40 | # >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1 | ||
41 | # >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0 | ||
42 | # >- __libc_csu_init ls 1 10354 0.1 10 0.0 | ||
43 | # |- _setjmp libc-2.19.so 1 0 0.0 4 0.0 | ||
44 | # v- main ls 1 8182043 99.6 180254 99.9 | ||
45 | # | ||
46 | # Points to note: | ||
47 | # The top level is a command name (comm) | ||
48 | # The next level is a thread (pid:tid) | ||
49 | # Subsequent levels are functions | ||
50 | # 'Count' is the number of calls | ||
51 | # 'Time' is the elapsed time until the function returns | ||
52 | # Percentages are relative to the level above | ||
53 | # 'Branch Count' is the total number of branches for that function and all | ||
54 | # functions that it calls | ||
55 | |||
56 | import sys | ||
57 | from PySide.QtCore import * | ||
58 | from PySide.QtGui import * | ||
59 | from PySide.QtSql import * | ||
60 | from decimal import * | ||
61 | |||
62 | class TreeItem(): | ||
63 | |||
64 | def __init__(self, db, row, parent_item): | ||
65 | self.db = db | ||
66 | self.row = row | ||
67 | self.parent_item = parent_item | ||
68 | self.query_done = False; | ||
69 | self.child_count = 0 | ||
70 | self.child_items = [] | ||
71 | self.data = ["", "", "", "", "", "", ""] | ||
72 | self.comm_id = 0 | ||
73 | self.thread_id = 0 | ||
74 | self.call_path_id = 1 | ||
75 | self.branch_count = 0 | ||
76 | self.time = 0 | ||
77 | if not parent_item: | ||
78 | self.setUpRoot() | ||
79 | |||
80 | def setUpRoot(self): | ||
81 | self.query_done = True | ||
82 | query = QSqlQuery(self.db) | ||
83 | ret = query.exec_('SELECT id, comm FROM comms') | ||
84 | if not ret: | ||
85 | raise Exception("Query failed: " + query.lastError().text()) | ||
86 | while query.next(): | ||
87 | if not query.value(0): | ||
88 | continue | ||
89 | child_item = TreeItem(self.db, self.child_count, self) | ||
90 | self.child_items.append(child_item) | ||
91 | self.child_count += 1 | ||
92 | child_item.setUpLevel1(query.value(0), query.value(1)) | ||
93 | |||
94 | def setUpLevel1(self, comm_id, comm): | ||
95 | self.query_done = True; | ||
96 | self.comm_id = comm_id | ||
97 | self.data[0] = comm | ||
98 | self.child_items = [] | ||
99 | self.child_count = 0 | ||
100 | query = QSqlQuery(self.db) | ||
101 | ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id)) | ||
102 | if not ret: | ||
103 | raise Exception("Query failed: " + query.lastError().text()) | ||
104 | while query.next(): | ||
105 | child_item = TreeItem(self.db, self.child_count, self) | ||
106 | self.child_items.append(child_item) | ||
107 | self.child_count += 1 | ||
108 | child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2)) | ||
109 | |||
110 | def setUpLevel2(self, comm_id, thread_id, pid, tid): | ||
111 | self.comm_id = comm_id | ||
112 | self.thread_id = thread_id | ||
113 | self.data[0] = str(pid) + ":" + str(tid) | ||
114 | |||
115 | def getChildItem(self, row): | ||
116 | return self.child_items[row] | ||
117 | |||
118 | def getParentItem(self): | ||
119 | return self.parent_item | ||
120 | |||
121 | def getRow(self): | ||
122 | return self.row | ||
123 | |||
124 | def timePercent(self, b): | ||
125 | if not self.time: | ||
126 | return "0.0" | ||
127 | x = (b * Decimal(100)) / self.time | ||
128 | return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) | ||
129 | |||
130 | def branchPercent(self, b): | ||
131 | if not self.branch_count: | ||
132 | return "0.0" | ||
133 | x = (b * Decimal(100)) / self.branch_count | ||
134 | return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP)) | ||
135 | |||
136 | def addChild(self, call_path_id, name, dso, count, time, branch_count): | ||
137 | child_item = TreeItem(self.db, self.child_count, self) | ||
138 | child_item.comm_id = self.comm_id | ||
139 | child_item.thread_id = self.thread_id | ||
140 | child_item.call_path_id = call_path_id | ||
141 | child_item.branch_count = branch_count | ||
142 | child_item.time = time | ||
143 | child_item.data[0] = name | ||
144 | if dso == "[kernel.kallsyms]": | ||
145 | dso = "[kernel]" | ||
146 | child_item.data[1] = dso | ||
147 | child_item.data[2] = str(count) | ||
148 | child_item.data[3] = str(time) | ||
149 | child_item.data[4] = self.timePercent(time) | ||
150 | child_item.data[5] = str(branch_count) | ||
151 | child_item.data[6] = self.branchPercent(branch_count) | ||
152 | self.child_items.append(child_item) | ||
153 | self.child_count += 1 | ||
154 | |||
155 | def selectCalls(self): | ||
156 | self.query_done = True; | ||
157 | query = QSqlQuery(self.db) | ||
158 | ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, ' | ||
159 | '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), ' | ||
160 | '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), ' | ||
161 | '( SELECT ip FROM call_paths where id = call_path_id ) ' | ||
162 | 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) + | ||
163 | 'ORDER BY call_path_id') | ||
164 | if not ret: | ||
165 | raise Exception("Query failed: " + query.lastError().text()) | ||
166 | last_call_path_id = 0 | ||
167 | name = "" | ||
168 | dso = "" | ||
169 | count = 0 | ||
170 | branch_count = 0 | ||
171 | total_branch_count = 0 | ||
172 | time = 0 | ||
173 | total_time = 0 | ||
174 | while query.next(): | ||
175 | if query.value(1) == last_call_path_id: | ||
176 | count += 1 | ||
177 | branch_count += query.value(2) | ||
178 | time += query.value(4) - query.value(3) | ||
179 | else: | ||
180 | if count: | ||
181 | self.addChild(last_call_path_id, name, dso, count, time, branch_count) | ||
182 | last_call_path_id = query.value(1) | ||
183 | name = query.value(5) | ||
184 | dso = query.value(6) | ||
185 | count = 1 | ||
186 | total_branch_count += branch_count | ||
187 | total_time += time | ||
188 | branch_count = query.value(2) | ||
189 | time = query.value(4) - query.value(3) | ||
190 | if count: | ||
191 | self.addChild(last_call_path_id, name, dso, count, time, branch_count) | ||
192 | total_branch_count += branch_count | ||
193 | total_time += time | ||
194 | # Top level does not have time or branch count, so fix that here | ||
195 | if total_branch_count > self.branch_count: | ||
196 | self.branch_count = total_branch_count | ||
197 | if self.branch_count: | ||
198 | for child_item in self.child_items: | ||
199 | child_item.data[6] = self.branchPercent(child_item.branch_count) | ||
200 | if total_time > self.time: | ||
201 | self.time = total_time | ||
202 | if self.time: | ||
203 | for child_item in self.child_items: | ||
204 | child_item.data[4] = self.timePercent(child_item.time) | ||
205 | |||
206 | def childCount(self): | ||
207 | if not self.query_done: | ||
208 | self.selectCalls() | ||
209 | return self.child_count | ||
210 | |||
211 | def columnCount(self): | ||
212 | return 7 | ||
213 | |||
214 | def columnHeader(self, column): | ||
215 | headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "] | ||
216 | return headers[column] | ||
217 | |||
218 | def getData(self, column): | ||
219 | return self.data[column] | ||
220 | |||
221 | class TreeModel(QAbstractItemModel): | ||
222 | |||
223 | def __init__(self, db, parent=None): | ||
224 | super(TreeModel, self).__init__(parent) | ||
225 | self.db = db | ||
226 | self.root = TreeItem(db, 0, None) | ||
227 | |||
228 | def columnCount(self, parent): | ||
229 | return self.root.columnCount() | ||
230 | |||
231 | def rowCount(self, parent): | ||
232 | if parent.isValid(): | ||
233 | parent_item = parent.internalPointer() | ||
234 | else: | ||
235 | parent_item = self.root | ||
236 | return parent_item.childCount() | ||
237 | |||
238 | def headerData(self, section, orientation, role): | ||
239 | if role == Qt.TextAlignmentRole: | ||
240 | if section > 1: | ||
241 | return Qt.AlignRight | ||
242 | if role != Qt.DisplayRole: | ||
243 | return None | ||
244 | if orientation != Qt.Horizontal: | ||
245 | return None | ||
246 | return self.root.columnHeader(section) | ||
247 | |||
248 | def parent(self, child): | ||
249 | child_item = child.internalPointer() | ||
250 | if child_item is self.root: | ||
251 | return QModelIndex() | ||
252 | parent_item = child_item.getParentItem() | ||
253 | return self.createIndex(parent_item.getRow(), 0, parent_item) | ||
254 | |||
255 | def index(self, row, column, parent): | ||
256 | if parent.isValid(): | ||
257 | parent_item = parent.internalPointer() | ||
258 | else: | ||
259 | parent_item = self.root | ||
260 | child_item = parent_item.getChildItem(row) | ||
261 | return self.createIndex(row, column, child_item) | ||
262 | |||
263 | def data(self, index, role): | ||
264 | if role == Qt.TextAlignmentRole: | ||
265 | if index.column() > 1: | ||
266 | return Qt.AlignRight | ||
267 | if role != Qt.DisplayRole: | ||
268 | return None | ||
269 | index_item = index.internalPointer() | ||
270 | return index_item.getData(index.column()) | ||
271 | |||
272 | class MainWindow(QMainWindow): | ||
273 | |||
274 | def __init__(self, db, dbname, parent=None): | ||
275 | super(MainWindow, self).__init__(parent) | ||
276 | |||
277 | self.setObjectName("MainWindow") | ||
278 | self.setWindowTitle("Call Graph: " + dbname) | ||
279 | self.move(100, 100) | ||
280 | self.resize(800, 600) | ||
281 | style = self.style() | ||
282 | icon = style.standardIcon(QStyle.SP_MessageBoxInformation) | ||
283 | self.setWindowIcon(icon); | ||
284 | |||
285 | self.model = TreeModel(db) | ||
286 | |||
287 | self.view = QTreeView() | ||
288 | self.view.setModel(self.model) | ||
289 | |||
290 | self.setCentralWidget(self.view) | ||
291 | |||
292 | if __name__ == '__main__': | ||
293 | if (len(sys.argv) < 2): | ||
294 | print >> sys.stderr, "Usage is: call-graph-from-postgresql.py <database name>" | ||
295 | raise Exception("Too few arguments") | ||
296 | |||
297 | dbname = sys.argv[1] | ||
298 | |||
299 | db = QSqlDatabase.addDatabase('QPSQL') | ||
300 | |||
301 | opts = dbname.split() | ||
302 | for opt in opts: | ||
303 | if '=' in opt: | ||
304 | opt = opt.split('=') | ||
305 | if opt[0] == 'hostname': | ||
306 | db.setHostName(opt[1]) | ||
307 | elif opt[0] == 'port': | ||
308 | db.setPort(int(opt[1])) | ||
309 | elif opt[0] == 'username': | ||
310 | db.setUserName(opt[1]) | ||
311 | elif opt[0] == 'password': | ||
312 | db.setPassword(opt[1]) | ||
313 | elif opt[0] == 'dbname': | ||
314 | dbname = opt[1] | ||
315 | else: | ||
316 | dbname = opt | ||
317 | |||
318 | db.setDatabaseName(dbname) | ||
319 | if not db.open(): | ||
320 | raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text()) | ||
321 | |||
322 | app = QApplication(sys.argv) | ||
323 | window = MainWindow(db, dbname) | ||
324 | window.show() | ||
325 | err = app.exec_() | ||
326 | db.close() | ||
327 | sys.exit(err) | ||
diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py new file mode 100644 index 000000000000..239cb0568ec3 --- /dev/null +++ b/tools/perf/scripts/python/compaction-times.py | |||
@@ -0,0 +1,311 @@ | |||
1 | # report time spent in compaction | ||
2 | # Licensed under the terms of the GNU GPL License version 2 | ||
3 | |||
4 | # testing: | ||
5 | # 'echo 1 > /proc/sys/vm/compact_memory' to force compaction of all zones | ||
6 | |||
7 | import os | ||
8 | import sys | ||
9 | import re | ||
10 | |||
11 | import signal | ||
12 | signal.signal(signal.SIGPIPE, signal.SIG_DFL) | ||
13 | |||
14 | usage = "usage: perf script report compaction-times.py -- [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]\n" | ||
15 | |||
16 | class popt: | ||
17 | DISP_DFL = 0 | ||
18 | DISP_PROC = 1 | ||
19 | DISP_PROC_VERBOSE=2 | ||
20 | |||
21 | class topt: | ||
22 | DISP_TIME = 0 | ||
23 | DISP_MIG = 1 | ||
24 | DISP_ISOLFREE = 2 | ||
25 | DISP_ISOLMIG = 4 | ||
26 | DISP_ALL = 7 | ||
27 | |||
28 | class comm_filter: | ||
29 | def __init__(self, re): | ||
30 | self.re = re | ||
31 | |||
32 | def filter(self, pid, comm): | ||
33 | m = self.re.search(comm) | ||
34 | return m == None or m.group() == "" | ||
35 | |||
36 | class pid_filter: | ||
37 | def __init__(self, low, high): | ||
38 | self.low = (0 if low == "" else int(low)) | ||
39 | self.high = (0 if high == "" else int(high)) | ||
40 | |||
41 | def filter(self, pid, comm): | ||
42 | return not (pid >= self.low and (self.high == 0 or pid <= self.high)) | ||
43 | |||
44 | def set_type(t): | ||
45 | global opt_disp | ||
46 | opt_disp = (t if opt_disp == topt.DISP_ALL else opt_disp|t) | ||
47 | |||
48 | def ns(sec, nsec): | ||
49 | return (sec * 1000000000) + nsec | ||
50 | |||
51 | def time(ns): | ||
52 | return "%dns" % ns if opt_ns else "%dus" % (round(ns, -3) / 1000) | ||
53 | |||
54 | class pair: | ||
55 | def __init__(self, aval, bval, alabel = None, blabel = None): | ||
56 | self.alabel = alabel | ||
57 | self.blabel = blabel | ||
58 | self.aval = aval | ||
59 | self.bval = bval | ||
60 | |||
61 | def __add__(self, rhs): | ||
62 | self.aval += rhs.aval | ||
63 | self.bval += rhs.bval | ||
64 | return self | ||
65 | |||
66 | def __str__(self): | ||
67 | return "%s=%d %s=%d" % (self.alabel, self.aval, self.blabel, self.bval) | ||
68 | |||
69 | class cnode: | ||
70 | def __init__(self, ns): | ||
71 | self.ns = ns | ||
72 | self.migrated = pair(0, 0, "moved", "failed") | ||
73 | self.fscan = pair(0,0, "scanned", "isolated") | ||
74 | self.mscan = pair(0,0, "scanned", "isolated") | ||
75 | |||
76 | def __add__(self, rhs): | ||
77 | self.ns += rhs.ns | ||
78 | self.migrated += rhs.migrated | ||
79 | self.fscan += rhs.fscan | ||
80 | self.mscan += rhs.mscan | ||
81 | return self | ||
82 | |||
83 | def __str__(self): | ||
84 | prev = 0 | ||
85 | s = "%s " % time(self.ns) | ||
86 | if (opt_disp & topt.DISP_MIG): | ||
87 | s += "migration: %s" % self.migrated | ||
88 | prev = 1 | ||
89 | if (opt_disp & topt.DISP_ISOLFREE): | ||
90 | s += "%sfree_scanner: %s" % (" " if prev else "", self.fscan) | ||
91 | prev = 1 | ||
92 | if (opt_disp & topt.DISP_ISOLMIG): | ||
93 | s += "%smigration_scanner: %s" % (" " if prev else "", self.mscan) | ||
94 | return s | ||
95 | |||
96 | def complete(self, secs, nsecs): | ||
97 | self.ns = ns(secs, nsecs) - self.ns | ||
98 | |||
99 | def increment(self, migrated, fscan, mscan): | ||
100 | if (migrated != None): | ||
101 | self.migrated += migrated | ||
102 | if (fscan != None): | ||
103 | self.fscan += fscan | ||
104 | if (mscan != None): | ||
105 | self.mscan += mscan | ||
106 | |||
107 | |||
108 | class chead: | ||
109 | heads = {} | ||
110 | val = cnode(0); | ||
111 | fobj = None | ||
112 | |||
113 | @classmethod | ||
114 | def add_filter(cls, filter): | ||
115 | cls.fobj = filter | ||
116 | |||
117 | @classmethod | ||
118 | def create_pending(cls, pid, comm, start_secs, start_nsecs): | ||
119 | filtered = 0 | ||
120 | try: | ||
121 | head = cls.heads[pid] | ||
122 | filtered = head.is_filtered() | ||
123 | except KeyError: | ||
124 | if cls.fobj != None: | ||
125 | filtered = cls.fobj.filter(pid, comm) | ||
126 | head = cls.heads[pid] = chead(comm, pid, filtered) | ||
127 | |||
128 | if not filtered: | ||
129 | head.mark_pending(start_secs, start_nsecs) | ||
130 | |||
131 | @classmethod | ||
132 | def increment_pending(cls, pid, migrated, fscan, mscan): | ||
133 | head = cls.heads[pid] | ||
134 | if not head.is_filtered(): | ||
135 | if head.is_pending(): | ||
136 | head.do_increment(migrated, fscan, mscan) | ||
137 | else: | ||
138 | sys.stderr.write("missing start compaction event for pid %d\n" % pid) | ||
139 | |||
140 | @classmethod | ||
141 | def complete_pending(cls, pid, secs, nsecs): | ||
142 | head = cls.heads[pid] | ||
143 | if not head.is_filtered(): | ||
144 | if head.is_pending(): | ||
145 | head.make_complete(secs, nsecs) | ||
146 | else: | ||
147 | sys.stderr.write("missing start compaction event for pid %d\n" % pid) | ||
148 | |||
149 | @classmethod | ||
150 | def gen(cls): | ||
151 | if opt_proc != popt.DISP_DFL: | ||
152 | for i in cls.heads: | ||
153 | yield cls.heads[i] | ||
154 | |||
155 | @classmethod | ||
156 | def str(cls): | ||
157 | return cls.val | ||
158 | |||
159 | def __init__(self, comm, pid, filtered): | ||
160 | self.comm = comm | ||
161 | self.pid = pid | ||
162 | self.val = cnode(0) | ||
163 | self.pending = None | ||
164 | self.filtered = filtered | ||
165 | self.list = [] | ||
166 | |||
167 | def __add__(self, rhs): | ||
168 | self.ns += rhs.ns | ||
169 | self.val += rhs.val | ||
170 | return self | ||
171 | |||
172 | def mark_pending(self, secs, nsecs): | ||
173 | self.pending = cnode(ns(secs, nsecs)) | ||
174 | |||
175 | def do_increment(self, migrated, fscan, mscan): | ||
176 | self.pending.increment(migrated, fscan, mscan) | ||
177 | |||
178 | def make_complete(self, secs, nsecs): | ||
179 | self.pending.complete(secs, nsecs) | ||
180 | chead.val += self.pending | ||
181 | |||
182 | if opt_proc != popt.DISP_DFL: | ||
183 | self.val += self.pending | ||
184 | |||
185 | if opt_proc == popt.DISP_PROC_VERBOSE: | ||
186 | self.list.append(self.pending) | ||
187 | self.pending = None | ||
188 | |||
189 | def enumerate(self): | ||
190 | if opt_proc == popt.DISP_PROC_VERBOSE and not self.is_filtered(): | ||
191 | for i, pelem in enumerate(self.list): | ||
192 | sys.stdout.write("%d[%s].%d: %s\n" % (self.pid, self.comm, i+1, pelem)) | ||
193 | |||
194 | def is_pending(self): | ||
195 | return self.pending != None | ||
196 | |||
197 | def is_filtered(self): | ||
198 | return self.filtered | ||
199 | |||
200 | def display(self): | ||
201 | if not self.is_filtered(): | ||
202 | sys.stdout.write("%d[%s]: %s\n" % (self.pid, self.comm, self.val)) | ||
203 | |||
204 | |||
205 | def trace_end(): | ||
206 | sys.stdout.write("total: %s\n" % chead.str()) | ||
207 | for i in chead.gen(): | ||
208 | i.display(), | ||
209 | i.enumerate() | ||
210 | |||
211 | def compaction__mm_compaction_migratepages(event_name, context, common_cpu, | ||
212 | common_secs, common_nsecs, common_pid, common_comm, | ||
213 | common_callchain, nr_migrated, nr_failed): | ||
214 | |||
215 | chead.increment_pending(common_pid, | ||
216 | pair(nr_migrated, nr_failed), None, None) | ||
217 | |||
218 | def compaction__mm_compaction_isolate_freepages(event_name, context, common_cpu, | ||
219 | common_secs, common_nsecs, common_pid, common_comm, | ||
220 | common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken): | ||
221 | |||
222 | chead.increment_pending(common_pid, | ||
223 | None, pair(nr_scanned, nr_taken), None) | ||
224 | |||
225 | def compaction__mm_compaction_isolate_migratepages(event_name, context, common_cpu, | ||
226 | common_secs, common_nsecs, common_pid, common_comm, | ||
227 | common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken): | ||
228 | |||
229 | chead.increment_pending(common_pid, | ||
230 | None, None, pair(nr_scanned, nr_taken)) | ||
231 | |||
232 | def compaction__mm_compaction_end(event_name, context, common_cpu, | ||
233 | common_secs, common_nsecs, common_pid, common_comm, | ||
234 | common_callchain, zone_start, migrate_start, free_start, zone_end, | ||
235 | sync, status): | ||
236 | |||
237 | chead.complete_pending(common_pid, common_secs, common_nsecs) | ||
238 | |||
239 | def compaction__mm_compaction_begin(event_name, context, common_cpu, | ||
240 | common_secs, common_nsecs, common_pid, common_comm, | ||
241 | common_callchain, zone_start, migrate_start, free_start, zone_end, | ||
242 | sync): | ||
243 | |||
244 | chead.create_pending(common_pid, common_comm, common_secs, common_nsecs) | ||
245 | |||
246 | def pr_help(): | ||
247 | global usage | ||
248 | |||
249 | sys.stdout.write(usage) | ||
250 | sys.stdout.write("\n") | ||
251 | sys.stdout.write("-h display this help\n") | ||
252 | sys.stdout.write("-p display by process\n") | ||
253 | sys.stdout.write("-pv display by process (verbose)\n") | ||
254 | sys.stdout.write("-t display stall times only\n") | ||
255 | sys.stdout.write("-m display stats for migration\n") | ||
256 | sys.stdout.write("-fs display stats for free scanner\n") | ||
257 | sys.stdout.write("-ms display stats for migration scanner\n") | ||
258 | sys.stdout.write("-u display results in microseconds (default nanoseconds)\n") | ||
259 | |||
260 | |||
261 | comm_re = None | ||
262 | pid_re = None | ||
263 | pid_regex = "^(\d*)-(\d*)$|^(\d*)$" | ||
264 | |||
265 | opt_proc = popt.DISP_DFL | ||
266 | opt_disp = topt.DISP_ALL | ||
267 | |||
268 | opt_ns = True | ||
269 | |||
270 | argc = len(sys.argv) - 1 | ||
271 | if argc >= 1: | ||
272 | pid_re = re.compile(pid_regex) | ||
273 | |||
274 | for i, opt in enumerate(sys.argv[1:]): | ||
275 | if opt[0] == "-": | ||
276 | if opt == "-h": | ||
277 | pr_help() | ||
278 | exit(0); | ||
279 | elif opt == "-p": | ||
280 | opt_proc = popt.DISP_PROC | ||
281 | elif opt == "-pv": | ||
282 | opt_proc = popt.DISP_PROC_VERBOSE | ||
283 | elif opt == '-u': | ||
284 | opt_ns = False | ||
285 | elif opt == "-t": | ||
286 | set_type(topt.DISP_TIME) | ||
287 | elif opt == "-m": | ||
288 | set_type(topt.DISP_MIG) | ||
289 | elif opt == "-fs": | ||
290 | set_type(topt.DISP_ISOLFREE) | ||
291 | elif opt == "-ms": | ||
292 | set_type(topt.DISP_ISOLMIG) | ||
293 | else: | ||
294 | sys.exit(usage) | ||
295 | |||
296 | elif i == argc - 1: | ||
297 | m = pid_re.search(opt) | ||
298 | if m != None and m.group() != "": | ||
299 | if m.group(3) != None: | ||
300 | f = pid_filter(m.group(3), m.group(3)) | ||
301 | else: | ||
302 | f = pid_filter(m.group(1), m.group(2)) | ||
303 | else: | ||
304 | try: | ||
305 | comm_re=re.compile(opt) | ||
306 | except: | ||
307 | sys.stderr.write("invalid regex '%s'" % opt) | ||
308 | sys.exit(usage) | ||
309 | f = comm_filter(comm_re) | ||
310 | |||
311 | chead.add_filter(f) | ||
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 4cdafd880074..84a32037a80f 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py | |||
@@ -15,6 +15,53 @@ import sys | |||
15 | import struct | 15 | import struct |
16 | import datetime | 16 | import datetime |
17 | 17 | ||
18 | # To use this script you will need to have installed package python-pyside which | ||
19 | # provides LGPL-licensed Python bindings for Qt. You will also need the package | ||
20 | # libqt4-sql-psql for Qt postgresql support. | ||
21 | # | ||
22 | # The script assumes postgresql is running on the local machine and that the | ||
23 | # user has postgresql permissions to create databases. Examples of installing | ||
24 | # postgresql and adding such a user are: | ||
25 | # | ||
26 | # fedora: | ||
27 | # | ||
28 | # $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql | ||
29 | # $ sudo su - postgres -c initdb | ||
30 | # $ sudo service postgresql start | ||
31 | # $ sudo su - postgres | ||
32 | # $ createuser <your user id here> | ||
33 | # Shall the new role be a superuser? (y/n) y | ||
34 | # | ||
35 | # ubuntu: | ||
36 | # | ||
37 | # $ sudo apt-get install postgresql | ||
38 | # $ sudo su - postgres | ||
39 | # $ createuser <your user id here> | ||
40 | # Shall the new role be a superuser? (y/n) y | ||
41 | # | ||
42 | # An example of using this script with Intel PT: | ||
43 | # | ||
44 | # $ perf record -e intel_pt//u ls | ||
45 | # $ perf script -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py pt_example branches calls | ||
46 | # 2015-05-29 12:49:23.464364 Creating database... | ||
47 | # 2015-05-29 12:49:26.281717 Writing to intermediate files... | ||
48 | # 2015-05-29 12:49:27.190383 Copying to database... | ||
49 | # 2015-05-29 12:49:28.140451 Removing intermediate files... | ||
50 | # 2015-05-29 12:49:28.147451 Adding primary keys | ||
51 | # 2015-05-29 12:49:28.655683 Adding foreign keys | ||
52 | # 2015-05-29 12:49:29.365350 Done | ||
53 | # | ||
54 | # To browse the database, psql can be used e.g. | ||
55 | # | ||
56 | # $ psql pt_example | ||
57 | # pt_example=# select * from samples_view where id < 100; | ||
58 | # pt_example=# \d+ | ||
59 | # pt_example=# \d+ samples_view | ||
60 | # pt_example=# \q | ||
61 | # | ||
62 | # An example of using the database is provided by the script | ||
63 | # call-graph-from-postgresql.py. Refer to that script for details. | ||
64 | |||
18 | from PySide.QtSql import * | 65 | from PySide.QtSql import * |
19 | 66 | ||
20 | # Need to access PostgreSQL C library directly to use COPY FROM STDIN | 67 | # Need to access PostgreSQL C library directly to use COPY FROM STDIN |
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index d20d6e6ab65b..c1518bdd0f1b 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build | |||
@@ -32,6 +32,7 @@ perf-y += sample-parsing.o | |||
32 | perf-y += parse-no-sample-id-all.o | 32 | perf-y += parse-no-sample-id-all.o |
33 | perf-y += kmod-path.o | 33 | perf-y += kmod-path.o |
34 | perf-y += thread-map.o | 34 | perf-y += thread-map.o |
35 | perf-y += llvm.o | ||
35 | 36 | ||
36 | perf-$(CONFIG_X86) += perf-time-to-tsc.o | 37 | perf-$(CONFIG_X86) += perf-time-to-tsc.o |
37 | 38 | ||
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index c1dde733c3a6..136cd934be66 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c | |||
@@ -175,6 +175,10 @@ static struct test { | |||
175 | .func = test__thread_map, | 175 | .func = test__thread_map, |
176 | }, | 176 | }, |
177 | { | 177 | { |
178 | .desc = "Test LLVM searching and compiling", | ||
179 | .func = test__llvm, | ||
180 | }, | ||
181 | { | ||
178 | .func = NULL, | 182 | .func = NULL, |
179 | }, | 183 | }, |
180 | }; | 184 | }; |
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 7d82c8be5e36..7ed737019de7 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c | |||
@@ -279,6 +279,7 @@ static int test1(struct perf_evsel *evsel, struct machine *machine) | |||
279 | 279 | ||
280 | symbol_conf.use_callchain = false; | 280 | symbol_conf.use_callchain = false; |
281 | symbol_conf.cumulate_callchain = false; | 281 | symbol_conf.cumulate_callchain = false; |
282 | perf_evsel__reset_sample_bit(evsel, CALLCHAIN); | ||
282 | 283 | ||
283 | setup_sorting(); | 284 | setup_sorting(); |
284 | callchain_register_param(&callchain_param); | 285 | callchain_register_param(&callchain_param); |
@@ -425,6 +426,7 @@ static int test2(struct perf_evsel *evsel, struct machine *machine) | |||
425 | 426 | ||
426 | symbol_conf.use_callchain = true; | 427 | symbol_conf.use_callchain = true; |
427 | symbol_conf.cumulate_callchain = false; | 428 | symbol_conf.cumulate_callchain = false; |
429 | perf_evsel__set_sample_bit(evsel, CALLCHAIN); | ||
428 | 430 | ||
429 | setup_sorting(); | 431 | setup_sorting(); |
430 | callchain_register_param(&callchain_param); | 432 | callchain_register_param(&callchain_param); |
@@ -482,6 +484,7 @@ static int test3(struct perf_evsel *evsel, struct machine *machine) | |||
482 | 484 | ||
483 | symbol_conf.use_callchain = false; | 485 | symbol_conf.use_callchain = false; |
484 | symbol_conf.cumulate_callchain = true; | 486 | symbol_conf.cumulate_callchain = true; |
487 | perf_evsel__reset_sample_bit(evsel, CALLCHAIN); | ||
485 | 488 | ||
486 | setup_sorting(); | 489 | setup_sorting(); |
487 | callchain_register_param(&callchain_param); | 490 | callchain_register_param(&callchain_param); |
@@ -665,6 +668,7 @@ static int test4(struct perf_evsel *evsel, struct machine *machine) | |||
665 | 668 | ||
666 | symbol_conf.use_callchain = true; | 669 | symbol_conf.use_callchain = true; |
667 | symbol_conf.cumulate_callchain = true; | 670 | symbol_conf.cumulate_callchain = true; |
671 | perf_evsel__set_sample_bit(evsel, CALLCHAIN); | ||
668 | 672 | ||
669 | setup_sorting(); | 673 | setup_sorting(); |
670 | callchain_register_param(&callchain_param); | 674 | callchain_register_param(&callchain_param); |
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c new file mode 100644 index 000000000000..52d55971f66f --- /dev/null +++ b/tools/perf/tests/llvm.c | |||
@@ -0,0 +1,98 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <bpf/libbpf.h> | ||
3 | #include <util/llvm-utils.h> | ||
4 | #include <util/cache.h> | ||
5 | #include "tests.h" | ||
6 | #include "debug.h" | ||
7 | |||
8 | static int perf_config_cb(const char *var, const char *val, | ||
9 | void *arg __maybe_unused) | ||
10 | { | ||
11 | return perf_default_config(var, val, arg); | ||
12 | } | ||
13 | |||
14 | /* | ||
15 | * Randomly give it a "version" section since we don't really load it | ||
16 | * into kernel | ||
17 | */ | ||
18 | static const char test_bpf_prog[] = | ||
19 | "__attribute__((section(\"do_fork\"), used)) " | ||
20 | "int fork(void *ctx) {return 0;} " | ||
21 | "char _license[] __attribute__((section(\"license\"), used)) = \"GPL\";" | ||
22 | "int _version __attribute__((section(\"version\"), used)) = 0x40100;"; | ||
23 | |||
24 | #ifdef HAVE_LIBBPF_SUPPORT | ||
25 | static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz) | ||
26 | { | ||
27 | struct bpf_object *obj; | ||
28 | |||
29 | obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL); | ||
30 | if (!obj) | ||
31 | return -1; | ||
32 | bpf_object__close(obj); | ||
33 | return 0; | ||
34 | } | ||
35 | #else | ||
36 | static int test__bpf_parsing(void *obj_buf __maybe_unused, | ||
37 | size_t obj_buf_sz __maybe_unused) | ||
38 | { | ||
39 | fprintf(stderr, " (skip bpf parsing)"); | ||
40 | return 0; | ||
41 | } | ||
42 | #endif | ||
43 | |||
44 | int test__llvm(void) | ||
45 | { | ||
46 | char *tmpl_new, *clang_opt_new; | ||
47 | void *obj_buf; | ||
48 | size_t obj_buf_sz; | ||
49 | int err, old_verbose; | ||
50 | |||
51 | perf_config(perf_config_cb, NULL); | ||
52 | |||
53 | /* | ||
54 | * Skip this test if user's .perfconfig doesn't set [llvm] section | ||
55 | * and clang is not found in $PATH, and this is not perf test -v | ||
56 | */ | ||
57 | if (verbose == 0 && !llvm_param.user_set_param && llvm__search_clang()) { | ||
58 | fprintf(stderr, " (no clang, try 'perf test -v LLVM')"); | ||
59 | return TEST_SKIP; | ||
60 | } | ||
61 | |||
62 | old_verbose = verbose; | ||
63 | /* | ||
64 | * llvm is verbosity when error. Suppress all error output if | ||
65 | * not 'perf test -v'. | ||
66 | */ | ||
67 | if (verbose == 0) | ||
68 | verbose = -1; | ||
69 | |||
70 | if (!llvm_param.clang_bpf_cmd_template) | ||
71 | return -1; | ||
72 | |||
73 | if (!llvm_param.clang_opt) | ||
74 | llvm_param.clang_opt = strdup(""); | ||
75 | |||
76 | err = asprintf(&tmpl_new, "echo '%s' | %s", test_bpf_prog, | ||
77 | llvm_param.clang_bpf_cmd_template); | ||
78 | if (err < 0) | ||
79 | return -1; | ||
80 | err = asprintf(&clang_opt_new, "-xc %s", llvm_param.clang_opt); | ||
81 | if (err < 0) | ||
82 | return -1; | ||
83 | |||
84 | llvm_param.clang_bpf_cmd_template = tmpl_new; | ||
85 | llvm_param.clang_opt = clang_opt_new; | ||
86 | err = llvm__compile_bpf("-", &obj_buf, &obj_buf_sz); | ||
87 | |||
88 | verbose = old_verbose; | ||
89 | if (err) { | ||
90 | if (!verbose) | ||
91 | fprintf(stderr, " (use -v to see error message)"); | ||
92 | return -1; | ||
93 | } | ||
94 | |||
95 | err = test__bpf_parsing(obj_buf, obj_buf_sz); | ||
96 | free(obj_buf); | ||
97 | return err; | ||
98 | } | ||
diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 729112f4cfaa..ba31c4bd441d 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make | |||
@@ -58,7 +58,8 @@ make_install_man := install-man | |||
58 | make_install_html := install-html | 58 | make_install_html := install-html |
59 | make_install_info := install-info | 59 | make_install_info := install-info |
60 | make_install_pdf := install-pdf | 60 | make_install_pdf := install-pdf |
61 | make_install_prefix := install prefix=/tmp/krava | 61 | make_install_prefix := install prefix=/tmp/krava |
62 | make_install_prefix_slash := install prefix=/tmp/krava/ | ||
62 | make_static := LDFLAGS=-static | 63 | make_static := LDFLAGS=-static |
63 | 64 | ||
64 | # all the NO_* variable combined | 65 | # all the NO_* variable combined |
@@ -101,6 +102,7 @@ run += make_util_pmu_bison_o | |||
101 | run += make_install | 102 | run += make_install |
102 | run += make_install_bin | 103 | run += make_install_bin |
103 | run += make_install_prefix | 104 | run += make_install_prefix |
105 | run += make_install_prefix_slash | ||
104 | # FIXME 'install-*' commented out till they're fixed | 106 | # FIXME 'install-*' commented out till they're fixed |
105 | # run += make_install_doc | 107 | # run += make_install_doc |
106 | # run += make_install_man | 108 | # run += make_install_man |
@@ -175,11 +177,14 @@ test_make_install_O := $(call test_dest_files,$(installed_files_all)) | |||
175 | test_make_install_bin := $(call test_dest_files,$(installed_files_bin)) | 177 | test_make_install_bin := $(call test_dest_files,$(installed_files_bin)) |
176 | test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin)) | 178 | test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin)) |
177 | 179 | ||
178 | # We prefix all installed files for make_install_prefix | 180 | # We prefix all installed files for make_install_prefix(_slash) |
179 | # with '/tmp/krava' to match installed/prefix-ed files. | 181 | # with '/tmp/krava' to match installed/prefix-ed files. |
180 | installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all)) | 182 | installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all)) |
181 | test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix)) | 183 | test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix)) |
182 | test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix)) | 184 | test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix)) |
185 | |||
186 | test_make_install_prefix_slash := $(test_make_install_prefix) | ||
187 | test_make_install_prefix_slash_O := $(test_make_install_prefix_O) | ||
183 | 188 | ||
184 | # FIXME nothing gets installed | 189 | # FIXME nothing gets installed |
185 | test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1 | 190 | test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1 |
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index d76963f7ad3d..9b6b2b6324a1 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c | |||
@@ -82,8 +82,12 @@ static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist) | |||
82 | TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); | 82 | TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type); |
83 | TEST_ASSERT_VAL("wrong config", | 83 | TEST_ASSERT_VAL("wrong config", |
84 | PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); | 84 | PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config); |
85 | /* | ||
86 | * The period value gets configured within perf_evlist__config, | ||
87 | * while this test executes only parse events method. | ||
88 | */ | ||
85 | TEST_ASSERT_VAL("wrong period", | 89 | TEST_ASSERT_VAL("wrong period", |
86 | 100000 == evsel->attr.sample_period); | 90 | 0 == evsel->attr.sample_period); |
87 | TEST_ASSERT_VAL("wrong config1", | 91 | TEST_ASSERT_VAL("wrong config1", |
88 | 0 == evsel->attr.config1); | 92 | 0 == evsel->attr.config1); |
89 | TEST_ASSERT_VAL("wrong config2", | 93 | TEST_ASSERT_VAL("wrong config2", |
@@ -406,7 +410,11 @@ static int test__checkevent_pmu(struct perf_evlist *evlist) | |||
406 | TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config); | 410 | TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config); |
407 | TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1); | 411 | TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1); |
408 | TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2); | 412 | TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2); |
409 | TEST_ASSERT_VAL("wrong period", 1000 == evsel->attr.sample_period); | 413 | /* |
414 | * The period value gets configured within perf_evlist__config, | ||
415 | * while this test executes only parse events method. | ||
416 | */ | ||
417 | TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); | ||
410 | 418 | ||
411 | return 0; | 419 | return 0; |
412 | } | 420 | } |
@@ -471,6 +479,39 @@ static int test__checkevent_pmu_name(struct perf_evlist *evlist) | |||
471 | return 0; | 479 | return 0; |
472 | } | 480 | } |
473 | 481 | ||
482 | static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist) | ||
483 | { | ||
484 | struct perf_evsel *evsel = perf_evlist__first(evlist); | ||
485 | |||
486 | /* cpu/config=1,call-graph=fp,time,period=100000/ */ | ||
487 | TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries); | ||
488 | TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); | ||
489 | TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config); | ||
490 | /* | ||
491 | * The period, time and callgraph value gets configured | ||
492 | * within perf_evlist__config, | ||
493 | * while this test executes only parse events method. | ||
494 | */ | ||
495 | TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); | ||
496 | TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); | ||
497 | TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); | ||
498 | |||
499 | /* cpu/config=2,call-graph=no,time=0,period=2000/ */ | ||
500 | evsel = perf_evsel__next(evsel); | ||
501 | TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type); | ||
502 | TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config); | ||
503 | /* | ||
504 | * The period, time and callgraph value gets configured | ||
505 | * within perf_evlist__config, | ||
506 | * while this test executes only parse events method. | ||
507 | */ | ||
508 | TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period); | ||
509 | TEST_ASSERT_VAL("wrong callgraph", !(PERF_SAMPLE_CALLCHAIN & evsel->attr.sample_type)); | ||
510 | TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type)); | ||
511 | |||
512 | return 0; | ||
513 | } | ||
514 | |||
474 | static int test__checkevent_pmu_events(struct perf_evlist *evlist) | 515 | static int test__checkevent_pmu_events(struct perf_evlist *evlist) |
475 | { | 516 | { |
476 | struct perf_evsel *evsel = perf_evlist__first(evlist); | 517 | struct perf_evsel *evsel = perf_evlist__first(evlist); |
@@ -1547,6 +1588,11 @@ static struct evlist_test test__events_pmu[] = { | |||
1547 | .check = test__checkevent_pmu_name, | 1588 | .check = test__checkevent_pmu_name, |
1548 | .id = 1, | 1589 | .id = 1, |
1549 | }, | 1590 | }, |
1591 | { | ||
1592 | .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/", | ||
1593 | .check = test__checkevent_pmu_partial_time_callgraph, | ||
1594 | .id = 2, | ||
1595 | }, | ||
1550 | }; | 1596 | }; |
1551 | 1597 | ||
1552 | struct terms_test { | 1598 | struct terms_test { |
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 1aa21c90731b..5b83f56a3b6f 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c | |||
@@ -34,6 +34,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) | |||
34 | .disabled = 1, | 34 | .disabled = 1, |
35 | .freq = 1, | 35 | .freq = 1, |
36 | }; | 36 | }; |
37 | struct cpu_map *cpus; | ||
38 | struct thread_map *threads; | ||
37 | 39 | ||
38 | attr.sample_freq = 500; | 40 | attr.sample_freq = 500; |
39 | 41 | ||
@@ -50,14 +52,19 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) | |||
50 | } | 52 | } |
51 | perf_evlist__add(evlist, evsel); | 53 | perf_evlist__add(evlist, evsel); |
52 | 54 | ||
53 | evlist->cpus = cpu_map__dummy_new(); | 55 | cpus = cpu_map__dummy_new(); |
54 | evlist->threads = thread_map__new_by_tid(getpid()); | 56 | threads = thread_map__new_by_tid(getpid()); |
55 | if (!evlist->cpus || !evlist->threads) { | 57 | if (!cpus || !threads) { |
56 | err = -ENOMEM; | 58 | err = -ENOMEM; |
57 | pr_debug("Not enough memory to create thread/cpu maps\n"); | 59 | pr_debug("Not enough memory to create thread/cpu maps\n"); |
58 | goto out_delete_evlist; | 60 | goto out_free_maps; |
59 | } | 61 | } |
60 | 62 | ||
63 | perf_evlist__set_maps(evlist, cpus, threads); | ||
64 | |||
65 | cpus = NULL; | ||
66 | threads = NULL; | ||
67 | |||
61 | if (perf_evlist__open(evlist)) { | 68 | if (perf_evlist__open(evlist)) { |
62 | const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate"; | 69 | const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate"; |
63 | 70 | ||
@@ -107,6 +114,9 @@ next_event: | |||
107 | err = -1; | 114 | err = -1; |
108 | } | 115 | } |
109 | 116 | ||
117 | out_free_maps: | ||
118 | cpu_map__put(cpus); | ||
119 | thread_map__put(threads); | ||
110 | out_delete_evlist: | 120 | out_delete_evlist: |
111 | perf_evlist__delete(evlist); | 121 | perf_evlist__delete(evlist); |
112 | return err; | 122 | return err; |
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 3a8fedef83bc..add16385f13e 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c | |||
@@ -43,6 +43,8 @@ int test__task_exit(void) | |||
43 | }; | 43 | }; |
44 | const char *argv[] = { "true", NULL }; | 44 | const char *argv[] = { "true", NULL }; |
45 | char sbuf[STRERR_BUFSIZE]; | 45 | char sbuf[STRERR_BUFSIZE]; |
46 | struct cpu_map *cpus; | ||
47 | struct thread_map *threads; | ||
46 | 48 | ||
47 | signal(SIGCHLD, sig_handler); | 49 | signal(SIGCHLD, sig_handler); |
48 | 50 | ||
@@ -58,14 +60,19 @@ int test__task_exit(void) | |||
58 | * perf_evlist__prepare_workload we'll fill in the only thread | 60 | * perf_evlist__prepare_workload we'll fill in the only thread |
59 | * we're monitoring, the one forked there. | 61 | * we're monitoring, the one forked there. |
60 | */ | 62 | */ |
61 | evlist->cpus = cpu_map__dummy_new(); | 63 | cpus = cpu_map__dummy_new(); |
62 | evlist->threads = thread_map__new_by_tid(-1); | 64 | threads = thread_map__new_by_tid(-1); |
63 | if (!evlist->cpus || !evlist->threads) { | 65 | if (!cpus || !threads) { |
64 | err = -ENOMEM; | 66 | err = -ENOMEM; |
65 | pr_debug("Not enough memory to create thread/cpu maps\n"); | 67 | pr_debug("Not enough memory to create thread/cpu maps\n"); |
66 | goto out_delete_evlist; | 68 | goto out_free_maps; |
67 | } | 69 | } |
68 | 70 | ||
71 | perf_evlist__set_maps(evlist, cpus, threads); | ||
72 | |||
73 | cpus = NULL; | ||
74 | threads = NULL; | ||
75 | |||
69 | err = perf_evlist__prepare_workload(evlist, &target, argv, false, | 76 | err = perf_evlist__prepare_workload(evlist, &target, argv, false, |
70 | workload_exec_failed_signal); | 77 | workload_exec_failed_signal); |
71 | if (err < 0) { | 78 | if (err < 0) { |
@@ -114,6 +121,9 @@ retry: | |||
114 | err = -1; | 121 | err = -1; |
115 | } | 122 | } |
116 | 123 | ||
124 | out_free_maps: | ||
125 | cpu_map__put(cpus); | ||
126 | thread_map__put(threads); | ||
117 | out_delete_evlist: | 127 | out_delete_evlist: |
118 | perf_evlist__delete(evlist); | 128 | perf_evlist__delete(evlist); |
119 | return err; | 129 | return err; |
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index ebb47d96bc0b..bf113a247987 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h | |||
@@ -62,6 +62,7 @@ int test__fdarray__filter(void); | |||
62 | int test__fdarray__add(void); | 62 | int test__fdarray__add(void); |
63 | int test__kmod_path__parse(void); | 63 | int test__kmod_path__parse(void); |
64 | int test__thread_map(void); | 64 | int test__thread_map(void); |
65 | int test__llvm(void); | ||
65 | 66 | ||
66 | #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) | 67 | #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) |
67 | #ifdef HAVE_DWARF_UNWIND_SUPPORT | 68 | #ifdef HAVE_DWARF_UNWIND_SUPPORT |
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 5acf000939ea..138a0e3431fa 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c | |||
@@ -20,6 +20,8 @@ int test__thread_map(void) | |||
20 | TEST_ASSERT_VAL("wrong comm", | 20 | TEST_ASSERT_VAL("wrong comm", |
21 | thread_map__comm(map, 0) && | 21 | thread_map__comm(map, 0) && |
22 | !strcmp(thread_map__comm(map, 0), "perf")); | 22 | !strcmp(thread_map__comm(map, 0), "perf")); |
23 | TEST_ASSERT_VAL("wrong refcnt", | ||
24 | atomic_read(&map->refcnt) == 1); | ||
23 | thread_map__put(map); | 25 | thread_map__put(map); |
24 | 26 | ||
25 | /* test dummy pid */ | 27 | /* test dummy pid */ |
@@ -33,6 +35,8 @@ int test__thread_map(void) | |||
33 | TEST_ASSERT_VAL("wrong comm", | 35 | TEST_ASSERT_VAL("wrong comm", |
34 | thread_map__comm(map, 0) && | 36 | thread_map__comm(map, 0) && |
35 | !strcmp(thread_map__comm(map, 0), "dummy")); | 37 | !strcmp(thread_map__comm(map, 0), "dummy")); |
38 | TEST_ASSERT_VAL("wrong refcnt", | ||
39 | atomic_read(&map->refcnt) == 1); | ||
36 | thread_map__put(map); | 40 | thread_map__put(map); |
37 | return 0; | 41 | return 0; |
38 | } | 42 | } |
diff --git a/tools/perf/trace/strace/groups/file b/tools/perf/trace/strace/groups/file new file mode 100644 index 000000000000..62378a899d79 --- /dev/null +++ b/tools/perf/trace/strace/groups/file | |||
@@ -0,0 +1,18 @@ | |||
1 | access | ||
2 | chmod | ||
3 | creat | ||
4 | execve | ||
5 | faccessat | ||
6 | getcwd | ||
7 | lstat | ||
8 | mkdir | ||
9 | open | ||
10 | openat | ||
11 | quotactl | ||
12 | readlink | ||
13 | rename | ||
14 | rmdir | ||
15 | stat | ||
16 | statfs | ||
17 | symlink | ||
18 | unlink | ||
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 6680fa5cb9dd..c6c7e5189214 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c | |||
@@ -46,6 +46,21 @@ void ui_browser__gotorc(struct ui_browser *browser, int y, int x) | |||
46 | SLsmg_gotorc(browser->y + y, browser->x + x); | 46 | SLsmg_gotorc(browser->y + y, browser->x + x); |
47 | } | 47 | } |
48 | 48 | ||
49 | void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg, | ||
50 | unsigned int width) | ||
51 | { | ||
52 | slsmg_write_nstring(msg, width); | ||
53 | } | ||
54 | |||
55 | void ui_browser__printf(struct ui_browser *browser __maybe_unused, const char *fmt, ...) | ||
56 | { | ||
57 | va_list args; | ||
58 | |||
59 | va_start(args, fmt); | ||
60 | slsmg_vprintf(fmt, args); | ||
61 | va_end(args); | ||
62 | } | ||
63 | |||
49 | static struct list_head * | 64 | static struct list_head * |
50 | ui_browser__list_head_filter_entries(struct ui_browser *browser, | 65 | ui_browser__list_head_filter_entries(struct ui_browser *browser, |
51 | struct list_head *pos) | 66 | struct list_head *pos) |
@@ -234,7 +249,7 @@ void __ui_browser__show_title(struct ui_browser *browser, const char *title) | |||
234 | { | 249 | { |
235 | SLsmg_gotorc(0, 0); | 250 | SLsmg_gotorc(0, 0); |
236 | ui_browser__set_color(browser, HE_COLORSET_ROOT); | 251 | ui_browser__set_color(browser, HE_COLORSET_ROOT); |
237 | slsmg_write_nstring(title, browser->width + 1); | 252 | ui_browser__write_nstring(browser, title, browser->width + 1); |
238 | } | 253 | } |
239 | 254 | ||
240 | void ui_browser__show_title(struct ui_browser *browser, const char *title) | 255 | void ui_browser__show_title(struct ui_browser *browser, const char *title) |
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 92ae72113965..f3cef564de02 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h | |||
@@ -37,6 +37,9 @@ void ui_browser__refresh_dimensions(struct ui_browser *browser); | |||
37 | void ui_browser__reset_index(struct ui_browser *browser); | 37 | void ui_browser__reset_index(struct ui_browser *browser); |
38 | 38 | ||
39 | void ui_browser__gotorc(struct ui_browser *browser, int y, int x); | 39 | void ui_browser__gotorc(struct ui_browser *browser, int y, int x); |
40 | void ui_browser__write_nstring(struct ui_browser *browser, const char *msg, | ||
41 | unsigned int width); | ||
42 | void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...); | ||
40 | void ui_browser__write_graph(struct ui_browser *browser, int graph); | 43 | void ui_browser__write_graph(struct ui_browser *browser, int graph); |
41 | void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, | 44 | void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, |
42 | u64 start, u64 end); | 45 | u64 start, u64 end); |
@@ -58,8 +61,8 @@ int ui_browser__help_window(struct ui_browser *browser, const char *text); | |||
58 | bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text); | 61 | bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text); |
59 | int ui_browser__input_window(const char *title, const char *text, char *input, | 62 | int ui_browser__input_window(const char *title, const char *text, char *input, |
60 | const char *exit_msg, int delay_sec); | 63 | const char *exit_msg, int delay_sec); |
61 | struct perf_session_env; | 64 | struct perf_env; |
62 | int tui__header_window(struct perf_session_env *env); | 65 | int tui__header_window(struct perf_env *env); |
63 | 66 | ||
64 | void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence); | 67 | void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence); |
65 | unsigned int ui_browser__argv_refresh(struct ui_browser *browser); | 68 | unsigned int ui_browser__argv_refresh(struct ui_browser *browser); |
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5995a8bd7c69..29739b347599 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c | |||
@@ -1,7 +1,6 @@ | |||
1 | #include "../../util/util.h" | 1 | #include "../../util/util.h" |
2 | #include "../browser.h" | 2 | #include "../browser.h" |
3 | #include "../helpline.h" | 3 | #include "../helpline.h" |
4 | #include "../libslang.h" | ||
5 | #include "../ui.h" | 4 | #include "../ui.h" |
6 | #include "../util.h" | 5 | #include "../util.h" |
7 | #include "../../util/annotate.h" | 6 | #include "../../util/annotate.h" |
@@ -16,6 +15,9 @@ struct disasm_line_samples { | |||
16 | u64 nr; | 15 | u64 nr; |
17 | }; | 16 | }; |
18 | 17 | ||
18 | #define IPC_WIDTH 6 | ||
19 | #define CYCLES_WIDTH 6 | ||
20 | |||
19 | struct browser_disasm_line { | 21 | struct browser_disasm_line { |
20 | struct rb_node rb_node; | 22 | struct rb_node rb_node; |
21 | u32 idx; | 23 | u32 idx; |
@@ -53,6 +55,7 @@ struct annotate_browser { | |||
53 | int max_jump_sources; | 55 | int max_jump_sources; |
54 | int nr_jumps; | 56 | int nr_jumps; |
55 | bool searching_backwards; | 57 | bool searching_backwards; |
58 | bool have_cycles; | ||
56 | u8 addr_width; | 59 | u8 addr_width; |
57 | u8 jumps_width; | 60 | u8 jumps_width; |
58 | u8 target_width; | 61 | u8 target_width; |
@@ -96,6 +99,15 @@ static int annotate_browser__set_jumps_percent_color(struct annotate_browser *br | |||
96 | return ui_browser__set_color(&browser->b, color); | 99 | return ui_browser__set_color(&browser->b, color); |
97 | } | 100 | } |
98 | 101 | ||
102 | static int annotate_browser__pcnt_width(struct annotate_browser *ab) | ||
103 | { | ||
104 | int w = 7 * ab->nr_events; | ||
105 | |||
106 | if (ab->have_cycles) | ||
107 | w += IPC_WIDTH + CYCLES_WIDTH; | ||
108 | return w; | ||
109 | } | ||
110 | |||
99 | static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) | 111 | static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) |
100 | { | 112 | { |
101 | struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); | 113 | struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); |
@@ -106,7 +118,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int | |||
106 | (!current_entry || (browser->use_navkeypressed && | 118 | (!current_entry || (browser->use_navkeypressed && |
107 | !browser->navkeypressed))); | 119 | !browser->navkeypressed))); |
108 | int width = browser->width, printed; | 120 | int width = browser->width, printed; |
109 | int i, pcnt_width = 7 * ab->nr_events; | 121 | int i, pcnt_width = annotate_browser__pcnt_width(ab); |
110 | double percent_max = 0.0; | 122 | double percent_max = 0.0; |
111 | char bf[256]; | 123 | char bf[256]; |
112 | 124 | ||
@@ -116,19 +128,36 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int | |||
116 | } | 128 | } |
117 | 129 | ||
118 | if (dl->offset != -1 && percent_max != 0.0) { | 130 | if (dl->offset != -1 && percent_max != 0.0) { |
119 | for (i = 0; i < ab->nr_events; i++) { | 131 | if (percent_max != 0.0) { |
120 | ui_browser__set_percent_color(browser, | 132 | for (i = 0; i < ab->nr_events; i++) { |
121 | bdl->samples[i].percent, | 133 | ui_browser__set_percent_color(browser, |
122 | current_entry); | 134 | bdl->samples[i].percent, |
123 | if (annotate_browser__opts.show_total_period) | 135 | current_entry); |
124 | slsmg_printf("%6" PRIu64 " ", | 136 | if (annotate_browser__opts.show_total_period) { |
125 | bdl->samples[i].nr); | 137 | ui_browser__printf(browser, "%6" PRIu64 " ", |
126 | else | 138 | bdl->samples[i].nr); |
127 | slsmg_printf("%6.2f ", bdl->samples[i].percent); | 139 | } else { |
140 | ui_browser__printf(browser, "%6.2f ", | ||
141 | bdl->samples[i].percent); | ||
142 | } | ||
143 | } | ||
144 | } else { | ||
145 | ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); | ||
128 | } | 146 | } |
129 | } else { | 147 | } else { |
130 | ui_browser__set_percent_color(browser, 0, current_entry); | 148 | ui_browser__set_percent_color(browser, 0, current_entry); |
131 | slsmg_write_nstring(" ", pcnt_width); | 149 | ui_browser__write_nstring(browser, " ", 7 * ab->nr_events); |
150 | } | ||
151 | if (ab->have_cycles) { | ||
152 | if (dl->ipc) | ||
153 | ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc); | ||
154 | else | ||
155 | ui_browser__write_nstring(browser, " ", IPC_WIDTH); | ||
156 | if (dl->cycles) | ||
157 | ui_browser__printf(browser, "%*" PRIu64 " ", | ||
158 | CYCLES_WIDTH - 1, dl->cycles); | ||
159 | else | ||
160 | ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); | ||
132 | } | 161 | } |
133 | 162 | ||
134 | SLsmg_write_char(' '); | 163 | SLsmg_write_char(' '); |
@@ -138,7 +167,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int | |||
138 | width += 1; | 167 | width += 1; |
139 | 168 | ||
140 | if (!*dl->line) | 169 | if (!*dl->line) |
141 | slsmg_write_nstring(" ", width - pcnt_width); | 170 | ui_browser__write_nstring(browser, " ", width - pcnt_width); |
142 | else if (dl->offset == -1) { | 171 | else if (dl->offset == -1) { |
143 | if (dl->line_nr && annotate_browser__opts.show_linenr) | 172 | if (dl->line_nr && annotate_browser__opts.show_linenr) |
144 | printed = scnprintf(bf, sizeof(bf), "%-*d ", | 173 | printed = scnprintf(bf, sizeof(bf), "%-*d ", |
@@ -146,8 +175,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int | |||
146 | else | 175 | else |
147 | printed = scnprintf(bf, sizeof(bf), "%*s ", | 176 | printed = scnprintf(bf, sizeof(bf), "%*s ", |
148 | ab->addr_width, " "); | 177 | ab->addr_width, " "); |
149 | slsmg_write_nstring(bf, printed); | 178 | ui_browser__write_nstring(browser, bf, printed); |
150 | slsmg_write_nstring(dl->line, width - printed - pcnt_width + 1); | 179 | ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width + 1); |
151 | } else { | 180 | } else { |
152 | u64 addr = dl->offset; | 181 | u64 addr = dl->offset; |
153 | int color = -1; | 182 | int color = -1; |
@@ -166,7 +195,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int | |||
166 | bdl->jump_sources); | 195 | bdl->jump_sources); |
167 | prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources, | 196 | prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources, |
168 | current_entry); | 197 | current_entry); |
169 | slsmg_write_nstring(bf, printed); | 198 | ui_browser__write_nstring(browser, bf, printed); |
170 | ui_browser__set_color(browser, prev); | 199 | ui_browser__set_color(browser, prev); |
171 | } | 200 | } |
172 | 201 | ||
@@ -180,7 +209,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int | |||
180 | 209 | ||
181 | if (change_color) | 210 | if (change_color) |
182 | color = ui_browser__set_color(browser, HE_COLORSET_ADDR); | 211 | color = ui_browser__set_color(browser, HE_COLORSET_ADDR); |
183 | slsmg_write_nstring(bf, printed); | 212 | ui_browser__write_nstring(browser, bf, printed); |
184 | if (change_color) | 213 | if (change_color) |
185 | ui_browser__set_color(browser, color); | 214 | ui_browser__set_color(browser, color); |
186 | if (dl->ins && dl->ins->ops->scnprintf) { | 215 | if (dl->ins && dl->ins->ops->scnprintf) { |
@@ -194,11 +223,11 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int | |||
194 | ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); | 223 | ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); |
195 | SLsmg_write_char(' '); | 224 | SLsmg_write_char(' '); |
196 | } else { | 225 | } else { |
197 | slsmg_write_nstring(" ", 2); | 226 | ui_browser__write_nstring(browser, " ", 2); |
198 | } | 227 | } |
199 | } else { | 228 | } else { |
200 | if (strcmp(dl->name, "retq")) { | 229 | if (strcmp(dl->name, "retq")) { |
201 | slsmg_write_nstring(" ", 2); | 230 | ui_browser__write_nstring(browser, " ", 2); |
202 | } else { | 231 | } else { |
203 | ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); | 232 | ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); |
204 | SLsmg_write_char(' '); | 233 | SLsmg_write_char(' '); |
@@ -206,7 +235,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int | |||
206 | } | 235 | } |
207 | 236 | ||
208 | disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); | 237 | disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); |
209 | slsmg_write_nstring(bf, width - pcnt_width - 3 - printed); | 238 | ui_browser__write_nstring(browser, bf, width - pcnt_width - 3 - printed); |
210 | } | 239 | } |
211 | 240 | ||
212 | if (current_entry) | 241 | if (current_entry) |
@@ -231,7 +260,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) | |||
231 | unsigned int from, to; | 260 | unsigned int from, to; |
232 | struct map_symbol *ms = ab->b.priv; | 261 | struct map_symbol *ms = ab->b.priv; |
233 | struct symbol *sym = ms->sym; | 262 | struct symbol *sym = ms->sym; |
234 | u8 pcnt_width = 7; | 263 | u8 pcnt_width = annotate_browser__pcnt_width(ab); |
235 | 264 | ||
236 | /* PLT symbols contain external offsets */ | 265 | /* PLT symbols contain external offsets */ |
237 | if (strstr(sym->name, "@plt")) | 266 | if (strstr(sym->name, "@plt")) |
@@ -255,8 +284,6 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) | |||
255 | to = (u64)btarget->idx; | 284 | to = (u64)btarget->idx; |
256 | } | 285 | } |
257 | 286 | ||
258 | pcnt_width *= ab->nr_events; | ||
259 | |||
260 | ui_browser__set_color(browser, HE_COLORSET_CODE); | 287 | ui_browser__set_color(browser, HE_COLORSET_CODE); |
261 | __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, | 288 | __ui_browser__line_arrow(browser, pcnt_width + 2 + ab->addr_width, |
262 | from, to); | 289 | from, to); |
@@ -266,9 +293,7 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) | |||
266 | { | 293 | { |
267 | struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); | 294 | struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); |
268 | int ret = ui_browser__list_head_refresh(browser); | 295 | int ret = ui_browser__list_head_refresh(browser); |
269 | int pcnt_width; | 296 | int pcnt_width = annotate_browser__pcnt_width(ab); |
270 | |||
271 | pcnt_width = 7 * ab->nr_events; | ||
272 | 297 | ||
273 | if (annotate_browser__opts.jump_arrows) | 298 | if (annotate_browser__opts.jump_arrows) |
274 | annotate_browser__draw_current_jump(browser); | 299 | annotate_browser__draw_current_jump(browser); |
@@ -390,7 +415,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, | |||
390 | max_percent = bpos->samples[i].percent; | 415 | max_percent = bpos->samples[i].percent; |
391 | } | 416 | } |
392 | 417 | ||
393 | if (max_percent < 0.01) { | 418 | if (max_percent < 0.01 && pos->ipc == 0) { |
394 | RB_CLEAR_NODE(&bpos->rb_node); | 419 | RB_CLEAR_NODE(&bpos->rb_node); |
395 | continue; | 420 | continue; |
396 | } | 421 | } |
@@ -869,6 +894,75 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, | |||
869 | return map_symbol__tui_annotate(&he->ms, evsel, hbt); | 894 | return map_symbol__tui_annotate(&he->ms, evsel, hbt); |
870 | } | 895 | } |
871 | 896 | ||
897 | |||
898 | static unsigned count_insn(struct annotate_browser *browser, u64 start, u64 end) | ||
899 | { | ||
900 | unsigned n_insn = 0; | ||
901 | u64 offset; | ||
902 | |||
903 | for (offset = start; offset <= end; offset++) { | ||
904 | if (browser->offsets[offset]) | ||
905 | n_insn++; | ||
906 | } | ||
907 | return n_insn; | ||
908 | } | ||
909 | |||
910 | static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, | ||
911 | struct cyc_hist *ch) | ||
912 | { | ||
913 | unsigned n_insn; | ||
914 | u64 offset; | ||
915 | |||
916 | n_insn = count_insn(browser, start, end); | ||
917 | if (n_insn && ch->num && ch->cycles) { | ||
918 | float ipc = n_insn / ((double)ch->cycles / (double)ch->num); | ||
919 | |||
920 | /* Hide data when there are too many overlaps. */ | ||
921 | if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) | ||
922 | return; | ||
923 | |||
924 | for (offset = start; offset <= end; offset++) { | ||
925 | struct disasm_line *dl = browser->offsets[offset]; | ||
926 | |||
927 | if (dl) | ||
928 | dl->ipc = ipc; | ||
929 | } | ||
930 | } | ||
931 | } | ||
932 | |||
933 | /* | ||
934 | * This should probably be in util/annotate.c to share with the tty | ||
935 | * annotate, but right now we need the per byte offsets arrays, | ||
936 | * which are only here. | ||
937 | */ | ||
938 | static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, | ||
939 | struct symbol *sym) | ||
940 | { | ||
941 | u64 offset; | ||
942 | struct annotation *notes = symbol__annotation(sym); | ||
943 | |||
944 | if (!notes->src || !notes->src->cycles_hist) | ||
945 | return; | ||
946 | |||
947 | pthread_mutex_lock(¬es->lock); | ||
948 | for (offset = 0; offset < size; ++offset) { | ||
949 | struct cyc_hist *ch; | ||
950 | |||
951 | ch = ¬es->src->cycles_hist[offset]; | ||
952 | if (ch && ch->cycles) { | ||
953 | struct disasm_line *dl; | ||
954 | |||
955 | if (ch->have_start) | ||
956 | count_and_fill(browser, ch->start, offset, ch); | ||
957 | dl = browser->offsets[offset]; | ||
958 | if (dl && ch->num_aggr) | ||
959 | dl->cycles = ch->cycles_aggr / ch->num_aggr; | ||
960 | browser->have_cycles = true; | ||
961 | } | ||
962 | } | ||
963 | pthread_mutex_unlock(¬es->lock); | ||
964 | } | ||
965 | |||
872 | static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, | 966 | static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, |
873 | size_t size) | 967 | size_t size) |
874 | { | 968 | { |
@@ -991,6 +1085,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, | |||
991 | } | 1085 | } |
992 | 1086 | ||
993 | annotate_browser__mark_jump_targets(&browser, size); | 1087 | annotate_browser__mark_jump_targets(&browser, size); |
1088 | annotate__compute_ipc(&browser, size, sym); | ||
994 | 1089 | ||
995 | browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); | 1090 | browser.addr_width = browser.target_width = browser.min_addr_width = hex_width(size); |
996 | browser.max_addr_width = hex_width(sym->end); | 1091 | browser.max_addr_width = hex_width(sym->end); |
diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c index e8278c558d4a..edbeaaf31ace 100644 --- a/tools/perf/ui/browsers/header.c +++ b/tools/perf/ui/browsers/header.c | |||
@@ -25,7 +25,7 @@ static void ui_browser__argv_write(struct ui_browser *browser, | |||
25 | ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : | 25 | ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : |
26 | HE_COLORSET_NORMAL); | 26 | HE_COLORSET_NORMAL); |
27 | 27 | ||
28 | slsmg_write_nstring(str, browser->width); | 28 | ui_browser__write_nstring(browser, str, browser->width); |
29 | } | 29 | } |
30 | 30 | ||
31 | static int list_menu__run(struct ui_browser *menu) | 31 | static int list_menu__run(struct ui_browser *menu) |
@@ -91,7 +91,7 @@ static int ui__list_menu(int argc, char * const argv[]) | |||
91 | return list_menu__run(&menu); | 91 | return list_menu__run(&menu); |
92 | } | 92 | } |
93 | 93 | ||
94 | int tui__header_window(struct perf_session_env *env) | 94 | int tui__header_window(struct perf_env *env) |
95 | { | 95 | { |
96 | int i, argc = 0; | 96 | int i, argc = 0; |
97 | char **argv; | 97 | char **argv; |
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index fa67613976a8..c04c60d4863c 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c | |||
@@ -1,5 +1,4 @@ | |||
1 | #include <stdio.h> | 1 | #include <stdio.h> |
2 | #include "../libslang.h" | ||
3 | #include <stdlib.h> | 2 | #include <stdlib.h> |
4 | #include <string.h> | 3 | #include <string.h> |
5 | #include <linux/rbtree.h> | 4 | #include <linux/rbtree.h> |
@@ -27,7 +26,7 @@ struct hist_browser { | |||
27 | struct map_symbol *selection; | 26 | struct map_symbol *selection; |
28 | struct hist_browser_timer *hbt; | 27 | struct hist_browser_timer *hbt; |
29 | struct pstack *pstack; | 28 | struct pstack *pstack; |
30 | struct perf_session_env *env; | 29 | struct perf_env *env; |
31 | int print_seq; | 30 | int print_seq; |
32 | bool show_dso; | 31 | bool show_dso; |
33 | bool show_headers; | 32 | bool show_headers; |
@@ -540,10 +539,10 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser, | |||
540 | 539 | ||
541 | ui_browser__set_color(&browser->b, color); | 540 | ui_browser__set_color(&browser->b, color); |
542 | hist_browser__gotorc(browser, row, 0); | 541 | hist_browser__gotorc(browser, row, 0); |
543 | slsmg_write_nstring(" ", offset); | 542 | ui_browser__write_nstring(&browser->b, " ", offset); |
544 | slsmg_printf("%c", folded_sign); | 543 | ui_browser__printf(&browser->b, "%c", folded_sign); |
545 | ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' '); | 544 | ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' '); |
546 | slsmg_write_nstring(str, width); | 545 | ui_browser__write_nstring(&browser->b, str, width); |
547 | } | 546 | } |
548 | 547 | ||
549 | static void hist_browser__fprintf_callchain_entry(struct hist_browser *b __maybe_unused, | 548 | static void hist_browser__fprintf_callchain_entry(struct hist_browser *b __maybe_unused, |
@@ -680,7 +679,7 @@ static int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...) | |||
680 | ui_browser__set_percent_color(arg->b, percent, arg->current_entry); | 679 | ui_browser__set_percent_color(arg->b, percent, arg->current_entry); |
681 | 680 | ||
682 | ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent); | 681 | ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent); |
683 | slsmg_printf("%s", hpp->buf); | 682 | ui_browser__printf(arg->b, "%s", hpp->buf); |
684 | 683 | ||
685 | advance_hpp(hpp, ret); | 684 | advance_hpp(hpp, ret); |
686 | return ret; | 685 | return ret; |
@@ -713,10 +712,11 @@ hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt, \ | |||
713 | struct hist_entry *he) \ | 712 | struct hist_entry *he) \ |
714 | { \ | 713 | { \ |
715 | if (!symbol_conf.cumulate_callchain) { \ | 714 | if (!symbol_conf.cumulate_callchain) { \ |
715 | struct hpp_arg *arg = hpp->ptr; \ | ||
716 | int len = fmt->user_len ?: fmt->len; \ | 716 | int len = fmt->user_len ?: fmt->len; \ |
717 | int ret = scnprintf(hpp->buf, hpp->size, \ | 717 | int ret = scnprintf(hpp->buf, hpp->size, \ |
718 | "%*s", len, "N/A"); \ | 718 | "%*s", len, "N/A"); \ |
719 | slsmg_printf("%s", hpp->buf); \ | 719 | ui_browser__printf(arg->b, "%s", hpp->buf); \ |
720 | \ | 720 | \ |
721 | return ret; \ | 721 | return ret; \ |
722 | } \ | 722 | } \ |
@@ -801,12 +801,12 @@ static int hist_browser__show_entry(struct hist_browser *browser, | |||
801 | 801 | ||
802 | if (first) { | 802 | if (first) { |
803 | if (symbol_conf.use_callchain) { | 803 | if (symbol_conf.use_callchain) { |
804 | slsmg_printf("%c ", folded_sign); | 804 | ui_browser__printf(&browser->b, "%c ", folded_sign); |
805 | width -= 2; | 805 | width -= 2; |
806 | } | 806 | } |
807 | first = false; | 807 | first = false; |
808 | } else { | 808 | } else { |
809 | slsmg_printf(" "); | 809 | ui_browser__printf(&browser->b, " "); |
810 | width -= 2; | 810 | width -= 2; |
811 | } | 811 | } |
812 | 812 | ||
@@ -814,7 +814,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, | |||
814 | width -= fmt->color(fmt, &hpp, entry); | 814 | width -= fmt->color(fmt, &hpp, entry); |
815 | } else { | 815 | } else { |
816 | width -= fmt->entry(fmt, &hpp, entry); | 816 | width -= fmt->entry(fmt, &hpp, entry); |
817 | slsmg_printf("%s", s); | 817 | ui_browser__printf(&browser->b, "%s", s); |
818 | } | 818 | } |
819 | } | 819 | } |
820 | 820 | ||
@@ -822,7 +822,7 @@ static int hist_browser__show_entry(struct hist_browser *browser, | |||
822 | if (!browser->b.navkeypressed) | 822 | if (!browser->b.navkeypressed) |
823 | width += 1; | 823 | width += 1; |
824 | 824 | ||
825 | slsmg_write_nstring("", width); | 825 | ui_browser__write_nstring(&browser->b, "", width); |
826 | 826 | ||
827 | ++row; | 827 | ++row; |
828 | ++printed; | 828 | ++printed; |
@@ -899,7 +899,7 @@ static void hist_browser__show_headers(struct hist_browser *browser) | |||
899 | hists__scnprintf_headers(headers, sizeof(headers), browser->hists); | 899 | hists__scnprintf_headers(headers, sizeof(headers), browser->hists); |
900 | ui_browser__gotorc(&browser->b, 0, 0); | 900 | ui_browser__gotorc(&browser->b, 0, 0); |
901 | ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); | 901 | ui_browser__set_color(&browser->b, HE_COLORSET_ROOT); |
902 | slsmg_write_nstring(headers, browser->b.width + 1); | 902 | ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1); |
903 | } | 903 | } |
904 | 904 | ||
905 | static void ui_browser__hists_init_top(struct ui_browser *browser) | 905 | static void ui_browser__hists_init_top(struct ui_browser *browser) |
@@ -1214,7 +1214,7 @@ static int hist_browser__dump(struct hist_browser *browser) | |||
1214 | 1214 | ||
1215 | static struct hist_browser *hist_browser__new(struct hists *hists, | 1215 | static struct hist_browser *hist_browser__new(struct hists *hists, |
1216 | struct hist_browser_timer *hbt, | 1216 | struct hist_browser_timer *hbt, |
1217 | struct perf_session_env *env) | 1217 | struct perf_env *env) |
1218 | { | 1218 | { |
1219 | struct hist_browser *browser = zalloc(sizeof(*browser)); | 1219 | struct hist_browser *browser = zalloc(sizeof(*browser)); |
1220 | 1220 | ||
@@ -1267,6 +1267,8 @@ static int hists__browser_title(struct hists *hists, | |||
1267 | const char *ev_name = perf_evsel__name(evsel); | 1267 | const char *ev_name = perf_evsel__name(evsel); |
1268 | char buf[512]; | 1268 | char buf[512]; |
1269 | size_t buflen = sizeof(buf); | 1269 | size_t buflen = sizeof(buf); |
1270 | char ref[30] = " show reference callgraph, "; | ||
1271 | bool enable_ref = false; | ||
1270 | 1272 | ||
1271 | if (symbol_conf.filter_relative) { | 1273 | if (symbol_conf.filter_relative) { |
1272 | nr_samples = hists->stats.nr_non_filtered_samples; | 1274 | nr_samples = hists->stats.nr_non_filtered_samples; |
@@ -1292,10 +1294,13 @@ static int hists__browser_title(struct hists *hists, | |||
1292 | } | 1294 | } |
1293 | } | 1295 | } |
1294 | 1296 | ||
1297 | if (symbol_conf.show_ref_callgraph && | ||
1298 | strstr(ev_name, "call-graph=no")) | ||
1299 | enable_ref = true; | ||
1295 | nr_samples = convert_unit(nr_samples, &unit); | 1300 | nr_samples = convert_unit(nr_samples, &unit); |
1296 | printed = scnprintf(bf, size, | 1301 | printed = scnprintf(bf, size, |
1297 | "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64, | 1302 | "Samples: %lu%c of event '%s',%sEvent count (approx.): %" PRIu64, |
1298 | nr_samples, unit, ev_name, nr_events); | 1303 | nr_samples, unit, ev_name, enable_ref ? ref : " ", nr_events); |
1299 | 1304 | ||
1300 | 1305 | ||
1301 | if (hists->uid_filter_str) | 1306 | if (hists->uid_filter_str) |
@@ -1690,7 +1695,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
1690 | bool left_exits, | 1695 | bool left_exits, |
1691 | struct hist_browser_timer *hbt, | 1696 | struct hist_browser_timer *hbt, |
1692 | float min_pcnt, | 1697 | float min_pcnt, |
1693 | struct perf_session_env *env) | 1698 | struct perf_env *env) |
1694 | { | 1699 | { |
1695 | struct hists *hists = evsel__hists(evsel); | 1700 | struct hists *hists = evsel__hists(evsel); |
1696 | struct hist_browser *browser = hist_browser__new(hists, hbt, env); | 1701 | struct hist_browser *browser = hist_browser__new(hists, hbt, env); |
@@ -1868,6 +1873,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
1868 | case K_RIGHT: | 1873 | case K_RIGHT: |
1869 | /* menu */ | 1874 | /* menu */ |
1870 | break; | 1875 | break; |
1876 | case K_ESC: | ||
1871 | case K_LEFT: { | 1877 | case K_LEFT: { |
1872 | const void *top; | 1878 | const void *top; |
1873 | 1879 | ||
@@ -1877,6 +1883,12 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
1877 | */ | 1883 | */ |
1878 | if (left_exits) | 1884 | if (left_exits) |
1879 | goto out_free_stack; | 1885 | goto out_free_stack; |
1886 | |||
1887 | if (key == K_ESC && | ||
1888 | ui_browser__dialog_yesno(&browser->b, | ||
1889 | "Do you really want to exit?")) | ||
1890 | goto out_free_stack; | ||
1891 | |||
1880 | continue; | 1892 | continue; |
1881 | } | 1893 | } |
1882 | top = pstack__peek(browser->pstack); | 1894 | top = pstack__peek(browser->pstack); |
@@ -1892,12 +1904,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, | |||
1892 | do_zoom_thread(browser, actions); | 1904 | do_zoom_thread(browser, actions); |
1893 | continue; | 1905 | continue; |
1894 | } | 1906 | } |
1895 | case K_ESC: | ||
1896 | if (!left_exits && | ||
1897 | !ui_browser__dialog_yesno(&browser->b, | ||
1898 | "Do you really want to exit?")) | ||
1899 | continue; | ||
1900 | /* Fall thru */ | ||
1901 | case 'q': | 1907 | case 'q': |
1902 | case CTRL('c'): | 1908 | case CTRL('c'): |
1903 | goto out_free_stack; | 1909 | goto out_free_stack; |
@@ -1962,7 +1968,8 @@ skip_annotation: | |||
1962 | &options[nr_options], dso); | 1968 | &options[nr_options], dso); |
1963 | nr_options += add_map_opt(browser, &actions[nr_options], | 1969 | nr_options += add_map_opt(browser, &actions[nr_options], |
1964 | &options[nr_options], | 1970 | &options[nr_options], |
1965 | browser->selection->map); | 1971 | browser->selection ? |
1972 | browser->selection->map : NULL); | ||
1966 | 1973 | ||
1967 | /* perf script support */ | 1974 | /* perf script support */ |
1968 | if (browser->he_selection) { | 1975 | if (browser->he_selection) { |
@@ -1970,6 +1977,15 @@ skip_annotation: | |||
1970 | &actions[nr_options], | 1977 | &actions[nr_options], |
1971 | &options[nr_options], | 1978 | &options[nr_options], |
1972 | thread, NULL); | 1979 | thread, NULL); |
1980 | /* | ||
1981 | * Note that browser->selection != NULL | ||
1982 | * when browser->he_selection is not NULL, | ||
1983 | * so we don't need to check browser->selection | ||
1984 | * before fetching browser->selection->sym like what | ||
1985 | * we do before fetching browser->selection->map. | ||
1986 | * | ||
1987 | * See hist_browser__show_entry. | ||
1988 | */ | ||
1973 | nr_options += add_script_opt(browser, | 1989 | nr_options += add_script_opt(browser, |
1974 | &actions[nr_options], | 1990 | &actions[nr_options], |
1975 | &options[nr_options], | 1991 | &options[nr_options], |
@@ -2010,7 +2026,7 @@ struct perf_evsel_menu { | |||
2010 | struct perf_evsel *selection; | 2026 | struct perf_evsel *selection; |
2011 | bool lost_events, lost_events_warned; | 2027 | bool lost_events, lost_events_warned; |
2012 | float min_pcnt; | 2028 | float min_pcnt; |
2013 | struct perf_session_env *env; | 2029 | struct perf_env *env; |
2014 | }; | 2030 | }; |
2015 | 2031 | ||
2016 | static void perf_evsel_menu__write(struct ui_browser *browser, | 2032 | static void perf_evsel_menu__write(struct ui_browser *browser, |
@@ -2044,7 +2060,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, | |||
2044 | nr_events = convert_unit(nr_events, &unit); | 2060 | nr_events = convert_unit(nr_events, &unit); |
2045 | printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events, | 2061 | printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events, |
2046 | unit, unit == ' ' ? "" : " ", ev_name); | 2062 | unit, unit == ' ' ? "" : " ", ev_name); |
2047 | slsmg_printf("%s", bf); | 2063 | ui_browser__printf(browser, "%s", bf); |
2048 | 2064 | ||
2049 | nr_events = hists->stats.nr_events[PERF_RECORD_LOST]; | 2065 | nr_events = hists->stats.nr_events[PERF_RECORD_LOST]; |
2050 | if (nr_events != 0) { | 2066 | if (nr_events != 0) { |
@@ -2057,7 +2073,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser, | |||
2057 | warn = bf; | 2073 | warn = bf; |
2058 | } | 2074 | } |
2059 | 2075 | ||
2060 | slsmg_write_nstring(warn, browser->width - printed); | 2076 | ui_browser__write_nstring(browser, warn, browser->width - printed); |
2061 | 2077 | ||
2062 | if (current_entry) | 2078 | if (current_entry) |
2063 | menu->selection = evsel; | 2079 | menu->selection = evsel; |
@@ -2120,15 +2136,11 @@ browse_hists: | |||
2120 | else | 2136 | else |
2121 | pos = perf_evsel__prev(pos); | 2137 | pos = perf_evsel__prev(pos); |
2122 | goto browse_hists; | 2138 | goto browse_hists; |
2123 | case K_ESC: | ||
2124 | if (!ui_browser__dialog_yesno(&menu->b, | ||
2125 | "Do you really want to exit?")) | ||
2126 | continue; | ||
2127 | /* Fall thru */ | ||
2128 | case K_SWITCH_INPUT_DATA: | 2139 | case K_SWITCH_INPUT_DATA: |
2129 | case 'q': | 2140 | case 'q': |
2130 | case CTRL('c'): | 2141 | case CTRL('c'): |
2131 | goto out; | 2142 | goto out; |
2143 | case K_ESC: | ||
2132 | default: | 2144 | default: |
2133 | continue; | 2145 | continue; |
2134 | } | 2146 | } |
@@ -2167,7 +2179,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, | |||
2167 | int nr_entries, const char *help, | 2179 | int nr_entries, const char *help, |
2168 | struct hist_browser_timer *hbt, | 2180 | struct hist_browser_timer *hbt, |
2169 | float min_pcnt, | 2181 | float min_pcnt, |
2170 | struct perf_session_env *env) | 2182 | struct perf_env *env) |
2171 | { | 2183 | { |
2172 | struct perf_evsel *pos; | 2184 | struct perf_evsel *pos; |
2173 | struct perf_evsel_menu menu = { | 2185 | struct perf_evsel_menu menu = { |
@@ -2200,7 +2212,7 @@ static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist, | |||
2200 | int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, | 2212 | int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, |
2201 | struct hist_browser_timer *hbt, | 2213 | struct hist_browser_timer *hbt, |
2202 | float min_pcnt, | 2214 | float min_pcnt, |
2203 | struct perf_session_env *env) | 2215 | struct perf_env *env) |
2204 | { | 2216 | { |
2205 | int nr_entries = evlist->nr_entries; | 2217 | int nr_entries = evlist->nr_entries; |
2206 | 2218 | ||
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c index b11639f33682..8c154c7d4669 100644 --- a/tools/perf/ui/browsers/map.c +++ b/tools/perf/ui/browsers/map.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #include "../libslang.h" | ||
2 | #include <elf.h> | 1 | #include <elf.h> |
3 | #include <inttypes.h> | 2 | #include <inttypes.h> |
4 | #include <sys/ttydefaults.h> | 3 | #include <sys/ttydefaults.h> |
@@ -26,13 +25,13 @@ static void map_browser__write(struct ui_browser *browser, void *nd, int row) | |||
26 | int width; | 25 | int width; |
27 | 26 | ||
28 | ui_browser__set_percent_color(browser, 0, current_entry); | 27 | ui_browser__set_percent_color(browser, 0, current_entry); |
29 | slsmg_printf("%*" PRIx64 " %*" PRIx64 " %c ", | 28 | ui_browser__printf(browser, "%*" PRIx64 " %*" PRIx64 " %c ", |
30 | mb->addrlen, sym->start, mb->addrlen, sym->end, | 29 | mb->addrlen, sym->start, mb->addrlen, sym->end, |
31 | sym->binding == STB_GLOBAL ? 'g' : | 30 | sym->binding == STB_GLOBAL ? 'g' : |
32 | sym->binding == STB_LOCAL ? 'l' : 'w'); | 31 | sym->binding == STB_LOCAL ? 'l' : 'w'); |
33 | width = browser->width - ((mb->addrlen * 2) + 4); | 32 | width = browser->width - ((mb->addrlen * 2) + 4); |
34 | if (width > 0) | 33 | if (width > 0) |
35 | slsmg_write_nstring(sym->name, width); | 34 | ui_browser__write_nstring(browser, sym->name, width); |
36 | } | 35 | } |
37 | 36 | ||
38 | /* FIXME uber-kludgy, see comment on cmd_report... */ | 37 | /* FIXME uber-kludgy, see comment on cmd_report... */ |
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 402d2bd30b09..e13b48d705ef 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c | |||
@@ -81,7 +81,7 @@ static void script_browser__write(struct ui_browser *browser, | |||
81 | ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : | 81 | ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : |
82 | HE_COLORSET_NORMAL); | 82 | HE_COLORSET_NORMAL); |
83 | 83 | ||
84 | slsmg_write_nstring(sline->line, browser->width); | 84 | ui_browser__write_nstring(browser, sline->line, browser->width); |
85 | } | 85 | } |
86 | 86 | ||
87 | static int script_browser__run(struct perf_script_browser *browser) | 87 | static int script_browser__run(struct perf_script_browser *browser) |
diff --git a/tools/perf/ui/libslang.h b/tools/perf/ui/libslang.h index 4d54b6450f5b..db816695ad97 100644 --- a/tools/perf/ui/libslang.h +++ b/tools/perf/ui/libslang.h | |||
@@ -14,12 +14,15 @@ | |||
14 | #if SLANG_VERSION < 20104 | 14 | #if SLANG_VERSION < 20104 |
15 | #define slsmg_printf(msg, args...) \ | 15 | #define slsmg_printf(msg, args...) \ |
16 | SLsmg_printf((char *)(msg), ##args) | 16 | SLsmg_printf((char *)(msg), ##args) |
17 | #define slsmg_vprintf(msg, vargs) \ | ||
18 | SLsmg_vprintf((char *)(msg), vargs) | ||
17 | #define slsmg_write_nstring(msg, len) \ | 19 | #define slsmg_write_nstring(msg, len) \ |
18 | SLsmg_write_nstring((char *)(msg), len) | 20 | SLsmg_write_nstring((char *)(msg), len) |
19 | #define sltt_set_color(obj, name, fg, bg) \ | 21 | #define sltt_set_color(obj, name, fg, bg) \ |
20 | SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg)) | 22 | SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg)) |
21 | #else | 23 | #else |
22 | #define slsmg_printf SLsmg_printf | 24 | #define slsmg_printf SLsmg_printf |
25 | #define slsmg_vprintf SLsmg_vprintf | ||
23 | #define slsmg_write_nstring SLsmg_write_nstring | 26 | #define slsmg_write_nstring SLsmg_write_nstring |
24 | #define sltt_set_color SLtt_set_color | 27 | #define sltt_set_color SLtt_set_color |
25 | #endif | 28 | #endif |
diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c index c61d14b101e0..c4b99008e2c9 100644 --- a/tools/perf/ui/tui/progress.c +++ b/tools/perf/ui/tui/progress.c | |||
@@ -33,9 +33,26 @@ static void tui_progress__update(struct ui_progress *p) | |||
33 | pthread_mutex_unlock(&ui__lock); | 33 | pthread_mutex_unlock(&ui__lock); |
34 | } | 34 | } |
35 | 35 | ||
36 | static void tui_progress__finish(void) | ||
37 | { | ||
38 | int y; | ||
39 | |||
40 | if (use_browser <= 0) | ||
41 | return; | ||
42 | |||
43 | ui__refresh_dimensions(false); | ||
44 | pthread_mutex_lock(&ui__lock); | ||
45 | y = SLtt_Screen_Rows / 2 - 2; | ||
46 | SLsmg_set_color(0); | ||
47 | SLsmg_fill_region(y, 0, 3, SLtt_Screen_Cols, ' '); | ||
48 | SLsmg_refresh(); | ||
49 | pthread_mutex_unlock(&ui__lock); | ||
50 | } | ||
51 | |||
36 | static struct ui_progress_ops tui_progress__ops = | 52 | static struct ui_progress_ops tui_progress__ops = |
37 | { | 53 | { |
38 | .update = tui_progress__update, | 54 | .update = tui_progress__update, |
55 | .finish = tui_progress__finish, | ||
39 | }; | 56 | }; |
40 | 57 | ||
41 | void tui_progress__init(void) | 58 | void tui_progress__init(void) |
diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c index bf890f72fe80..d96ad7c8325d 100644 --- a/tools/perf/ui/tui/util.c +++ b/tools/perf/ui/tui/util.c | |||
@@ -21,7 +21,7 @@ static void ui_browser__argv_write(struct ui_browser *browser, | |||
21 | 21 | ||
22 | ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : | 22 | ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : |
23 | HE_COLORSET_NORMAL); | 23 | HE_COLORSET_NORMAL); |
24 | slsmg_write_nstring(*arg, browser->width); | 24 | ui_browser__write_nstring(browser, *arg, browser->width); |
25 | } | 25 | } |
26 | 26 | ||
27 | static int popup_menu__run(struct ui_browser *menu) | 27 | static int popup_menu__run(struct ui_browser *menu) |
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index d2d318c59b37..e5f18a288b74 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build | |||
@@ -14,8 +14,10 @@ libperf-y += find_next_bit.o | |||
14 | libperf-y += help.o | 14 | libperf-y += help.o |
15 | libperf-y += kallsyms.o | 15 | libperf-y += kallsyms.o |
16 | libperf-y += levenshtein.o | 16 | libperf-y += levenshtein.o |
17 | libperf-y += llvm-utils.o | ||
17 | libperf-y += parse-options.o | 18 | libperf-y += parse-options.o |
18 | libperf-y += parse-events.o | 19 | libperf-y += parse-events.o |
20 | libperf-y += perf_regs.o | ||
19 | libperf-y += path.o | 21 | libperf-y += path.o |
20 | libperf-y += rbtree.o | 22 | libperf-y += rbtree.o |
21 | libperf-y += bitmap.o | 23 | libperf-y += bitmap.o |
@@ -67,18 +69,25 @@ libperf-y += target.o | |||
67 | libperf-y += rblist.o | 69 | libperf-y += rblist.o |
68 | libperf-y += intlist.o | 70 | libperf-y += intlist.o |
69 | libperf-y += vdso.o | 71 | libperf-y += vdso.o |
72 | libperf-y += counts.o | ||
70 | libperf-y += stat.o | 73 | libperf-y += stat.o |
71 | libperf-y += stat-shadow.o | 74 | libperf-y += stat-shadow.o |
72 | libperf-y += record.o | 75 | libperf-y += record.o |
73 | libperf-y += srcline.o | 76 | libperf-y += srcline.o |
74 | libperf-y += data.o | 77 | libperf-y += data.o |
75 | libperf-$(CONFIG_X86) += tsc.o | 78 | libperf-$(CONFIG_X86) += tsc.o |
79 | libperf-$(CONFIG_AUXTRACE) += tsc.o | ||
76 | libperf-y += cloexec.o | 80 | libperf-y += cloexec.o |
77 | libperf-y += thread-stack.o | 81 | libperf-y += thread-stack.o |
78 | libperf-$(CONFIG_AUXTRACE) += auxtrace.o | 82 | libperf-$(CONFIG_AUXTRACE) += auxtrace.o |
83 | libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ | ||
84 | libperf-$(CONFIG_AUXTRACE) += intel-pt.o | ||
85 | libperf-$(CONFIG_AUXTRACE) += intel-bts.o | ||
79 | libperf-y += parse-branch-options.o | 86 | libperf-y += parse-branch-options.o |
87 | libperf-y += parse-regs-options.o | ||
80 | 88 | ||
81 | libperf-$(CONFIG_LIBELF) += symbol-elf.o | 89 | libperf-$(CONFIG_LIBELF) += symbol-elf.o |
90 | libperf-$(CONFIG_LIBELF) += probe-file.o | ||
82 | libperf-$(CONFIG_LIBELF) += probe-event.o | 91 | libperf-$(CONFIG_LIBELF) += probe-event.o |
83 | 92 | ||
84 | ifndef CONFIG_LIBELF | 93 | ifndef CONFIG_LIBELF |
@@ -95,7 +104,6 @@ libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o | |||
95 | 104 | ||
96 | libperf-y += scripting-engines/ | 105 | libperf-y += scripting-engines/ |
97 | 106 | ||
98 | libperf-$(CONFIG_PERF_REGS) += perf_regs.o | ||
99 | libperf-$(CONFIG_ZLIB) += zlib.o | 107 | libperf-$(CONFIG_ZLIB) += zlib.o |
100 | libperf-$(CONFIG_LZMA) += lzma.o | 108 | libperf-$(CONFIG_LZMA) += lzma.o |
101 | 109 | ||
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 03b7bc70eb66..d1eece70b84d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c | |||
@@ -473,17 +473,73 @@ int symbol__alloc_hist(struct symbol *sym) | |||
473 | return 0; | 473 | return 0; |
474 | } | 474 | } |
475 | 475 | ||
476 | /* The cycles histogram is lazily allocated. */ | ||
477 | static int symbol__alloc_hist_cycles(struct symbol *sym) | ||
478 | { | ||
479 | struct annotation *notes = symbol__annotation(sym); | ||
480 | const size_t size = symbol__size(sym); | ||
481 | |||
482 | notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist)); | ||
483 | if (notes->src->cycles_hist == NULL) | ||
484 | return -1; | ||
485 | return 0; | ||
486 | } | ||
487 | |||
476 | void symbol__annotate_zero_histograms(struct symbol *sym) | 488 | void symbol__annotate_zero_histograms(struct symbol *sym) |
477 | { | 489 | { |
478 | struct annotation *notes = symbol__annotation(sym); | 490 | struct annotation *notes = symbol__annotation(sym); |
479 | 491 | ||
480 | pthread_mutex_lock(¬es->lock); | 492 | pthread_mutex_lock(¬es->lock); |
481 | if (notes->src != NULL) | 493 | if (notes->src != NULL) { |
482 | memset(notes->src->histograms, 0, | 494 | memset(notes->src->histograms, 0, |
483 | notes->src->nr_histograms * notes->src->sizeof_sym_hist); | 495 | notes->src->nr_histograms * notes->src->sizeof_sym_hist); |
496 | if (notes->src->cycles_hist) | ||
497 | memset(notes->src->cycles_hist, 0, | ||
498 | symbol__size(sym) * sizeof(struct cyc_hist)); | ||
499 | } | ||
484 | pthread_mutex_unlock(¬es->lock); | 500 | pthread_mutex_unlock(¬es->lock); |
485 | } | 501 | } |
486 | 502 | ||
503 | static int __symbol__account_cycles(struct annotation *notes, | ||
504 | u64 start, | ||
505 | unsigned offset, unsigned cycles, | ||
506 | unsigned have_start) | ||
507 | { | ||
508 | struct cyc_hist *ch; | ||
509 | |||
510 | ch = notes->src->cycles_hist; | ||
511 | /* | ||
512 | * For now we can only account one basic block per | ||
513 | * final jump. But multiple could be overlapping. | ||
514 | * Always account the longest one. So when | ||
515 | * a shorter one has been already seen throw it away. | ||
516 | * | ||
517 | * We separately always account the full cycles. | ||
518 | */ | ||
519 | ch[offset].num_aggr++; | ||
520 | ch[offset].cycles_aggr += cycles; | ||
521 | |||
522 | if (!have_start && ch[offset].have_start) | ||
523 | return 0; | ||
524 | if (ch[offset].num) { | ||
525 | if (have_start && (!ch[offset].have_start || | ||
526 | ch[offset].start > start)) { | ||
527 | ch[offset].have_start = 0; | ||
528 | ch[offset].cycles = 0; | ||
529 | ch[offset].num = 0; | ||
530 | if (ch[offset].reset < 0xffff) | ||
531 | ch[offset].reset++; | ||
532 | } else if (have_start && | ||
533 | ch[offset].start < start) | ||
534 | return 0; | ||
535 | } | ||
536 | ch[offset].have_start = have_start; | ||
537 | ch[offset].start = start; | ||
538 | ch[offset].cycles += cycles; | ||
539 | ch[offset].num++; | ||
540 | return 0; | ||
541 | } | ||
542 | |||
487 | static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, | 543 | static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, |
488 | struct annotation *notes, int evidx, u64 addr) | 544 | struct annotation *notes, int evidx, u64 addr) |
489 | { | 545 | { |
@@ -506,7 +562,7 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, | |||
506 | return 0; | 562 | return 0; |
507 | } | 563 | } |
508 | 564 | ||
509 | static struct annotation *symbol__get_annotation(struct symbol *sym) | 565 | static struct annotation *symbol__get_annotation(struct symbol *sym, bool cycles) |
510 | { | 566 | { |
511 | struct annotation *notes = symbol__annotation(sym); | 567 | struct annotation *notes = symbol__annotation(sym); |
512 | 568 | ||
@@ -514,6 +570,10 @@ static struct annotation *symbol__get_annotation(struct symbol *sym) | |||
514 | if (symbol__alloc_hist(sym) < 0) | 570 | if (symbol__alloc_hist(sym) < 0) |
515 | return NULL; | 571 | return NULL; |
516 | } | 572 | } |
573 | if (!notes->src->cycles_hist && cycles) { | ||
574 | if (symbol__alloc_hist_cycles(sym) < 0) | ||
575 | return NULL; | ||
576 | } | ||
517 | return notes; | 577 | return notes; |
518 | } | 578 | } |
519 | 579 | ||
@@ -524,12 +584,73 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, | |||
524 | 584 | ||
525 | if (sym == NULL) | 585 | if (sym == NULL) |
526 | return 0; | 586 | return 0; |
527 | notes = symbol__get_annotation(sym); | 587 | notes = symbol__get_annotation(sym, false); |
528 | if (notes == NULL) | 588 | if (notes == NULL) |
529 | return -ENOMEM; | 589 | return -ENOMEM; |
530 | return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); | 590 | return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); |
531 | } | 591 | } |
532 | 592 | ||
593 | static int symbol__account_cycles(u64 addr, u64 start, | ||
594 | struct symbol *sym, unsigned cycles) | ||
595 | { | ||
596 | struct annotation *notes; | ||
597 | unsigned offset; | ||
598 | |||
599 | if (sym == NULL) | ||
600 | return 0; | ||
601 | notes = symbol__get_annotation(sym, true); | ||
602 | if (notes == NULL) | ||
603 | return -ENOMEM; | ||
604 | if (addr < sym->start || addr >= sym->end) | ||
605 | return -ERANGE; | ||
606 | |||
607 | if (start) { | ||
608 | if (start < sym->start || start >= sym->end) | ||
609 | return -ERANGE; | ||
610 | if (start >= addr) | ||
611 | start = 0; | ||
612 | } | ||
613 | offset = addr - sym->start; | ||
614 | return __symbol__account_cycles(notes, | ||
615 | start ? start - sym->start : 0, | ||
616 | offset, cycles, | ||
617 | !!start); | ||
618 | } | ||
619 | |||
620 | int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, | ||
621 | struct addr_map_symbol *start, | ||
622 | unsigned cycles) | ||
623 | { | ||
624 | u64 saddr = 0; | ||
625 | int err; | ||
626 | |||
627 | if (!cycles) | ||
628 | return 0; | ||
629 | |||
630 | /* | ||
631 | * Only set start when IPC can be computed. We can only | ||
632 | * compute it when the basic block is completely in a single | ||
633 | * function. | ||
634 | * Special case the case when the jump is elsewhere, but | ||
635 | * it starts on the function start. | ||
636 | */ | ||
637 | if (start && | ||
638 | (start->sym == ams->sym || | ||
639 | (ams->sym && | ||
640 | start->addr == ams->sym->start + ams->map->start))) | ||
641 | saddr = start->al_addr; | ||
642 | if (saddr == 0) | ||
643 | pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n", | ||
644 | ams->addr, | ||
645 | start ? start->addr : 0, | ||
646 | ams->sym ? ams->sym->start + ams->map->start : 0, | ||
647 | saddr); | ||
648 | err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles); | ||
649 | if (err) | ||
650 | pr_debug2("account_cycles failed %d\n", err); | ||
651 | return err; | ||
652 | } | ||
653 | |||
533 | int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) | 654 | int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx) |
534 | { | 655 | { |
535 | return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); | 656 | return symbol__inc_addr_samples(ams->sym, ams->map, evidx, ams->al_addr); |
@@ -1005,6 +1126,7 @@ fallback: | |||
1005 | dso->annotate_warned = 1; | 1126 | dso->annotate_warned = 1; |
1006 | pr_err("Can't annotate %s:\n\n" | 1127 | pr_err("Can't annotate %s:\n\n" |
1007 | "No vmlinux file%s\nwas found in the path.\n\n" | 1128 | "No vmlinux file%s\nwas found in the path.\n\n" |
1129 | "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" | ||
1008 | "Please use:\n\n" | 1130 | "Please use:\n\n" |
1009 | " perf buildid-cache -vu vmlinux\n\n" | 1131 | " perf buildid-cache -vu vmlinux\n\n" |
1010 | "or:\n\n" | 1132 | "or:\n\n" |
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 7e78e6c27078..e9996092a093 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h | |||
@@ -59,6 +59,8 @@ struct disasm_line { | |||
59 | char *name; | 59 | char *name; |
60 | struct ins *ins; | 60 | struct ins *ins; |
61 | int line_nr; | 61 | int line_nr; |
62 | float ipc; | ||
63 | u64 cycles; | ||
62 | struct ins_operands ops; | 64 | struct ins_operands ops; |
63 | }; | 65 | }; |
64 | 66 | ||
@@ -79,6 +81,17 @@ struct sym_hist { | |||
79 | u64 addr[0]; | 81 | u64 addr[0]; |
80 | }; | 82 | }; |
81 | 83 | ||
84 | struct cyc_hist { | ||
85 | u64 start; | ||
86 | u64 cycles; | ||
87 | u64 cycles_aggr; | ||
88 | u32 num; | ||
89 | u32 num_aggr; | ||
90 | u8 have_start; | ||
91 | /* 1 byte padding */ | ||
92 | u16 reset; | ||
93 | }; | ||
94 | |||
82 | struct source_line_samples { | 95 | struct source_line_samples { |
83 | double percent; | 96 | double percent; |
84 | double percent_sum; | 97 | double percent_sum; |
@@ -97,6 +110,7 @@ struct source_line { | |||
97 | * @histogram: Array of addr hit histograms per event being monitored | 110 | * @histogram: Array of addr hit histograms per event being monitored |
98 | * @lines: If 'print_lines' is specified, per source code line percentages | 111 | * @lines: If 'print_lines' is specified, per source code line percentages |
99 | * @source: source parsed from a disassembler like objdump -dS | 112 | * @source: source parsed from a disassembler like objdump -dS |
113 | * @cyc_hist: Average cycles per basic block | ||
100 | * | 114 | * |
101 | * lines is allocated, percentages calculated and all sorted by percentage | 115 | * lines is allocated, percentages calculated and all sorted by percentage |
102 | * when the annotation is about to be presented, so the percentages are for | 116 | * when the annotation is about to be presented, so the percentages are for |
@@ -109,6 +123,7 @@ struct annotated_source { | |||
109 | struct source_line *lines; | 123 | struct source_line *lines; |
110 | int nr_histograms; | 124 | int nr_histograms; |
111 | int sizeof_sym_hist; | 125 | int sizeof_sym_hist; |
126 | struct cyc_hist *cycles_hist; | ||
112 | struct sym_hist histograms[0]; | 127 | struct sym_hist histograms[0]; |
113 | }; | 128 | }; |
114 | 129 | ||
@@ -130,6 +145,10 @@ static inline struct annotation *symbol__annotation(struct symbol *sym) | |||
130 | 145 | ||
131 | int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); | 146 | int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, int evidx); |
132 | 147 | ||
148 | int addr_map_symbol__account_cycles(struct addr_map_symbol *ams, | ||
149 | struct addr_map_symbol *start, | ||
150 | unsigned cycles); | ||
151 | |||
133 | int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); | 152 | int hist_entry__inc_addr_samples(struct hist_entry *he, int evidx, u64 addr); |
134 | 153 | ||
135 | int symbol__alloc_hist(struct symbol *sym); | 154 | int symbol__alloc_hist(struct symbol *sym); |
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 83d9dd96fe08..a980e7c50ee0 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c | |||
@@ -47,6 +47,9 @@ | |||
47 | #include "debug.h" | 47 | #include "debug.h" |
48 | #include "parse-options.h" | 48 | #include "parse-options.h" |
49 | 49 | ||
50 | #include "intel-pt.h" | ||
51 | #include "intel-bts.h" | ||
52 | |||
50 | int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, | 53 | int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, |
51 | struct auxtrace_mmap_params *mp, | 54 | struct auxtrace_mmap_params *mp, |
52 | void *userpg, int fd) | 55 | void *userpg, int fd) |
@@ -876,7 +879,7 @@ static bool auxtrace__dont_decode(struct perf_session *session) | |||
876 | 879 | ||
877 | int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, | 880 | int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, |
878 | union perf_event *event, | 881 | union perf_event *event, |
879 | struct perf_session *session __maybe_unused) | 882 | struct perf_session *session) |
880 | { | 883 | { |
881 | enum auxtrace_type type = event->auxtrace_info.type; | 884 | enum auxtrace_type type = event->auxtrace_info.type; |
882 | 885 | ||
@@ -884,6 +887,10 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, | |||
884 | fprintf(stdout, " type: %u\n", type); | 887 | fprintf(stdout, " type: %u\n", type); |
885 | 888 | ||
886 | switch (type) { | 889 | switch (type) { |
890 | case PERF_AUXTRACE_INTEL_PT: | ||
891 | return intel_pt_process_auxtrace_info(event, session); | ||
892 | case PERF_AUXTRACE_INTEL_BTS: | ||
893 | return intel_bts_process_auxtrace_info(event, session); | ||
887 | case PERF_AUXTRACE_UNKNOWN: | 894 | case PERF_AUXTRACE_UNKNOWN: |
888 | default: | 895 | default: |
889 | return -EINVAL; | 896 | return -EINVAL; |
@@ -942,6 +949,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, | |||
942 | struct itrace_synth_opts *synth_opts = opt->value; | 949 | struct itrace_synth_opts *synth_opts = opt->value; |
943 | const char *p; | 950 | const char *p; |
944 | char *endptr; | 951 | char *endptr; |
952 | bool period_type_set = false; | ||
945 | 953 | ||
946 | synth_opts->set = true; | 954 | synth_opts->set = true; |
947 | 955 | ||
@@ -970,10 +978,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, | |||
970 | case 'i': | 978 | case 'i': |
971 | synth_opts->period_type = | 979 | synth_opts->period_type = |
972 | PERF_ITRACE_PERIOD_INSTRUCTIONS; | 980 | PERF_ITRACE_PERIOD_INSTRUCTIONS; |
981 | period_type_set = true; | ||
973 | break; | 982 | break; |
974 | case 't': | 983 | case 't': |
975 | synth_opts->period_type = | 984 | synth_opts->period_type = |
976 | PERF_ITRACE_PERIOD_TICKS; | 985 | PERF_ITRACE_PERIOD_TICKS; |
986 | period_type_set = true; | ||
977 | break; | 987 | break; |
978 | case 'm': | 988 | case 'm': |
979 | synth_opts->period *= 1000; | 989 | synth_opts->period *= 1000; |
@@ -986,6 +996,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, | |||
986 | goto out_err; | 996 | goto out_err; |
987 | synth_opts->period_type = | 997 | synth_opts->period_type = |
988 | PERF_ITRACE_PERIOD_NANOSECS; | 998 | PERF_ITRACE_PERIOD_NANOSECS; |
999 | period_type_set = true; | ||
989 | break; | 1000 | break; |
990 | case '\0': | 1001 | case '\0': |
991 | goto out; | 1002 | goto out; |
@@ -1039,7 +1050,7 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str, | |||
1039 | } | 1050 | } |
1040 | out: | 1051 | out: |
1041 | if (synth_opts->instructions) { | 1052 | if (synth_opts->instructions) { |
1042 | if (!synth_opts->period_type) | 1053 | if (!period_type_set) |
1043 | synth_opts->period_type = | 1054 | synth_opts->period_type = |
1044 | PERF_ITRACE_DEFAULT_PERIOD_TYPE; | 1055 | PERF_ITRACE_DEFAULT_PERIOD_TYPE; |
1045 | if (!synth_opts->period) | 1056 | if (!synth_opts->period) |
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 471aecbc4d68..bf72b77a588a 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h | |||
@@ -39,6 +39,8 @@ struct events_stats; | |||
39 | 39 | ||
40 | enum auxtrace_type { | 40 | enum auxtrace_type { |
41 | PERF_AUXTRACE_UNKNOWN, | 41 | PERF_AUXTRACE_UNKNOWN, |
42 | PERF_AUXTRACE_INTEL_PT, | ||
43 | PERF_AUXTRACE_INTEL_BTS, | ||
42 | }; | 44 | }; |
43 | 45 | ||
44 | enum itrace_period_type { | 46 | enum itrace_period_type { |
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 1f6fc2323ef9..d909459fb54c 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c | |||
@@ -93,6 +93,38 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) | |||
93 | return raw - build_id; | 93 | return raw - build_id; |
94 | } | 94 | } |
95 | 95 | ||
96 | int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id) | ||
97 | { | ||
98 | char notes[PATH_MAX]; | ||
99 | u8 build_id[BUILD_ID_SIZE]; | ||
100 | int ret; | ||
101 | |||
102 | if (!root_dir) | ||
103 | root_dir = ""; | ||
104 | |||
105 | scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir); | ||
106 | |||
107 | ret = sysfs__read_build_id(notes, build_id, sizeof(build_id)); | ||
108 | if (ret < 0) | ||
109 | return ret; | ||
110 | |||
111 | return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); | ||
112 | } | ||
113 | |||
114 | int filename__sprintf_build_id(const char *pathname, char *sbuild_id) | ||
115 | { | ||
116 | u8 build_id[BUILD_ID_SIZE]; | ||
117 | int ret; | ||
118 | |||
119 | ret = filename__read_build_id(pathname, build_id, sizeof(build_id)); | ||
120 | if (ret < 0) | ||
121 | return ret; | ||
122 | else if (ret != sizeof(build_id)) | ||
123 | return -EINVAL; | ||
124 | |||
125 | return build_id__sprintf(build_id, sizeof(build_id), sbuild_id); | ||
126 | } | ||
127 | |||
96 | /* asnprintf consolidates asprintf and snprintf */ | 128 | /* asnprintf consolidates asprintf and snprintf */ |
97 | static int asnprintf(char **strp, size_t size, const char *fmt, ...) | 129 | static int asnprintf(char **strp, size_t size, const char *fmt, ...) |
98 | { | 130 | { |
@@ -124,7 +156,7 @@ static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) | |||
124 | 156 | ||
125 | char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) | 157 | char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) |
126 | { | 158 | { |
127 | char build_id_hex[BUILD_ID_SIZE * 2 + 1]; | 159 | char build_id_hex[SBUILD_ID_SIZE]; |
128 | 160 | ||
129 | if (!dso->has_build_id) | 161 | if (!dso->has_build_id) |
130 | return NULL; | 162 | return NULL; |
@@ -291,7 +323,7 @@ int build_id_cache__list_build_ids(const char *pathname, | |||
291 | struct dirent *d; | 323 | struct dirent *d; |
292 | int ret = 0; | 324 | int ret = 0; |
293 | 325 | ||
294 | list = strlist__new(true, NULL); | 326 | list = strlist__new(NULL, NULL); |
295 | dir_name = build_id_cache__dirname_from_path(pathname, false, false); | 327 | dir_name = build_id_cache__dirname_from_path(pathname, false, false); |
296 | if (!list || !dir_name) { | 328 | if (!list || !dir_name) { |
297 | ret = -ENOMEM; | 329 | ret = -ENOMEM; |
@@ -384,7 +416,7 @@ static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, | |||
384 | const char *name, bool is_kallsyms, | 416 | const char *name, bool is_kallsyms, |
385 | bool is_vdso) | 417 | bool is_vdso) |
386 | { | 418 | { |
387 | char sbuild_id[BUILD_ID_SIZE * 2 + 1]; | 419 | char sbuild_id[SBUILD_ID_SIZE]; |
388 | 420 | ||
389 | build_id__sprintf(build_id, build_id_size, sbuild_id); | 421 | build_id__sprintf(build_id, build_id_size, sbuild_id); |
390 | 422 | ||
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 85011222cc14..27a14a8a945b 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h | |||
@@ -1,7 +1,8 @@ | |||
1 | #ifndef PERF_BUILD_ID_H_ | 1 | #ifndef PERF_BUILD_ID_H_ |
2 | #define PERF_BUILD_ID_H_ 1 | 2 | #define PERF_BUILD_ID_H_ 1 |
3 | 3 | ||
4 | #define BUILD_ID_SIZE 20 | 4 | #define BUILD_ID_SIZE 20 |
5 | #define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1) | ||
5 | 6 | ||
6 | #include "tool.h" | 7 | #include "tool.h" |
7 | #include "strlist.h" | 8 | #include "strlist.h" |
@@ -11,6 +12,9 @@ extern struct perf_tool build_id__mark_dso_hit_ops; | |||
11 | struct dso; | 12 | struct dso; |
12 | 13 | ||
13 | int build_id__sprintf(const u8 *build_id, int len, char *bf); | 14 | int build_id__sprintf(const u8 *build_id, int len, char *bf); |
15 | int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id); | ||
16 | int filename__sprintf_build_id(const char *pathname, char *sbuild_id); | ||
17 | |||
14 | char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); | 18 | char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size); |
15 | 19 | ||
16 | int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, | 20 | int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event, |
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 9f643ee77001..773fe13ce627 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c | |||
@@ -25,96 +25,9 @@ | |||
25 | 25 | ||
26 | __thread struct callchain_cursor callchain_cursor; | 26 | __thread struct callchain_cursor callchain_cursor; |
27 | 27 | ||
28 | #ifdef HAVE_DWARF_UNWIND_SUPPORT | 28 | int parse_callchain_record_opt(const char *arg, struct callchain_param *param) |
29 | static int get_stack_size(const char *str, unsigned long *_size) | ||
30 | { | ||
31 | char *endptr; | ||
32 | unsigned long size; | ||
33 | unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); | ||
34 | |||
35 | size = strtoul(str, &endptr, 0); | ||
36 | |||
37 | do { | ||
38 | if (*endptr) | ||
39 | break; | ||
40 | |||
41 | size = round_up(size, sizeof(u64)); | ||
42 | if (!size || size > max_size) | ||
43 | break; | ||
44 | |||
45 | *_size = size; | ||
46 | return 0; | ||
47 | |||
48 | } while (0); | ||
49 | |||
50 | pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", | ||
51 | max_size, str); | ||
52 | return -1; | ||
53 | } | ||
54 | #endif /* HAVE_DWARF_UNWIND_SUPPORT */ | ||
55 | |||
56 | int parse_callchain_record_opt(const char *arg) | ||
57 | { | 29 | { |
58 | char *tok, *name, *saveptr = NULL; | 30 | return parse_callchain_record(arg, param); |
59 | char *buf; | ||
60 | int ret = -1; | ||
61 | |||
62 | /* We need buffer that we know we can write to. */ | ||
63 | buf = malloc(strlen(arg) + 1); | ||
64 | if (!buf) | ||
65 | return -ENOMEM; | ||
66 | |||
67 | strcpy(buf, arg); | ||
68 | |||
69 | tok = strtok_r((char *)buf, ",", &saveptr); | ||
70 | name = tok ? : (char *)buf; | ||
71 | |||
72 | do { | ||
73 | /* Framepointer style */ | ||
74 | if (!strncmp(name, "fp", sizeof("fp"))) { | ||
75 | if (!strtok_r(NULL, ",", &saveptr)) { | ||
76 | callchain_param.record_mode = CALLCHAIN_FP; | ||
77 | ret = 0; | ||
78 | } else | ||
79 | pr_err("callchain: No more arguments " | ||
80 | "needed for --call-graph fp\n"); | ||
81 | break; | ||
82 | |||
83 | #ifdef HAVE_DWARF_UNWIND_SUPPORT | ||
84 | /* Dwarf style */ | ||
85 | } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { | ||
86 | const unsigned long default_stack_dump_size = 8192; | ||
87 | |||
88 | ret = 0; | ||
89 | callchain_param.record_mode = CALLCHAIN_DWARF; | ||
90 | callchain_param.dump_size = default_stack_dump_size; | ||
91 | |||
92 | tok = strtok_r(NULL, ",", &saveptr); | ||
93 | if (tok) { | ||
94 | unsigned long size = 0; | ||
95 | |||
96 | ret = get_stack_size(tok, &size); | ||
97 | callchain_param.dump_size = size; | ||
98 | } | ||
99 | #endif /* HAVE_DWARF_UNWIND_SUPPORT */ | ||
100 | } else if (!strncmp(name, "lbr", sizeof("lbr"))) { | ||
101 | if (!strtok_r(NULL, ",", &saveptr)) { | ||
102 | callchain_param.record_mode = CALLCHAIN_LBR; | ||
103 | ret = 0; | ||
104 | } else | ||
105 | pr_err("callchain: No more arguments " | ||
106 | "needed for --call-graph lbr\n"); | ||
107 | break; | ||
108 | } else { | ||
109 | pr_err("callchain: Unknown --call-graph option " | ||
110 | "value: %s\n", arg); | ||
111 | break; | ||
112 | } | ||
113 | |||
114 | } while (0); | ||
115 | |||
116 | free(buf); | ||
117 | return ret; | ||
118 | } | 31 | } |
119 | 32 | ||
120 | static int parse_callchain_mode(const char *value) | 33 | static int parse_callchain_mode(const char *value) |
@@ -219,7 +132,7 @@ int perf_callchain_config(const char *var, const char *value) | |||
219 | var += sizeof("call-graph.") - 1; | 132 | var += sizeof("call-graph.") - 1; |
220 | 133 | ||
221 | if (!strcmp(var, "record-mode")) | 134 | if (!strcmp(var, "record-mode")) |
222 | return parse_callchain_record_opt(value); | 135 | return parse_callchain_record_opt(value, &callchain_param); |
223 | #ifdef HAVE_DWARF_UNWIND_SUPPORT | 136 | #ifdef HAVE_DWARF_UNWIND_SUPPORT |
224 | if (!strcmp(var, "dump-size")) { | 137 | if (!strcmp(var, "dump-size")) { |
225 | unsigned long size = 0; | 138 | unsigned long size = 0; |
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index 679c2c6d8ade..acee2b3cd801 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h | |||
@@ -177,7 +177,8 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node * | |||
177 | bool hide_unresolved); | 177 | bool hide_unresolved); |
178 | 178 | ||
179 | extern const char record_callchain_help[]; | 179 | extern const char record_callchain_help[]; |
180 | int parse_callchain_record_opt(const char *arg); | 180 | extern int parse_callchain_record(const char *arg, struct callchain_param *param); |
181 | int parse_callchain_record_opt(const char *arg, struct callchain_param *param); | ||
181 | int parse_callchain_report_opt(const char *arg); | 182 | int parse_callchain_report_opt(const char *arg); |
182 | int perf_callchain_config(const char *var, const char *value); | 183 | int perf_callchain_config(const char *var, const char *value); |
183 | 184 | ||
diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 68888c29b04a..3bee6773ddb0 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h | |||
@@ -4,7 +4,7 @@ | |||
4 | unsigned long perf_event_open_cloexec_flag(void); | 4 | unsigned long perf_event_open_cloexec_flag(void); |
5 | 5 | ||
6 | #ifdef __GLIBC_PREREQ | 6 | #ifdef __GLIBC_PREREQ |
7 | #if !__GLIBC_PREREQ(2, 6) | 7 | #if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__) |
8 | extern int sched_getcpu(void) __THROW; | 8 | extern int sched_getcpu(void) __THROW; |
9 | #endif | 9 | #endif |
10 | #endif | 10 | #endif |
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index 55355b3d4f85..9b9565416f90 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c | |||
@@ -67,8 +67,9 @@ static int __color_vsnprintf(char *bf, size_t size, const char *color, | |||
67 | return r; | 67 | return r; |
68 | } | 68 | } |
69 | 69 | ||
70 | /* Colors are not included in return value */ | ||
70 | static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, | 71 | static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, |
71 | va_list args, const char *trail) | 72 | va_list args) |
72 | { | 73 | { |
73 | int r = 0; | 74 | int r = 0; |
74 | 75 | ||
@@ -83,12 +84,10 @@ static int __color_vfprintf(FILE *fp, const char *color, const char *fmt, | |||
83 | } | 84 | } |
84 | 85 | ||
85 | if (perf_use_color_default && *color) | 86 | if (perf_use_color_default && *color) |
86 | r += fprintf(fp, "%s", color); | 87 | fprintf(fp, "%s", color); |
87 | r += vfprintf(fp, fmt, args); | 88 | r += vfprintf(fp, fmt, args); |
88 | if (perf_use_color_default && *color) | 89 | if (perf_use_color_default && *color) |
89 | r += fprintf(fp, "%s", PERF_COLOR_RESET); | 90 | fprintf(fp, "%s", PERF_COLOR_RESET); |
90 | if (trail) | ||
91 | r += fprintf(fp, "%s", trail); | ||
92 | return r; | 91 | return r; |
93 | } | 92 | } |
94 | 93 | ||
@@ -100,7 +99,7 @@ int color_vsnprintf(char *bf, size_t size, const char *color, | |||
100 | 99 | ||
101 | int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args) | 100 | int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args) |
102 | { | 101 | { |
103 | return __color_vfprintf(fp, color, fmt, args, NULL); | 102 | return __color_vfprintf(fp, color, fmt, args); |
104 | } | 103 | } |
105 | 104 | ||
106 | int color_snprintf(char *bf, size_t size, const char *color, | 105 | int color_snprintf(char *bf, size_t size, const char *color, |
@@ -126,16 +125,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) | |||
126 | return r; | 125 | return r; |
127 | } | 126 | } |
128 | 127 | ||
129 | int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...) | ||
130 | { | ||
131 | va_list args; | ||
132 | int r; | ||
133 | va_start(args, fmt); | ||
134 | r = __color_vfprintf(fp, color, fmt, args, "\n"); | ||
135 | va_end(args); | ||
136 | return r; | ||
137 | } | ||
138 | |||
139 | /* | 128 | /* |
140 | * This function splits the buffer by newlines and colors the lines individually. | 129 | * This function splits the buffer by newlines and colors the lines individually. |
141 | * | 130 | * |
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 38146f922c54..a93997f16dec 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h | |||
@@ -35,7 +35,6 @@ int color_vsnprintf(char *bf, size_t size, const char *color, | |||
35 | int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); | 35 | int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); |
36 | int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); | 36 | int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); |
37 | int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); | 37 | int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); |
38 | int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); | ||
39 | int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); | 38 | int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); |
40 | int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); | 39 | int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); |
41 | int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); | 40 | int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); |
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index e18f653cd7db..2e452ac1353d 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include "cache.h" | 12 | #include "cache.h" |
13 | #include "exec_cmd.h" | 13 | #include "exec_cmd.h" |
14 | #include "util/hist.h" /* perf_hist_config */ | 14 | #include "util/hist.h" /* perf_hist_config */ |
15 | #include "util/llvm-utils.h" /* perf_llvm_config */ | ||
15 | 16 | ||
16 | #define MAXNAME (256) | 17 | #define MAXNAME (256) |
17 | 18 | ||
@@ -408,6 +409,9 @@ int perf_default_config(const char *var, const char *value, | |||
408 | if (!prefixcmp(var, "call-graph.")) | 409 | if (!prefixcmp(var, "call-graph.")) |
409 | return perf_callchain_config(var, value); | 410 | return perf_callchain_config(var, value); |
410 | 411 | ||
412 | if (!prefixcmp(var, "llvm.")) | ||
413 | return perf_llvm_config(var, value); | ||
414 | |||
411 | /* Add other config variables here. */ | 415 | /* Add other config variables here. */ |
412 | return 0; | 416 | return 0; |
413 | } | 417 | } |
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c new file mode 100644 index 000000000000..e3fde313deb2 --- /dev/null +++ b/tools/perf/util/counts.c | |||
@@ -0,0 +1,52 @@ | |||
1 | #include <stdlib.h> | ||
2 | #include "evsel.h" | ||
3 | #include "counts.h" | ||
4 | |||
5 | struct perf_counts *perf_counts__new(int ncpus, int nthreads) | ||
6 | { | ||
7 | struct perf_counts *counts = zalloc(sizeof(*counts)); | ||
8 | |||
9 | if (counts) { | ||
10 | struct xyarray *values; | ||
11 | |||
12 | values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); | ||
13 | if (!values) { | ||
14 | free(counts); | ||
15 | return NULL; | ||
16 | } | ||
17 | |||
18 | counts->values = values; | ||
19 | } | ||
20 | |||
21 | return counts; | ||
22 | } | ||
23 | |||
24 | void perf_counts__delete(struct perf_counts *counts) | ||
25 | { | ||
26 | if (counts) { | ||
27 | xyarray__delete(counts->values); | ||
28 | free(counts); | ||
29 | } | ||
30 | } | ||
31 | |||
32 | static void perf_counts__reset(struct perf_counts *counts) | ||
33 | { | ||
34 | xyarray__reset(counts->values); | ||
35 | } | ||
36 | |||
37 | void perf_evsel__reset_counts(struct perf_evsel *evsel) | ||
38 | { | ||
39 | perf_counts__reset(evsel->counts); | ||
40 | } | ||
41 | |||
42 | int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) | ||
43 | { | ||
44 | evsel->counts = perf_counts__new(ncpus, nthreads); | ||
45 | return evsel->counts != NULL ? 0 : -ENOMEM; | ||
46 | } | ||
47 | |||
48 | void perf_evsel__free_counts(struct perf_evsel *evsel) | ||
49 | { | ||
50 | perf_counts__delete(evsel->counts); | ||
51 | evsel->counts = NULL; | ||
52 | } | ||
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h new file mode 100644 index 000000000000..34d8baaf558a --- /dev/null +++ b/tools/perf/util/counts.h | |||
@@ -0,0 +1,37 @@ | |||
1 | #ifndef __PERF_COUNTS_H | ||
2 | #define __PERF_COUNTS_H | ||
3 | |||
4 | #include "xyarray.h" | ||
5 | |||
6 | struct perf_counts_values { | ||
7 | union { | ||
8 | struct { | ||
9 | u64 val; | ||
10 | u64 ena; | ||
11 | u64 run; | ||
12 | }; | ||
13 | u64 values[3]; | ||
14 | }; | ||
15 | }; | ||
16 | |||
17 | struct perf_counts { | ||
18 | s8 scaled; | ||
19 | struct perf_counts_values aggr; | ||
20 | struct xyarray *values; | ||
21 | }; | ||
22 | |||
23 | |||
24 | static inline struct perf_counts_values* | ||
25 | perf_counts(struct perf_counts *counts, int cpu, int thread) | ||
26 | { | ||
27 | return xyarray__entry(counts->values, cpu, thread); | ||
28 | } | ||
29 | |||
30 | struct perf_counts *perf_counts__new(int ncpus, int nthreads); | ||
31 | void perf_counts__delete(struct perf_counts *counts); | ||
32 | |||
33 | void perf_evsel__reset_counts(struct perf_evsel *evsel); | ||
34 | int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); | ||
35 | void perf_evsel__free_counts(struct perf_evsel *evsel); | ||
36 | |||
37 | #endif /* __PERF_COUNTS_H */ | ||
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index 2da5581ec74d..86d9c7302598 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c | |||
@@ -36,6 +36,11 @@ static int _eprintf(int level, int var, const char *fmt, va_list args) | |||
36 | return ret; | 36 | return ret; |
37 | } | 37 | } |
38 | 38 | ||
39 | int veprintf(int level, int var, const char *fmt, va_list args) | ||
40 | { | ||
41 | return _eprintf(level, var, fmt, args); | ||
42 | } | ||
43 | |||
39 | int eprintf(int level, int var, const char *fmt, ...) | 44 | int eprintf(int level, int var, const char *fmt, ...) |
40 | { | 45 | { |
41 | va_list args; | 46 | va_list args; |
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index caac2fdc6105..8b9a088c32ab 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h | |||
@@ -50,6 +50,7 @@ void pr_stat(const char *fmt, ...); | |||
50 | 50 | ||
51 | int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); | 51 | int eprintf(int level, int var, const char *fmt, ...) __attribute__((format(printf, 3, 4))); |
52 | int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); | 52 | int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __attribute__((format(printf, 4, 5))); |
53 | int veprintf(int level, int var, const char *fmt, va_list args); | ||
53 | 54 | ||
54 | int perf_debug_option(const char *str); | 55 | int perf_debug_option(const char *str); |
55 | 56 | ||
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index 2fe98bb0e95b..fc8db9c764ac 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h | |||
@@ -137,6 +137,10 @@ struct dso { | |||
137 | struct rb_node rb_node; /* rbtree node sorted by long name */ | 137 | struct rb_node rb_node; /* rbtree node sorted by long name */ |
138 | struct rb_root symbols[MAP__NR_TYPES]; | 138 | struct rb_root symbols[MAP__NR_TYPES]; |
139 | struct rb_root symbol_names[MAP__NR_TYPES]; | 139 | struct rb_root symbol_names[MAP__NR_TYPES]; |
140 | struct { | ||
141 | u64 addr; | ||
142 | struct symbol *symbol; | ||
143 | } last_find_result[MAP__NR_TYPES]; | ||
140 | void *a2l; | 144 | void *a2l; |
141 | char *symsrc_filename; | 145 | char *symsrc_filename; |
142 | unsigned int a2l_fails; | 146 | unsigned int a2l_fails; |
@@ -320,6 +324,8 @@ struct dso *__dsos__findnew(struct dsos *dsos, const char *name); | |||
320 | struct dso *dsos__findnew(struct dsos *dsos, const char *name); | 324 | struct dso *dsos__findnew(struct dsos *dsos, const char *name); |
321 | bool __dsos__read_build_ids(struct list_head *head, bool with_hits); | 325 | bool __dsos__read_build_ids(struct list_head *head, bool with_hits); |
322 | 326 | ||
327 | void dso__reset_find_symbol_cache(struct dso *dso); | ||
328 | |||
323 | size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, | 329 | size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, |
324 | bool (skip)(struct dso *dso, int parm), int parm); | 330 | bool (skip)(struct dso *dso, int parm), int parm); |
325 | size_t __dsos__fprintf(struct list_head *head, FILE *fp); | 331 | size_t __dsos__fprintf(struct list_head *head, FILE *fp); |
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 57f3ef41c2bc..a509aa8433a1 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c | |||
@@ -734,15 +734,18 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) | |||
734 | Dwarf_Lines *lines; | 734 | Dwarf_Lines *lines; |
735 | Dwarf_Line *line; | 735 | Dwarf_Line *line; |
736 | Dwarf_Addr addr; | 736 | Dwarf_Addr addr; |
737 | const char *fname; | 737 | const char *fname, *decf = NULL; |
738 | int lineno, ret = 0; | 738 | int lineno, ret = 0; |
739 | int decl = 0, inl; | ||
739 | Dwarf_Die die_mem, *cu_die; | 740 | Dwarf_Die die_mem, *cu_die; |
740 | size_t nlines, i; | 741 | size_t nlines, i; |
741 | 742 | ||
742 | /* Get the CU die */ | 743 | /* Get the CU die */ |
743 | if (dwarf_tag(rt_die) != DW_TAG_compile_unit) | 744 | if (dwarf_tag(rt_die) != DW_TAG_compile_unit) { |
744 | cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); | 745 | cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); |
745 | else | 746 | dwarf_decl_line(rt_die, &decl); |
747 | decf = dwarf_decl_file(rt_die); | ||
748 | } else | ||
746 | cu_die = rt_die; | 749 | cu_die = rt_die; |
747 | if (!cu_die) { | 750 | if (!cu_die) { |
748 | pr_debug2("Failed to get CU from given DIE.\n"); | 751 | pr_debug2("Failed to get CU from given DIE.\n"); |
@@ -767,15 +770,21 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) | |||
767 | continue; | 770 | continue; |
768 | } | 771 | } |
769 | /* Filter lines based on address */ | 772 | /* Filter lines based on address */ |
770 | if (rt_die != cu_die) | 773 | if (rt_die != cu_die) { |
771 | /* | 774 | /* |
772 | * Address filtering | 775 | * Address filtering |
773 | * The line is included in given function, and | 776 | * The line is included in given function, and |
774 | * no inline block includes it. | 777 | * no inline block includes it. |
775 | */ | 778 | */ |
776 | if (!dwarf_haspc(rt_die, addr) || | 779 | if (!dwarf_haspc(rt_die, addr)) |
777 | die_find_inlinefunc(rt_die, addr, &die_mem)) | ||
778 | continue; | 780 | continue; |
781 | if (die_find_inlinefunc(rt_die, addr, &die_mem)) { | ||
782 | dwarf_decl_line(&die_mem, &inl); | ||
783 | if (inl != decl || | ||
784 | decf != dwarf_decl_file(&die_mem)) | ||
785 | continue; | ||
786 | } | ||
787 | } | ||
779 | /* Get source line */ | 788 | /* Get source line */ |
780 | fname = dwarf_linesrc(line, NULL, NULL); | 789 | fname = dwarf_linesrc(line, NULL, NULL); |
781 | 790 | ||
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 67a977e5d0ab..7ff61274ed57 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c | |||
@@ -26,6 +26,8 @@ static const char *perf_event__names[] = { | |||
26 | [PERF_RECORD_AUX] = "AUX", | 26 | [PERF_RECORD_AUX] = "AUX", |
27 | [PERF_RECORD_ITRACE_START] = "ITRACE_START", | 27 | [PERF_RECORD_ITRACE_START] = "ITRACE_START", |
28 | [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", | 28 | [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", |
29 | [PERF_RECORD_SWITCH] = "SWITCH", | ||
30 | [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE", | ||
29 | [PERF_RECORD_HEADER_ATTR] = "ATTR", | 31 | [PERF_RECORD_HEADER_ATTR] = "ATTR", |
30 | [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", | 32 | [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", |
31 | [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", | 33 | [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", |
@@ -749,6 +751,14 @@ int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, | |||
749 | return machine__process_lost_samples_event(machine, event, sample); | 751 | return machine__process_lost_samples_event(machine, event, sample); |
750 | } | 752 | } |
751 | 753 | ||
754 | int perf_event__process_switch(struct perf_tool *tool __maybe_unused, | ||
755 | union perf_event *event, | ||
756 | struct perf_sample *sample __maybe_unused, | ||
757 | struct machine *machine) | ||
758 | { | ||
759 | return machine__process_switch_event(machine, event); | ||
760 | } | ||
761 | |||
752 | size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) | 762 | size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) |
753 | { | 763 | { |
754 | return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", | 764 | return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", |
@@ -827,6 +837,20 @@ size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) | |||
827 | event->itrace_start.pid, event->itrace_start.tid); | 837 | event->itrace_start.pid, event->itrace_start.tid); |
828 | } | 838 | } |
829 | 839 | ||
840 | size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) | ||
841 | { | ||
842 | bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT; | ||
843 | const char *in_out = out ? "OUT" : "IN "; | ||
844 | |||
845 | if (event->header.type == PERF_RECORD_SWITCH) | ||
846 | return fprintf(fp, " %s\n", in_out); | ||
847 | |||
848 | return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n", | ||
849 | in_out, out ? "next" : "prev", | ||
850 | event->context_switch.next_prev_pid, | ||
851 | event->context_switch.next_prev_tid); | ||
852 | } | ||
853 | |||
830 | size_t perf_event__fprintf(union perf_event *event, FILE *fp) | 854 | size_t perf_event__fprintf(union perf_event *event, FILE *fp) |
831 | { | 855 | { |
832 | size_t ret = fprintf(fp, "PERF_RECORD_%s", | 856 | size_t ret = fprintf(fp, "PERF_RECORD_%s", |
@@ -852,6 +876,10 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) | |||
852 | case PERF_RECORD_ITRACE_START: | 876 | case PERF_RECORD_ITRACE_START: |
853 | ret += perf_event__fprintf_itrace_start(event, fp); | 877 | ret += perf_event__fprintf_itrace_start(event, fp); |
854 | break; | 878 | break; |
879 | case PERF_RECORD_SWITCH: | ||
880 | case PERF_RECORD_SWITCH_CPU_WIDE: | ||
881 | ret += perf_event__fprintf_switch(event, fp); | ||
882 | break; | ||
855 | default: | 883 | default: |
856 | ret += fprintf(fp, "\n"); | 884 | ret += fprintf(fp, "\n"); |
857 | } | 885 | } |
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c53f36384b64..f729df5e25e6 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h | |||
@@ -134,7 +134,8 @@ struct branch_flags { | |||
134 | u64 predicted:1; | 134 | u64 predicted:1; |
135 | u64 in_tx:1; | 135 | u64 in_tx:1; |
136 | u64 abort:1; | 136 | u64 abort:1; |
137 | u64 reserved:60; | 137 | u64 cycles:16; |
138 | u64 reserved:44; | ||
138 | }; | 139 | }; |
139 | 140 | ||
140 | struct branch_entry { | 141 | struct branch_entry { |
@@ -348,6 +349,12 @@ struct itrace_start_event { | |||
348 | u32 pid, tid; | 349 | u32 pid, tid; |
349 | }; | 350 | }; |
350 | 351 | ||
352 | struct context_switch_event { | ||
353 | struct perf_event_header header; | ||
354 | u32 next_prev_pid; | ||
355 | u32 next_prev_tid; | ||
356 | }; | ||
357 | |||
351 | union perf_event { | 358 | union perf_event { |
352 | struct perf_event_header header; | 359 | struct perf_event_header header; |
353 | struct mmap_event mmap; | 360 | struct mmap_event mmap; |
@@ -369,6 +376,7 @@ union perf_event { | |||
369 | struct auxtrace_error_event auxtrace_error; | 376 | struct auxtrace_error_event auxtrace_error; |
370 | struct aux_event aux; | 377 | struct aux_event aux; |
371 | struct itrace_start_event itrace_start; | 378 | struct itrace_start_event itrace_start; |
379 | struct context_switch_event context_switch; | ||
372 | }; | 380 | }; |
373 | 381 | ||
374 | void perf_event__print_totals(void); | 382 | void perf_event__print_totals(void); |
@@ -418,6 +426,10 @@ int perf_event__process_itrace_start(struct perf_tool *tool, | |||
418 | union perf_event *event, | 426 | union perf_event *event, |
419 | struct perf_sample *sample, | 427 | struct perf_sample *sample, |
420 | struct machine *machine); | 428 | struct machine *machine); |
429 | int perf_event__process_switch(struct perf_tool *tool, | ||
430 | union perf_event *event, | ||
431 | struct perf_sample *sample, | ||
432 | struct machine *machine); | ||
421 | int perf_event__process_mmap(struct perf_tool *tool, | 433 | int perf_event__process_mmap(struct perf_tool *tool, |
422 | union perf_event *event, | 434 | union perf_event *event, |
423 | struct perf_sample *sample, | 435 | struct perf_sample *sample, |
@@ -480,6 +492,7 @@ size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); | |||
480 | size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); | 492 | size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); |
481 | size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); | 493 | size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); |
482 | size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); | 494 | size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); |
495 | size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp); | ||
483 | size_t perf_event__fprintf(union perf_event *event, FILE *fp); | 496 | size_t perf_event__fprintf(union perf_event *event, FILE *fp); |
484 | 497 | ||
485 | u64 kallsyms__get_function_start(const char *kallsyms_filename, | 498 | u64 kallsyms__get_function_start(const char *kallsyms_filename, |
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6cfdee68e763..c8fc8a258f42 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c | |||
@@ -98,6 +98,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) | |||
98 | 98 | ||
99 | evlist__for_each_safe(evlist, n, pos) { | 99 | evlist__for_each_safe(evlist, n, pos) { |
100 | list_del_init(&pos->node); | 100 | list_del_init(&pos->node); |
101 | pos->evlist = NULL; | ||
101 | perf_evsel__delete(pos); | 102 | perf_evsel__delete(pos); |
102 | } | 103 | } |
103 | 104 | ||
@@ -123,26 +124,55 @@ void perf_evlist__delete(struct perf_evlist *evlist) | |||
123 | free(evlist); | 124 | free(evlist); |
124 | } | 125 | } |
125 | 126 | ||
127 | static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, | ||
128 | struct perf_evsel *evsel) | ||
129 | { | ||
130 | /* | ||
131 | * We already have cpus for evsel (via PMU sysfs) so | ||
132 | * keep it, if there's no target cpu list defined. | ||
133 | */ | ||
134 | if (!evsel->own_cpus || evlist->has_user_cpus) { | ||
135 | cpu_map__put(evsel->cpus); | ||
136 | evsel->cpus = cpu_map__get(evlist->cpus); | ||
137 | } else if (evsel->cpus != evsel->own_cpus) { | ||
138 | cpu_map__put(evsel->cpus); | ||
139 | evsel->cpus = cpu_map__get(evsel->own_cpus); | ||
140 | } | ||
141 | |||
142 | thread_map__put(evsel->threads); | ||
143 | evsel->threads = thread_map__get(evlist->threads); | ||
144 | } | ||
145 | |||
146 | static void perf_evlist__propagate_maps(struct perf_evlist *evlist) | ||
147 | { | ||
148 | struct perf_evsel *evsel; | ||
149 | |||
150 | evlist__for_each(evlist, evsel) | ||
151 | __perf_evlist__propagate_maps(evlist, evsel); | ||
152 | } | ||
153 | |||
126 | void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) | 154 | void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry) |
127 | { | 155 | { |
156 | entry->evlist = evlist; | ||
128 | list_add_tail(&entry->node, &evlist->entries); | 157 | list_add_tail(&entry->node, &evlist->entries); |
129 | entry->idx = evlist->nr_entries; | 158 | entry->idx = evlist->nr_entries; |
130 | entry->tracking = !entry->idx; | 159 | entry->tracking = !entry->idx; |
131 | 160 | ||
132 | if (!evlist->nr_entries++) | 161 | if (!evlist->nr_entries++) |
133 | perf_evlist__set_id_pos(evlist); | 162 | perf_evlist__set_id_pos(evlist); |
163 | |||
164 | __perf_evlist__propagate_maps(evlist, entry); | ||
134 | } | 165 | } |
135 | 166 | ||
136 | void perf_evlist__splice_list_tail(struct perf_evlist *evlist, | 167 | void perf_evlist__splice_list_tail(struct perf_evlist *evlist, |
137 | struct list_head *list, | 168 | struct list_head *list) |
138 | int nr_entries) | ||
139 | { | 169 | { |
140 | bool set_id_pos = !evlist->nr_entries; | 170 | struct perf_evsel *evsel, *temp; |
141 | 171 | ||
142 | list_splice_tail(list, &evlist->entries); | 172 | __evlist__for_each_safe(list, temp, evsel) { |
143 | evlist->nr_entries += nr_entries; | 173 | list_del_init(&evsel->node); |
144 | if (set_id_pos) | 174 | perf_evlist__add(evlist, evsel); |
145 | perf_evlist__set_id_pos(evlist); | 175 | } |
146 | } | 176 | } |
147 | 177 | ||
148 | void __perf_evlist__set_leader(struct list_head *list) | 178 | void __perf_evlist__set_leader(struct list_head *list) |
@@ -208,7 +238,7 @@ static int perf_evlist__add_attrs(struct perf_evlist *evlist, | |||
208 | list_add_tail(&evsel->node, &head); | 238 | list_add_tail(&evsel->node, &head); |
209 | } | 239 | } |
210 | 240 | ||
211 | perf_evlist__splice_list_tail(evlist, &head, nr_attrs); | 241 | perf_evlist__splice_list_tail(evlist, &head); |
212 | 242 | ||
213 | return 0; | 243 | return 0; |
214 | 244 | ||
@@ -573,7 +603,7 @@ struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id) | |||
573 | { | 603 | { |
574 | struct perf_sample_id *sid; | 604 | struct perf_sample_id *sid; |
575 | 605 | ||
576 | if (evlist->nr_entries == 1) | 606 | if (evlist->nr_entries == 1 || !id) |
577 | return perf_evlist__first(evlist); | 607 | return perf_evlist__first(evlist); |
578 | 608 | ||
579 | sid = perf_evlist__id2sid(evlist, id); | 609 | sid = perf_evlist__id2sid(evlist, id); |
@@ -1101,55 +1131,58 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, | |||
1101 | return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); | 1131 | return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); |
1102 | } | 1132 | } |
1103 | 1133 | ||
1104 | static int perf_evlist__propagate_maps(struct perf_evlist *evlist, | ||
1105 | struct target *target) | ||
1106 | { | ||
1107 | struct perf_evsel *evsel; | ||
1108 | |||
1109 | evlist__for_each(evlist, evsel) { | ||
1110 | /* | ||
1111 | * We already have cpus for evsel (via PMU sysfs) so | ||
1112 | * keep it, if there's no target cpu list defined. | ||
1113 | */ | ||
1114 | if (evsel->cpus && target->cpu_list) | ||
1115 | cpu_map__put(evsel->cpus); | ||
1116 | |||
1117 | if (!evsel->cpus || target->cpu_list) | ||
1118 | evsel->cpus = cpu_map__get(evlist->cpus); | ||
1119 | |||
1120 | evsel->threads = thread_map__get(evlist->threads); | ||
1121 | |||
1122 | if (!evsel->cpus || !evsel->threads) | ||
1123 | return -ENOMEM; | ||
1124 | } | ||
1125 | |||
1126 | return 0; | ||
1127 | } | ||
1128 | |||
1129 | int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) | 1134 | int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) |
1130 | { | 1135 | { |
1131 | evlist->threads = thread_map__new_str(target->pid, target->tid, | 1136 | struct cpu_map *cpus; |
1132 | target->uid); | 1137 | struct thread_map *threads; |
1138 | |||
1139 | threads = thread_map__new_str(target->pid, target->tid, target->uid); | ||
1133 | 1140 | ||
1134 | if (evlist->threads == NULL) | 1141 | if (!threads) |
1135 | return -1; | 1142 | return -1; |
1136 | 1143 | ||
1137 | if (target__uses_dummy_map(target)) | 1144 | if (target__uses_dummy_map(target)) |
1138 | evlist->cpus = cpu_map__dummy_new(); | 1145 | cpus = cpu_map__dummy_new(); |
1139 | else | 1146 | else |
1140 | evlist->cpus = cpu_map__new(target->cpu_list); | 1147 | cpus = cpu_map__new(target->cpu_list); |
1141 | 1148 | ||
1142 | if (evlist->cpus == NULL) | 1149 | if (!cpus) |
1143 | goto out_delete_threads; | 1150 | goto out_delete_threads; |
1144 | 1151 | ||
1145 | return perf_evlist__propagate_maps(evlist, target); | 1152 | evlist->has_user_cpus = !!target->cpu_list; |
1153 | |||
1154 | perf_evlist__set_maps(evlist, cpus, threads); | ||
1155 | |||
1156 | return 0; | ||
1146 | 1157 | ||
1147 | out_delete_threads: | 1158 | out_delete_threads: |
1148 | thread_map__put(evlist->threads); | 1159 | thread_map__put(threads); |
1149 | evlist->threads = NULL; | ||
1150 | return -1; | 1160 | return -1; |
1151 | } | 1161 | } |
1152 | 1162 | ||
1163 | void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, | ||
1164 | struct thread_map *threads) | ||
1165 | { | ||
1166 | /* | ||
1167 | * Allow for the possibility that one or another of the maps isn't being | ||
1168 | * changed i.e. don't put it. Note we are assuming the maps that are | ||
1169 | * being applied are brand new and evlist is taking ownership of the | ||
1170 | * original reference count of 1. If that is not the case it is up to | ||
1171 | * the caller to increase the reference count. | ||
1172 | */ | ||
1173 | if (cpus != evlist->cpus) { | ||
1174 | cpu_map__put(evlist->cpus); | ||
1175 | evlist->cpus = cpus; | ||
1176 | } | ||
1177 | |||
1178 | if (threads != evlist->threads) { | ||
1179 | thread_map__put(evlist->threads); | ||
1180 | evlist->threads = threads; | ||
1181 | } | ||
1182 | |||
1183 | perf_evlist__propagate_maps(evlist); | ||
1184 | } | ||
1185 | |||
1153 | int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) | 1186 | int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) |
1154 | { | 1187 | { |
1155 | struct perf_evsel *evsel; | 1188 | struct perf_evsel *evsel; |
@@ -1161,7 +1194,11 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **e | |||
1161 | if (evsel->filter == NULL) | 1194 | if (evsel->filter == NULL) |
1162 | continue; | 1195 | continue; |
1163 | 1196 | ||
1164 | err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); | 1197 | /* |
1198 | * filters only work for tracepoint event, which doesn't have cpu limit. | ||
1199 | * So evlist and evsel should always be same. | ||
1200 | */ | ||
1201 | err = perf_evsel__apply_filter(evsel, ncpus, nthreads, evsel->filter); | ||
1165 | if (err) { | 1202 | if (err) { |
1166 | *err_evsel = evsel; | 1203 | *err_evsel = evsel; |
1167 | break; | 1204 | break; |
@@ -1175,11 +1212,9 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) | |||
1175 | { | 1212 | { |
1176 | struct perf_evsel *evsel; | 1213 | struct perf_evsel *evsel; |
1177 | int err = 0; | 1214 | int err = 0; |
1178 | const int ncpus = cpu_map__nr(evlist->cpus), | ||
1179 | nthreads = thread_map__nr(evlist->threads); | ||
1180 | 1215 | ||
1181 | evlist__for_each(evlist, evsel) { | 1216 | evlist__for_each(evlist, evsel) { |
1182 | err = perf_evsel__set_filter(evsel, ncpus, nthreads, filter); | 1217 | err = perf_evsel__set_filter(evsel, filter); |
1183 | if (err) | 1218 | if (err) |
1184 | break; | 1219 | break; |
1185 | } | 1220 | } |
@@ -1257,6 +1292,16 @@ u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist) | |||
1257 | return __perf_evlist__combined_sample_type(evlist); | 1292 | return __perf_evlist__combined_sample_type(evlist); |
1258 | } | 1293 | } |
1259 | 1294 | ||
1295 | u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist) | ||
1296 | { | ||
1297 | struct perf_evsel *evsel; | ||
1298 | u64 branch_type = 0; | ||
1299 | |||
1300 | evlist__for_each(evlist, evsel) | ||
1301 | branch_type |= evsel->attr.branch_sample_type; | ||
1302 | return branch_type; | ||
1303 | } | ||
1304 | |||
1260 | bool perf_evlist__valid_read_format(struct perf_evlist *evlist) | 1305 | bool perf_evlist__valid_read_format(struct perf_evlist *evlist) |
1261 | { | 1306 | { |
1262 | struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; | 1307 | struct perf_evsel *first = perf_evlist__first(evlist), *pos = first; |
@@ -1355,6 +1400,8 @@ void perf_evlist__close(struct perf_evlist *evlist) | |||
1355 | 1400 | ||
1356 | static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) | 1401 | static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) |
1357 | { | 1402 | { |
1403 | struct cpu_map *cpus; | ||
1404 | struct thread_map *threads; | ||
1358 | int err = -ENOMEM; | 1405 | int err = -ENOMEM; |
1359 | 1406 | ||
1360 | /* | 1407 | /* |
@@ -1366,20 +1413,19 @@ static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) | |||
1366 | * error, and we may not want to do that fallback to a | 1413 | * error, and we may not want to do that fallback to a |
1367 | * default cpu identity map :-\ | 1414 | * default cpu identity map :-\ |
1368 | */ | 1415 | */ |
1369 | evlist->cpus = cpu_map__new(NULL); | 1416 | cpus = cpu_map__new(NULL); |
1370 | if (evlist->cpus == NULL) | 1417 | if (!cpus) |
1371 | goto out; | 1418 | goto out; |
1372 | 1419 | ||
1373 | evlist->threads = thread_map__new_dummy(); | 1420 | threads = thread_map__new_dummy(); |
1374 | if (evlist->threads == NULL) | 1421 | if (!threads) |
1375 | goto out_free_cpus; | 1422 | goto out_put; |
1376 | 1423 | ||
1377 | err = 0; | 1424 | perf_evlist__set_maps(evlist, cpus, threads); |
1378 | out: | 1425 | out: |
1379 | return err; | 1426 | return err; |
1380 | out_free_cpus: | 1427 | out_put: |
1381 | cpu_map__put(evlist->cpus); | 1428 | cpu_map__put(cpus); |
1382 | evlist->cpus = NULL; | ||
1383 | goto out; | 1429 | goto out; |
1384 | } | 1430 | } |
1385 | 1431 | ||
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 037633c1da9d..115d8b53c601 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h | |||
@@ -42,6 +42,7 @@ struct perf_evlist { | |||
42 | int nr_mmaps; | 42 | int nr_mmaps; |
43 | bool overwrite; | 43 | bool overwrite; |
44 | bool enabled; | 44 | bool enabled; |
45 | bool has_user_cpus; | ||
45 | size_t mmap_len; | 46 | size_t mmap_len; |
46 | int id_pos; | 47 | int id_pos; |
47 | int is_pos; | 48 | int is_pos; |
@@ -56,6 +57,7 @@ struct perf_evlist { | |||
56 | struct cpu_map *cpus; | 57 | struct cpu_map *cpus; |
57 | struct perf_evsel *selected; | 58 | struct perf_evsel *selected; |
58 | struct events_stats stats; | 59 | struct events_stats stats; |
60 | struct perf_env *env; | ||
59 | }; | 61 | }; |
60 | 62 | ||
61 | struct perf_evsel_str_handler { | 63 | struct perf_evsel_str_handler { |
@@ -114,6 +116,8 @@ void perf_evlist__close(struct perf_evlist *evlist); | |||
114 | 116 | ||
115 | void perf_evlist__set_id_pos(struct perf_evlist *evlist); | 117 | void perf_evlist__set_id_pos(struct perf_evlist *evlist); |
116 | bool perf_can_sample_identifier(void); | 118 | bool perf_can_sample_identifier(void); |
119 | bool perf_can_record_switch_events(void); | ||
120 | bool perf_can_record_cpu_wide(void); | ||
117 | void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); | 121 | void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts); |
118 | int record_opts__config(struct record_opts *opts); | 122 | int record_opts__config(struct record_opts *opts); |
119 | 123 | ||
@@ -152,14 +156,8 @@ int perf_evlist__enable_event_idx(struct perf_evlist *evlist, | |||
152 | void perf_evlist__set_selected(struct perf_evlist *evlist, | 156 | void perf_evlist__set_selected(struct perf_evlist *evlist, |
153 | struct perf_evsel *evsel); | 157 | struct perf_evsel *evsel); |
154 | 158 | ||
155 | static inline void perf_evlist__set_maps(struct perf_evlist *evlist, | 159 | void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus, |
156 | struct cpu_map *cpus, | 160 | struct thread_map *threads); |
157 | struct thread_map *threads) | ||
158 | { | ||
159 | evlist->cpus = cpus; | ||
160 | evlist->threads = threads; | ||
161 | } | ||
162 | |||
163 | int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); | 161 | int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); |
164 | int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); | 162 | int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); |
165 | 163 | ||
@@ -169,6 +167,7 @@ void perf_evlist__set_leader(struct perf_evlist *evlist); | |||
169 | u64 perf_evlist__read_format(struct perf_evlist *evlist); | 167 | u64 perf_evlist__read_format(struct perf_evlist *evlist); |
170 | u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); | 168 | u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist); |
171 | u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); | 169 | u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist); |
170 | u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist); | ||
172 | bool perf_evlist__sample_id_all(struct perf_evlist *evlist); | 171 | bool perf_evlist__sample_id_all(struct perf_evlist *evlist); |
173 | u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); | 172 | u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); |
174 | 173 | ||
@@ -180,8 +179,7 @@ bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); | |||
180 | bool perf_evlist__valid_read_format(struct perf_evlist *evlist); | 179 | bool perf_evlist__valid_read_format(struct perf_evlist *evlist); |
181 | 180 | ||
182 | void perf_evlist__splice_list_tail(struct perf_evlist *evlist, | 181 | void perf_evlist__splice_list_tail(struct perf_evlist *evlist, |
183 | struct list_head *list, | 182 | struct list_head *list); |
184 | int nr_entries); | ||
185 | 183 | ||
186 | static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) | 184 | static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist) |
187 | { | 185 | { |
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 2936b3080722..5410483d5219 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c | |||
@@ -206,10 +206,13 @@ void perf_evsel__init(struct perf_evsel *evsel, | |||
206 | evsel->leader = evsel; | 206 | evsel->leader = evsel; |
207 | evsel->unit = ""; | 207 | evsel->unit = ""; |
208 | evsel->scale = 1.0; | 208 | evsel->scale = 1.0; |
209 | evsel->evlist = NULL; | ||
209 | INIT_LIST_HEAD(&evsel->node); | 210 | INIT_LIST_HEAD(&evsel->node); |
211 | INIT_LIST_HEAD(&evsel->config_terms); | ||
210 | perf_evsel__object.init(evsel); | 212 | perf_evsel__object.init(evsel); |
211 | evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); | 213 | evsel->sample_size = __perf_evsel__sample_size(attr->sample_type); |
212 | perf_evsel__calc_id_pos(evsel); | 214 | perf_evsel__calc_id_pos(evsel); |
215 | evsel->cmdline_group_boundary = false; | ||
213 | } | 216 | } |
214 | 217 | ||
215 | struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) | 218 | struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx) |
@@ -543,14 +546,15 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) | |||
543 | 546 | ||
544 | static void | 547 | static void |
545 | perf_evsel__config_callgraph(struct perf_evsel *evsel, | 548 | perf_evsel__config_callgraph(struct perf_evsel *evsel, |
546 | struct record_opts *opts) | 549 | struct record_opts *opts, |
550 | struct callchain_param *param) | ||
547 | { | 551 | { |
548 | bool function = perf_evsel__is_function_event(evsel); | 552 | bool function = perf_evsel__is_function_event(evsel); |
549 | struct perf_event_attr *attr = &evsel->attr; | 553 | struct perf_event_attr *attr = &evsel->attr; |
550 | 554 | ||
551 | perf_evsel__set_sample_bit(evsel, CALLCHAIN); | 555 | perf_evsel__set_sample_bit(evsel, CALLCHAIN); |
552 | 556 | ||
553 | if (callchain_param.record_mode == CALLCHAIN_LBR) { | 557 | if (param->record_mode == CALLCHAIN_LBR) { |
554 | if (!opts->branch_stack) { | 558 | if (!opts->branch_stack) { |
555 | if (attr->exclude_user) { | 559 | if (attr->exclude_user) { |
556 | pr_warning("LBR callstack option is only available " | 560 | pr_warning("LBR callstack option is only available " |
@@ -566,12 +570,12 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel, | |||
566 | "Falling back to framepointers.\n"); | 570 | "Falling back to framepointers.\n"); |
567 | } | 571 | } |
568 | 572 | ||
569 | if (callchain_param.record_mode == CALLCHAIN_DWARF) { | 573 | if (param->record_mode == CALLCHAIN_DWARF) { |
570 | if (!function) { | 574 | if (!function) { |
571 | perf_evsel__set_sample_bit(evsel, REGS_USER); | 575 | perf_evsel__set_sample_bit(evsel, REGS_USER); |
572 | perf_evsel__set_sample_bit(evsel, STACK_USER); | 576 | perf_evsel__set_sample_bit(evsel, STACK_USER); |
573 | attr->sample_regs_user = PERF_REGS_MASK; | 577 | attr->sample_regs_user = PERF_REGS_MASK; |
574 | attr->sample_stack_user = callchain_param.dump_size; | 578 | attr->sample_stack_user = param->dump_size; |
575 | attr->exclude_callchain_user = 1; | 579 | attr->exclude_callchain_user = 1; |
576 | } else { | 580 | } else { |
577 | pr_info("Cannot use DWARF unwind for function trace event," | 581 | pr_info("Cannot use DWARF unwind for function trace event," |
@@ -585,6 +589,97 @@ perf_evsel__config_callgraph(struct perf_evsel *evsel, | |||
585 | } | 589 | } |
586 | } | 590 | } |
587 | 591 | ||
592 | static void | ||
593 | perf_evsel__reset_callgraph(struct perf_evsel *evsel, | ||
594 | struct callchain_param *param) | ||
595 | { | ||
596 | struct perf_event_attr *attr = &evsel->attr; | ||
597 | |||
598 | perf_evsel__reset_sample_bit(evsel, CALLCHAIN); | ||
599 | if (param->record_mode == CALLCHAIN_LBR) { | ||
600 | perf_evsel__reset_sample_bit(evsel, BRANCH_STACK); | ||
601 | attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER | | ||
602 | PERF_SAMPLE_BRANCH_CALL_STACK); | ||
603 | } | ||
604 | if (param->record_mode == CALLCHAIN_DWARF) { | ||
605 | perf_evsel__reset_sample_bit(evsel, REGS_USER); | ||
606 | perf_evsel__reset_sample_bit(evsel, STACK_USER); | ||
607 | } | ||
608 | } | ||
609 | |||
610 | static void apply_config_terms(struct perf_evsel *evsel, | ||
611 | struct record_opts *opts) | ||
612 | { | ||
613 | struct perf_evsel_config_term *term; | ||
614 | struct list_head *config_terms = &evsel->config_terms; | ||
615 | struct perf_event_attr *attr = &evsel->attr; | ||
616 | struct callchain_param param; | ||
617 | u32 dump_size = 0; | ||
618 | char *callgraph_buf = NULL; | ||
619 | |||
620 | /* callgraph default */ | ||
621 | param.record_mode = callchain_param.record_mode; | ||
622 | |||
623 | list_for_each_entry(term, config_terms, list) { | ||
624 | switch (term->type) { | ||
625 | case PERF_EVSEL__CONFIG_TERM_PERIOD: | ||
626 | attr->sample_period = term->val.period; | ||
627 | attr->freq = 0; | ||
628 | break; | ||
629 | case PERF_EVSEL__CONFIG_TERM_FREQ: | ||
630 | attr->sample_freq = term->val.freq; | ||
631 | attr->freq = 1; | ||
632 | break; | ||
633 | case PERF_EVSEL__CONFIG_TERM_TIME: | ||
634 | if (term->val.time) | ||
635 | perf_evsel__set_sample_bit(evsel, TIME); | ||
636 | else | ||
637 | perf_evsel__reset_sample_bit(evsel, TIME); | ||
638 | break; | ||
639 | case PERF_EVSEL__CONFIG_TERM_CALLGRAPH: | ||
640 | callgraph_buf = term->val.callgraph; | ||
641 | break; | ||
642 | case PERF_EVSEL__CONFIG_TERM_STACK_USER: | ||
643 | dump_size = term->val.stack_user; | ||
644 | break; | ||
645 | default: | ||
646 | break; | ||
647 | } | ||
648 | } | ||
649 | |||
650 | /* User explicitly set per-event callgraph, clear the old setting and reset. */ | ||
651 | if ((callgraph_buf != NULL) || (dump_size > 0)) { | ||
652 | |||
653 | /* parse callgraph parameters */ | ||
654 | if (callgraph_buf != NULL) { | ||
655 | if (!strcmp(callgraph_buf, "no")) { | ||
656 | param.enabled = false; | ||
657 | param.record_mode = CALLCHAIN_NONE; | ||
658 | } else { | ||
659 | param.enabled = true; | ||
660 | if (parse_callchain_record(callgraph_buf, ¶m)) { | ||
661 | pr_err("per-event callgraph setting for %s failed. " | ||
662 | "Apply callgraph global setting for it\n", | ||
663 | evsel->name); | ||
664 | return; | ||
665 | } | ||
666 | } | ||
667 | } | ||
668 | if (dump_size > 0) { | ||
669 | dump_size = round_up(dump_size, sizeof(u64)); | ||
670 | param.dump_size = dump_size; | ||
671 | } | ||
672 | |||
673 | /* If global callgraph set, clear it */ | ||
674 | if (callchain_param.enabled) | ||
675 | perf_evsel__reset_callgraph(evsel, &callchain_param); | ||
676 | |||
677 | /* set perf-event callgraph */ | ||
678 | if (param.enabled) | ||
679 | perf_evsel__config_callgraph(evsel, opts, ¶m); | ||
680 | } | ||
681 | } | ||
682 | |||
588 | /* | 683 | /* |
589 | * The enable_on_exec/disabled value strategy: | 684 | * The enable_on_exec/disabled value strategy: |
590 | * | 685 | * |
@@ -689,10 +784,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) | |||
689 | evsel->attr.exclude_callchain_user = 1; | 784 | evsel->attr.exclude_callchain_user = 1; |
690 | 785 | ||
691 | if (callchain_param.enabled && !evsel->no_aux_samples) | 786 | if (callchain_param.enabled && !evsel->no_aux_samples) |
692 | perf_evsel__config_callgraph(evsel, opts); | 787 | perf_evsel__config_callgraph(evsel, opts, &callchain_param); |
693 | 788 | ||
694 | if (opts->sample_intr_regs) { | 789 | if (opts->sample_intr_regs) { |
695 | attr->sample_regs_intr = PERF_REGS_MASK; | 790 | attr->sample_regs_intr = opts->sample_intr_regs; |
696 | perf_evsel__set_sample_bit(evsel, REGS_INTR); | 791 | perf_evsel__set_sample_bit(evsel, REGS_INTR); |
697 | } | 792 | } |
698 | 793 | ||
@@ -707,7 +802,8 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) | |||
707 | */ | 802 | */ |
708 | if (opts->sample_time && | 803 | if (opts->sample_time && |
709 | (!perf_missing_features.sample_id_all && | 804 | (!perf_missing_features.sample_id_all && |
710 | (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu))) | 805 | (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu || |
806 | opts->sample_time_set))) | ||
711 | perf_evsel__set_sample_bit(evsel, TIME); | 807 | perf_evsel__set_sample_bit(evsel, TIME); |
712 | 808 | ||
713 | if (opts->raw_samples && !evsel->no_aux_samples) { | 809 | if (opts->raw_samples && !evsel->no_aux_samples) { |
@@ -736,6 +832,9 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) | |||
736 | attr->mmap2 = track && !perf_missing_features.mmap2; | 832 | attr->mmap2 = track && !perf_missing_features.mmap2; |
737 | attr->comm = track; | 833 | attr->comm = track; |
738 | 834 | ||
835 | if (opts->record_switch_events) | ||
836 | attr->context_switch = track; | ||
837 | |||
739 | if (opts->sample_transaction) | 838 | if (opts->sample_transaction) |
740 | perf_evsel__set_sample_bit(evsel, TRANSACTION); | 839 | perf_evsel__set_sample_bit(evsel, TRANSACTION); |
741 | 840 | ||
@@ -772,6 +871,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) | |||
772 | attr->use_clockid = 1; | 871 | attr->use_clockid = 1; |
773 | attr->clockid = opts->clockid; | 872 | attr->clockid = opts->clockid; |
774 | } | 873 | } |
874 | |||
875 | /* | ||
876 | * Apply event specific term settings, | ||
877 | * it overloads any global configuration. | ||
878 | */ | ||
879 | apply_config_terms(evsel, opts); | ||
775 | } | 880 | } |
776 | 881 | ||
777 | static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) | 882 | static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) |
@@ -815,14 +920,44 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int ncpus, int nthrea | |||
815 | return 0; | 920 | return 0; |
816 | } | 921 | } |
817 | 922 | ||
818 | int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, | 923 | int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, |
819 | const char *filter) | 924 | const char *filter) |
820 | { | 925 | { |
821 | return perf_evsel__run_ioctl(evsel, ncpus, nthreads, | 926 | return perf_evsel__run_ioctl(evsel, ncpus, nthreads, |
822 | PERF_EVENT_IOC_SET_FILTER, | 927 | PERF_EVENT_IOC_SET_FILTER, |
823 | (void *)filter); | 928 | (void *)filter); |
824 | } | 929 | } |
825 | 930 | ||
931 | int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter) | ||
932 | { | ||
933 | char *new_filter = strdup(filter); | ||
934 | |||
935 | if (new_filter != NULL) { | ||
936 | free(evsel->filter); | ||
937 | evsel->filter = new_filter; | ||
938 | return 0; | ||
939 | } | ||
940 | |||
941 | return -1; | ||
942 | } | ||
943 | |||
944 | int perf_evsel__append_filter(struct perf_evsel *evsel, | ||
945 | const char *op, const char *filter) | ||
946 | { | ||
947 | char *new_filter; | ||
948 | |||
949 | if (evsel->filter == NULL) | ||
950 | return perf_evsel__set_filter(evsel, filter); | ||
951 | |||
952 | if (asprintf(&new_filter,"(%s) %s (%s)", evsel->filter, op, filter) > 0) { | ||
953 | free(evsel->filter); | ||
954 | evsel->filter = new_filter; | ||
955 | return 0; | ||
956 | } | ||
957 | |||
958 | return -1; | ||
959 | } | ||
960 | |||
826 | int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) | 961 | int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads) |
827 | { | 962 | { |
828 | return perf_evsel__run_ioctl(evsel, ncpus, nthreads, | 963 | return perf_evsel__run_ioctl(evsel, ncpus, nthreads, |
@@ -865,6 +1000,16 @@ static void perf_evsel__free_id(struct perf_evsel *evsel) | |||
865 | zfree(&evsel->id); | 1000 | zfree(&evsel->id); |
866 | } | 1001 | } |
867 | 1002 | ||
1003 | static void perf_evsel__free_config_terms(struct perf_evsel *evsel) | ||
1004 | { | ||
1005 | struct perf_evsel_config_term *term, *h; | ||
1006 | |||
1007 | list_for_each_entry_safe(term, h, &evsel->config_terms, list) { | ||
1008 | list_del(&term->list); | ||
1009 | free(term); | ||
1010 | } | ||
1011 | } | ||
1012 | |||
868 | void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) | 1013 | void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) |
869 | { | 1014 | { |
870 | int cpu, thread; | 1015 | int cpu, thread; |
@@ -882,10 +1027,13 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) | |||
882 | void perf_evsel__exit(struct perf_evsel *evsel) | 1027 | void perf_evsel__exit(struct perf_evsel *evsel) |
883 | { | 1028 | { |
884 | assert(list_empty(&evsel->node)); | 1029 | assert(list_empty(&evsel->node)); |
1030 | assert(evsel->evlist == NULL); | ||
885 | perf_evsel__free_fd(evsel); | 1031 | perf_evsel__free_fd(evsel); |
886 | perf_evsel__free_id(evsel); | 1032 | perf_evsel__free_id(evsel); |
1033 | perf_evsel__free_config_terms(evsel); | ||
887 | close_cgroup(evsel->cgrp); | 1034 | close_cgroup(evsel->cgrp); |
888 | cpu_map__put(evsel->cpus); | 1035 | cpu_map__put(evsel->cpus); |
1036 | cpu_map__put(evsel->own_cpus); | ||
889 | thread_map__put(evsel->threads); | 1037 | thread_map__put(evsel->threads); |
890 | zfree(&evsel->group_name); | 1038 | zfree(&evsel->group_name); |
891 | zfree(&evsel->name); | 1039 | zfree(&evsel->name); |
@@ -1095,6 +1243,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, | |||
1095 | PRINT_ATTRf(mmap2, p_unsigned); | 1243 | PRINT_ATTRf(mmap2, p_unsigned); |
1096 | PRINT_ATTRf(comm_exec, p_unsigned); | 1244 | PRINT_ATTRf(comm_exec, p_unsigned); |
1097 | PRINT_ATTRf(use_clockid, p_unsigned); | 1245 | PRINT_ATTRf(use_clockid, p_unsigned); |
1246 | PRINT_ATTRf(context_switch, p_unsigned); | ||
1098 | 1247 | ||
1099 | PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); | 1248 | PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); |
1100 | PRINT_ATTRf(bp_type, p_unsigned); | 1249 | PRINT_ATTRf(bp_type, p_unsigned); |
@@ -2075,8 +2224,13 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, | |||
2075 | printed += perf_event_attr__fprintf(fp, &evsel->attr, | 2224 | printed += perf_event_attr__fprintf(fp, &evsel->attr, |
2076 | __print_attr__fprintf, &first); | 2225 | __print_attr__fprintf, &first); |
2077 | } else if (details->freq) { | 2226 | } else if (details->freq) { |
2078 | printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64, | 2227 | const char *term = "sample_freq"; |
2079 | (u64)evsel->attr.sample_freq); | 2228 | |
2229 | if (!evsel->attr.freq) | ||
2230 | term = "sample_period"; | ||
2231 | |||
2232 | printed += comma_fprintf(fp, &first, " %s=%" PRIu64, | ||
2233 | term, (u64)evsel->attr.sample_freq); | ||
2080 | } | 2234 | } |
2081 | out: | 2235 | out: |
2082 | fputc('\n', fp); | 2236 | fputc('\n', fp); |
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4a7ed5656cf0..ef8925f7211a 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h | |||
@@ -9,7 +9,7 @@ | |||
9 | #include "xyarray.h" | 9 | #include "xyarray.h" |
10 | #include "symbol.h" | 10 | #include "symbol.h" |
11 | #include "cpumap.h" | 11 | #include "cpumap.h" |
12 | #include "stat.h" | 12 | #include "counts.h" |
13 | 13 | ||
14 | struct perf_evsel; | 14 | struct perf_evsel; |
15 | 15 | ||
@@ -31,8 +31,38 @@ struct perf_sample_id { | |||
31 | 31 | ||
32 | struct cgroup_sel; | 32 | struct cgroup_sel; |
33 | 33 | ||
34 | /* | ||
35 | * The 'struct perf_evsel_config_term' is used to pass event | ||
36 | * specific configuration data to perf_evsel__config routine. | ||
37 | * It is allocated within event parsing and attached to | ||
38 | * perf_evsel::config_terms list head. | ||
39 | */ | ||
40 | enum { | ||
41 | PERF_EVSEL__CONFIG_TERM_PERIOD, | ||
42 | PERF_EVSEL__CONFIG_TERM_FREQ, | ||
43 | PERF_EVSEL__CONFIG_TERM_TIME, | ||
44 | PERF_EVSEL__CONFIG_TERM_CALLGRAPH, | ||
45 | PERF_EVSEL__CONFIG_TERM_STACK_USER, | ||
46 | PERF_EVSEL__CONFIG_TERM_MAX, | ||
47 | }; | ||
48 | |||
49 | struct perf_evsel_config_term { | ||
50 | struct list_head list; | ||
51 | int type; | ||
52 | union { | ||
53 | u64 period; | ||
54 | u64 freq; | ||
55 | bool time; | ||
56 | char *callgraph; | ||
57 | u64 stack_user; | ||
58 | } val; | ||
59 | }; | ||
60 | |||
34 | /** struct perf_evsel - event selector | 61 | /** struct perf_evsel - event selector |
35 | * | 62 | * |
63 | * @evlist - evlist this evsel is in, if it is in one. | ||
64 | * @node - To insert it into evlist->entries or in other list_heads, say in | ||
65 | * the event parsing routines. | ||
36 | * @name - Can be set to retain the original event name passed by the user, | 66 | * @name - Can be set to retain the original event name passed by the user, |
37 | * so that when showing results in tools such as 'perf stat', we | 67 | * so that when showing results in tools such as 'perf stat', we |
38 | * show the name used, not some alias. | 68 | * show the name used, not some alias. |
@@ -46,6 +76,7 @@ struct cgroup_sel; | |||
46 | */ | 76 | */ |
47 | struct perf_evsel { | 77 | struct perf_evsel { |
48 | struct list_head node; | 78 | struct list_head node; |
79 | struct perf_evlist *evlist; | ||
49 | struct perf_event_attr attr; | 80 | struct perf_event_attr attr; |
50 | char *filter; | 81 | char *filter; |
51 | struct xyarray *fd; | 82 | struct xyarray *fd; |
@@ -67,6 +98,7 @@ struct perf_evsel { | |||
67 | struct cgroup_sel *cgrp; | 98 | struct cgroup_sel *cgrp; |
68 | void *handler; | 99 | void *handler; |
69 | struct cpu_map *cpus; | 100 | struct cpu_map *cpus; |
101 | struct cpu_map *own_cpus; | ||
70 | struct thread_map *threads; | 102 | struct thread_map *threads; |
71 | unsigned int sample_size; | 103 | unsigned int sample_size; |
72 | int id_pos; | 104 | int id_pos; |
@@ -86,6 +118,8 @@ struct perf_evsel { | |||
86 | unsigned long *per_pkg_mask; | 118 | unsigned long *per_pkg_mask; |
87 | struct perf_evsel *leader; | 119 | struct perf_evsel *leader; |
88 | char *group_name; | 120 | char *group_name; |
121 | bool cmdline_group_boundary; | ||
122 | struct list_head config_terms; | ||
89 | }; | 123 | }; |
90 | 124 | ||
91 | union u64_swap { | 125 | union u64_swap { |
@@ -182,8 +216,11 @@ void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel, | |||
182 | void perf_evsel__set_sample_id(struct perf_evsel *evsel, | 216 | void perf_evsel__set_sample_id(struct perf_evsel *evsel, |
183 | bool use_sample_identifier); | 217 | bool use_sample_identifier); |
184 | 218 | ||
185 | int perf_evsel__set_filter(struct perf_evsel *evsel, int ncpus, int nthreads, | 219 | int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter); |
186 | const char *filter); | 220 | int perf_evsel__append_filter(struct perf_evsel *evsel, |
221 | const char *op, const char *filter); | ||
222 | int perf_evsel__apply_filter(struct perf_evsel *evsel, int ncpus, int nthreads, | ||
223 | const char *filter); | ||
187 | int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads); | 224 | int perf_evsel__enable(struct perf_evsel *evsel, int ncpus, int nthreads); |
188 | 225 | ||
189 | int perf_evsel__open_per_cpu(struct perf_evsel *evsel, | 226 | int perf_evsel__open_per_cpu(struct perf_evsel *evsel, |
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 03ace57a800c..fce6634aebe2 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
@@ -923,17 +923,13 @@ static void print_cmdline(struct perf_header *ph, int fd __maybe_unused, | |||
923 | FILE *fp) | 923 | FILE *fp) |
924 | { | 924 | { |
925 | int nr, i; | 925 | int nr, i; |
926 | char *str; | ||
927 | 926 | ||
928 | nr = ph->env.nr_cmdline; | 927 | nr = ph->env.nr_cmdline; |
929 | str = ph->env.cmdline; | ||
930 | 928 | ||
931 | fprintf(fp, "# cmdline : "); | 929 | fprintf(fp, "# cmdline : "); |
932 | 930 | ||
933 | for (i = 0; i < nr; i++) { | 931 | for (i = 0; i < nr; i++) |
934 | fprintf(fp, "%s ", str); | 932 | fprintf(fp, "%s ", ph->env.cmdline_argv[i]); |
935 | str += strlen(str) + 1; | ||
936 | } | ||
937 | fputc('\n', fp); | 933 | fputc('\n', fp); |
938 | } | 934 | } |
939 | 935 | ||
@@ -1442,7 +1438,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, | |||
1442 | if (ph->needs_swap) | 1438 | if (ph->needs_swap) |
1443 | nr = bswap_32(nr); | 1439 | nr = bswap_32(nr); |
1444 | 1440 | ||
1445 | ph->env.nr_cpus_online = nr; | 1441 | ph->env.nr_cpus_avail = nr; |
1446 | 1442 | ||
1447 | ret = readn(fd, &nr, sizeof(nr)); | 1443 | ret = readn(fd, &nr, sizeof(nr)); |
1448 | if (ret != sizeof(nr)) | 1444 | if (ret != sizeof(nr)) |
@@ -1451,7 +1447,7 @@ static int process_nrcpus(struct perf_file_section *section __maybe_unused, | |||
1451 | if (ph->needs_swap) | 1447 | if (ph->needs_swap) |
1452 | nr = bswap_32(nr); | 1448 | nr = bswap_32(nr); |
1453 | 1449 | ||
1454 | ph->env.nr_cpus_avail = nr; | 1450 | ph->env.nr_cpus_online = nr; |
1455 | return 0; | 1451 | return 0; |
1456 | } | 1452 | } |
1457 | 1453 | ||
@@ -1541,14 +1537,13 @@ process_event_desc(struct perf_file_section *section __maybe_unused, | |||
1541 | return 0; | 1537 | return 0; |
1542 | } | 1538 | } |
1543 | 1539 | ||
1544 | static int process_cmdline(struct perf_file_section *section __maybe_unused, | 1540 | static int process_cmdline(struct perf_file_section *section, |
1545 | struct perf_header *ph, int fd, | 1541 | struct perf_header *ph, int fd, |
1546 | void *data __maybe_unused) | 1542 | void *data __maybe_unused) |
1547 | { | 1543 | { |
1548 | ssize_t ret; | 1544 | ssize_t ret; |
1549 | char *str; | 1545 | char *str, *cmdline = NULL, **argv = NULL; |
1550 | u32 nr, i; | 1546 | u32 nr, i, len = 0; |
1551 | struct strbuf sb; | ||
1552 | 1547 | ||
1553 | ret = readn(fd, &nr, sizeof(nr)); | 1548 | ret = readn(fd, &nr, sizeof(nr)); |
1554 | if (ret != sizeof(nr)) | 1549 | if (ret != sizeof(nr)) |
@@ -1558,22 +1553,32 @@ static int process_cmdline(struct perf_file_section *section __maybe_unused, | |||
1558 | nr = bswap_32(nr); | 1553 | nr = bswap_32(nr); |
1559 | 1554 | ||
1560 | ph->env.nr_cmdline = nr; | 1555 | ph->env.nr_cmdline = nr; |
1561 | strbuf_init(&sb, 128); | 1556 | |
1557 | cmdline = zalloc(section->size + nr + 1); | ||
1558 | if (!cmdline) | ||
1559 | return -1; | ||
1560 | |||
1561 | argv = zalloc(sizeof(char *) * (nr + 1)); | ||
1562 | if (!argv) | ||
1563 | goto error; | ||
1562 | 1564 | ||
1563 | for (i = 0; i < nr; i++) { | 1565 | for (i = 0; i < nr; i++) { |
1564 | str = do_read_string(fd, ph); | 1566 | str = do_read_string(fd, ph); |
1565 | if (!str) | 1567 | if (!str) |
1566 | goto error; | 1568 | goto error; |
1567 | 1569 | ||
1568 | /* include a NULL character at the end */ | 1570 | argv[i] = cmdline + len; |
1569 | strbuf_add(&sb, str, strlen(str) + 1); | 1571 | memcpy(argv[i], str, strlen(str) + 1); |
1572 | len += strlen(str) + 1; | ||
1570 | free(str); | 1573 | free(str); |
1571 | } | 1574 | } |
1572 | ph->env.cmdline = strbuf_detach(&sb, NULL); | 1575 | ph->env.cmdline = cmdline; |
1576 | ph->env.cmdline_argv = (const char **) argv; | ||
1573 | return 0; | 1577 | return 0; |
1574 | 1578 | ||
1575 | error: | 1579 | error: |
1576 | strbuf_release(&sb); | 1580 | free(argv); |
1581 | free(cmdline); | ||
1577 | return -1; | 1582 | return -1; |
1578 | } | 1583 | } |
1579 | 1584 | ||
@@ -2509,6 +2514,7 @@ int perf_session__read_header(struct perf_session *session) | |||
2509 | if (session->evlist == NULL) | 2514 | if (session->evlist == NULL) |
2510 | return -ENOMEM; | 2515 | return -ENOMEM; |
2511 | 2516 | ||
2517 | session->evlist->env = &header->env; | ||
2512 | if (perf_data_file__is_pipe(file)) | 2518 | if (perf_data_file__is_pipe(file)) |
2513 | return perf_header__read_pipe(session); | 2519 | return perf_header__read_pipe(session); |
2514 | 2520 | ||
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index d4d57962c591..396e4965f0c9 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h | |||
@@ -66,7 +66,7 @@ struct perf_header; | |||
66 | int perf_file_header__read(struct perf_file_header *header, | 66 | int perf_file_header__read(struct perf_file_header *header, |
67 | struct perf_header *ph, int fd); | 67 | struct perf_header *ph, int fd); |
68 | 68 | ||
69 | struct perf_session_env { | 69 | struct perf_env { |
70 | char *hostname; | 70 | char *hostname; |
71 | char *os_release; | 71 | char *os_release; |
72 | char *version; | 72 | char *version; |
@@ -84,6 +84,7 @@ struct perf_session_env { | |||
84 | int nr_pmu_mappings; | 84 | int nr_pmu_mappings; |
85 | int nr_groups; | 85 | int nr_groups; |
86 | char *cmdline; | 86 | char *cmdline; |
87 | const char **cmdline_argv; | ||
87 | char *sibling_cores; | 88 | char *sibling_cores; |
88 | char *sibling_threads; | 89 | char *sibling_threads; |
89 | char *numa_nodes; | 90 | char *numa_nodes; |
@@ -97,7 +98,7 @@ struct perf_header { | |||
97 | u64 data_size; | 98 | u64 data_size; |
98 | u64 feat_offset; | 99 | u64 feat_offset; |
99 | DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); | 100 | DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); |
100 | struct perf_session_env env; | 101 | struct perf_env env; |
101 | }; | 102 | }; |
102 | 103 | ||
103 | struct perf_evlist; | 104 | struct perf_evlist; |
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 6f28d53d4e46..08b6cd945f1e 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -151,6 +151,12 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) | |||
151 | hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); | 151 | hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); |
152 | hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); | 152 | hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); |
153 | 153 | ||
154 | if (h->srcline) | ||
155 | hists__new_col_len(hists, HISTC_SRCLINE, strlen(h->srcline)); | ||
156 | |||
157 | if (h->srcfile) | ||
158 | hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile)); | ||
159 | |||
154 | if (h->transaction) | 160 | if (h->transaction) |
155 | hists__new_col_len(hists, HISTC_TRANSACTION, | 161 | hists__new_col_len(hists, HISTC_TRANSACTION, |
156 | hist_entry__transaction_len()); | 162 | hist_entry__transaction_len()); |
@@ -618,7 +624,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a | |||
618 | * and not events sampled. Thus we use a pseudo period of 1. | 624 | * and not events sampled. Thus we use a pseudo period of 1. |
619 | */ | 625 | */ |
620 | he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, | 626 | he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL, |
621 | 1, 1, 0, true); | 627 | 1, bi->flags.cycles ? bi->flags.cycles : 1, |
628 | 0, true); | ||
622 | if (he == NULL) | 629 | if (he == NULL) |
623 | return -ENOMEM; | 630 | return -ENOMEM; |
624 | 631 | ||
@@ -760,6 +767,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter, | |||
760 | struct hist_entry **he_cache = iter->priv; | 767 | struct hist_entry **he_cache = iter->priv; |
761 | struct hist_entry *he; | 768 | struct hist_entry *he; |
762 | struct hist_entry he_tmp = { | 769 | struct hist_entry he_tmp = { |
770 | .hists = evsel__hists(evsel), | ||
763 | .cpu = al->cpu, | 771 | .cpu = al->cpu, |
764 | .thread = al->thread, | 772 | .thread = al->thread, |
765 | .comm = thread__comm(al->thread), | 773 | .comm = thread__comm(al->thread), |
@@ -944,6 +952,8 @@ void hist_entry__delete(struct hist_entry *he) | |||
944 | 952 | ||
945 | zfree(&he->stat_acc); | 953 | zfree(&he->stat_acc); |
946 | free_srcline(he->srcline); | 954 | free_srcline(he->srcline); |
955 | if (he->srcfile && he->srcfile[0]) | ||
956 | free(he->srcfile); | ||
947 | free_callchain(he->callchain); | 957 | free_callchain(he->callchain); |
948 | free(he); | 958 | free(he); |
949 | } | 959 | } |
@@ -1099,13 +1109,14 @@ void hists__inc_stats(struct hists *hists, struct hist_entry *h) | |||
1099 | 1109 | ||
1100 | static void __hists__insert_output_entry(struct rb_root *entries, | 1110 | static void __hists__insert_output_entry(struct rb_root *entries, |
1101 | struct hist_entry *he, | 1111 | struct hist_entry *he, |
1102 | u64 min_callchain_hits) | 1112 | u64 min_callchain_hits, |
1113 | bool use_callchain) | ||
1103 | { | 1114 | { |
1104 | struct rb_node **p = &entries->rb_node; | 1115 | struct rb_node **p = &entries->rb_node; |
1105 | struct rb_node *parent = NULL; | 1116 | struct rb_node *parent = NULL; |
1106 | struct hist_entry *iter; | 1117 | struct hist_entry *iter; |
1107 | 1118 | ||
1108 | if (symbol_conf.use_callchain) | 1119 | if (use_callchain) |
1109 | callchain_param.sort(&he->sorted_chain, he->callchain, | 1120 | callchain_param.sort(&he->sorted_chain, he->callchain, |
1110 | min_callchain_hits, &callchain_param); | 1121 | min_callchain_hits, &callchain_param); |
1111 | 1122 | ||
@@ -1129,6 +1140,13 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) | |||
1129 | struct rb_node *next; | 1140 | struct rb_node *next; |
1130 | struct hist_entry *n; | 1141 | struct hist_entry *n; |
1131 | u64 min_callchain_hits; | 1142 | u64 min_callchain_hits; |
1143 | struct perf_evsel *evsel = hists_to_evsel(hists); | ||
1144 | bool use_callchain; | ||
1145 | |||
1146 | if (evsel && !symbol_conf.show_ref_callgraph) | ||
1147 | use_callchain = evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN; | ||
1148 | else | ||
1149 | use_callchain = symbol_conf.use_callchain; | ||
1132 | 1150 | ||
1133 | min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); | 1151 | min_callchain_hits = hists->stats.total_period * (callchain_param.min_percent / 100); |
1134 | 1152 | ||
@@ -1147,7 +1165,7 @@ void hists__output_resort(struct hists *hists, struct ui_progress *prog) | |||
1147 | n = rb_entry(next, struct hist_entry, rb_node_in); | 1165 | n = rb_entry(next, struct hist_entry, rb_node_in); |
1148 | next = rb_next(&n->rb_node_in); | 1166 | next = rb_next(&n->rb_node_in); |
1149 | 1167 | ||
1150 | __hists__insert_output_entry(&hists->entries, n, min_callchain_hits); | 1168 | __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain); |
1151 | hists__inc_stats(hists, n); | 1169 | hists__inc_stats(hists, n); |
1152 | 1170 | ||
1153 | if (!n->filtered) | 1171 | if (!n->filtered) |
@@ -1414,6 +1432,39 @@ int hists__link(struct hists *leader, struct hists *other) | |||
1414 | return 0; | 1432 | return 0; |
1415 | } | 1433 | } |
1416 | 1434 | ||
1435 | void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, | ||
1436 | struct perf_sample *sample, bool nonany_branch_mode) | ||
1437 | { | ||
1438 | struct branch_info *bi; | ||
1439 | |||
1440 | /* If we have branch cycles always annotate them. */ | ||
1441 | if (bs && bs->nr && bs->entries[0].flags.cycles) { | ||
1442 | int i; | ||
1443 | |||
1444 | bi = sample__resolve_bstack(sample, al); | ||
1445 | if (bi) { | ||
1446 | struct addr_map_symbol *prev = NULL; | ||
1447 | |||
1448 | /* | ||
1449 | * Ignore errors, still want to process the | ||
1450 | * other entries. | ||
1451 | * | ||
1452 | * For non standard branch modes always | ||
1453 | * force no IPC (prev == NULL) | ||
1454 | * | ||
1455 | * Note that perf stores branches reversed from | ||
1456 | * program order! | ||
1457 | */ | ||
1458 | for (i = bs->nr - 1; i >= 0; i--) { | ||
1459 | addr_map_symbol__account_cycles(&bi[i].from, | ||
1460 | nonany_branch_mode ? NULL : prev, | ||
1461 | bi[i].flags.cycles); | ||
1462 | prev = &bi[i].to; | ||
1463 | } | ||
1464 | free(bi); | ||
1465 | } | ||
1466 | } | ||
1467 | } | ||
1417 | 1468 | ||
1418 | size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) | 1469 | size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp) |
1419 | { | 1470 | { |
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 5ed8d9c22981..de6d58e7f0d5 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h | |||
@@ -30,6 +30,7 @@ enum hist_column { | |||
30 | HISTC_PARENT, | 30 | HISTC_PARENT, |
31 | HISTC_CPU, | 31 | HISTC_CPU, |
32 | HISTC_SRCLINE, | 32 | HISTC_SRCLINE, |
33 | HISTC_SRCFILE, | ||
33 | HISTC_MISPREDICT, | 34 | HISTC_MISPREDICT, |
34 | HISTC_IN_TX, | 35 | HISTC_IN_TX, |
35 | HISTC_ABORT, | 36 | HISTC_ABORT, |
@@ -47,6 +48,7 @@ enum hist_column { | |||
47 | HISTC_MEM_SNOOP, | 48 | HISTC_MEM_SNOOP, |
48 | HISTC_MEM_DCACHELINE, | 49 | HISTC_MEM_DCACHELINE, |
49 | HISTC_TRANSACTION, | 50 | HISTC_TRANSACTION, |
51 | HISTC_CYCLES, | ||
50 | HISTC_NR_COLS, /* Last entry */ | 52 | HISTC_NR_COLS, /* Last entry */ |
51 | }; | 53 | }; |
52 | 54 | ||
@@ -311,7 +313,7 @@ int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, | |||
311 | int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, | 313 | int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, |
312 | struct hist_browser_timer *hbt, | 314 | struct hist_browser_timer *hbt, |
313 | float min_pcnt, | 315 | float min_pcnt, |
314 | struct perf_session_env *env); | 316 | struct perf_env *env); |
315 | int script_browse(const char *script_opt); | 317 | int script_browse(const char *script_opt); |
316 | #else | 318 | #else |
317 | static inline | 319 | static inline |
@@ -319,7 +321,7 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, | |||
319 | const char *help __maybe_unused, | 321 | const char *help __maybe_unused, |
320 | struct hist_browser_timer *hbt __maybe_unused, | 322 | struct hist_browser_timer *hbt __maybe_unused, |
321 | float min_pcnt __maybe_unused, | 323 | float min_pcnt __maybe_unused, |
322 | struct perf_session_env *env __maybe_unused) | 324 | struct perf_env *env __maybe_unused) |
323 | { | 325 | { |
324 | return 0; | 326 | return 0; |
325 | } | 327 | } |
@@ -349,6 +351,9 @@ static inline int script_browse(const char *script_opt __maybe_unused) | |||
349 | 351 | ||
350 | unsigned int hists__sort_list_width(struct hists *hists); | 352 | unsigned int hists__sort_list_width(struct hists *hists); |
351 | 353 | ||
354 | void hist__account_cycles(struct branch_stack *bs, struct addr_location *al, | ||
355 | struct perf_sample *sample, bool nonany_branch_mode); | ||
356 | |||
352 | struct option; | 357 | struct option; |
353 | int parse_filter_percentage(const struct option *opt __maybe_unused, | 358 | int parse_filter_percentage(const struct option *opt __maybe_unused, |
354 | const char *arg, int unset __maybe_unused); | 359 | const char *arg, int unset __maybe_unused); |
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c new file mode 100644 index 000000000000..eb0e7f8bf515 --- /dev/null +++ b/tools/perf/util/intel-bts.c | |||
@@ -0,0 +1,933 @@ | |||
1 | /* | ||
2 | * intel-bts.c: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2015, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <endian.h> | ||
17 | #include <byteswap.h> | ||
18 | #include <linux/kernel.h> | ||
19 | #include <linux/types.h> | ||
20 | #include <linux/bitops.h> | ||
21 | #include <linux/log2.h> | ||
22 | |||
23 | #include "cpumap.h" | ||
24 | #include "color.h" | ||
25 | #include "evsel.h" | ||
26 | #include "evlist.h" | ||
27 | #include "machine.h" | ||
28 | #include "session.h" | ||
29 | #include "util.h" | ||
30 | #include "thread.h" | ||
31 | #include "thread-stack.h" | ||
32 | #include "debug.h" | ||
33 | #include "tsc.h" | ||
34 | #include "auxtrace.h" | ||
35 | #include "intel-pt-decoder/intel-pt-insn-decoder.h" | ||
36 | #include "intel-bts.h" | ||
37 | |||
38 | #define MAX_TIMESTAMP (~0ULL) | ||
39 | |||
40 | #define INTEL_BTS_ERR_NOINSN 5 | ||
41 | #define INTEL_BTS_ERR_LOST 9 | ||
42 | |||
43 | #if __BYTE_ORDER == __BIG_ENDIAN | ||
44 | #define le64_to_cpu bswap_64 | ||
45 | #else | ||
46 | #define le64_to_cpu | ||
47 | #endif | ||
48 | |||
49 | struct intel_bts { | ||
50 | struct auxtrace auxtrace; | ||
51 | struct auxtrace_queues queues; | ||
52 | struct auxtrace_heap heap; | ||
53 | u32 auxtrace_type; | ||
54 | struct perf_session *session; | ||
55 | struct machine *machine; | ||
56 | bool sampling_mode; | ||
57 | bool snapshot_mode; | ||
58 | bool data_queued; | ||
59 | u32 pmu_type; | ||
60 | struct perf_tsc_conversion tc; | ||
61 | bool cap_user_time_zero; | ||
62 | struct itrace_synth_opts synth_opts; | ||
63 | bool sample_branches; | ||
64 | u32 branches_filter; | ||
65 | u64 branches_sample_type; | ||
66 | u64 branches_id; | ||
67 | size_t branches_event_size; | ||
68 | bool synth_needs_swap; | ||
69 | }; | ||
70 | |||
71 | struct intel_bts_queue { | ||
72 | struct intel_bts *bts; | ||
73 | unsigned int queue_nr; | ||
74 | struct auxtrace_buffer *buffer; | ||
75 | bool on_heap; | ||
76 | bool done; | ||
77 | pid_t pid; | ||
78 | pid_t tid; | ||
79 | int cpu; | ||
80 | u64 time; | ||
81 | struct intel_pt_insn intel_pt_insn; | ||
82 | u32 sample_flags; | ||
83 | }; | ||
84 | |||
85 | struct branch { | ||
86 | u64 from; | ||
87 | u64 to; | ||
88 | u64 misc; | ||
89 | }; | ||
90 | |||
91 | static void intel_bts_dump(struct intel_bts *bts __maybe_unused, | ||
92 | unsigned char *buf, size_t len) | ||
93 | { | ||
94 | struct branch *branch; | ||
95 | size_t i, pos = 0, br_sz = sizeof(struct branch), sz; | ||
96 | const char *color = PERF_COLOR_BLUE; | ||
97 | |||
98 | color_fprintf(stdout, color, | ||
99 | ". ... Intel BTS data: size %zu bytes\n", | ||
100 | len); | ||
101 | |||
102 | while (len) { | ||
103 | if (len >= br_sz) | ||
104 | sz = br_sz; | ||
105 | else | ||
106 | sz = len; | ||
107 | printf("."); | ||
108 | color_fprintf(stdout, color, " %08x: ", pos); | ||
109 | for (i = 0; i < sz; i++) | ||
110 | color_fprintf(stdout, color, " %02x", buf[i]); | ||
111 | for (; i < br_sz; i++) | ||
112 | color_fprintf(stdout, color, " "); | ||
113 | if (len >= br_sz) { | ||
114 | branch = (struct branch *)buf; | ||
115 | color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n", | ||
116 | le64_to_cpu(branch->from), | ||
117 | le64_to_cpu(branch->to), | ||
118 | le64_to_cpu(branch->misc) & 0x10 ? | ||
119 | "pred" : "miss"); | ||
120 | } else { | ||
121 | color_fprintf(stdout, color, " Bad record!\n"); | ||
122 | } | ||
123 | pos += sz; | ||
124 | buf += sz; | ||
125 | len -= sz; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf, | ||
130 | size_t len) | ||
131 | { | ||
132 | printf(".\n"); | ||
133 | intel_bts_dump(bts, buf, len); | ||
134 | } | ||
135 | |||
136 | static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample) | ||
137 | { | ||
138 | union perf_event event; | ||
139 | int err; | ||
140 | |||
141 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, | ||
142 | INTEL_BTS_ERR_LOST, sample->cpu, sample->pid, | ||
143 | sample->tid, 0, "Lost trace data"); | ||
144 | |||
145 | err = perf_session__deliver_synth_event(bts->session, &event, NULL); | ||
146 | if (err) | ||
147 | pr_err("Intel BTS: failed to deliver error event, error %d\n", | ||
148 | err); | ||
149 | |||
150 | return err; | ||
151 | } | ||
152 | |||
153 | static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts, | ||
154 | unsigned int queue_nr) | ||
155 | { | ||
156 | struct intel_bts_queue *btsq; | ||
157 | |||
158 | btsq = zalloc(sizeof(struct intel_bts_queue)); | ||
159 | if (!btsq) | ||
160 | return NULL; | ||
161 | |||
162 | btsq->bts = bts; | ||
163 | btsq->queue_nr = queue_nr; | ||
164 | btsq->pid = -1; | ||
165 | btsq->tid = -1; | ||
166 | btsq->cpu = -1; | ||
167 | |||
168 | return btsq; | ||
169 | } | ||
170 | |||
171 | static int intel_bts_setup_queue(struct intel_bts *bts, | ||
172 | struct auxtrace_queue *queue, | ||
173 | unsigned int queue_nr) | ||
174 | { | ||
175 | struct intel_bts_queue *btsq = queue->priv; | ||
176 | |||
177 | if (list_empty(&queue->head)) | ||
178 | return 0; | ||
179 | |||
180 | if (!btsq) { | ||
181 | btsq = intel_bts_alloc_queue(bts, queue_nr); | ||
182 | if (!btsq) | ||
183 | return -ENOMEM; | ||
184 | queue->priv = btsq; | ||
185 | |||
186 | if (queue->cpu != -1) | ||
187 | btsq->cpu = queue->cpu; | ||
188 | btsq->tid = queue->tid; | ||
189 | } | ||
190 | |||
191 | if (bts->sampling_mode) | ||
192 | return 0; | ||
193 | |||
194 | if (!btsq->on_heap && !btsq->buffer) { | ||
195 | int ret; | ||
196 | |||
197 | btsq->buffer = auxtrace_buffer__next(queue, NULL); | ||
198 | if (!btsq->buffer) | ||
199 | return 0; | ||
200 | |||
201 | ret = auxtrace_heap__add(&bts->heap, queue_nr, | ||
202 | btsq->buffer->reference); | ||
203 | if (ret) | ||
204 | return ret; | ||
205 | btsq->on_heap = true; | ||
206 | } | ||
207 | |||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | static int intel_bts_setup_queues(struct intel_bts *bts) | ||
212 | { | ||
213 | unsigned int i; | ||
214 | int ret; | ||
215 | |||
216 | for (i = 0; i < bts->queues.nr_queues; i++) { | ||
217 | ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i], | ||
218 | i); | ||
219 | if (ret) | ||
220 | return ret; | ||
221 | } | ||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | static inline int intel_bts_update_queues(struct intel_bts *bts) | ||
226 | { | ||
227 | if (bts->queues.new_data) { | ||
228 | bts->queues.new_data = false; | ||
229 | return intel_bts_setup_queues(bts); | ||
230 | } | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a, | ||
235 | unsigned char *buf_b, size_t len_b) | ||
236 | { | ||
237 | size_t offs, len; | ||
238 | |||
239 | if (len_a > len_b) | ||
240 | offs = len_a - len_b; | ||
241 | else | ||
242 | offs = 0; | ||
243 | |||
244 | for (; offs < len_a; offs += sizeof(struct branch)) { | ||
245 | len = len_a - offs; | ||
246 | if (!memcmp(buf_a + offs, buf_b, len)) | ||
247 | return buf_b + len; | ||
248 | } | ||
249 | |||
250 | return buf_b; | ||
251 | } | ||
252 | |||
253 | static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue, | ||
254 | struct auxtrace_buffer *b) | ||
255 | { | ||
256 | struct auxtrace_buffer *a; | ||
257 | void *start; | ||
258 | |||
259 | if (b->list.prev == &queue->head) | ||
260 | return 0; | ||
261 | a = list_entry(b->list.prev, struct auxtrace_buffer, list); | ||
262 | start = intel_bts_find_overlap(a->data, a->size, b->data, b->size); | ||
263 | if (!start) | ||
264 | return -EINVAL; | ||
265 | b->use_size = b->data + b->size - start; | ||
266 | b->use_data = start; | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, | ||
271 | struct branch *branch) | ||
272 | { | ||
273 | int ret; | ||
274 | struct intel_bts *bts = btsq->bts; | ||
275 | union perf_event event; | ||
276 | struct perf_sample sample = { .ip = 0, }; | ||
277 | |||
278 | event.sample.header.type = PERF_RECORD_SAMPLE; | ||
279 | event.sample.header.misc = PERF_RECORD_MISC_USER; | ||
280 | event.sample.header.size = sizeof(struct perf_event_header); | ||
281 | |||
282 | sample.ip = le64_to_cpu(branch->from); | ||
283 | sample.pid = btsq->pid; | ||
284 | sample.tid = btsq->tid; | ||
285 | sample.addr = le64_to_cpu(branch->to); | ||
286 | sample.id = btsq->bts->branches_id; | ||
287 | sample.stream_id = btsq->bts->branches_id; | ||
288 | sample.period = 1; | ||
289 | sample.cpu = btsq->cpu; | ||
290 | sample.flags = btsq->sample_flags; | ||
291 | sample.insn_len = btsq->intel_pt_insn.length; | ||
292 | |||
293 | if (bts->synth_opts.inject) { | ||
294 | event.sample.header.size = bts->branches_event_size; | ||
295 | ret = perf_event__synthesize_sample(&event, | ||
296 | bts->branches_sample_type, | ||
297 | 0, &sample, | ||
298 | bts->synth_needs_swap); | ||
299 | if (ret) | ||
300 | return ret; | ||
301 | } | ||
302 | |||
303 | ret = perf_session__deliver_synth_event(bts->session, &event, &sample); | ||
304 | if (ret) | ||
305 | pr_err("Intel BTS: failed to deliver branch event, error %d\n", | ||
306 | ret); | ||
307 | |||
308 | return ret; | ||
309 | } | ||
310 | |||
311 | static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip) | ||
312 | { | ||
313 | struct machine *machine = btsq->bts->machine; | ||
314 | struct thread *thread; | ||
315 | struct addr_location al; | ||
316 | unsigned char buf[1024]; | ||
317 | size_t bufsz; | ||
318 | ssize_t len; | ||
319 | int x86_64; | ||
320 | uint8_t cpumode; | ||
321 | int err = -1; | ||
322 | |||
323 | bufsz = intel_pt_insn_max_size(); | ||
324 | |||
325 | if (machine__kernel_ip(machine, ip)) | ||
326 | cpumode = PERF_RECORD_MISC_KERNEL; | ||
327 | else | ||
328 | cpumode = PERF_RECORD_MISC_USER; | ||
329 | |||
330 | thread = machine__find_thread(machine, -1, btsq->tid); | ||
331 | if (!thread) | ||
332 | return -1; | ||
333 | |||
334 | thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al); | ||
335 | if (!al.map || !al.map->dso) | ||
336 | goto out_put; | ||
337 | |||
338 | len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz); | ||
339 | if (len <= 0) | ||
340 | goto out_put; | ||
341 | |||
342 | /* Load maps to ensure dso->is_64_bit has been updated */ | ||
343 | map__load(al.map, machine->symbol_filter); | ||
344 | |||
345 | x86_64 = al.map->dso->is_64_bit; | ||
346 | |||
347 | if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn)) | ||
348 | goto out_put; | ||
349 | |||
350 | err = 0; | ||
351 | out_put: | ||
352 | thread__put(thread); | ||
353 | return err; | ||
354 | } | ||
355 | |||
356 | static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid, | ||
357 | pid_t tid, u64 ip) | ||
358 | { | ||
359 | union perf_event event; | ||
360 | int err; | ||
361 | |||
362 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, | ||
363 | INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip, | ||
364 | "Failed to get instruction"); | ||
365 | |||
366 | err = perf_session__deliver_synth_event(bts->session, &event, NULL); | ||
367 | if (err) | ||
368 | pr_err("Intel BTS: failed to deliver error event, error %d\n", | ||
369 | err); | ||
370 | |||
371 | return err; | ||
372 | } | ||
373 | |||
374 | static int intel_bts_get_branch_type(struct intel_bts_queue *btsq, | ||
375 | struct branch *branch) | ||
376 | { | ||
377 | int err; | ||
378 | |||
379 | if (!branch->from) { | ||
380 | if (branch->to) | ||
381 | btsq->sample_flags = PERF_IP_FLAG_BRANCH | | ||
382 | PERF_IP_FLAG_TRACE_BEGIN; | ||
383 | else | ||
384 | btsq->sample_flags = 0; | ||
385 | btsq->intel_pt_insn.length = 0; | ||
386 | } else if (!branch->to) { | ||
387 | btsq->sample_flags = PERF_IP_FLAG_BRANCH | | ||
388 | PERF_IP_FLAG_TRACE_END; | ||
389 | btsq->intel_pt_insn.length = 0; | ||
390 | } else { | ||
391 | err = intel_bts_get_next_insn(btsq, branch->from); | ||
392 | if (err) { | ||
393 | btsq->sample_flags = 0; | ||
394 | btsq->intel_pt_insn.length = 0; | ||
395 | if (!btsq->bts->synth_opts.errors) | ||
396 | return 0; | ||
397 | err = intel_bts_synth_error(btsq->bts, btsq->cpu, | ||
398 | btsq->pid, btsq->tid, | ||
399 | branch->from); | ||
400 | return err; | ||
401 | } | ||
402 | btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op); | ||
403 | /* Check for an async branch into the kernel */ | ||
404 | if (!machine__kernel_ip(btsq->bts->machine, branch->from) && | ||
405 | machine__kernel_ip(btsq->bts->machine, branch->to) && | ||
406 | btsq->sample_flags != (PERF_IP_FLAG_BRANCH | | ||
407 | PERF_IP_FLAG_CALL | | ||
408 | PERF_IP_FLAG_SYSCALLRET)) | ||
409 | btsq->sample_flags = PERF_IP_FLAG_BRANCH | | ||
410 | PERF_IP_FLAG_CALL | | ||
411 | PERF_IP_FLAG_ASYNC | | ||
412 | PERF_IP_FLAG_INTERRUPT; | ||
413 | } | ||
414 | |||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | static int intel_bts_process_buffer(struct intel_bts_queue *btsq, | ||
419 | struct auxtrace_buffer *buffer) | ||
420 | { | ||
421 | struct branch *branch; | ||
422 | size_t sz, bsz = sizeof(struct branch); | ||
423 | u32 filter = btsq->bts->branches_filter; | ||
424 | int err = 0; | ||
425 | |||
426 | if (buffer->use_data) { | ||
427 | sz = buffer->use_size; | ||
428 | branch = buffer->use_data; | ||
429 | } else { | ||
430 | sz = buffer->size; | ||
431 | branch = buffer->data; | ||
432 | } | ||
433 | |||
434 | if (!btsq->bts->sample_branches) | ||
435 | return 0; | ||
436 | |||
437 | for (; sz > bsz; branch += 1, sz -= bsz) { | ||
438 | if (!branch->from && !branch->to) | ||
439 | continue; | ||
440 | intel_bts_get_branch_type(btsq, branch); | ||
441 | if (filter && !(filter & btsq->sample_flags)) | ||
442 | continue; | ||
443 | err = intel_bts_synth_branch_sample(btsq, branch); | ||
444 | if (err) | ||
445 | break; | ||
446 | } | ||
447 | return err; | ||
448 | } | ||
449 | |||
450 | static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp) | ||
451 | { | ||
452 | struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer; | ||
453 | struct auxtrace_queue *queue; | ||
454 | struct thread *thread; | ||
455 | int err; | ||
456 | |||
457 | if (btsq->done) | ||
458 | return 1; | ||
459 | |||
460 | if (btsq->pid == -1) { | ||
461 | thread = machine__find_thread(btsq->bts->machine, -1, | ||
462 | btsq->tid); | ||
463 | if (thread) | ||
464 | btsq->pid = thread->pid_; | ||
465 | } else { | ||
466 | thread = machine__findnew_thread(btsq->bts->machine, btsq->pid, | ||
467 | btsq->tid); | ||
468 | } | ||
469 | |||
470 | queue = &btsq->bts->queues.queue_array[btsq->queue_nr]; | ||
471 | |||
472 | if (!buffer) | ||
473 | buffer = auxtrace_buffer__next(queue, NULL); | ||
474 | |||
475 | if (!buffer) { | ||
476 | if (!btsq->bts->sampling_mode) | ||
477 | btsq->done = 1; | ||
478 | err = 1; | ||
479 | goto out_put; | ||
480 | } | ||
481 | |||
482 | /* Currently there is no support for split buffers */ | ||
483 | if (buffer->consecutive) { | ||
484 | err = -EINVAL; | ||
485 | goto out_put; | ||
486 | } | ||
487 | |||
488 | if (!buffer->data) { | ||
489 | int fd = perf_data_file__fd(btsq->bts->session->file); | ||
490 | |||
491 | buffer->data = auxtrace_buffer__get_data(buffer, fd); | ||
492 | if (!buffer->data) { | ||
493 | err = -ENOMEM; | ||
494 | goto out_put; | ||
495 | } | ||
496 | } | ||
497 | |||
498 | if (btsq->bts->snapshot_mode && !buffer->consecutive && | ||
499 | intel_bts_do_fix_overlap(queue, buffer)) { | ||
500 | err = -ENOMEM; | ||
501 | goto out_put; | ||
502 | } | ||
503 | |||
504 | if (!btsq->bts->synth_opts.callchain && thread && | ||
505 | (!old_buffer || btsq->bts->sampling_mode || | ||
506 | (btsq->bts->snapshot_mode && !buffer->consecutive))) | ||
507 | thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1); | ||
508 | |||
509 | err = intel_bts_process_buffer(btsq, buffer); | ||
510 | |||
511 | auxtrace_buffer__drop_data(buffer); | ||
512 | |||
513 | btsq->buffer = auxtrace_buffer__next(queue, buffer); | ||
514 | if (btsq->buffer) { | ||
515 | if (timestamp) | ||
516 | *timestamp = btsq->buffer->reference; | ||
517 | } else { | ||
518 | if (!btsq->bts->sampling_mode) | ||
519 | btsq->done = 1; | ||
520 | } | ||
521 | out_put: | ||
522 | thread__put(thread); | ||
523 | return err; | ||
524 | } | ||
525 | |||
526 | static int intel_bts_flush_queue(struct intel_bts_queue *btsq) | ||
527 | { | ||
528 | u64 ts = 0; | ||
529 | int ret; | ||
530 | |||
531 | while (1) { | ||
532 | ret = intel_bts_process_queue(btsq, &ts); | ||
533 | if (ret < 0) | ||
534 | return ret; | ||
535 | if (ret) | ||
536 | break; | ||
537 | } | ||
538 | return 0; | ||
539 | } | ||
540 | |||
541 | static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid) | ||
542 | { | ||
543 | struct auxtrace_queues *queues = &bts->queues; | ||
544 | unsigned int i; | ||
545 | |||
546 | for (i = 0; i < queues->nr_queues; i++) { | ||
547 | struct auxtrace_queue *queue = &bts->queues.queue_array[i]; | ||
548 | struct intel_bts_queue *btsq = queue->priv; | ||
549 | |||
550 | if (btsq && btsq->tid == tid) | ||
551 | return intel_bts_flush_queue(btsq); | ||
552 | } | ||
553 | return 0; | ||
554 | } | ||
555 | |||
556 | static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp) | ||
557 | { | ||
558 | while (1) { | ||
559 | unsigned int queue_nr; | ||
560 | struct auxtrace_queue *queue; | ||
561 | struct intel_bts_queue *btsq; | ||
562 | u64 ts = 0; | ||
563 | int ret; | ||
564 | |||
565 | if (!bts->heap.heap_cnt) | ||
566 | return 0; | ||
567 | |||
568 | if (bts->heap.heap_array[0].ordinal > timestamp) | ||
569 | return 0; | ||
570 | |||
571 | queue_nr = bts->heap.heap_array[0].queue_nr; | ||
572 | queue = &bts->queues.queue_array[queue_nr]; | ||
573 | btsq = queue->priv; | ||
574 | |||
575 | auxtrace_heap__pop(&bts->heap); | ||
576 | |||
577 | ret = intel_bts_process_queue(btsq, &ts); | ||
578 | if (ret < 0) { | ||
579 | auxtrace_heap__add(&bts->heap, queue_nr, ts); | ||
580 | return ret; | ||
581 | } | ||
582 | |||
583 | if (!ret) { | ||
584 | ret = auxtrace_heap__add(&bts->heap, queue_nr, ts); | ||
585 | if (ret < 0) | ||
586 | return ret; | ||
587 | } else { | ||
588 | btsq->on_heap = false; | ||
589 | } | ||
590 | } | ||
591 | |||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | static int intel_bts_process_event(struct perf_session *session, | ||
596 | union perf_event *event, | ||
597 | struct perf_sample *sample, | ||
598 | struct perf_tool *tool) | ||
599 | { | ||
600 | struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, | ||
601 | auxtrace); | ||
602 | u64 timestamp; | ||
603 | int err; | ||
604 | |||
605 | if (dump_trace) | ||
606 | return 0; | ||
607 | |||
608 | if (!tool->ordered_events) { | ||
609 | pr_err("Intel BTS requires ordered events\n"); | ||
610 | return -EINVAL; | ||
611 | } | ||
612 | |||
613 | if (sample->time && sample->time != (u64)-1) | ||
614 | timestamp = perf_time_to_tsc(sample->time, &bts->tc); | ||
615 | else | ||
616 | timestamp = 0; | ||
617 | |||
618 | err = intel_bts_update_queues(bts); | ||
619 | if (err) | ||
620 | return err; | ||
621 | |||
622 | err = intel_bts_process_queues(bts, timestamp); | ||
623 | if (err) | ||
624 | return err; | ||
625 | if (event->header.type == PERF_RECORD_EXIT) { | ||
626 | err = intel_bts_process_tid_exit(bts, event->fork.tid); | ||
627 | if (err) | ||
628 | return err; | ||
629 | } | ||
630 | |||
631 | if (event->header.type == PERF_RECORD_AUX && | ||
632 | (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && | ||
633 | bts->synth_opts.errors) | ||
634 | err = intel_bts_lost(bts, sample); | ||
635 | |||
636 | return err; | ||
637 | } | ||
638 | |||
639 | static int intel_bts_process_auxtrace_event(struct perf_session *session, | ||
640 | union perf_event *event, | ||
641 | struct perf_tool *tool __maybe_unused) | ||
642 | { | ||
643 | struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, | ||
644 | auxtrace); | ||
645 | |||
646 | if (bts->sampling_mode) | ||
647 | return 0; | ||
648 | |||
649 | if (!bts->data_queued) { | ||
650 | struct auxtrace_buffer *buffer; | ||
651 | off_t data_offset; | ||
652 | int fd = perf_data_file__fd(session->file); | ||
653 | int err; | ||
654 | |||
655 | if (perf_data_file__is_pipe(session->file)) { | ||
656 | data_offset = 0; | ||
657 | } else { | ||
658 | data_offset = lseek(fd, 0, SEEK_CUR); | ||
659 | if (data_offset == -1) | ||
660 | return -errno; | ||
661 | } | ||
662 | |||
663 | err = auxtrace_queues__add_event(&bts->queues, session, event, | ||
664 | data_offset, &buffer); | ||
665 | if (err) | ||
666 | return err; | ||
667 | |||
668 | /* Dump here now we have copied a piped trace out of the pipe */ | ||
669 | if (dump_trace) { | ||
670 | if (auxtrace_buffer__get_data(buffer, fd)) { | ||
671 | intel_bts_dump_event(bts, buffer->data, | ||
672 | buffer->size); | ||
673 | auxtrace_buffer__put_data(buffer); | ||
674 | } | ||
675 | } | ||
676 | } | ||
677 | |||
678 | return 0; | ||
679 | } | ||
680 | |||
681 | static int intel_bts_flush(struct perf_session *session __maybe_unused, | ||
682 | struct perf_tool *tool __maybe_unused) | ||
683 | { | ||
684 | struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, | ||
685 | auxtrace); | ||
686 | int ret; | ||
687 | |||
688 | if (dump_trace || bts->sampling_mode) | ||
689 | return 0; | ||
690 | |||
691 | if (!tool->ordered_events) | ||
692 | return -EINVAL; | ||
693 | |||
694 | ret = intel_bts_update_queues(bts); | ||
695 | if (ret < 0) | ||
696 | return ret; | ||
697 | |||
698 | return intel_bts_process_queues(bts, MAX_TIMESTAMP); | ||
699 | } | ||
700 | |||
701 | static void intel_bts_free_queue(void *priv) | ||
702 | { | ||
703 | struct intel_bts_queue *btsq = priv; | ||
704 | |||
705 | if (!btsq) | ||
706 | return; | ||
707 | free(btsq); | ||
708 | } | ||
709 | |||
710 | static void intel_bts_free_events(struct perf_session *session) | ||
711 | { | ||
712 | struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, | ||
713 | auxtrace); | ||
714 | struct auxtrace_queues *queues = &bts->queues; | ||
715 | unsigned int i; | ||
716 | |||
717 | for (i = 0; i < queues->nr_queues; i++) { | ||
718 | intel_bts_free_queue(queues->queue_array[i].priv); | ||
719 | queues->queue_array[i].priv = NULL; | ||
720 | } | ||
721 | auxtrace_queues__free(queues); | ||
722 | } | ||
723 | |||
724 | static void intel_bts_free(struct perf_session *session) | ||
725 | { | ||
726 | struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts, | ||
727 | auxtrace); | ||
728 | |||
729 | auxtrace_heap__free(&bts->heap); | ||
730 | intel_bts_free_events(session); | ||
731 | session->auxtrace = NULL; | ||
732 | free(bts); | ||
733 | } | ||
734 | |||
735 | struct intel_bts_synth { | ||
736 | struct perf_tool dummy_tool; | ||
737 | struct perf_session *session; | ||
738 | }; | ||
739 | |||
740 | static int intel_bts_event_synth(struct perf_tool *tool, | ||
741 | union perf_event *event, | ||
742 | struct perf_sample *sample __maybe_unused, | ||
743 | struct machine *machine __maybe_unused) | ||
744 | { | ||
745 | struct intel_bts_synth *intel_bts_synth = | ||
746 | container_of(tool, struct intel_bts_synth, dummy_tool); | ||
747 | |||
748 | return perf_session__deliver_synth_event(intel_bts_synth->session, | ||
749 | event, NULL); | ||
750 | } | ||
751 | |||
752 | static int intel_bts_synth_event(struct perf_session *session, | ||
753 | struct perf_event_attr *attr, u64 id) | ||
754 | { | ||
755 | struct intel_bts_synth intel_bts_synth; | ||
756 | |||
757 | memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth)); | ||
758 | intel_bts_synth.session = session; | ||
759 | |||
760 | return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1, | ||
761 | &id, intel_bts_event_synth); | ||
762 | } | ||
763 | |||
764 | static int intel_bts_synth_events(struct intel_bts *bts, | ||
765 | struct perf_session *session) | ||
766 | { | ||
767 | struct perf_evlist *evlist = session->evlist; | ||
768 | struct perf_evsel *evsel; | ||
769 | struct perf_event_attr attr; | ||
770 | bool found = false; | ||
771 | u64 id; | ||
772 | int err; | ||
773 | |||
774 | evlist__for_each(evlist, evsel) { | ||
775 | if (evsel->attr.type == bts->pmu_type && evsel->ids) { | ||
776 | found = true; | ||
777 | break; | ||
778 | } | ||
779 | } | ||
780 | |||
781 | if (!found) { | ||
782 | pr_debug("There are no selected events with Intel BTS data\n"); | ||
783 | return 0; | ||
784 | } | ||
785 | |||
786 | memset(&attr, 0, sizeof(struct perf_event_attr)); | ||
787 | attr.size = sizeof(struct perf_event_attr); | ||
788 | attr.type = PERF_TYPE_HARDWARE; | ||
789 | attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; | ||
790 | attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | | ||
791 | PERF_SAMPLE_PERIOD; | ||
792 | attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; | ||
793 | attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; | ||
794 | attr.exclude_user = evsel->attr.exclude_user; | ||
795 | attr.exclude_kernel = evsel->attr.exclude_kernel; | ||
796 | attr.exclude_hv = evsel->attr.exclude_hv; | ||
797 | attr.exclude_host = evsel->attr.exclude_host; | ||
798 | attr.exclude_guest = evsel->attr.exclude_guest; | ||
799 | attr.sample_id_all = evsel->attr.sample_id_all; | ||
800 | attr.read_format = evsel->attr.read_format; | ||
801 | |||
802 | id = evsel->id[0] + 1000000000; | ||
803 | if (!id) | ||
804 | id = 1; | ||
805 | |||
806 | if (bts->synth_opts.branches) { | ||
807 | attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; | ||
808 | attr.sample_period = 1; | ||
809 | attr.sample_type |= PERF_SAMPLE_ADDR; | ||
810 | pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", | ||
811 | id, (u64)attr.sample_type); | ||
812 | err = intel_bts_synth_event(session, &attr, id); | ||
813 | if (err) { | ||
814 | pr_err("%s: failed to synthesize 'branches' event type\n", | ||
815 | __func__); | ||
816 | return err; | ||
817 | } | ||
818 | bts->sample_branches = true; | ||
819 | bts->branches_sample_type = attr.sample_type; | ||
820 | bts->branches_id = id; | ||
821 | /* | ||
822 | * We only use sample types from PERF_SAMPLE_MASK so we can use | ||
823 | * __perf_evsel__sample_size() here. | ||
824 | */ | ||
825 | bts->branches_event_size = sizeof(struct sample_event) + | ||
826 | __perf_evsel__sample_size(attr.sample_type); | ||
827 | } | ||
828 | |||
829 | bts->synth_needs_swap = evsel->needs_swap; | ||
830 | |||
831 | return 0; | ||
832 | } | ||
833 | |||
834 | static const char * const intel_bts_info_fmts[] = { | ||
835 | [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n", | ||
836 | [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n", | ||
837 | [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n", | ||
838 | [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n", | ||
839 | [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", | ||
840 | [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", | ||
841 | }; | ||
842 | |||
843 | static void intel_bts_print_info(u64 *arr, int start, int finish) | ||
844 | { | ||
845 | int i; | ||
846 | |||
847 | if (!dump_trace) | ||
848 | return; | ||
849 | |||
850 | for (i = start; i <= finish; i++) | ||
851 | fprintf(stdout, intel_bts_info_fmts[i], arr[i]); | ||
852 | } | ||
853 | |||
854 | u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE]; | ||
855 | |||
856 | int intel_bts_process_auxtrace_info(union perf_event *event, | ||
857 | struct perf_session *session) | ||
858 | { | ||
859 | struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; | ||
860 | size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE; | ||
861 | struct intel_bts *bts; | ||
862 | int err; | ||
863 | |||
864 | if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + | ||
865 | min_sz) | ||
866 | return -EINVAL; | ||
867 | |||
868 | bts = zalloc(sizeof(struct intel_bts)); | ||
869 | if (!bts) | ||
870 | return -ENOMEM; | ||
871 | |||
872 | err = auxtrace_queues__init(&bts->queues); | ||
873 | if (err) | ||
874 | goto err_free; | ||
875 | |||
876 | bts->session = session; | ||
877 | bts->machine = &session->machines.host; /* No kvm support */ | ||
878 | bts->auxtrace_type = auxtrace_info->type; | ||
879 | bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE]; | ||
880 | bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT]; | ||
881 | bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT]; | ||
882 | bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO]; | ||
883 | bts->cap_user_time_zero = | ||
884 | auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO]; | ||
885 | bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE]; | ||
886 | |||
887 | bts->sampling_mode = false; | ||
888 | |||
889 | bts->auxtrace.process_event = intel_bts_process_event; | ||
890 | bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event; | ||
891 | bts->auxtrace.flush_events = intel_bts_flush; | ||
892 | bts->auxtrace.free_events = intel_bts_free_events; | ||
893 | bts->auxtrace.free = intel_bts_free; | ||
894 | session->auxtrace = &bts->auxtrace; | ||
895 | |||
896 | intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE, | ||
897 | INTEL_BTS_SNAPSHOT_MODE); | ||
898 | |||
899 | if (dump_trace) | ||
900 | return 0; | ||
901 | |||
902 | if (session->itrace_synth_opts && session->itrace_synth_opts->set) | ||
903 | bts->synth_opts = *session->itrace_synth_opts; | ||
904 | else | ||
905 | itrace_synth_opts__set_default(&bts->synth_opts); | ||
906 | |||
907 | if (bts->synth_opts.calls) | ||
908 | bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | | ||
909 | PERF_IP_FLAG_TRACE_END; | ||
910 | if (bts->synth_opts.returns) | ||
911 | bts->branches_filter |= PERF_IP_FLAG_RETURN | | ||
912 | PERF_IP_FLAG_TRACE_BEGIN; | ||
913 | |||
914 | err = intel_bts_synth_events(bts, session); | ||
915 | if (err) | ||
916 | goto err_free_queues; | ||
917 | |||
918 | err = auxtrace_queues__process_index(&bts->queues, session); | ||
919 | if (err) | ||
920 | goto err_free_queues; | ||
921 | |||
922 | if (bts->queues.populated) | ||
923 | bts->data_queued = true; | ||
924 | |||
925 | return 0; | ||
926 | |||
927 | err_free_queues: | ||
928 | auxtrace_queues__free(&bts->queues); | ||
929 | session->auxtrace = NULL; | ||
930 | err_free: | ||
931 | free(bts); | ||
932 | return err; | ||
933 | } | ||
diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h new file mode 100644 index 000000000000..ca65e21b3e83 --- /dev/null +++ b/tools/perf/util/intel-bts.h | |||
@@ -0,0 +1,43 @@ | |||
1 | /* | ||
2 | * intel-bts.h: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #ifndef INCLUDE__PERF_INTEL_BTS_H__ | ||
17 | #define INCLUDE__PERF_INTEL_BTS_H__ | ||
18 | |||
19 | #define INTEL_BTS_PMU_NAME "intel_bts" | ||
20 | |||
21 | enum { | ||
22 | INTEL_BTS_PMU_TYPE, | ||
23 | INTEL_BTS_TIME_SHIFT, | ||
24 | INTEL_BTS_TIME_MULT, | ||
25 | INTEL_BTS_TIME_ZERO, | ||
26 | INTEL_BTS_CAP_USER_TIME_ZERO, | ||
27 | INTEL_BTS_SNAPSHOT_MODE, | ||
28 | INTEL_BTS_AUXTRACE_PRIV_MAX, | ||
29 | }; | ||
30 | |||
31 | #define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64)) | ||
32 | |||
33 | struct auxtrace_record; | ||
34 | struct perf_tool; | ||
35 | union perf_event; | ||
36 | struct perf_session; | ||
37 | |||
38 | struct auxtrace_record *intel_bts_recording_init(int *err); | ||
39 | |||
40 | int intel_bts_process_auxtrace_info(union perf_event *event, | ||
41 | struct perf_session *session); | ||
42 | |||
43 | #endif | ||
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build new file mode 100644 index 000000000000..2386322ece4f --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/Build | |||
@@ -0,0 +1,12 @@ | |||
1 | libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o | ||
2 | |||
3 | inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk | ||
4 | inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt | ||
5 | |||
6 | $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps) | ||
7 | $(call rule_mkdir) | ||
8 | @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@ | ||
9 | |||
10 | $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c | ||
11 | |||
12 | CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder -Wno-override-init | ||
diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk new file mode 100644 index 000000000000..517567347aac --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk | |||
@@ -0,0 +1,386 @@ | |||
1 | #!/bin/awk -f | ||
2 | # gen-insn-attr-x86.awk: Instruction attribute table generator | ||
3 | # Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
4 | # | ||
5 | # Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c | ||
6 | |||
7 | # Awk implementation sanity check | ||
8 | function check_awk_implement() { | ||
9 | if (sprintf("%x", 0) != "0") | ||
10 | return "Your awk has a printf-format problem." | ||
11 | return "" | ||
12 | } | ||
13 | |||
14 | # Clear working vars | ||
15 | function clear_vars() { | ||
16 | delete table | ||
17 | delete lptable2 | ||
18 | delete lptable1 | ||
19 | delete lptable3 | ||
20 | eid = -1 # escape id | ||
21 | gid = -1 # group id | ||
22 | aid = -1 # AVX id | ||
23 | tname = "" | ||
24 | } | ||
25 | |||
26 | BEGIN { | ||
27 | # Implementation error checking | ||
28 | awkchecked = check_awk_implement() | ||
29 | if (awkchecked != "") { | ||
30 | print "Error: " awkchecked > "/dev/stderr" | ||
31 | print "Please try to use gawk." > "/dev/stderr" | ||
32 | exit 1 | ||
33 | } | ||
34 | |||
35 | # Setup generating tables | ||
36 | print "/* x86 opcode map generated from x86-opcode-map.txt */" | ||
37 | print "/* Do not change this code. */\n" | ||
38 | ggid = 1 | ||
39 | geid = 1 | ||
40 | gaid = 0 | ||
41 | delete etable | ||
42 | delete gtable | ||
43 | delete atable | ||
44 | |||
45 | opnd_expr = "^[A-Za-z/]" | ||
46 | ext_expr = "^\\(" | ||
47 | sep_expr = "^\\|$" | ||
48 | group_expr = "^Grp[0-9A-Za-z]+" | ||
49 | |||
50 | imm_expr = "^[IJAOL][a-z]" | ||
51 | imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
52 | imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
53 | imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" | ||
54 | imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" | ||
55 | imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" | ||
56 | imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" | ||
57 | imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | ||
58 | imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" | ||
59 | imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" | ||
60 | imm_flag["Ob"] = "INAT_MOFFSET" | ||
61 | imm_flag["Ov"] = "INAT_MOFFSET" | ||
62 | imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" | ||
63 | |||
64 | modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" | ||
65 | force64_expr = "\\([df]64\\)" | ||
66 | rex_expr = "^REX(\\.[XRWB]+)*" | ||
67 | fpu_expr = "^ESC" # TODO | ||
68 | |||
69 | lprefix1_expr = "\\((66|!F3)\\)" | ||
70 | lprefix2_expr = "\\(F3\\)" | ||
71 | lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)" | ||
72 | lprefix_expr = "\\((66|F2|F3)\\)" | ||
73 | max_lprefix = 4 | ||
74 | |||
75 | # All opcodes starting with lower-case 'v' or with (v1) superscript | ||
76 | # accepts VEX prefix | ||
77 | vexok_opcode_expr = "^v.*" | ||
78 | vexok_expr = "\\(v1\\)" | ||
79 | # All opcodes with (v) superscript supports *only* VEX prefix | ||
80 | vexonly_expr = "\\(v\\)" | ||
81 | |||
82 | prefix_expr = "\\(Prefix\\)" | ||
83 | prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" | ||
84 | prefix_num["REPNE"] = "INAT_PFX_REPNE" | ||
85 | prefix_num["REP/REPE"] = "INAT_PFX_REPE" | ||
86 | prefix_num["XACQUIRE"] = "INAT_PFX_REPNE" | ||
87 | prefix_num["XRELEASE"] = "INAT_PFX_REPE" | ||
88 | prefix_num["LOCK"] = "INAT_PFX_LOCK" | ||
89 | prefix_num["SEG=CS"] = "INAT_PFX_CS" | ||
90 | prefix_num["SEG=DS"] = "INAT_PFX_DS" | ||
91 | prefix_num["SEG=ES"] = "INAT_PFX_ES" | ||
92 | prefix_num["SEG=FS"] = "INAT_PFX_FS" | ||
93 | prefix_num["SEG=GS"] = "INAT_PFX_GS" | ||
94 | prefix_num["SEG=SS"] = "INAT_PFX_SS" | ||
95 | prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" | ||
96 | prefix_num["VEX+1byte"] = "INAT_PFX_VEX2" | ||
97 | prefix_num["VEX+2byte"] = "INAT_PFX_VEX3" | ||
98 | |||
99 | clear_vars() | ||
100 | } | ||
101 | |||
102 | function semantic_error(msg) { | ||
103 | print "Semantic error at " NR ": " msg > "/dev/stderr" | ||
104 | exit 1 | ||
105 | } | ||
106 | |||
107 | function debug(msg) { | ||
108 | print "DEBUG: " msg | ||
109 | } | ||
110 | |||
111 | function array_size(arr, i,c) { | ||
112 | c = 0 | ||
113 | for (i in arr) | ||
114 | c++ | ||
115 | return c | ||
116 | } | ||
117 | |||
118 | /^Table:/ { | ||
119 | print "/* " $0 " */" | ||
120 | if (tname != "") | ||
121 | semantic_error("Hit Table: before EndTable:."); | ||
122 | } | ||
123 | |||
124 | /^Referrer:/ { | ||
125 | if (NF != 1) { | ||
126 | # escape opcode table | ||
127 | ref = "" | ||
128 | for (i = 2; i <= NF; i++) | ||
129 | ref = ref $i | ||
130 | eid = escape[ref] | ||
131 | tname = sprintf("inat_escape_table_%d", eid) | ||
132 | } | ||
133 | } | ||
134 | |||
135 | /^AVXcode:/ { | ||
136 | if (NF != 1) { | ||
137 | # AVX/escape opcode table | ||
138 | aid = $2 | ||
139 | if (gaid <= aid) | ||
140 | gaid = aid + 1 | ||
141 | if (tname == "") # AVX only opcode table | ||
142 | tname = sprintf("inat_avx_table_%d", $2) | ||
143 | } | ||
144 | if (aid == -1 && eid == -1) # primary opcode table | ||
145 | tname = "inat_primary_table" | ||
146 | } | ||
147 | |||
148 | /^GrpTable:/ { | ||
149 | print "/* " $0 " */" | ||
150 | if (!($2 in group)) | ||
151 | semantic_error("No group: " $2 ) | ||
152 | gid = group[$2] | ||
153 | tname = "inat_group_table_" gid | ||
154 | } | ||
155 | |||
156 | function print_table(tbl,name,fmt,n) | ||
157 | { | ||
158 | print "const insn_attr_t " name " = {" | ||
159 | for (i = 0; i < n; i++) { | ||
160 | id = sprintf(fmt, i) | ||
161 | if (tbl[id]) | ||
162 | print " [" id "] = " tbl[id] "," | ||
163 | } | ||
164 | print "};" | ||
165 | } | ||
166 | |||
167 | /^EndTable/ { | ||
168 | if (gid != -1) { | ||
169 | # print group tables | ||
170 | if (array_size(table) != 0) { | ||
171 | print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", | ||
172 | "0x%x", 8) | ||
173 | gtable[gid,0] = tname | ||
174 | } | ||
175 | if (array_size(lptable1) != 0) { | ||
176 | print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", | ||
177 | "0x%x", 8) | ||
178 | gtable[gid,1] = tname "_1" | ||
179 | } | ||
180 | if (array_size(lptable2) != 0) { | ||
181 | print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", | ||
182 | "0x%x", 8) | ||
183 | gtable[gid,2] = tname "_2" | ||
184 | } | ||
185 | if (array_size(lptable3) != 0) { | ||
186 | print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", | ||
187 | "0x%x", 8) | ||
188 | gtable[gid,3] = tname "_3" | ||
189 | } | ||
190 | } else { | ||
191 | # print primary/escaped tables | ||
192 | if (array_size(table) != 0) { | ||
193 | print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", | ||
194 | "0x%02x", 256) | ||
195 | etable[eid,0] = tname | ||
196 | if (aid >= 0) | ||
197 | atable[aid,0] = tname | ||
198 | } | ||
199 | if (array_size(lptable1) != 0) { | ||
200 | print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", | ||
201 | "0x%02x", 256) | ||
202 | etable[eid,1] = tname "_1" | ||
203 | if (aid >= 0) | ||
204 | atable[aid,1] = tname "_1" | ||
205 | } | ||
206 | if (array_size(lptable2) != 0) { | ||
207 | print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", | ||
208 | "0x%02x", 256) | ||
209 | etable[eid,2] = tname "_2" | ||
210 | if (aid >= 0) | ||
211 | atable[aid,2] = tname "_2" | ||
212 | } | ||
213 | if (array_size(lptable3) != 0) { | ||
214 | print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", | ||
215 | "0x%02x", 256) | ||
216 | etable[eid,3] = tname "_3" | ||
217 | if (aid >= 0) | ||
218 | atable[aid,3] = tname "_3" | ||
219 | } | ||
220 | } | ||
221 | print "" | ||
222 | clear_vars() | ||
223 | } | ||
224 | |||
225 | function add_flags(old,new) { | ||
226 | if (old && new) | ||
227 | return old " | " new | ||
228 | else if (old) | ||
229 | return old | ||
230 | else | ||
231 | return new | ||
232 | } | ||
233 | |||
234 | # convert operands to flags. | ||
235 | function convert_operands(count,opnd, i,j,imm,mod) | ||
236 | { | ||
237 | imm = null | ||
238 | mod = null | ||
239 | for (j = 1; j <= count; j++) { | ||
240 | i = opnd[j] | ||
241 | if (match(i, imm_expr) == 1) { | ||
242 | if (!imm_flag[i]) | ||
243 | semantic_error("Unknown imm opnd: " i) | ||
244 | if (imm) { | ||
245 | if (i != "Ib") | ||
246 | semantic_error("Second IMM error") | ||
247 | imm = add_flags(imm, "INAT_SCNDIMM") | ||
248 | } else | ||
249 | imm = imm_flag[i] | ||
250 | } else if (match(i, modrm_expr)) | ||
251 | mod = "INAT_MODRM" | ||
252 | } | ||
253 | return add_flags(imm, mod) | ||
254 | } | ||
255 | |||
256 | /^[0-9a-f]+\:/ { | ||
257 | if (NR == 1) | ||
258 | next | ||
259 | # get index | ||
260 | idx = "0x" substr($1, 1, index($1,":") - 1) | ||
261 | if (idx in table) | ||
262 | semantic_error("Redefine " idx " in " tname) | ||
263 | |||
264 | # check if escaped opcode | ||
265 | if ("escape" == $2) { | ||
266 | if ($3 != "#") | ||
267 | semantic_error("No escaped name") | ||
268 | ref = "" | ||
269 | for (i = 4; i <= NF; i++) | ||
270 | ref = ref $i | ||
271 | if (ref in escape) | ||
272 | semantic_error("Redefine escape (" ref ")") | ||
273 | escape[ref] = geid | ||
274 | geid++ | ||
275 | table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" | ||
276 | next | ||
277 | } | ||
278 | |||
279 | variant = null | ||
280 | # converts | ||
281 | i = 2 | ||
282 | while (i <= NF) { | ||
283 | opcode = $(i++) | ||
284 | delete opnds | ||
285 | ext = null | ||
286 | flags = null | ||
287 | opnd = null | ||
288 | # parse one opcode | ||
289 | if (match($i, opnd_expr)) { | ||
290 | opnd = $i | ||
291 | count = split($(i++), opnds, ",") | ||
292 | flags = convert_operands(count, opnds) | ||
293 | } | ||
294 | if (match($i, ext_expr)) | ||
295 | ext = $(i++) | ||
296 | if (match($i, sep_expr)) | ||
297 | i++ | ||
298 | else if (i < NF) | ||
299 | semantic_error($i " is not a separator") | ||
300 | |||
301 | # check if group opcode | ||
302 | if (match(opcode, group_expr)) { | ||
303 | if (!(opcode in group)) { | ||
304 | group[opcode] = ggid | ||
305 | ggid++ | ||
306 | } | ||
307 | flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") | ||
308 | } | ||
309 | # check force(or default) 64bit | ||
310 | if (match(ext, force64_expr)) | ||
311 | flags = add_flags(flags, "INAT_FORCE64") | ||
312 | |||
313 | # check REX prefix | ||
314 | if (match(opcode, rex_expr)) | ||
315 | flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") | ||
316 | |||
317 | # check coprocessor escape : TODO | ||
318 | if (match(opcode, fpu_expr)) | ||
319 | flags = add_flags(flags, "INAT_MODRM") | ||
320 | |||
321 | # check VEX codes | ||
322 | if (match(ext, vexonly_expr)) | ||
323 | flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") | ||
324 | else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr)) | ||
325 | flags = add_flags(flags, "INAT_VEXOK") | ||
326 | |||
327 | # check prefixes | ||
328 | if (match(ext, prefix_expr)) { | ||
329 | if (!prefix_num[opcode]) | ||
330 | semantic_error("Unknown prefix: " opcode) | ||
331 | flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") | ||
332 | } | ||
333 | if (length(flags) == 0) | ||
334 | continue | ||
335 | # check if last prefix | ||
336 | if (match(ext, lprefix1_expr)) { | ||
337 | lptable1[idx] = add_flags(lptable1[idx],flags) | ||
338 | variant = "INAT_VARIANT" | ||
339 | } | ||
340 | if (match(ext, lprefix2_expr)) { | ||
341 | lptable2[idx] = add_flags(lptable2[idx],flags) | ||
342 | variant = "INAT_VARIANT" | ||
343 | } | ||
344 | if (match(ext, lprefix3_expr)) { | ||
345 | lptable3[idx] = add_flags(lptable3[idx],flags) | ||
346 | variant = "INAT_VARIANT" | ||
347 | } | ||
348 | if (!match(ext, lprefix_expr)){ | ||
349 | table[idx] = add_flags(table[idx],flags) | ||
350 | } | ||
351 | } | ||
352 | if (variant) | ||
353 | table[idx] = add_flags(table[idx],variant) | ||
354 | } | ||
355 | |||
356 | END { | ||
357 | if (awkchecked != "") | ||
358 | exit 1 | ||
359 | # print escape opcode map's array | ||
360 | print "/* Escape opcode map array */" | ||
361 | print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \ | ||
362 | "[INAT_LSTPFX_MAX + 1] = {" | ||
363 | for (i = 0; i < geid; i++) | ||
364 | for (j = 0; j < max_lprefix; j++) | ||
365 | if (etable[i,j]) | ||
366 | print " ["i"]["j"] = "etable[i,j]"," | ||
367 | print "};\n" | ||
368 | # print group opcode map's array | ||
369 | print "/* Group opcode map array */" | ||
370 | print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\ | ||
371 | "[INAT_LSTPFX_MAX + 1] = {" | ||
372 | for (i = 0; i < ggid; i++) | ||
373 | for (j = 0; j < max_lprefix; j++) | ||
374 | if (gtable[i,j]) | ||
375 | print " ["i"]["j"] = "gtable[i,j]"," | ||
376 | print "};\n" | ||
377 | # print AVX opcode map's array | ||
378 | print "/* AVX opcode map array */" | ||
379 | print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\ | ||
380 | "[INAT_LSTPFX_MAX + 1] = {" | ||
381 | for (i = 0; i < gaid; i++) | ||
382 | for (j = 0; j < max_lprefix; j++) | ||
383 | if (atable[i,j]) | ||
384 | print " ["i"]["j"] = "atable[i,j]"," | ||
385 | print "};" | ||
386 | } | ||
diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c new file mode 100644 index 000000000000..906d94aa0a24 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat.c | |||
@@ -0,0 +1,96 @@ | |||
1 | /* | ||
2 | * x86 instruction attribute tables | ||
3 | * | ||
4 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | */ | ||
21 | #include "insn.h" | ||
22 | |||
23 | /* Attribute tables are generated from opcode map */ | ||
24 | #include "inat-tables.c" | ||
25 | |||
26 | /* Attribute search APIs */ | ||
27 | insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode) | ||
28 | { | ||
29 | return inat_primary_table[opcode]; | ||
30 | } | ||
31 | |||
32 | int inat_get_last_prefix_id(insn_byte_t last_pfx) | ||
33 | { | ||
34 | insn_attr_t lpfx_attr; | ||
35 | |||
36 | lpfx_attr = inat_get_opcode_attribute(last_pfx); | ||
37 | return inat_last_prefix_id(lpfx_attr); | ||
38 | } | ||
39 | |||
40 | insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id, | ||
41 | insn_attr_t esc_attr) | ||
42 | { | ||
43 | const insn_attr_t *table; | ||
44 | int n; | ||
45 | |||
46 | n = inat_escape_id(esc_attr); | ||
47 | |||
48 | table = inat_escape_tables[n][0]; | ||
49 | if (!table) | ||
50 | return 0; | ||
51 | if (inat_has_variant(table[opcode]) && lpfx_id) { | ||
52 | table = inat_escape_tables[n][lpfx_id]; | ||
53 | if (!table) | ||
54 | return 0; | ||
55 | } | ||
56 | return table[opcode]; | ||
57 | } | ||
58 | |||
59 | insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id, | ||
60 | insn_attr_t grp_attr) | ||
61 | { | ||
62 | const insn_attr_t *table; | ||
63 | int n; | ||
64 | |||
65 | n = inat_group_id(grp_attr); | ||
66 | |||
67 | table = inat_group_tables[n][0]; | ||
68 | if (!table) | ||
69 | return inat_group_common_attribute(grp_attr); | ||
70 | if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) { | ||
71 | table = inat_group_tables[n][lpfx_id]; | ||
72 | if (!table) | ||
73 | return inat_group_common_attribute(grp_attr); | ||
74 | } | ||
75 | return table[X86_MODRM_REG(modrm)] | | ||
76 | inat_group_common_attribute(grp_attr); | ||
77 | } | ||
78 | |||
79 | insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m, | ||
80 | insn_byte_t vex_p) | ||
81 | { | ||
82 | const insn_attr_t *table; | ||
83 | if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX) | ||
84 | return 0; | ||
85 | /* At first, this checks the master table */ | ||
86 | table = inat_avx_tables[vex_m][0]; | ||
87 | if (!table) | ||
88 | return 0; | ||
89 | if (!inat_is_group(table[opcode]) && vex_p) { | ||
90 | /* If this is not a group, get attribute directly */ | ||
91 | table = inat_avx_tables[vex_m][vex_p]; | ||
92 | if (!table) | ||
93 | return 0; | ||
94 | } | ||
95 | return table[opcode]; | ||
96 | } | ||
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h new file mode 100644 index 000000000000..611645e903a8 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat.h | |||
@@ -0,0 +1,221 @@ | |||
1 | #ifndef _ASM_X86_INAT_H | ||
2 | #define _ASM_X86_INAT_H | ||
3 | /* | ||
4 | * x86 instruction attributes | ||
5 | * | ||
6 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
21 | * | ||
22 | */ | ||
23 | #include "inat_types.h" | ||
24 | |||
25 | /* | ||
26 | * Internal bits. Don't use bitmasks directly, because these bits are | ||
27 | * unstable. You should use checking functions. | ||
28 | */ | ||
29 | |||
30 | #define INAT_OPCODE_TABLE_SIZE 256 | ||
31 | #define INAT_GROUP_TABLE_SIZE 8 | ||
32 | |||
33 | /* Legacy last prefixes */ | ||
34 | #define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */ | ||
35 | #define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */ | ||
36 | #define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */ | ||
37 | /* Other Legacy prefixes */ | ||
38 | #define INAT_PFX_LOCK 4 /* 0xF0 */ | ||
39 | #define INAT_PFX_CS 5 /* 0x2E */ | ||
40 | #define INAT_PFX_DS 6 /* 0x3E */ | ||
41 | #define INAT_PFX_ES 7 /* 0x26 */ | ||
42 | #define INAT_PFX_FS 8 /* 0x64 */ | ||
43 | #define INAT_PFX_GS 9 /* 0x65 */ | ||
44 | #define INAT_PFX_SS 10 /* 0x36 */ | ||
45 | #define INAT_PFX_ADDRSZ 11 /* 0x67 */ | ||
46 | /* x86-64 REX prefix */ | ||
47 | #define INAT_PFX_REX 12 /* 0x4X */ | ||
48 | /* AVX VEX prefixes */ | ||
49 | #define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */ | ||
50 | #define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */ | ||
51 | |||
52 | #define INAT_LSTPFX_MAX 3 | ||
53 | #define INAT_LGCPFX_MAX 11 | ||
54 | |||
55 | /* Immediate size */ | ||
56 | #define INAT_IMM_BYTE 1 | ||
57 | #define INAT_IMM_WORD 2 | ||
58 | #define INAT_IMM_DWORD 3 | ||
59 | #define INAT_IMM_QWORD 4 | ||
60 | #define INAT_IMM_PTR 5 | ||
61 | #define INAT_IMM_VWORD32 6 | ||
62 | #define INAT_IMM_VWORD 7 | ||
63 | |||
64 | /* Legacy prefix */ | ||
65 | #define INAT_PFX_OFFS 0 | ||
66 | #define INAT_PFX_BITS 4 | ||
67 | #define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1) | ||
68 | #define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS) | ||
69 | /* Escape opcodes */ | ||
70 | #define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS) | ||
71 | #define INAT_ESC_BITS 2 | ||
72 | #define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1) | ||
73 | #define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS) | ||
74 | /* Group opcodes (1-16) */ | ||
75 | #define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS) | ||
76 | #define INAT_GRP_BITS 5 | ||
77 | #define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1) | ||
78 | #define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS) | ||
79 | /* Immediates */ | ||
80 | #define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS) | ||
81 | #define INAT_IMM_BITS 3 | ||
82 | #define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS) | ||
83 | /* Flags */ | ||
84 | #define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS) | ||
85 | #define INAT_MODRM (1 << (INAT_FLAG_OFFS)) | ||
86 | #define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1)) | ||
87 | #define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2)) | ||
88 | #define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3)) | ||
89 | #define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4)) | ||
90 | #define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5)) | ||
91 | #define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6)) | ||
92 | /* Attribute making macros for attribute tables */ | ||
93 | #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) | ||
94 | #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) | ||
95 | #define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM) | ||
96 | #define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS) | ||
97 | |||
98 | /* Attribute search APIs */ | ||
99 | extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode); | ||
100 | extern int inat_get_last_prefix_id(insn_byte_t last_pfx); | ||
101 | extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, | ||
102 | int lpfx_id, | ||
103 | insn_attr_t esc_attr); | ||
104 | extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm, | ||
105 | int lpfx_id, | ||
106 | insn_attr_t esc_attr); | ||
107 | extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, | ||
108 | insn_byte_t vex_m, | ||
109 | insn_byte_t vex_pp); | ||
110 | |||
111 | /* Attribute checking functions */ | ||
112 | static inline int inat_is_legacy_prefix(insn_attr_t attr) | ||
113 | { | ||
114 | attr &= INAT_PFX_MASK; | ||
115 | return attr && attr <= INAT_LGCPFX_MAX; | ||
116 | } | ||
117 | |||
118 | static inline int inat_is_address_size_prefix(insn_attr_t attr) | ||
119 | { | ||
120 | return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ; | ||
121 | } | ||
122 | |||
123 | static inline int inat_is_operand_size_prefix(insn_attr_t attr) | ||
124 | { | ||
125 | return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ; | ||
126 | } | ||
127 | |||
128 | static inline int inat_is_rex_prefix(insn_attr_t attr) | ||
129 | { | ||
130 | return (attr & INAT_PFX_MASK) == INAT_PFX_REX; | ||
131 | } | ||
132 | |||
133 | static inline int inat_last_prefix_id(insn_attr_t attr) | ||
134 | { | ||
135 | if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX) | ||
136 | return 0; | ||
137 | else | ||
138 | return attr & INAT_PFX_MASK; | ||
139 | } | ||
140 | |||
141 | static inline int inat_is_vex_prefix(insn_attr_t attr) | ||
142 | { | ||
143 | attr &= INAT_PFX_MASK; | ||
144 | return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3; | ||
145 | } | ||
146 | |||
147 | static inline int inat_is_vex3_prefix(insn_attr_t attr) | ||
148 | { | ||
149 | return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3; | ||
150 | } | ||
151 | |||
152 | static inline int inat_is_escape(insn_attr_t attr) | ||
153 | { | ||
154 | return attr & INAT_ESC_MASK; | ||
155 | } | ||
156 | |||
157 | static inline int inat_escape_id(insn_attr_t attr) | ||
158 | { | ||
159 | return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS; | ||
160 | } | ||
161 | |||
162 | static inline int inat_is_group(insn_attr_t attr) | ||
163 | { | ||
164 | return attr & INAT_GRP_MASK; | ||
165 | } | ||
166 | |||
167 | static inline int inat_group_id(insn_attr_t attr) | ||
168 | { | ||
169 | return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS; | ||
170 | } | ||
171 | |||
172 | static inline int inat_group_common_attribute(insn_attr_t attr) | ||
173 | { | ||
174 | return attr & ~INAT_GRP_MASK; | ||
175 | } | ||
176 | |||
177 | static inline int inat_has_immediate(insn_attr_t attr) | ||
178 | { | ||
179 | return attr & INAT_IMM_MASK; | ||
180 | } | ||
181 | |||
182 | static inline int inat_immediate_size(insn_attr_t attr) | ||
183 | { | ||
184 | return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS; | ||
185 | } | ||
186 | |||
187 | static inline int inat_has_modrm(insn_attr_t attr) | ||
188 | { | ||
189 | return attr & INAT_MODRM; | ||
190 | } | ||
191 | |||
192 | static inline int inat_is_force64(insn_attr_t attr) | ||
193 | { | ||
194 | return attr & INAT_FORCE64; | ||
195 | } | ||
196 | |||
197 | static inline int inat_has_second_immediate(insn_attr_t attr) | ||
198 | { | ||
199 | return attr & INAT_SCNDIMM; | ||
200 | } | ||
201 | |||
202 | static inline int inat_has_moffset(insn_attr_t attr) | ||
203 | { | ||
204 | return attr & INAT_MOFFSET; | ||
205 | } | ||
206 | |||
207 | static inline int inat_has_variant(insn_attr_t attr) | ||
208 | { | ||
209 | return attr & INAT_VARIANT; | ||
210 | } | ||
211 | |||
212 | static inline int inat_accept_vex(insn_attr_t attr) | ||
213 | { | ||
214 | return attr & INAT_VEXOK; | ||
215 | } | ||
216 | |||
217 | static inline int inat_must_vex(insn_attr_t attr) | ||
218 | { | ||
219 | return attr & INAT_VEXONLY; | ||
220 | } | ||
221 | #endif | ||
diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h new file mode 100644 index 000000000000..cb3c20ce39cf --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/inat_types.h | |||
@@ -0,0 +1,29 @@ | |||
1 | #ifndef _ASM_X86_INAT_TYPES_H | ||
2 | #define _ASM_X86_INAT_TYPES_H | ||
3 | /* | ||
4 | * x86 instruction attributes | ||
5 | * | ||
6 | * Written by Masami Hiramatsu <mhiramat@redhat.com> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, | ||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 | * GNU General Public License for more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | /* Instruction attributes */ | ||
25 | typedef unsigned int insn_attr_t; | ||
26 | typedef unsigned char insn_byte_t; | ||
27 | typedef signed int insn_value_t; | ||
28 | |||
29 | #endif | ||
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c new file mode 100644 index 000000000000..47314a64399c --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/insn.c | |||
@@ -0,0 +1,594 @@ | |||
1 | /* | ||
2 | * x86 instruction analysis | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
17 | * | ||
18 | * Copyright (C) IBM Corporation, 2002, 2004, 2009 | ||
19 | */ | ||
20 | |||
21 | #ifdef __KERNEL__ | ||
22 | #include <linux/string.h> | ||
23 | #else | ||
24 | #include <string.h> | ||
25 | #endif | ||
26 | #include "inat.h" | ||
27 | #include "insn.h" | ||
28 | |||
29 | /* Verify next sizeof(t) bytes can be on the same instruction */ | ||
30 | #define validate_next(t, insn, n) \ | ||
31 | ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) | ||
32 | |||
33 | #define __get_next(t, insn) \ | ||
34 | ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) | ||
35 | |||
36 | #define __peek_nbyte_next(t, insn, n) \ | ||
37 | ({ t r = *(t*)((insn)->next_byte + n); r; }) | ||
38 | |||
39 | #define get_next(t, insn) \ | ||
40 | ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) | ||
41 | |||
42 | #define peek_nbyte_next(t, insn, n) \ | ||
43 | ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); }) | ||
44 | |||
45 | #define peek_next(t, insn) peek_nbyte_next(t, insn, 0) | ||
46 | |||
47 | /** | ||
48 | * insn_init() - initialize struct insn | ||
49 | * @insn: &struct insn to be initialized | ||
50 | * @kaddr: address (in kernel memory) of instruction (or copy thereof) | ||
51 | * @x86_64: !0 for 64-bit kernel or 64-bit app | ||
52 | */ | ||
53 | void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64) | ||
54 | { | ||
55 | /* | ||
56 | * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid | ||
57 | * even if the input buffer is long enough to hold them. | ||
58 | */ | ||
59 | if (buf_len > MAX_INSN_SIZE) | ||
60 | buf_len = MAX_INSN_SIZE; | ||
61 | |||
62 | memset(insn, 0, sizeof(*insn)); | ||
63 | insn->kaddr = kaddr; | ||
64 | insn->end_kaddr = kaddr + buf_len; | ||
65 | insn->next_byte = kaddr; | ||
66 | insn->x86_64 = x86_64 ? 1 : 0; | ||
67 | insn->opnd_bytes = 4; | ||
68 | if (x86_64) | ||
69 | insn->addr_bytes = 8; | ||
70 | else | ||
71 | insn->addr_bytes = 4; | ||
72 | } | ||
73 | |||
74 | /** | ||
75 | * insn_get_prefixes - scan x86 instruction prefix bytes | ||
76 | * @insn: &struct insn containing instruction | ||
77 | * | ||
78 | * Populates the @insn->prefixes bitmap, and updates @insn->next_byte | ||
79 | * to point to the (first) opcode. No effect if @insn->prefixes.got | ||
80 | * is already set. | ||
81 | */ | ||
82 | void insn_get_prefixes(struct insn *insn) | ||
83 | { | ||
84 | struct insn_field *prefixes = &insn->prefixes; | ||
85 | insn_attr_t attr; | ||
86 | insn_byte_t b, lb; | ||
87 | int i, nb; | ||
88 | |||
89 | if (prefixes->got) | ||
90 | return; | ||
91 | |||
92 | nb = 0; | ||
93 | lb = 0; | ||
94 | b = peek_next(insn_byte_t, insn); | ||
95 | attr = inat_get_opcode_attribute(b); | ||
96 | while (inat_is_legacy_prefix(attr)) { | ||
97 | /* Skip if same prefix */ | ||
98 | for (i = 0; i < nb; i++) | ||
99 | if (prefixes->bytes[i] == b) | ||
100 | goto found; | ||
101 | if (nb == 4) | ||
102 | /* Invalid instruction */ | ||
103 | break; | ||
104 | prefixes->bytes[nb++] = b; | ||
105 | if (inat_is_address_size_prefix(attr)) { | ||
106 | /* address size switches 2/4 or 4/8 */ | ||
107 | if (insn->x86_64) | ||
108 | insn->addr_bytes ^= 12; | ||
109 | else | ||
110 | insn->addr_bytes ^= 6; | ||
111 | } else if (inat_is_operand_size_prefix(attr)) { | ||
112 | /* oprand size switches 2/4 */ | ||
113 | insn->opnd_bytes ^= 6; | ||
114 | } | ||
115 | found: | ||
116 | prefixes->nbytes++; | ||
117 | insn->next_byte++; | ||
118 | lb = b; | ||
119 | b = peek_next(insn_byte_t, insn); | ||
120 | attr = inat_get_opcode_attribute(b); | ||
121 | } | ||
122 | /* Set the last prefix */ | ||
123 | if (lb && lb != insn->prefixes.bytes[3]) { | ||
124 | if (unlikely(insn->prefixes.bytes[3])) { | ||
125 | /* Swap the last prefix */ | ||
126 | b = insn->prefixes.bytes[3]; | ||
127 | for (i = 0; i < nb; i++) | ||
128 | if (prefixes->bytes[i] == lb) | ||
129 | prefixes->bytes[i] = b; | ||
130 | } | ||
131 | insn->prefixes.bytes[3] = lb; | ||
132 | } | ||
133 | |||
134 | /* Decode REX prefix */ | ||
135 | if (insn->x86_64) { | ||
136 | b = peek_next(insn_byte_t, insn); | ||
137 | attr = inat_get_opcode_attribute(b); | ||
138 | if (inat_is_rex_prefix(attr)) { | ||
139 | insn->rex_prefix.value = b; | ||
140 | insn->rex_prefix.nbytes = 1; | ||
141 | insn->next_byte++; | ||
142 | if (X86_REX_W(b)) | ||
143 | /* REX.W overrides opnd_size */ | ||
144 | insn->opnd_bytes = 8; | ||
145 | } | ||
146 | } | ||
147 | insn->rex_prefix.got = 1; | ||
148 | |||
149 | /* Decode VEX prefix */ | ||
150 | b = peek_next(insn_byte_t, insn); | ||
151 | attr = inat_get_opcode_attribute(b); | ||
152 | if (inat_is_vex_prefix(attr)) { | ||
153 | insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1); | ||
154 | if (!insn->x86_64) { | ||
155 | /* | ||
156 | * In 32-bits mode, if the [7:6] bits (mod bits of | ||
157 | * ModRM) on the second byte are not 11b, it is | ||
158 | * LDS or LES. | ||
159 | */ | ||
160 | if (X86_MODRM_MOD(b2) != 3) | ||
161 | goto vex_end; | ||
162 | } | ||
163 | insn->vex_prefix.bytes[0] = b; | ||
164 | insn->vex_prefix.bytes[1] = b2; | ||
165 | if (inat_is_vex3_prefix(attr)) { | ||
166 | b2 = peek_nbyte_next(insn_byte_t, insn, 2); | ||
167 | insn->vex_prefix.bytes[2] = b2; | ||
168 | insn->vex_prefix.nbytes = 3; | ||
169 | insn->next_byte += 3; | ||
170 | if (insn->x86_64 && X86_VEX_W(b2)) | ||
171 | /* VEX.W overrides opnd_size */ | ||
172 | insn->opnd_bytes = 8; | ||
173 | } else { | ||
174 | /* | ||
175 | * For VEX2, fake VEX3-like byte#2. | ||
176 | * Makes it easier to decode vex.W, vex.vvvv, | ||
177 | * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. | ||
178 | */ | ||
179 | insn->vex_prefix.bytes[2] = b2 & 0x7f; | ||
180 | insn->vex_prefix.nbytes = 2; | ||
181 | insn->next_byte += 2; | ||
182 | } | ||
183 | } | ||
184 | vex_end: | ||
185 | insn->vex_prefix.got = 1; | ||
186 | |||
187 | prefixes->got = 1; | ||
188 | |||
189 | err_out: | ||
190 | return; | ||
191 | } | ||
192 | |||
193 | /** | ||
194 | * insn_get_opcode - collect opcode(s) | ||
195 | * @insn: &struct insn containing instruction | ||
196 | * | ||
197 | * Populates @insn->opcode, updates @insn->next_byte to point past the | ||
198 | * opcode byte(s), and set @insn->attr (except for groups). | ||
199 | * If necessary, first collects any preceding (prefix) bytes. | ||
200 | * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got | ||
201 | * is already 1. | ||
202 | */ | ||
203 | void insn_get_opcode(struct insn *insn) | ||
204 | { | ||
205 | struct insn_field *opcode = &insn->opcode; | ||
206 | insn_byte_t op; | ||
207 | int pfx_id; | ||
208 | if (opcode->got) | ||
209 | return; | ||
210 | if (!insn->prefixes.got) | ||
211 | insn_get_prefixes(insn); | ||
212 | |||
213 | /* Get first opcode */ | ||
214 | op = get_next(insn_byte_t, insn); | ||
215 | opcode->bytes[0] = op; | ||
216 | opcode->nbytes = 1; | ||
217 | |||
218 | /* Check if there is VEX prefix or not */ | ||
219 | if (insn_is_avx(insn)) { | ||
220 | insn_byte_t m, p; | ||
221 | m = insn_vex_m_bits(insn); | ||
222 | p = insn_vex_p_bits(insn); | ||
223 | insn->attr = inat_get_avx_attribute(op, m, p); | ||
224 | if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr)) | ||
225 | insn->attr = 0; /* This instruction is bad */ | ||
226 | goto end; /* VEX has only 1 byte for opcode */ | ||
227 | } | ||
228 | |||
229 | insn->attr = inat_get_opcode_attribute(op); | ||
230 | while (inat_is_escape(insn->attr)) { | ||
231 | /* Get escaped opcode */ | ||
232 | op = get_next(insn_byte_t, insn); | ||
233 | opcode->bytes[opcode->nbytes++] = op; | ||
234 | pfx_id = insn_last_prefix_id(insn); | ||
235 | insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr); | ||
236 | } | ||
237 | if (inat_must_vex(insn->attr)) | ||
238 | insn->attr = 0; /* This instruction is bad */ | ||
239 | end: | ||
240 | opcode->got = 1; | ||
241 | |||
242 | err_out: | ||
243 | return; | ||
244 | } | ||
245 | |||
246 | /** | ||
247 | * insn_get_modrm - collect ModRM byte, if any | ||
248 | * @insn: &struct insn containing instruction | ||
249 | * | ||
250 | * Populates @insn->modrm and updates @insn->next_byte to point past the | ||
251 | * ModRM byte, if any. If necessary, first collects the preceding bytes | ||
252 | * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1. | ||
253 | */ | ||
254 | void insn_get_modrm(struct insn *insn) | ||
255 | { | ||
256 | struct insn_field *modrm = &insn->modrm; | ||
257 | insn_byte_t pfx_id, mod; | ||
258 | if (modrm->got) | ||
259 | return; | ||
260 | if (!insn->opcode.got) | ||
261 | insn_get_opcode(insn); | ||
262 | |||
263 | if (inat_has_modrm(insn->attr)) { | ||
264 | mod = get_next(insn_byte_t, insn); | ||
265 | modrm->value = mod; | ||
266 | modrm->nbytes = 1; | ||
267 | if (inat_is_group(insn->attr)) { | ||
268 | pfx_id = insn_last_prefix_id(insn); | ||
269 | insn->attr = inat_get_group_attribute(mod, pfx_id, | ||
270 | insn->attr); | ||
271 | if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) | ||
272 | insn->attr = 0; /* This is bad */ | ||
273 | } | ||
274 | } | ||
275 | |||
276 | if (insn->x86_64 && inat_is_force64(insn->attr)) | ||
277 | insn->opnd_bytes = 8; | ||
278 | modrm->got = 1; | ||
279 | |||
280 | err_out: | ||
281 | return; | ||
282 | } | ||
283 | |||
284 | |||
285 | /** | ||
286 | * insn_rip_relative() - Does instruction use RIP-relative addressing mode? | ||
287 | * @insn: &struct insn containing instruction | ||
288 | * | ||
289 | * If necessary, first collects the instruction up to and including the | ||
290 | * ModRM byte. No effect if @insn->x86_64 is 0. | ||
291 | */ | ||
292 | int insn_rip_relative(struct insn *insn) | ||
293 | { | ||
294 | struct insn_field *modrm = &insn->modrm; | ||
295 | |||
296 | if (!insn->x86_64) | ||
297 | return 0; | ||
298 | if (!modrm->got) | ||
299 | insn_get_modrm(insn); | ||
300 | /* | ||
301 | * For rip-relative instructions, the mod field (top 2 bits) | ||
302 | * is zero and the r/m field (bottom 3 bits) is 0x5. | ||
303 | */ | ||
304 | return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); | ||
305 | } | ||
306 | |||
307 | /** | ||
308 | * insn_get_sib() - Get the SIB byte of instruction | ||
309 | * @insn: &struct insn containing instruction | ||
310 | * | ||
311 | * If necessary, first collects the instruction up to and including the | ||
312 | * ModRM byte. | ||
313 | */ | ||
314 | void insn_get_sib(struct insn *insn) | ||
315 | { | ||
316 | insn_byte_t modrm; | ||
317 | |||
318 | if (insn->sib.got) | ||
319 | return; | ||
320 | if (!insn->modrm.got) | ||
321 | insn_get_modrm(insn); | ||
322 | if (insn->modrm.nbytes) { | ||
323 | modrm = (insn_byte_t)insn->modrm.value; | ||
324 | if (insn->addr_bytes != 2 && | ||
325 | X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { | ||
326 | insn->sib.value = get_next(insn_byte_t, insn); | ||
327 | insn->sib.nbytes = 1; | ||
328 | } | ||
329 | } | ||
330 | insn->sib.got = 1; | ||
331 | |||
332 | err_out: | ||
333 | return; | ||
334 | } | ||
335 | |||
336 | |||
337 | /** | ||
338 | * insn_get_displacement() - Get the displacement of instruction | ||
339 | * @insn: &struct insn containing instruction | ||
340 | * | ||
341 | * If necessary, first collects the instruction up to and including the | ||
342 | * SIB byte. | ||
343 | * Displacement value is sign-expanded. | ||
344 | */ | ||
345 | void insn_get_displacement(struct insn *insn) | ||
346 | { | ||
347 | insn_byte_t mod, rm, base; | ||
348 | |||
349 | if (insn->displacement.got) | ||
350 | return; | ||
351 | if (!insn->sib.got) | ||
352 | insn_get_sib(insn); | ||
353 | if (insn->modrm.nbytes) { | ||
354 | /* | ||
355 | * Interpreting the modrm byte: | ||
356 | * mod = 00 - no displacement fields (exceptions below) | ||
357 | * mod = 01 - 1-byte displacement field | ||
358 | * mod = 10 - displacement field is 4 bytes, or 2 bytes if | ||
359 | * address size = 2 (0x67 prefix in 32-bit mode) | ||
360 | * mod = 11 - no memory operand | ||
361 | * | ||
362 | * If address size = 2... | ||
363 | * mod = 00, r/m = 110 - displacement field is 2 bytes | ||
364 | * | ||
365 | * If address size != 2... | ||
366 | * mod != 11, r/m = 100 - SIB byte exists | ||
367 | * mod = 00, SIB base = 101 - displacement field is 4 bytes | ||
368 | * mod = 00, r/m = 101 - rip-relative addressing, displacement | ||
369 | * field is 4 bytes | ||
370 | */ | ||
371 | mod = X86_MODRM_MOD(insn->modrm.value); | ||
372 | rm = X86_MODRM_RM(insn->modrm.value); | ||
373 | base = X86_SIB_BASE(insn->sib.value); | ||
374 | if (mod == 3) | ||
375 | goto out; | ||
376 | if (mod == 1) { | ||
377 | insn->displacement.value = get_next(char, insn); | ||
378 | insn->displacement.nbytes = 1; | ||
379 | } else if (insn->addr_bytes == 2) { | ||
380 | if ((mod == 0 && rm == 6) || mod == 2) { | ||
381 | insn->displacement.value = | ||
382 | get_next(short, insn); | ||
383 | insn->displacement.nbytes = 2; | ||
384 | } | ||
385 | } else { | ||
386 | if ((mod == 0 && rm == 5) || mod == 2 || | ||
387 | (mod == 0 && base == 5)) { | ||
388 | insn->displacement.value = get_next(int, insn); | ||
389 | insn->displacement.nbytes = 4; | ||
390 | } | ||
391 | } | ||
392 | } | ||
393 | out: | ||
394 | insn->displacement.got = 1; | ||
395 | |||
396 | err_out: | ||
397 | return; | ||
398 | } | ||
399 | |||
400 | /* Decode moffset16/32/64. Return 0 if failed */ | ||
401 | static int __get_moffset(struct insn *insn) | ||
402 | { | ||
403 | switch (insn->addr_bytes) { | ||
404 | case 2: | ||
405 | insn->moffset1.value = get_next(short, insn); | ||
406 | insn->moffset1.nbytes = 2; | ||
407 | break; | ||
408 | case 4: | ||
409 | insn->moffset1.value = get_next(int, insn); | ||
410 | insn->moffset1.nbytes = 4; | ||
411 | break; | ||
412 | case 8: | ||
413 | insn->moffset1.value = get_next(int, insn); | ||
414 | insn->moffset1.nbytes = 4; | ||
415 | insn->moffset2.value = get_next(int, insn); | ||
416 | insn->moffset2.nbytes = 4; | ||
417 | break; | ||
418 | default: /* opnd_bytes must be modified manually */ | ||
419 | goto err_out; | ||
420 | } | ||
421 | insn->moffset1.got = insn->moffset2.got = 1; | ||
422 | |||
423 | return 1; | ||
424 | |||
425 | err_out: | ||
426 | return 0; | ||
427 | } | ||
428 | |||
429 | /* Decode imm v32(Iz). Return 0 if failed */ | ||
430 | static int __get_immv32(struct insn *insn) | ||
431 | { | ||
432 | switch (insn->opnd_bytes) { | ||
433 | case 2: | ||
434 | insn->immediate.value = get_next(short, insn); | ||
435 | insn->immediate.nbytes = 2; | ||
436 | break; | ||
437 | case 4: | ||
438 | case 8: | ||
439 | insn->immediate.value = get_next(int, insn); | ||
440 | insn->immediate.nbytes = 4; | ||
441 | break; | ||
442 | default: /* opnd_bytes must be modified manually */ | ||
443 | goto err_out; | ||
444 | } | ||
445 | |||
446 | return 1; | ||
447 | |||
448 | err_out: | ||
449 | return 0; | ||
450 | } | ||
451 | |||
452 | /* Decode imm v64(Iv/Ov), Return 0 if failed */ | ||
453 | static int __get_immv(struct insn *insn) | ||
454 | { | ||
455 | switch (insn->opnd_bytes) { | ||
456 | case 2: | ||
457 | insn->immediate1.value = get_next(short, insn); | ||
458 | insn->immediate1.nbytes = 2; | ||
459 | break; | ||
460 | case 4: | ||
461 | insn->immediate1.value = get_next(int, insn); | ||
462 | insn->immediate1.nbytes = 4; | ||
463 | break; | ||
464 | case 8: | ||
465 | insn->immediate1.value = get_next(int, insn); | ||
466 | insn->immediate1.nbytes = 4; | ||
467 | insn->immediate2.value = get_next(int, insn); | ||
468 | insn->immediate2.nbytes = 4; | ||
469 | break; | ||
470 | default: /* opnd_bytes must be modified manually */ | ||
471 | goto err_out; | ||
472 | } | ||
473 | insn->immediate1.got = insn->immediate2.got = 1; | ||
474 | |||
475 | return 1; | ||
476 | err_out: | ||
477 | return 0; | ||
478 | } | ||
479 | |||
480 | /* Decode ptr16:16/32(Ap) */ | ||
481 | static int __get_immptr(struct insn *insn) | ||
482 | { | ||
483 | switch (insn->opnd_bytes) { | ||
484 | case 2: | ||
485 | insn->immediate1.value = get_next(short, insn); | ||
486 | insn->immediate1.nbytes = 2; | ||
487 | break; | ||
488 | case 4: | ||
489 | insn->immediate1.value = get_next(int, insn); | ||
490 | insn->immediate1.nbytes = 4; | ||
491 | break; | ||
492 | case 8: | ||
493 | /* ptr16:64 is not exist (no segment) */ | ||
494 | return 0; | ||
495 | default: /* opnd_bytes must be modified manually */ | ||
496 | goto err_out; | ||
497 | } | ||
498 | insn->immediate2.value = get_next(unsigned short, insn); | ||
499 | insn->immediate2.nbytes = 2; | ||
500 | insn->immediate1.got = insn->immediate2.got = 1; | ||
501 | |||
502 | return 1; | ||
503 | err_out: | ||
504 | return 0; | ||
505 | } | ||
506 | |||
507 | /** | ||
508 | * insn_get_immediate() - Get the immediates of instruction | ||
509 | * @insn: &struct insn containing instruction | ||
510 | * | ||
511 | * If necessary, first collects the instruction up to and including the | ||
512 | * displacement bytes. | ||
513 | * Basically, most of immediates are sign-expanded. Unsigned-value can be | ||
514 | * get by bit masking with ((1 << (nbytes * 8)) - 1) | ||
515 | */ | ||
516 | void insn_get_immediate(struct insn *insn) | ||
517 | { | ||
518 | if (insn->immediate.got) | ||
519 | return; | ||
520 | if (!insn->displacement.got) | ||
521 | insn_get_displacement(insn); | ||
522 | |||
523 | if (inat_has_moffset(insn->attr)) { | ||
524 | if (!__get_moffset(insn)) | ||
525 | goto err_out; | ||
526 | goto done; | ||
527 | } | ||
528 | |||
529 | if (!inat_has_immediate(insn->attr)) | ||
530 | /* no immediates */ | ||
531 | goto done; | ||
532 | |||
533 | switch (inat_immediate_size(insn->attr)) { | ||
534 | case INAT_IMM_BYTE: | ||
535 | insn->immediate.value = get_next(char, insn); | ||
536 | insn->immediate.nbytes = 1; | ||
537 | break; | ||
538 | case INAT_IMM_WORD: | ||
539 | insn->immediate.value = get_next(short, insn); | ||
540 | insn->immediate.nbytes = 2; | ||
541 | break; | ||
542 | case INAT_IMM_DWORD: | ||
543 | insn->immediate.value = get_next(int, insn); | ||
544 | insn->immediate.nbytes = 4; | ||
545 | break; | ||
546 | case INAT_IMM_QWORD: | ||
547 | insn->immediate1.value = get_next(int, insn); | ||
548 | insn->immediate1.nbytes = 4; | ||
549 | insn->immediate2.value = get_next(int, insn); | ||
550 | insn->immediate2.nbytes = 4; | ||
551 | break; | ||
552 | case INAT_IMM_PTR: | ||
553 | if (!__get_immptr(insn)) | ||
554 | goto err_out; | ||
555 | break; | ||
556 | case INAT_IMM_VWORD32: | ||
557 | if (!__get_immv32(insn)) | ||
558 | goto err_out; | ||
559 | break; | ||
560 | case INAT_IMM_VWORD: | ||
561 | if (!__get_immv(insn)) | ||
562 | goto err_out; | ||
563 | break; | ||
564 | default: | ||
565 | /* Here, insn must have an immediate, but failed */ | ||
566 | goto err_out; | ||
567 | } | ||
568 | if (inat_has_second_immediate(insn->attr)) { | ||
569 | insn->immediate2.value = get_next(char, insn); | ||
570 | insn->immediate2.nbytes = 1; | ||
571 | } | ||
572 | done: | ||
573 | insn->immediate.got = 1; | ||
574 | |||
575 | err_out: | ||
576 | return; | ||
577 | } | ||
578 | |||
579 | /** | ||
580 | * insn_get_length() - Get the length of instruction | ||
581 | * @insn: &struct insn containing instruction | ||
582 | * | ||
583 | * If necessary, first collects the instruction up to and including the | ||
584 | * immediates bytes. | ||
585 | */ | ||
586 | void insn_get_length(struct insn *insn) | ||
587 | { | ||
588 | if (insn->length) | ||
589 | return; | ||
590 | if (!insn->immediate.got) | ||
591 | insn_get_immediate(insn); | ||
592 | insn->length = (unsigned char)((unsigned long)insn->next_byte | ||
593 | - (unsigned long)insn->kaddr); | ||
594 | } | ||
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h new file mode 100644 index 000000000000..dd12da0f4593 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/insn.h | |||
@@ -0,0 +1,201 @@ | |||
1 | #ifndef _ASM_X86_INSN_H | ||
2 | #define _ASM_X86_INSN_H | ||
3 | /* | ||
4 | * x86 instruction analysis | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
19 | * | ||
20 | * Copyright (C) IBM Corporation, 2009 | ||
21 | */ | ||
22 | |||
23 | /* insn_attr_t is defined in inat.h */ | ||
24 | #include "inat.h" | ||
25 | |||
26 | struct insn_field { | ||
27 | union { | ||
28 | insn_value_t value; | ||
29 | insn_byte_t bytes[4]; | ||
30 | }; | ||
31 | /* !0 if we've run insn_get_xxx() for this field */ | ||
32 | unsigned char got; | ||
33 | unsigned char nbytes; | ||
34 | }; | ||
35 | |||
36 | struct insn { | ||
37 | struct insn_field prefixes; /* | ||
38 | * Prefixes | ||
39 | * prefixes.bytes[3]: last prefix | ||
40 | */ | ||
41 | struct insn_field rex_prefix; /* REX prefix */ | ||
42 | struct insn_field vex_prefix; /* VEX prefix */ | ||
43 | struct insn_field opcode; /* | ||
44 | * opcode.bytes[0]: opcode1 | ||
45 | * opcode.bytes[1]: opcode2 | ||
46 | * opcode.bytes[2]: opcode3 | ||
47 | */ | ||
48 | struct insn_field modrm; | ||
49 | struct insn_field sib; | ||
50 | struct insn_field displacement; | ||
51 | union { | ||
52 | struct insn_field immediate; | ||
53 | struct insn_field moffset1; /* for 64bit MOV */ | ||
54 | struct insn_field immediate1; /* for 64bit imm or off16/32 */ | ||
55 | }; | ||
56 | union { | ||
57 | struct insn_field moffset2; /* for 64bit MOV */ | ||
58 | struct insn_field immediate2; /* for 64bit imm or seg16 */ | ||
59 | }; | ||
60 | |||
61 | insn_attr_t attr; | ||
62 | unsigned char opnd_bytes; | ||
63 | unsigned char addr_bytes; | ||
64 | unsigned char length; | ||
65 | unsigned char x86_64; | ||
66 | |||
67 | const insn_byte_t *kaddr; /* kernel address of insn to analyze */ | ||
68 | const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */ | ||
69 | const insn_byte_t *next_byte; | ||
70 | }; | ||
71 | |||
72 | #define MAX_INSN_SIZE 15 | ||
73 | |||
74 | #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) | ||
75 | #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) | ||
76 | #define X86_MODRM_RM(modrm) ((modrm) & 0x07) | ||
77 | |||
78 | #define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6) | ||
79 | #define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3) | ||
80 | #define X86_SIB_BASE(sib) ((sib) & 0x07) | ||
81 | |||
82 | #define X86_REX_W(rex) ((rex) & 8) | ||
83 | #define X86_REX_R(rex) ((rex) & 4) | ||
84 | #define X86_REX_X(rex) ((rex) & 2) | ||
85 | #define X86_REX_B(rex) ((rex) & 1) | ||
86 | |||
87 | /* VEX bit flags */ | ||
88 | #define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */ | ||
89 | #define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */ | ||
90 | #define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */ | ||
91 | #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ | ||
92 | #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ | ||
93 | /* VEX bit fields */ | ||
94 | #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ | ||
95 | #define X86_VEX2_M 1 /* VEX2.M always 1 */ | ||
96 | #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ | ||
97 | #define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */ | ||
98 | #define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */ | ||
99 | |||
100 | extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64); | ||
101 | extern void insn_get_prefixes(struct insn *insn); | ||
102 | extern void insn_get_opcode(struct insn *insn); | ||
103 | extern void insn_get_modrm(struct insn *insn); | ||
104 | extern void insn_get_sib(struct insn *insn); | ||
105 | extern void insn_get_displacement(struct insn *insn); | ||
106 | extern void insn_get_immediate(struct insn *insn); | ||
107 | extern void insn_get_length(struct insn *insn); | ||
108 | |||
109 | /* Attribute will be determined after getting ModRM (for opcode groups) */ | ||
110 | static inline void insn_get_attribute(struct insn *insn) | ||
111 | { | ||
112 | insn_get_modrm(insn); | ||
113 | } | ||
114 | |||
115 | /* Instruction uses RIP-relative addressing */ | ||
116 | extern int insn_rip_relative(struct insn *insn); | ||
117 | |||
118 | /* Init insn for kernel text */ | ||
119 | static inline void kernel_insn_init(struct insn *insn, | ||
120 | const void *kaddr, int buf_len) | ||
121 | { | ||
122 | #ifdef CONFIG_X86_64 | ||
123 | insn_init(insn, kaddr, buf_len, 1); | ||
124 | #else /* CONFIG_X86_32 */ | ||
125 | insn_init(insn, kaddr, buf_len, 0); | ||
126 | #endif | ||
127 | } | ||
128 | |||
129 | static inline int insn_is_avx(struct insn *insn) | ||
130 | { | ||
131 | if (!insn->prefixes.got) | ||
132 | insn_get_prefixes(insn); | ||
133 | return (insn->vex_prefix.value != 0); | ||
134 | } | ||
135 | |||
136 | /* Ensure this instruction is decoded completely */ | ||
137 | static inline int insn_complete(struct insn *insn) | ||
138 | { | ||
139 | return insn->opcode.got && insn->modrm.got && insn->sib.got && | ||
140 | insn->displacement.got && insn->immediate.got; | ||
141 | } | ||
142 | |||
143 | static inline insn_byte_t insn_vex_m_bits(struct insn *insn) | ||
144 | { | ||
145 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | ||
146 | return X86_VEX2_M; | ||
147 | else | ||
148 | return X86_VEX3_M(insn->vex_prefix.bytes[1]); | ||
149 | } | ||
150 | |||
151 | static inline insn_byte_t insn_vex_p_bits(struct insn *insn) | ||
152 | { | ||
153 | if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */ | ||
154 | return X86_VEX_P(insn->vex_prefix.bytes[1]); | ||
155 | else | ||
156 | return X86_VEX_P(insn->vex_prefix.bytes[2]); | ||
157 | } | ||
158 | |||
159 | /* Get the last prefix id from last prefix or VEX prefix */ | ||
160 | static inline int insn_last_prefix_id(struct insn *insn) | ||
161 | { | ||
162 | if (insn_is_avx(insn)) | ||
163 | return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */ | ||
164 | |||
165 | if (insn->prefixes.bytes[3]) | ||
166 | return inat_get_last_prefix_id(insn->prefixes.bytes[3]); | ||
167 | |||
168 | return 0; | ||
169 | } | ||
170 | |||
171 | /* Offset of each field from kaddr */ | ||
172 | static inline int insn_offset_rex_prefix(struct insn *insn) | ||
173 | { | ||
174 | return insn->prefixes.nbytes; | ||
175 | } | ||
176 | static inline int insn_offset_vex_prefix(struct insn *insn) | ||
177 | { | ||
178 | return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes; | ||
179 | } | ||
180 | static inline int insn_offset_opcode(struct insn *insn) | ||
181 | { | ||
182 | return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes; | ||
183 | } | ||
184 | static inline int insn_offset_modrm(struct insn *insn) | ||
185 | { | ||
186 | return insn_offset_opcode(insn) + insn->opcode.nbytes; | ||
187 | } | ||
188 | static inline int insn_offset_sib(struct insn *insn) | ||
189 | { | ||
190 | return insn_offset_modrm(insn) + insn->modrm.nbytes; | ||
191 | } | ||
192 | static inline int insn_offset_displacement(struct insn *insn) | ||
193 | { | ||
194 | return insn_offset_sib(insn) + insn->sib.nbytes; | ||
195 | } | ||
196 | static inline int insn_offset_immediate(struct insn *insn) | ||
197 | { | ||
198 | return insn_offset_displacement(insn) + insn->displacement.nbytes; | ||
199 | } | ||
200 | |||
201 | #endif /* _ASM_X86_INSN_H */ | ||
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c new file mode 100644 index 000000000000..22ba50224319 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | |||
@@ -0,0 +1,2345 @@ | |||
1 | /* | ||
2 | * intel_pt_decoder.c: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #ifndef _GNU_SOURCE | ||
17 | #define _GNU_SOURCE | ||
18 | #endif | ||
19 | #include <stdlib.h> | ||
20 | #include <stdbool.h> | ||
21 | #include <string.h> | ||
22 | #include <errno.h> | ||
23 | #include <stdint.h> | ||
24 | #include <inttypes.h> | ||
25 | |||
26 | #include "../cache.h" | ||
27 | #include "../util.h" | ||
28 | |||
29 | #include "intel-pt-insn-decoder.h" | ||
30 | #include "intel-pt-pkt-decoder.h" | ||
31 | #include "intel-pt-decoder.h" | ||
32 | #include "intel-pt-log.h" | ||
33 | |||
34 | #define INTEL_PT_BLK_SIZE 1024 | ||
35 | |||
36 | #define BIT63 (((uint64_t)1 << 63)) | ||
37 | |||
38 | #define INTEL_PT_RETURN 1 | ||
39 | |||
40 | /* Maximum number of loops with no packets consumed i.e. stuck in a loop */ | ||
41 | #define INTEL_PT_MAX_LOOPS 10000 | ||
42 | |||
43 | struct intel_pt_blk { | ||
44 | struct intel_pt_blk *prev; | ||
45 | uint64_t ip[INTEL_PT_BLK_SIZE]; | ||
46 | }; | ||
47 | |||
48 | struct intel_pt_stack { | ||
49 | struct intel_pt_blk *blk; | ||
50 | struct intel_pt_blk *spare; | ||
51 | int pos; | ||
52 | }; | ||
53 | |||
54 | enum intel_pt_pkt_state { | ||
55 | INTEL_PT_STATE_NO_PSB, | ||
56 | INTEL_PT_STATE_NO_IP, | ||
57 | INTEL_PT_STATE_ERR_RESYNC, | ||
58 | INTEL_PT_STATE_IN_SYNC, | ||
59 | INTEL_PT_STATE_TNT, | ||
60 | INTEL_PT_STATE_TIP, | ||
61 | INTEL_PT_STATE_TIP_PGD, | ||
62 | INTEL_PT_STATE_FUP, | ||
63 | INTEL_PT_STATE_FUP_NO_TIP, | ||
64 | }; | ||
65 | |||
66 | #ifdef INTEL_PT_STRICT | ||
67 | #define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB | ||
68 | #define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB | ||
69 | #define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB | ||
70 | #define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB | ||
71 | #else | ||
72 | #define INTEL_PT_STATE_ERR1 (decoder->pkt_state) | ||
73 | #define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP | ||
74 | #define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC | ||
75 | #define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC | ||
76 | #endif | ||
77 | |||
78 | struct intel_pt_decoder { | ||
79 | int (*get_trace)(struct intel_pt_buffer *buffer, void *data); | ||
80 | int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, | ||
81 | uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, | ||
82 | uint64_t max_insn_cnt, void *data); | ||
83 | void *data; | ||
84 | struct intel_pt_state state; | ||
85 | const unsigned char *buf; | ||
86 | size_t len; | ||
87 | bool return_compression; | ||
88 | bool mtc_insn; | ||
89 | bool pge; | ||
90 | bool have_tma; | ||
91 | bool have_cyc; | ||
92 | uint64_t pos; | ||
93 | uint64_t last_ip; | ||
94 | uint64_t ip; | ||
95 | uint64_t cr3; | ||
96 | uint64_t timestamp; | ||
97 | uint64_t tsc_timestamp; | ||
98 | uint64_t ref_timestamp; | ||
99 | uint64_t ret_addr; | ||
100 | uint64_t ctc_timestamp; | ||
101 | uint64_t ctc_delta; | ||
102 | uint64_t cycle_cnt; | ||
103 | uint64_t cyc_ref_timestamp; | ||
104 | uint32_t last_mtc; | ||
105 | uint32_t tsc_ctc_ratio_n; | ||
106 | uint32_t tsc_ctc_ratio_d; | ||
107 | uint32_t tsc_ctc_mult; | ||
108 | uint32_t tsc_slip; | ||
109 | uint32_t ctc_rem_mask; | ||
110 | int mtc_shift; | ||
111 | struct intel_pt_stack stack; | ||
112 | enum intel_pt_pkt_state pkt_state; | ||
113 | struct intel_pt_pkt packet; | ||
114 | struct intel_pt_pkt tnt; | ||
115 | int pkt_step; | ||
116 | int pkt_len; | ||
117 | int last_packet_type; | ||
118 | unsigned int cbr; | ||
119 | unsigned int max_non_turbo_ratio; | ||
120 | double max_non_turbo_ratio_fp; | ||
121 | double cbr_cyc_to_tsc; | ||
122 | double calc_cyc_to_tsc; | ||
123 | bool have_calc_cyc_to_tsc; | ||
124 | int exec_mode; | ||
125 | unsigned int insn_bytes; | ||
126 | uint64_t sign_bit; | ||
127 | uint64_t sign_bits; | ||
128 | uint64_t period; | ||
129 | enum intel_pt_period_type period_type; | ||
130 | uint64_t tot_insn_cnt; | ||
131 | uint64_t period_insn_cnt; | ||
132 | uint64_t period_mask; | ||
133 | uint64_t period_ticks; | ||
134 | uint64_t last_masked_timestamp; | ||
135 | bool continuous_period; | ||
136 | bool overflow; | ||
137 | bool set_fup_tx_flags; | ||
138 | unsigned int fup_tx_flags; | ||
139 | unsigned int tx_flags; | ||
140 | uint64_t timestamp_insn_cnt; | ||
141 | uint64_t stuck_ip; | ||
142 | int no_progress; | ||
143 | int stuck_ip_prd; | ||
144 | int stuck_ip_cnt; | ||
145 | const unsigned char *next_buf; | ||
146 | size_t next_len; | ||
147 | unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ]; | ||
148 | }; | ||
149 | |||
150 | static uint64_t intel_pt_lower_power_of_2(uint64_t x) | ||
151 | { | ||
152 | int i; | ||
153 | |||
154 | for (i = 0; x != 1; i++) | ||
155 | x >>= 1; | ||
156 | |||
157 | return x << i; | ||
158 | } | ||
159 | |||
160 | static void intel_pt_setup_period(struct intel_pt_decoder *decoder) | ||
161 | { | ||
162 | if (decoder->period_type == INTEL_PT_PERIOD_TICKS) { | ||
163 | uint64_t period; | ||
164 | |||
165 | period = intel_pt_lower_power_of_2(decoder->period); | ||
166 | decoder->period_mask = ~(period - 1); | ||
167 | decoder->period_ticks = period; | ||
168 | } | ||
169 | } | ||
170 | |||
171 | static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d) | ||
172 | { | ||
173 | if (!d) | ||
174 | return 0; | ||
175 | return (t / d) * n + ((t % d) * n) / d; | ||
176 | } | ||
177 | |||
178 | struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) | ||
179 | { | ||
180 | struct intel_pt_decoder *decoder; | ||
181 | |||
182 | if (!params->get_trace || !params->walk_insn) | ||
183 | return NULL; | ||
184 | |||
185 | decoder = zalloc(sizeof(struct intel_pt_decoder)); | ||
186 | if (!decoder) | ||
187 | return NULL; | ||
188 | |||
189 | decoder->get_trace = params->get_trace; | ||
190 | decoder->walk_insn = params->walk_insn; | ||
191 | decoder->data = params->data; | ||
192 | decoder->return_compression = params->return_compression; | ||
193 | |||
194 | decoder->sign_bit = (uint64_t)1 << 47; | ||
195 | decoder->sign_bits = ~(((uint64_t)1 << 48) - 1); | ||
196 | |||
197 | decoder->period = params->period; | ||
198 | decoder->period_type = params->period_type; | ||
199 | |||
200 | decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; | ||
201 | decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio; | ||
202 | |||
203 | intel_pt_setup_period(decoder); | ||
204 | |||
205 | decoder->mtc_shift = params->mtc_period; | ||
206 | decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1; | ||
207 | |||
208 | decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n; | ||
209 | decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d; | ||
210 | |||
211 | if (!decoder->tsc_ctc_ratio_n) | ||
212 | decoder->tsc_ctc_ratio_d = 0; | ||
213 | |||
214 | if (decoder->tsc_ctc_ratio_d) { | ||
215 | if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) | ||
216 | decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / | ||
217 | decoder->tsc_ctc_ratio_d; | ||
218 | |||
219 | /* | ||
220 | * Allow for timestamps appearing to backwards because a TSC | ||
221 | * packet has slipped past a MTC packet, so allow 2 MTC ticks | ||
222 | * or ... | ||
223 | */ | ||
224 | decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, | ||
225 | decoder->tsc_ctc_ratio_n, | ||
226 | decoder->tsc_ctc_ratio_d); | ||
227 | } | ||
228 | /* ... or 0x100 paranoia */ | ||
229 | if (decoder->tsc_slip < 0x100) | ||
230 | decoder->tsc_slip = 0x100; | ||
231 | |||
232 | intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); | ||
233 | intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n); | ||
234 | intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d); | ||
235 | intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult); | ||
236 | intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip); | ||
237 | |||
238 | return decoder; | ||
239 | } | ||
240 | |||
241 | static void intel_pt_pop_blk(struct intel_pt_stack *stack) | ||
242 | { | ||
243 | struct intel_pt_blk *blk = stack->blk; | ||
244 | |||
245 | stack->blk = blk->prev; | ||
246 | if (!stack->spare) | ||
247 | stack->spare = blk; | ||
248 | else | ||
249 | free(blk); | ||
250 | } | ||
251 | |||
252 | static uint64_t intel_pt_pop(struct intel_pt_stack *stack) | ||
253 | { | ||
254 | if (!stack->pos) { | ||
255 | if (!stack->blk) | ||
256 | return 0; | ||
257 | intel_pt_pop_blk(stack); | ||
258 | if (!stack->blk) | ||
259 | return 0; | ||
260 | stack->pos = INTEL_PT_BLK_SIZE; | ||
261 | } | ||
262 | return stack->blk->ip[--stack->pos]; | ||
263 | } | ||
264 | |||
265 | static int intel_pt_alloc_blk(struct intel_pt_stack *stack) | ||
266 | { | ||
267 | struct intel_pt_blk *blk; | ||
268 | |||
269 | if (stack->spare) { | ||
270 | blk = stack->spare; | ||
271 | stack->spare = NULL; | ||
272 | } else { | ||
273 | blk = malloc(sizeof(struct intel_pt_blk)); | ||
274 | if (!blk) | ||
275 | return -ENOMEM; | ||
276 | } | ||
277 | |||
278 | blk->prev = stack->blk; | ||
279 | stack->blk = blk; | ||
280 | stack->pos = 0; | ||
281 | return 0; | ||
282 | } | ||
283 | |||
284 | static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip) | ||
285 | { | ||
286 | int err; | ||
287 | |||
288 | if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) { | ||
289 | err = intel_pt_alloc_blk(stack); | ||
290 | if (err) | ||
291 | return err; | ||
292 | } | ||
293 | |||
294 | stack->blk->ip[stack->pos++] = ip; | ||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | static void intel_pt_clear_stack(struct intel_pt_stack *stack) | ||
299 | { | ||
300 | while (stack->blk) | ||
301 | intel_pt_pop_blk(stack); | ||
302 | stack->pos = 0; | ||
303 | } | ||
304 | |||
305 | static void intel_pt_free_stack(struct intel_pt_stack *stack) | ||
306 | { | ||
307 | intel_pt_clear_stack(stack); | ||
308 | zfree(&stack->blk); | ||
309 | zfree(&stack->spare); | ||
310 | } | ||
311 | |||
312 | void intel_pt_decoder_free(struct intel_pt_decoder *decoder) | ||
313 | { | ||
314 | intel_pt_free_stack(&decoder->stack); | ||
315 | free(decoder); | ||
316 | } | ||
317 | |||
318 | static int intel_pt_ext_err(int code) | ||
319 | { | ||
320 | switch (code) { | ||
321 | case -ENOMEM: | ||
322 | return INTEL_PT_ERR_NOMEM; | ||
323 | case -ENOSYS: | ||
324 | return INTEL_PT_ERR_INTERN; | ||
325 | case -EBADMSG: | ||
326 | return INTEL_PT_ERR_BADPKT; | ||
327 | case -ENODATA: | ||
328 | return INTEL_PT_ERR_NODATA; | ||
329 | case -EILSEQ: | ||
330 | return INTEL_PT_ERR_NOINSN; | ||
331 | case -ENOENT: | ||
332 | return INTEL_PT_ERR_MISMAT; | ||
333 | case -EOVERFLOW: | ||
334 | return INTEL_PT_ERR_OVR; | ||
335 | case -ENOSPC: | ||
336 | return INTEL_PT_ERR_LOST; | ||
337 | case -ELOOP: | ||
338 | return INTEL_PT_ERR_NELOOP; | ||
339 | default: | ||
340 | return INTEL_PT_ERR_UNK; | ||
341 | } | ||
342 | } | ||
343 | |||
344 | static const char *intel_pt_err_msgs[] = { | ||
345 | [INTEL_PT_ERR_NOMEM] = "Memory allocation failed", | ||
346 | [INTEL_PT_ERR_INTERN] = "Internal error", | ||
347 | [INTEL_PT_ERR_BADPKT] = "Bad packet", | ||
348 | [INTEL_PT_ERR_NODATA] = "No more data", | ||
349 | [INTEL_PT_ERR_NOINSN] = "Failed to get instruction", | ||
350 | [INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction", | ||
351 | [INTEL_PT_ERR_OVR] = "Overflow packet", | ||
352 | [INTEL_PT_ERR_LOST] = "Lost trace data", | ||
353 | [INTEL_PT_ERR_UNK] = "Unknown error!", | ||
354 | [INTEL_PT_ERR_NELOOP] = "Never-ending loop", | ||
355 | }; | ||
356 | |||
357 | int intel_pt__strerror(int code, char *buf, size_t buflen) | ||
358 | { | ||
359 | if (code < 1 || code > INTEL_PT_ERR_MAX) | ||
360 | code = INTEL_PT_ERR_UNK; | ||
361 | strlcpy(buf, intel_pt_err_msgs[code], buflen); | ||
362 | return 0; | ||
363 | } | ||
364 | |||
365 | static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, | ||
366 | const struct intel_pt_pkt *packet, | ||
367 | uint64_t last_ip) | ||
368 | { | ||
369 | uint64_t ip; | ||
370 | |||
371 | switch (packet->count) { | ||
372 | case 2: | ||
373 | ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) | | ||
374 | packet->payload; | ||
375 | break; | ||
376 | case 4: | ||
377 | ip = (last_ip & (uint64_t)0xffffffff00000000ULL) | | ||
378 | packet->payload; | ||
379 | break; | ||
380 | case 6: | ||
381 | ip = packet->payload; | ||
382 | break; | ||
383 | default: | ||
384 | return 0; | ||
385 | } | ||
386 | |||
387 | if (ip & decoder->sign_bit) | ||
388 | return ip | decoder->sign_bits; | ||
389 | |||
390 | return ip; | ||
391 | } | ||
392 | |||
393 | static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) | ||
394 | { | ||
395 | decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet, | ||
396 | decoder->last_ip); | ||
397 | } | ||
398 | |||
399 | static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) | ||
400 | { | ||
401 | intel_pt_set_last_ip(decoder); | ||
402 | decoder->ip = decoder->last_ip; | ||
403 | } | ||
404 | |||
405 | static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder) | ||
406 | { | ||
407 | intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos, | ||
408 | decoder->buf); | ||
409 | } | ||
410 | |||
411 | static int intel_pt_bug(struct intel_pt_decoder *decoder) | ||
412 | { | ||
413 | intel_pt_log("ERROR: Internal error\n"); | ||
414 | decoder->pkt_state = INTEL_PT_STATE_NO_PSB; | ||
415 | return -ENOSYS; | ||
416 | } | ||
417 | |||
418 | static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder) | ||
419 | { | ||
420 | decoder->tx_flags = 0; | ||
421 | } | ||
422 | |||
423 | static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder) | ||
424 | { | ||
425 | decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX; | ||
426 | } | ||
427 | |||
428 | static int intel_pt_bad_packet(struct intel_pt_decoder *decoder) | ||
429 | { | ||
430 | intel_pt_clear_tx_flags(decoder); | ||
431 | decoder->have_tma = false; | ||
432 | decoder->pkt_len = 1; | ||
433 | decoder->pkt_step = 1; | ||
434 | intel_pt_decoder_log_packet(decoder); | ||
435 | if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) { | ||
436 | intel_pt_log("ERROR: Bad packet\n"); | ||
437 | decoder->pkt_state = INTEL_PT_STATE_ERR1; | ||
438 | } | ||
439 | return -EBADMSG; | ||
440 | } | ||
441 | |||
442 | static int intel_pt_get_data(struct intel_pt_decoder *decoder) | ||
443 | { | ||
444 | struct intel_pt_buffer buffer = { .buf = 0, }; | ||
445 | int ret; | ||
446 | |||
447 | decoder->pkt_step = 0; | ||
448 | |||
449 | intel_pt_log("Getting more data\n"); | ||
450 | ret = decoder->get_trace(&buffer, decoder->data); | ||
451 | if (ret) | ||
452 | return ret; | ||
453 | decoder->buf = buffer.buf; | ||
454 | decoder->len = buffer.len; | ||
455 | if (!decoder->len) { | ||
456 | intel_pt_log("No more data\n"); | ||
457 | return -ENODATA; | ||
458 | } | ||
459 | if (!buffer.consecutive) { | ||
460 | decoder->ip = 0; | ||
461 | decoder->pkt_state = INTEL_PT_STATE_NO_PSB; | ||
462 | decoder->ref_timestamp = buffer.ref_timestamp; | ||
463 | decoder->timestamp = 0; | ||
464 | decoder->have_tma = false; | ||
465 | decoder->state.trace_nr = buffer.trace_nr; | ||
466 | intel_pt_log("Reference timestamp 0x%" PRIx64 "\n", | ||
467 | decoder->ref_timestamp); | ||
468 | return -ENOLINK; | ||
469 | } | ||
470 | |||
471 | return 0; | ||
472 | } | ||
473 | |||
474 | static int intel_pt_get_next_data(struct intel_pt_decoder *decoder) | ||
475 | { | ||
476 | if (!decoder->next_buf) | ||
477 | return intel_pt_get_data(decoder); | ||
478 | |||
479 | decoder->buf = decoder->next_buf; | ||
480 | decoder->len = decoder->next_len; | ||
481 | decoder->next_buf = 0; | ||
482 | decoder->next_len = 0; | ||
483 | return 0; | ||
484 | } | ||
485 | |||
486 | static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder) | ||
487 | { | ||
488 | unsigned char *buf = decoder->temp_buf; | ||
489 | size_t old_len, len, n; | ||
490 | int ret; | ||
491 | |||
492 | old_len = decoder->len; | ||
493 | len = decoder->len; | ||
494 | memcpy(buf, decoder->buf, len); | ||
495 | |||
496 | ret = intel_pt_get_data(decoder); | ||
497 | if (ret) { | ||
498 | decoder->pos += old_len; | ||
499 | return ret < 0 ? ret : -EINVAL; | ||
500 | } | ||
501 | |||
502 | n = INTEL_PT_PKT_MAX_SZ - len; | ||
503 | if (n > decoder->len) | ||
504 | n = decoder->len; | ||
505 | memcpy(buf + len, decoder->buf, n); | ||
506 | len += n; | ||
507 | |||
508 | ret = intel_pt_get_packet(buf, len, &decoder->packet); | ||
509 | if (ret < (int)old_len) { | ||
510 | decoder->next_buf = decoder->buf; | ||
511 | decoder->next_len = decoder->len; | ||
512 | decoder->buf = buf; | ||
513 | decoder->len = old_len; | ||
514 | return intel_pt_bad_packet(decoder); | ||
515 | } | ||
516 | |||
517 | decoder->next_buf = decoder->buf + (ret - old_len); | ||
518 | decoder->next_len = decoder->len - (ret - old_len); | ||
519 | |||
520 | decoder->buf = buf; | ||
521 | decoder->len = ret; | ||
522 | |||
523 | return ret; | ||
524 | } | ||
525 | |||
526 | struct intel_pt_pkt_info { | ||
527 | struct intel_pt_decoder *decoder; | ||
528 | struct intel_pt_pkt packet; | ||
529 | uint64_t pos; | ||
530 | int pkt_len; | ||
531 | int last_packet_type; | ||
532 | void *data; | ||
533 | }; | ||
534 | |||
535 | typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info); | ||
536 | |||
537 | /* Lookahead packets in current buffer */ | ||
538 | static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder, | ||
539 | intel_pt_pkt_cb_t cb, void *data) | ||
540 | { | ||
541 | struct intel_pt_pkt_info pkt_info; | ||
542 | const unsigned char *buf = decoder->buf; | ||
543 | size_t len = decoder->len; | ||
544 | int ret; | ||
545 | |||
546 | pkt_info.decoder = decoder; | ||
547 | pkt_info.pos = decoder->pos; | ||
548 | pkt_info.pkt_len = decoder->pkt_step; | ||
549 | pkt_info.last_packet_type = decoder->last_packet_type; | ||
550 | pkt_info.data = data; | ||
551 | |||
552 | while (1) { | ||
553 | do { | ||
554 | pkt_info.pos += pkt_info.pkt_len; | ||
555 | buf += pkt_info.pkt_len; | ||
556 | len -= pkt_info.pkt_len; | ||
557 | |||
558 | if (!len) | ||
559 | return INTEL_PT_NEED_MORE_BYTES; | ||
560 | |||
561 | ret = intel_pt_get_packet(buf, len, &pkt_info.packet); | ||
562 | if (!ret) | ||
563 | return INTEL_PT_NEED_MORE_BYTES; | ||
564 | if (ret < 0) | ||
565 | return ret; | ||
566 | |||
567 | pkt_info.pkt_len = ret; | ||
568 | } while (pkt_info.packet.type == INTEL_PT_PAD); | ||
569 | |||
570 | ret = cb(&pkt_info); | ||
571 | if (ret) | ||
572 | return 0; | ||
573 | |||
574 | pkt_info.last_packet_type = pkt_info.packet.type; | ||
575 | } | ||
576 | } | ||
577 | |||
578 | struct intel_pt_calc_cyc_to_tsc_info { | ||
579 | uint64_t cycle_cnt; | ||
580 | unsigned int cbr; | ||
581 | uint32_t last_mtc; | ||
582 | uint64_t ctc_timestamp; | ||
583 | uint64_t ctc_delta; | ||
584 | uint64_t tsc_timestamp; | ||
585 | uint64_t timestamp; | ||
586 | bool have_tma; | ||
587 | bool from_mtc; | ||
588 | double cbr_cyc_to_tsc; | ||
589 | }; | ||
590 | |||
591 | static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info) | ||
592 | { | ||
593 | struct intel_pt_decoder *decoder = pkt_info->decoder; | ||
594 | struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data; | ||
595 | uint64_t timestamp; | ||
596 | double cyc_to_tsc; | ||
597 | unsigned int cbr; | ||
598 | uint32_t mtc, mtc_delta, ctc, fc, ctc_rem; | ||
599 | |||
600 | switch (pkt_info->packet.type) { | ||
601 | case INTEL_PT_TNT: | ||
602 | case INTEL_PT_TIP_PGE: | ||
603 | case INTEL_PT_TIP: | ||
604 | case INTEL_PT_FUP: | ||
605 | case INTEL_PT_PSB: | ||
606 | case INTEL_PT_PIP: | ||
607 | case INTEL_PT_MODE_EXEC: | ||
608 | case INTEL_PT_MODE_TSX: | ||
609 | case INTEL_PT_PSBEND: | ||
610 | case INTEL_PT_PAD: | ||
611 | case INTEL_PT_VMCS: | ||
612 | case INTEL_PT_MNT: | ||
613 | return 0; | ||
614 | |||
615 | case INTEL_PT_MTC: | ||
616 | if (!data->have_tma) | ||
617 | return 0; | ||
618 | |||
619 | mtc = pkt_info->packet.payload; | ||
620 | if (mtc > data->last_mtc) | ||
621 | mtc_delta = mtc - data->last_mtc; | ||
622 | else | ||
623 | mtc_delta = mtc + 256 - data->last_mtc; | ||
624 | data->ctc_delta += mtc_delta << decoder->mtc_shift; | ||
625 | data->last_mtc = mtc; | ||
626 | |||
627 | if (decoder->tsc_ctc_mult) { | ||
628 | timestamp = data->ctc_timestamp + | ||
629 | data->ctc_delta * decoder->tsc_ctc_mult; | ||
630 | } else { | ||
631 | timestamp = data->ctc_timestamp + | ||
632 | multdiv(data->ctc_delta, | ||
633 | decoder->tsc_ctc_ratio_n, | ||
634 | decoder->tsc_ctc_ratio_d); | ||
635 | } | ||
636 | |||
637 | if (timestamp < data->timestamp) | ||
638 | return 1; | ||
639 | |||
640 | if (pkt_info->last_packet_type != INTEL_PT_CYC) { | ||
641 | data->timestamp = timestamp; | ||
642 | return 0; | ||
643 | } | ||
644 | |||
645 | break; | ||
646 | |||
647 | case INTEL_PT_TSC: | ||
648 | timestamp = pkt_info->packet.payload | | ||
649 | (data->timestamp & (0xffULL << 56)); | ||
650 | if (data->from_mtc && timestamp < data->timestamp && | ||
651 | data->timestamp - timestamp < decoder->tsc_slip) | ||
652 | return 1; | ||
653 | while (timestamp < data->timestamp) | ||
654 | timestamp += (1ULL << 56); | ||
655 | if (pkt_info->last_packet_type != INTEL_PT_CYC) { | ||
656 | if (data->from_mtc) | ||
657 | return 1; | ||
658 | data->tsc_timestamp = timestamp; | ||
659 | data->timestamp = timestamp; | ||
660 | return 0; | ||
661 | } | ||
662 | break; | ||
663 | |||
664 | case INTEL_PT_TMA: | ||
665 | if (data->from_mtc) | ||
666 | return 1; | ||
667 | |||
668 | if (!decoder->tsc_ctc_ratio_d) | ||
669 | return 0; | ||
670 | |||
671 | ctc = pkt_info->packet.payload; | ||
672 | fc = pkt_info->packet.count; | ||
673 | ctc_rem = ctc & decoder->ctc_rem_mask; | ||
674 | |||
675 | data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; | ||
676 | |||
677 | data->ctc_timestamp = data->tsc_timestamp - fc; | ||
678 | if (decoder->tsc_ctc_mult) { | ||
679 | data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; | ||
680 | } else { | ||
681 | data->ctc_timestamp -= | ||
682 | multdiv(ctc_rem, decoder->tsc_ctc_ratio_n, | ||
683 | decoder->tsc_ctc_ratio_d); | ||
684 | } | ||
685 | |||
686 | data->ctc_delta = 0; | ||
687 | data->have_tma = true; | ||
688 | |||
689 | return 0; | ||
690 | |||
691 | case INTEL_PT_CYC: | ||
692 | data->cycle_cnt += pkt_info->packet.payload; | ||
693 | return 0; | ||
694 | |||
695 | case INTEL_PT_CBR: | ||
696 | cbr = pkt_info->packet.payload; | ||
697 | if (data->cbr && data->cbr != cbr) | ||
698 | return 1; | ||
699 | data->cbr = cbr; | ||
700 | data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; | ||
701 | return 0; | ||
702 | |||
703 | case INTEL_PT_TIP_PGD: | ||
704 | case INTEL_PT_TRACESTOP: | ||
705 | case INTEL_PT_OVF: | ||
706 | case INTEL_PT_BAD: /* Does not happen */ | ||
707 | default: | ||
708 | return 1; | ||
709 | } | ||
710 | |||
711 | if (!data->cbr && decoder->cbr) { | ||
712 | data->cbr = decoder->cbr; | ||
713 | data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc; | ||
714 | } | ||
715 | |||
716 | if (!data->cycle_cnt) | ||
717 | return 1; | ||
718 | |||
719 | cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt; | ||
720 | |||
721 | if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc && | ||
722 | cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) { | ||
723 | intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n", | ||
724 | cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); | ||
725 | return 1; | ||
726 | } | ||
727 | |||
728 | decoder->calc_cyc_to_tsc = cyc_to_tsc; | ||
729 | decoder->have_calc_cyc_to_tsc = true; | ||
730 | |||
731 | if (data->cbr) { | ||
732 | intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n", | ||
733 | cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos); | ||
734 | } else { | ||
735 | intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n", | ||
736 | cyc_to_tsc, pkt_info->pos); | ||
737 | } | ||
738 | |||
739 | return 1; | ||
740 | } | ||
741 | |||
742 | static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder, | ||
743 | bool from_mtc) | ||
744 | { | ||
745 | struct intel_pt_calc_cyc_to_tsc_info data = { | ||
746 | .cycle_cnt = 0, | ||
747 | .cbr = 0, | ||
748 | .last_mtc = decoder->last_mtc, | ||
749 | .ctc_timestamp = decoder->ctc_timestamp, | ||
750 | .ctc_delta = decoder->ctc_delta, | ||
751 | .tsc_timestamp = decoder->tsc_timestamp, | ||
752 | .timestamp = decoder->timestamp, | ||
753 | .have_tma = decoder->have_tma, | ||
754 | .from_mtc = from_mtc, | ||
755 | .cbr_cyc_to_tsc = 0, | ||
756 | }; | ||
757 | |||
758 | intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data); | ||
759 | } | ||
760 | |||
761 | static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder) | ||
762 | { | ||
763 | int ret; | ||
764 | |||
765 | decoder->last_packet_type = decoder->packet.type; | ||
766 | |||
767 | do { | ||
768 | decoder->pos += decoder->pkt_step; | ||
769 | decoder->buf += decoder->pkt_step; | ||
770 | decoder->len -= decoder->pkt_step; | ||
771 | |||
772 | if (!decoder->len) { | ||
773 | ret = intel_pt_get_next_data(decoder); | ||
774 | if (ret) | ||
775 | return ret; | ||
776 | } | ||
777 | |||
778 | ret = intel_pt_get_packet(decoder->buf, decoder->len, | ||
779 | &decoder->packet); | ||
780 | if (ret == INTEL_PT_NEED_MORE_BYTES && | ||
781 | decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) { | ||
782 | ret = intel_pt_get_split_packet(decoder); | ||
783 | if (ret < 0) | ||
784 | return ret; | ||
785 | } | ||
786 | if (ret <= 0) | ||
787 | return intel_pt_bad_packet(decoder); | ||
788 | |||
789 | decoder->pkt_len = ret; | ||
790 | decoder->pkt_step = ret; | ||
791 | intel_pt_decoder_log_packet(decoder); | ||
792 | } while (decoder->packet.type == INTEL_PT_PAD); | ||
793 | |||
794 | return 0; | ||
795 | } | ||
796 | |||
797 | static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder) | ||
798 | { | ||
799 | uint64_t timestamp, masked_timestamp; | ||
800 | |||
801 | timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; | ||
802 | masked_timestamp = timestamp & decoder->period_mask; | ||
803 | if (decoder->continuous_period) { | ||
804 | if (masked_timestamp != decoder->last_masked_timestamp) | ||
805 | return 1; | ||
806 | } else { | ||
807 | timestamp += 1; | ||
808 | masked_timestamp = timestamp & decoder->period_mask; | ||
809 | if (masked_timestamp != decoder->last_masked_timestamp) { | ||
810 | decoder->last_masked_timestamp = masked_timestamp; | ||
811 | decoder->continuous_period = true; | ||
812 | } | ||
813 | } | ||
814 | return decoder->period_ticks - (timestamp - masked_timestamp); | ||
815 | } | ||
816 | |||
817 | static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder) | ||
818 | { | ||
819 | switch (decoder->period_type) { | ||
820 | case INTEL_PT_PERIOD_INSTRUCTIONS: | ||
821 | return decoder->period - decoder->period_insn_cnt; | ||
822 | case INTEL_PT_PERIOD_TICKS: | ||
823 | return intel_pt_next_period(decoder); | ||
824 | case INTEL_PT_PERIOD_NONE: | ||
825 | case INTEL_PT_PERIOD_MTC: | ||
826 | default: | ||
827 | return 0; | ||
828 | } | ||
829 | } | ||
830 | |||
831 | static void intel_pt_sample_insn(struct intel_pt_decoder *decoder) | ||
832 | { | ||
833 | uint64_t timestamp, masked_timestamp; | ||
834 | |||
835 | switch (decoder->period_type) { | ||
836 | case INTEL_PT_PERIOD_INSTRUCTIONS: | ||
837 | decoder->period_insn_cnt = 0; | ||
838 | break; | ||
839 | case INTEL_PT_PERIOD_TICKS: | ||
840 | timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; | ||
841 | masked_timestamp = timestamp & decoder->period_mask; | ||
842 | decoder->last_masked_timestamp = masked_timestamp; | ||
843 | break; | ||
844 | case INTEL_PT_PERIOD_NONE: | ||
845 | case INTEL_PT_PERIOD_MTC: | ||
846 | default: | ||
847 | break; | ||
848 | } | ||
849 | |||
850 | decoder->state.type |= INTEL_PT_INSTRUCTION; | ||
851 | } | ||
852 | |||
853 | static int intel_pt_walk_insn(struct intel_pt_decoder *decoder, | ||
854 | struct intel_pt_insn *intel_pt_insn, uint64_t ip) | ||
855 | { | ||
856 | uint64_t max_insn_cnt, insn_cnt = 0; | ||
857 | int err; | ||
858 | |||
859 | if (!decoder->mtc_insn) | ||
860 | decoder->mtc_insn = true; | ||
861 | |||
862 | max_insn_cnt = intel_pt_next_sample(decoder); | ||
863 | |||
864 | err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip, | ||
865 | max_insn_cnt, decoder->data); | ||
866 | |||
867 | decoder->tot_insn_cnt += insn_cnt; | ||
868 | decoder->timestamp_insn_cnt += insn_cnt; | ||
869 | decoder->period_insn_cnt += insn_cnt; | ||
870 | |||
871 | if (err) { | ||
872 | decoder->no_progress = 0; | ||
873 | decoder->pkt_state = INTEL_PT_STATE_ERR2; | ||
874 | intel_pt_log_at("ERROR: Failed to get instruction", | ||
875 | decoder->ip); | ||
876 | if (err == -ENOENT) | ||
877 | return -ENOLINK; | ||
878 | return -EILSEQ; | ||
879 | } | ||
880 | |||
881 | if (ip && decoder->ip == ip) { | ||
882 | err = -EAGAIN; | ||
883 | goto out; | ||
884 | } | ||
885 | |||
886 | if (max_insn_cnt && insn_cnt >= max_insn_cnt) | ||
887 | intel_pt_sample_insn(decoder); | ||
888 | |||
889 | if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) { | ||
890 | decoder->state.type = INTEL_PT_INSTRUCTION; | ||
891 | decoder->state.from_ip = decoder->ip; | ||
892 | decoder->state.to_ip = 0; | ||
893 | decoder->ip += intel_pt_insn->length; | ||
894 | err = INTEL_PT_RETURN; | ||
895 | goto out; | ||
896 | } | ||
897 | |||
898 | if (intel_pt_insn->op == INTEL_PT_OP_CALL) { | ||
899 | /* Zero-length calls are excluded */ | ||
900 | if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL || | ||
901 | intel_pt_insn->rel) { | ||
902 | err = intel_pt_push(&decoder->stack, decoder->ip + | ||
903 | intel_pt_insn->length); | ||
904 | if (err) | ||
905 | goto out; | ||
906 | } | ||
907 | } else if (intel_pt_insn->op == INTEL_PT_OP_RET) { | ||
908 | decoder->ret_addr = intel_pt_pop(&decoder->stack); | ||
909 | } | ||
910 | |||
911 | if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) { | ||
912 | int cnt = decoder->no_progress++; | ||
913 | |||
914 | decoder->state.from_ip = decoder->ip; | ||
915 | decoder->ip += intel_pt_insn->length + | ||
916 | intel_pt_insn->rel; | ||
917 | decoder->state.to_ip = decoder->ip; | ||
918 | err = INTEL_PT_RETURN; | ||
919 | |||
920 | /* | ||
921 | * Check for being stuck in a loop. This can happen if a | ||
922 | * decoder error results in the decoder erroneously setting the | ||
923 | * ip to an address that is itself in an infinite loop that | ||
924 | * consumes no packets. When that happens, there must be an | ||
925 | * unconditional branch. | ||
926 | */ | ||
927 | if (cnt) { | ||
928 | if (cnt == 1) { | ||
929 | decoder->stuck_ip = decoder->state.to_ip; | ||
930 | decoder->stuck_ip_prd = 1; | ||
931 | decoder->stuck_ip_cnt = 1; | ||
932 | } else if (cnt > INTEL_PT_MAX_LOOPS || | ||
933 | decoder->state.to_ip == decoder->stuck_ip) { | ||
934 | intel_pt_log_at("ERROR: Never-ending loop", | ||
935 | decoder->state.to_ip); | ||
936 | decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; | ||
937 | err = -ELOOP; | ||
938 | goto out; | ||
939 | } else if (!--decoder->stuck_ip_cnt) { | ||
940 | decoder->stuck_ip_prd += 1; | ||
941 | decoder->stuck_ip_cnt = decoder->stuck_ip_prd; | ||
942 | decoder->stuck_ip = decoder->state.to_ip; | ||
943 | } | ||
944 | } | ||
945 | goto out_no_progress; | ||
946 | } | ||
947 | out: | ||
948 | decoder->no_progress = 0; | ||
949 | out_no_progress: | ||
950 | decoder->state.insn_op = intel_pt_insn->op; | ||
951 | decoder->state.insn_len = intel_pt_insn->length; | ||
952 | |||
953 | if (decoder->tx_flags & INTEL_PT_IN_TX) | ||
954 | decoder->state.flags |= INTEL_PT_IN_TX; | ||
955 | |||
956 | return err; | ||
957 | } | ||
958 | |||
959 | static int intel_pt_walk_fup(struct intel_pt_decoder *decoder) | ||
960 | { | ||
961 | struct intel_pt_insn intel_pt_insn; | ||
962 | uint64_t ip; | ||
963 | int err; | ||
964 | |||
965 | ip = decoder->last_ip; | ||
966 | |||
967 | while (1) { | ||
968 | err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip); | ||
969 | if (err == INTEL_PT_RETURN) | ||
970 | return 0; | ||
971 | if (err == -EAGAIN) { | ||
972 | if (decoder->set_fup_tx_flags) { | ||
973 | decoder->set_fup_tx_flags = false; | ||
974 | decoder->tx_flags = decoder->fup_tx_flags; | ||
975 | decoder->state.type = INTEL_PT_TRANSACTION; | ||
976 | decoder->state.from_ip = decoder->ip; | ||
977 | decoder->state.to_ip = 0; | ||
978 | decoder->state.flags = decoder->fup_tx_flags; | ||
979 | return 0; | ||
980 | } | ||
981 | return err; | ||
982 | } | ||
983 | decoder->set_fup_tx_flags = false; | ||
984 | if (err) | ||
985 | return err; | ||
986 | |||
987 | if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { | ||
988 | intel_pt_log_at("ERROR: Unexpected indirect branch", | ||
989 | decoder->ip); | ||
990 | decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; | ||
991 | return -ENOENT; | ||
992 | } | ||
993 | |||
994 | if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { | ||
995 | intel_pt_log_at("ERROR: Unexpected conditional branch", | ||
996 | decoder->ip); | ||
997 | decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; | ||
998 | return -ENOENT; | ||
999 | } | ||
1000 | |||
1001 | intel_pt_bug(decoder); | ||
1002 | } | ||
1003 | } | ||
1004 | |||
1005 | static int intel_pt_walk_tip(struct intel_pt_decoder *decoder) | ||
1006 | { | ||
1007 | struct intel_pt_insn intel_pt_insn; | ||
1008 | int err; | ||
1009 | |||
1010 | err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); | ||
1011 | if (err == INTEL_PT_RETURN) | ||
1012 | return 0; | ||
1013 | if (err) | ||
1014 | return err; | ||
1015 | |||
1016 | if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { | ||
1017 | if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) { | ||
1018 | decoder->pge = false; | ||
1019 | decoder->continuous_period = false; | ||
1020 | decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; | ||
1021 | decoder->state.from_ip = decoder->ip; | ||
1022 | decoder->state.to_ip = 0; | ||
1023 | if (decoder->packet.count != 0) | ||
1024 | decoder->ip = decoder->last_ip; | ||
1025 | } else { | ||
1026 | decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; | ||
1027 | decoder->state.from_ip = decoder->ip; | ||
1028 | if (decoder->packet.count == 0) { | ||
1029 | decoder->state.to_ip = 0; | ||
1030 | } else { | ||
1031 | decoder->state.to_ip = decoder->last_ip; | ||
1032 | decoder->ip = decoder->last_ip; | ||
1033 | } | ||
1034 | } | ||
1035 | return 0; | ||
1036 | } | ||
1037 | |||
1038 | if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { | ||
1039 | intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch", | ||
1040 | decoder->ip); | ||
1041 | decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; | ||
1042 | return -ENOENT; | ||
1043 | } | ||
1044 | |||
1045 | return intel_pt_bug(decoder); | ||
1046 | } | ||
1047 | |||
1048 | static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) | ||
1049 | { | ||
1050 | struct intel_pt_insn intel_pt_insn; | ||
1051 | int err; | ||
1052 | |||
1053 | while (1) { | ||
1054 | err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0); | ||
1055 | if (err == INTEL_PT_RETURN) | ||
1056 | return 0; | ||
1057 | if (err) | ||
1058 | return err; | ||
1059 | |||
1060 | if (intel_pt_insn.op == INTEL_PT_OP_RET) { | ||
1061 | if (!decoder->return_compression) { | ||
1062 | intel_pt_log_at("ERROR: RET when expecting conditional branch", | ||
1063 | decoder->ip); | ||
1064 | decoder->pkt_state = INTEL_PT_STATE_ERR3; | ||
1065 | return -ENOENT; | ||
1066 | } | ||
1067 | if (!decoder->ret_addr) { | ||
1068 | intel_pt_log_at("ERROR: Bad RET compression (stack empty)", | ||
1069 | decoder->ip); | ||
1070 | decoder->pkt_state = INTEL_PT_STATE_ERR3; | ||
1071 | return -ENOENT; | ||
1072 | } | ||
1073 | if (!(decoder->tnt.payload & BIT63)) { | ||
1074 | intel_pt_log_at("ERROR: Bad RET compression (TNT=N)", | ||
1075 | decoder->ip); | ||
1076 | decoder->pkt_state = INTEL_PT_STATE_ERR3; | ||
1077 | return -ENOENT; | ||
1078 | } | ||
1079 | decoder->tnt.count -= 1; | ||
1080 | if (!decoder->tnt.count) | ||
1081 | decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; | ||
1082 | decoder->tnt.payload <<= 1; | ||
1083 | decoder->state.from_ip = decoder->ip; | ||
1084 | decoder->ip = decoder->ret_addr; | ||
1085 | decoder->state.to_ip = decoder->ip; | ||
1086 | return 0; | ||
1087 | } | ||
1088 | |||
1089 | if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) { | ||
1090 | /* Handle deferred TIPs */ | ||
1091 | err = intel_pt_get_next_packet(decoder); | ||
1092 | if (err) | ||
1093 | return err; | ||
1094 | if (decoder->packet.type != INTEL_PT_TIP || | ||
1095 | decoder->packet.count == 0) { | ||
1096 | intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch", | ||
1097 | decoder->ip); | ||
1098 | decoder->pkt_state = INTEL_PT_STATE_ERR3; | ||
1099 | decoder->pkt_step = 0; | ||
1100 | return -ENOENT; | ||
1101 | } | ||
1102 | intel_pt_set_last_ip(decoder); | ||
1103 | decoder->state.from_ip = decoder->ip; | ||
1104 | decoder->state.to_ip = decoder->last_ip; | ||
1105 | decoder->ip = decoder->last_ip; | ||
1106 | return 0; | ||
1107 | } | ||
1108 | |||
1109 | if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { | ||
1110 | decoder->tnt.count -= 1; | ||
1111 | if (!decoder->tnt.count) | ||
1112 | decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; | ||
1113 | if (decoder->tnt.payload & BIT63) { | ||
1114 | decoder->tnt.payload <<= 1; | ||
1115 | decoder->state.from_ip = decoder->ip; | ||
1116 | decoder->ip += intel_pt_insn.length + | ||
1117 | intel_pt_insn.rel; | ||
1118 | decoder->state.to_ip = decoder->ip; | ||
1119 | return 0; | ||
1120 | } | ||
1121 | /* Instruction sample for a non-taken branch */ | ||
1122 | if (decoder->state.type & INTEL_PT_INSTRUCTION) { | ||
1123 | decoder->tnt.payload <<= 1; | ||
1124 | decoder->state.type = INTEL_PT_INSTRUCTION; | ||
1125 | decoder->state.from_ip = decoder->ip; | ||
1126 | decoder->state.to_ip = 0; | ||
1127 | decoder->ip += intel_pt_insn.length; | ||
1128 | return 0; | ||
1129 | } | ||
1130 | decoder->ip += intel_pt_insn.length; | ||
1131 | if (!decoder->tnt.count) | ||
1132 | return -EAGAIN; | ||
1133 | decoder->tnt.payload <<= 1; | ||
1134 | continue; | ||
1135 | } | ||
1136 | |||
1137 | return intel_pt_bug(decoder); | ||
1138 | } | ||
1139 | } | ||
1140 | |||
1141 | static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip) | ||
1142 | { | ||
1143 | unsigned int fup_tx_flags; | ||
1144 | int err; | ||
1145 | |||
1146 | fup_tx_flags = decoder->packet.payload & | ||
1147 | (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX); | ||
1148 | err = intel_pt_get_next_packet(decoder); | ||
1149 | if (err) | ||
1150 | return err; | ||
1151 | if (decoder->packet.type == INTEL_PT_FUP) { | ||
1152 | decoder->fup_tx_flags = fup_tx_flags; | ||
1153 | decoder->set_fup_tx_flags = true; | ||
1154 | if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX)) | ||
1155 | *no_tip = true; | ||
1156 | } else { | ||
1157 | intel_pt_log_at("ERROR: Missing FUP after MODE.TSX", | ||
1158 | decoder->pos); | ||
1159 | intel_pt_update_in_tx(decoder); | ||
1160 | } | ||
1161 | return 0; | ||
1162 | } | ||
1163 | |||
1164 | static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder) | ||
1165 | { | ||
1166 | uint64_t timestamp; | ||
1167 | |||
1168 | decoder->have_tma = false; | ||
1169 | |||
1170 | if (decoder->ref_timestamp) { | ||
1171 | timestamp = decoder->packet.payload | | ||
1172 | (decoder->ref_timestamp & (0xffULL << 56)); | ||
1173 | if (timestamp < decoder->ref_timestamp) { | ||
1174 | if (decoder->ref_timestamp - timestamp > (1ULL << 55)) | ||
1175 | timestamp += (1ULL << 56); | ||
1176 | } else { | ||
1177 | if (timestamp - decoder->ref_timestamp > (1ULL << 55)) | ||
1178 | timestamp -= (1ULL << 56); | ||
1179 | } | ||
1180 | decoder->tsc_timestamp = timestamp; | ||
1181 | decoder->timestamp = timestamp; | ||
1182 | decoder->ref_timestamp = 0; | ||
1183 | decoder->timestamp_insn_cnt = 0; | ||
1184 | } else if (decoder->timestamp) { | ||
1185 | timestamp = decoder->packet.payload | | ||
1186 | (decoder->timestamp & (0xffULL << 56)); | ||
1187 | decoder->tsc_timestamp = timestamp; | ||
1188 | if (timestamp < decoder->timestamp && | ||
1189 | decoder->timestamp - timestamp < decoder->tsc_slip) { | ||
1190 | intel_pt_log_to("Suppressing backwards timestamp", | ||
1191 | timestamp); | ||
1192 | timestamp = decoder->timestamp; | ||
1193 | } | ||
1194 | while (timestamp < decoder->timestamp) { | ||
1195 | intel_pt_log_to("Wraparound timestamp", timestamp); | ||
1196 | timestamp += (1ULL << 56); | ||
1197 | decoder->tsc_timestamp = timestamp; | ||
1198 | } | ||
1199 | decoder->timestamp = timestamp; | ||
1200 | decoder->timestamp_insn_cnt = 0; | ||
1201 | } | ||
1202 | |||
1203 | if (decoder->last_packet_type == INTEL_PT_CYC) { | ||
1204 | decoder->cyc_ref_timestamp = decoder->timestamp; | ||
1205 | decoder->cycle_cnt = 0; | ||
1206 | decoder->have_calc_cyc_to_tsc = false; | ||
1207 | intel_pt_calc_cyc_to_tsc(decoder, false); | ||
1208 | } | ||
1209 | |||
1210 | intel_pt_log_to("Setting timestamp", decoder->timestamp); | ||
1211 | } | ||
1212 | |||
1213 | static int intel_pt_overflow(struct intel_pt_decoder *decoder) | ||
1214 | { | ||
1215 | intel_pt_log("ERROR: Buffer overflow\n"); | ||
1216 | intel_pt_clear_tx_flags(decoder); | ||
1217 | decoder->have_tma = false; | ||
1218 | decoder->cbr = 0; | ||
1219 | decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC; | ||
1220 | decoder->overflow = true; | ||
1221 | return -EOVERFLOW; | ||
1222 | } | ||
1223 | |||
1224 | static void intel_pt_calc_tma(struct intel_pt_decoder *decoder) | ||
1225 | { | ||
1226 | uint32_t ctc = decoder->packet.payload; | ||
1227 | uint32_t fc = decoder->packet.count; | ||
1228 | uint32_t ctc_rem = ctc & decoder->ctc_rem_mask; | ||
1229 | |||
1230 | if (!decoder->tsc_ctc_ratio_d) | ||
1231 | return; | ||
1232 | |||
1233 | decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff; | ||
1234 | decoder->ctc_timestamp = decoder->tsc_timestamp - fc; | ||
1235 | if (decoder->tsc_ctc_mult) { | ||
1236 | decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult; | ||
1237 | } else { | ||
1238 | decoder->ctc_timestamp -= multdiv(ctc_rem, | ||
1239 | decoder->tsc_ctc_ratio_n, | ||
1240 | decoder->tsc_ctc_ratio_d); | ||
1241 | } | ||
1242 | decoder->ctc_delta = 0; | ||
1243 | decoder->have_tma = true; | ||
1244 | intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n", | ||
1245 | decoder->ctc_timestamp, decoder->last_mtc, ctc_rem); | ||
1246 | } | ||
1247 | |||
1248 | static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder) | ||
1249 | { | ||
1250 | uint64_t timestamp; | ||
1251 | uint32_t mtc, mtc_delta; | ||
1252 | |||
1253 | if (!decoder->have_tma) | ||
1254 | return; | ||
1255 | |||
1256 | mtc = decoder->packet.payload; | ||
1257 | |||
1258 | if (mtc > decoder->last_mtc) | ||
1259 | mtc_delta = mtc - decoder->last_mtc; | ||
1260 | else | ||
1261 | mtc_delta = mtc + 256 - decoder->last_mtc; | ||
1262 | |||
1263 | decoder->ctc_delta += mtc_delta << decoder->mtc_shift; | ||
1264 | |||
1265 | if (decoder->tsc_ctc_mult) { | ||
1266 | timestamp = decoder->ctc_timestamp + | ||
1267 | decoder->ctc_delta * decoder->tsc_ctc_mult; | ||
1268 | } else { | ||
1269 | timestamp = decoder->ctc_timestamp + | ||
1270 | multdiv(decoder->ctc_delta, | ||
1271 | decoder->tsc_ctc_ratio_n, | ||
1272 | decoder->tsc_ctc_ratio_d); | ||
1273 | } | ||
1274 | |||
1275 | if (timestamp < decoder->timestamp) | ||
1276 | intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", | ||
1277 | timestamp, decoder->timestamp); | ||
1278 | else | ||
1279 | decoder->timestamp = timestamp; | ||
1280 | |||
1281 | decoder->timestamp_insn_cnt = 0; | ||
1282 | decoder->last_mtc = mtc; | ||
1283 | |||
1284 | if (decoder->last_packet_type == INTEL_PT_CYC) { | ||
1285 | decoder->cyc_ref_timestamp = decoder->timestamp; | ||
1286 | decoder->cycle_cnt = 0; | ||
1287 | decoder->have_calc_cyc_to_tsc = false; | ||
1288 | intel_pt_calc_cyc_to_tsc(decoder, true); | ||
1289 | } | ||
1290 | } | ||
1291 | |||
1292 | static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder) | ||
1293 | { | ||
1294 | unsigned int cbr = decoder->packet.payload; | ||
1295 | |||
1296 | if (decoder->cbr == cbr) | ||
1297 | return; | ||
1298 | |||
1299 | decoder->cbr = cbr; | ||
1300 | decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr; | ||
1301 | } | ||
1302 | |||
1303 | static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder) | ||
1304 | { | ||
1305 | uint64_t timestamp = decoder->cyc_ref_timestamp; | ||
1306 | |||
1307 | decoder->have_cyc = true; | ||
1308 | |||
1309 | decoder->cycle_cnt += decoder->packet.payload; | ||
1310 | |||
1311 | if (!decoder->cyc_ref_timestamp) | ||
1312 | return; | ||
1313 | |||
1314 | if (decoder->have_calc_cyc_to_tsc) | ||
1315 | timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc; | ||
1316 | else if (decoder->cbr) | ||
1317 | timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc; | ||
1318 | else | ||
1319 | return; | ||
1320 | |||
1321 | if (timestamp < decoder->timestamp) | ||
1322 | intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n", | ||
1323 | timestamp, decoder->timestamp); | ||
1324 | else | ||
1325 | decoder->timestamp = timestamp; | ||
1326 | } | ||
1327 | |||
1328 | /* Walk PSB+ packets when already in sync. */ | ||
1329 | static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) | ||
1330 | { | ||
1331 | int err; | ||
1332 | |||
1333 | while (1) { | ||
1334 | err = intel_pt_get_next_packet(decoder); | ||
1335 | if (err) | ||
1336 | return err; | ||
1337 | |||
1338 | switch (decoder->packet.type) { | ||
1339 | case INTEL_PT_PSBEND: | ||
1340 | return 0; | ||
1341 | |||
1342 | case INTEL_PT_TIP_PGD: | ||
1343 | case INTEL_PT_TIP_PGE: | ||
1344 | case INTEL_PT_TIP: | ||
1345 | case INTEL_PT_TNT: | ||
1346 | case INTEL_PT_TRACESTOP: | ||
1347 | case INTEL_PT_BAD: | ||
1348 | case INTEL_PT_PSB: | ||
1349 | decoder->have_tma = false; | ||
1350 | intel_pt_log("ERROR: Unexpected packet\n"); | ||
1351 | return -EAGAIN; | ||
1352 | |||
1353 | case INTEL_PT_OVF: | ||
1354 | return intel_pt_overflow(decoder); | ||
1355 | |||
1356 | case INTEL_PT_TSC: | ||
1357 | intel_pt_calc_tsc_timestamp(decoder); | ||
1358 | break; | ||
1359 | |||
1360 | case INTEL_PT_TMA: | ||
1361 | intel_pt_calc_tma(decoder); | ||
1362 | break; | ||
1363 | |||
1364 | case INTEL_PT_CBR: | ||
1365 | intel_pt_calc_cbr(decoder); | ||
1366 | break; | ||
1367 | |||
1368 | case INTEL_PT_MODE_EXEC: | ||
1369 | decoder->exec_mode = decoder->packet.payload; | ||
1370 | break; | ||
1371 | |||
1372 | case INTEL_PT_PIP: | ||
1373 | decoder->cr3 = decoder->packet.payload & (BIT63 - 1); | ||
1374 | break; | ||
1375 | |||
1376 | case INTEL_PT_FUP: | ||
1377 | decoder->pge = true; | ||
1378 | intel_pt_set_last_ip(decoder); | ||
1379 | break; | ||
1380 | |||
1381 | case INTEL_PT_MODE_TSX: | ||
1382 | intel_pt_update_in_tx(decoder); | ||
1383 | break; | ||
1384 | |||
1385 | case INTEL_PT_MTC: | ||
1386 | intel_pt_calc_mtc_timestamp(decoder); | ||
1387 | if (decoder->period_type == INTEL_PT_PERIOD_MTC) | ||
1388 | decoder->state.type |= INTEL_PT_INSTRUCTION; | ||
1389 | break; | ||
1390 | |||
1391 | case INTEL_PT_CYC: | ||
1392 | case INTEL_PT_VMCS: | ||
1393 | case INTEL_PT_MNT: | ||
1394 | case INTEL_PT_PAD: | ||
1395 | default: | ||
1396 | break; | ||
1397 | } | ||
1398 | } | ||
1399 | } | ||
1400 | |||
1401 | static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder) | ||
1402 | { | ||
1403 | int err; | ||
1404 | |||
1405 | if (decoder->tx_flags & INTEL_PT_ABORT_TX) { | ||
1406 | decoder->tx_flags = 0; | ||
1407 | decoder->state.flags &= ~INTEL_PT_IN_TX; | ||
1408 | decoder->state.flags |= INTEL_PT_ABORT_TX; | ||
1409 | } else { | ||
1410 | decoder->state.flags |= INTEL_PT_ASYNC; | ||
1411 | } | ||
1412 | |||
1413 | while (1) { | ||
1414 | err = intel_pt_get_next_packet(decoder); | ||
1415 | if (err) | ||
1416 | return err; | ||
1417 | |||
1418 | switch (decoder->packet.type) { | ||
1419 | case INTEL_PT_TNT: | ||
1420 | case INTEL_PT_FUP: | ||
1421 | case INTEL_PT_TRACESTOP: | ||
1422 | case INTEL_PT_PSB: | ||
1423 | case INTEL_PT_TSC: | ||
1424 | case INTEL_PT_TMA: | ||
1425 | case INTEL_PT_CBR: | ||
1426 | case INTEL_PT_MODE_TSX: | ||
1427 | case INTEL_PT_BAD: | ||
1428 | case INTEL_PT_PSBEND: | ||
1429 | intel_pt_log("ERROR: Missing TIP after FUP\n"); | ||
1430 | decoder->pkt_state = INTEL_PT_STATE_ERR3; | ||
1431 | return -ENOENT; | ||
1432 | |||
1433 | case INTEL_PT_OVF: | ||
1434 | return intel_pt_overflow(decoder); | ||
1435 | |||
1436 | case INTEL_PT_TIP_PGD: | ||
1437 | decoder->state.from_ip = decoder->ip; | ||
1438 | decoder->state.to_ip = 0; | ||
1439 | if (decoder->packet.count != 0) { | ||
1440 | intel_pt_set_ip(decoder); | ||
1441 | intel_pt_log("Omitting PGD ip " x64_fmt "\n", | ||
1442 | decoder->ip); | ||
1443 | } | ||
1444 | decoder->pge = false; | ||
1445 | decoder->continuous_period = false; | ||
1446 | return 0; | ||
1447 | |||
1448 | case INTEL_PT_TIP_PGE: | ||
1449 | decoder->pge = true; | ||
1450 | intel_pt_log("Omitting PGE ip " x64_fmt "\n", | ||
1451 | decoder->ip); | ||
1452 | decoder->state.from_ip = 0; | ||
1453 | if (decoder->packet.count == 0) { | ||
1454 | decoder->state.to_ip = 0; | ||
1455 | } else { | ||
1456 | intel_pt_set_ip(decoder); | ||
1457 | decoder->state.to_ip = decoder->ip; | ||
1458 | } | ||
1459 | return 0; | ||
1460 | |||
1461 | case INTEL_PT_TIP: | ||
1462 | decoder->state.from_ip = decoder->ip; | ||
1463 | if (decoder->packet.count == 0) { | ||
1464 | decoder->state.to_ip = 0; | ||
1465 | } else { | ||
1466 | intel_pt_set_ip(decoder); | ||
1467 | decoder->state.to_ip = decoder->ip; | ||
1468 | } | ||
1469 | return 0; | ||
1470 | |||
1471 | case INTEL_PT_PIP: | ||
1472 | decoder->cr3 = decoder->packet.payload & (BIT63 - 1); | ||
1473 | break; | ||
1474 | |||
1475 | case INTEL_PT_MTC: | ||
1476 | intel_pt_calc_mtc_timestamp(decoder); | ||
1477 | if (decoder->period_type == INTEL_PT_PERIOD_MTC) | ||
1478 | decoder->state.type |= INTEL_PT_INSTRUCTION; | ||
1479 | break; | ||
1480 | |||
1481 | case INTEL_PT_CYC: | ||
1482 | intel_pt_calc_cyc_timestamp(decoder); | ||
1483 | break; | ||
1484 | |||
1485 | case INTEL_PT_MODE_EXEC: | ||
1486 | decoder->exec_mode = decoder->packet.payload; | ||
1487 | break; | ||
1488 | |||
1489 | case INTEL_PT_VMCS: | ||
1490 | case INTEL_PT_MNT: | ||
1491 | case INTEL_PT_PAD: | ||
1492 | break; | ||
1493 | |||
1494 | default: | ||
1495 | return intel_pt_bug(decoder); | ||
1496 | } | ||
1497 | } | ||
1498 | } | ||
1499 | |||
1500 | static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) | ||
1501 | { | ||
1502 | bool no_tip = false; | ||
1503 | int err; | ||
1504 | |||
1505 | while (1) { | ||
1506 | err = intel_pt_get_next_packet(decoder); | ||
1507 | if (err) | ||
1508 | return err; | ||
1509 | next: | ||
1510 | switch (decoder->packet.type) { | ||
1511 | case INTEL_PT_TNT: | ||
1512 | if (!decoder->packet.count) | ||
1513 | break; | ||
1514 | decoder->tnt = decoder->packet; | ||
1515 | decoder->pkt_state = INTEL_PT_STATE_TNT; | ||
1516 | err = intel_pt_walk_tnt(decoder); | ||
1517 | if (err == -EAGAIN) | ||
1518 | break; | ||
1519 | return err; | ||
1520 | |||
1521 | case INTEL_PT_TIP_PGD: | ||
1522 | if (decoder->packet.count != 0) | ||
1523 | intel_pt_set_last_ip(decoder); | ||
1524 | decoder->pkt_state = INTEL_PT_STATE_TIP_PGD; | ||
1525 | return intel_pt_walk_tip(decoder); | ||
1526 | |||
1527 | case INTEL_PT_TIP_PGE: { | ||
1528 | decoder->pge = true; | ||
1529 | if (decoder->packet.count == 0) { | ||
1530 | intel_pt_log_at("Skipping zero TIP.PGE", | ||
1531 | decoder->pos); | ||
1532 | break; | ||
1533 | } | ||
1534 | intel_pt_set_ip(decoder); | ||
1535 | decoder->state.from_ip = 0; | ||
1536 | decoder->state.to_ip = decoder->ip; | ||
1537 | return 0; | ||
1538 | } | ||
1539 | |||
1540 | case INTEL_PT_OVF: | ||
1541 | return intel_pt_overflow(decoder); | ||
1542 | |||
1543 | case INTEL_PT_TIP: | ||
1544 | if (decoder->packet.count != 0) | ||
1545 | intel_pt_set_last_ip(decoder); | ||
1546 | decoder->pkt_state = INTEL_PT_STATE_TIP; | ||
1547 | return intel_pt_walk_tip(decoder); | ||
1548 | |||
1549 | case INTEL_PT_FUP: | ||
1550 | if (decoder->packet.count == 0) { | ||
1551 | intel_pt_log_at("Skipping zero FUP", | ||
1552 | decoder->pos); | ||
1553 | no_tip = false; | ||
1554 | break; | ||
1555 | } | ||
1556 | intel_pt_set_last_ip(decoder); | ||
1557 | err = intel_pt_walk_fup(decoder); | ||
1558 | if (err != -EAGAIN) { | ||
1559 | if (err) | ||
1560 | return err; | ||
1561 | if (no_tip) | ||
1562 | decoder->pkt_state = | ||
1563 | INTEL_PT_STATE_FUP_NO_TIP; | ||
1564 | else | ||
1565 | decoder->pkt_state = INTEL_PT_STATE_FUP; | ||
1566 | return 0; | ||
1567 | } | ||
1568 | if (no_tip) { | ||
1569 | no_tip = false; | ||
1570 | break; | ||
1571 | } | ||
1572 | return intel_pt_walk_fup_tip(decoder); | ||
1573 | |||
1574 | case INTEL_PT_TRACESTOP: | ||
1575 | decoder->pge = false; | ||
1576 | decoder->continuous_period = false; | ||
1577 | intel_pt_clear_tx_flags(decoder); | ||
1578 | decoder->have_tma = false; | ||
1579 | break; | ||
1580 | |||
1581 | case INTEL_PT_PSB: | ||
1582 | intel_pt_clear_stack(&decoder->stack); | ||
1583 | err = intel_pt_walk_psbend(decoder); | ||
1584 | if (err == -EAGAIN) | ||
1585 | goto next; | ||
1586 | if (err) | ||
1587 | return err; | ||
1588 | break; | ||
1589 | |||
1590 | case INTEL_PT_PIP: | ||
1591 | decoder->cr3 = decoder->packet.payload & (BIT63 - 1); | ||
1592 | break; | ||
1593 | |||
1594 | case INTEL_PT_MTC: | ||
1595 | intel_pt_calc_mtc_timestamp(decoder); | ||
1596 | if (decoder->period_type != INTEL_PT_PERIOD_MTC) | ||
1597 | break; | ||
1598 | /* | ||
1599 | * Ensure that there has been an instruction since the | ||
1600 | * last MTC. | ||
1601 | */ | ||
1602 | if (!decoder->mtc_insn) | ||
1603 | break; | ||
1604 | decoder->mtc_insn = false; | ||
1605 | /* Ensure that there is a timestamp */ | ||
1606 | if (!decoder->timestamp) | ||
1607 | break; | ||
1608 | decoder->state.type = INTEL_PT_INSTRUCTION; | ||
1609 | decoder->state.from_ip = decoder->ip; | ||
1610 | decoder->state.to_ip = 0; | ||
1611 | decoder->mtc_insn = false; | ||
1612 | return 0; | ||
1613 | |||
1614 | case INTEL_PT_TSC: | ||
1615 | intel_pt_calc_tsc_timestamp(decoder); | ||
1616 | break; | ||
1617 | |||
1618 | case INTEL_PT_TMA: | ||
1619 | intel_pt_calc_tma(decoder); | ||
1620 | break; | ||
1621 | |||
1622 | case INTEL_PT_CYC: | ||
1623 | intel_pt_calc_cyc_timestamp(decoder); | ||
1624 | break; | ||
1625 | |||
1626 | case INTEL_PT_CBR: | ||
1627 | intel_pt_calc_cbr(decoder); | ||
1628 | break; | ||
1629 | |||
1630 | case INTEL_PT_MODE_EXEC: | ||
1631 | decoder->exec_mode = decoder->packet.payload; | ||
1632 | break; | ||
1633 | |||
1634 | case INTEL_PT_MODE_TSX: | ||
1635 | /* MODE_TSX need not be followed by FUP */ | ||
1636 | if (!decoder->pge) { | ||
1637 | intel_pt_update_in_tx(decoder); | ||
1638 | break; | ||
1639 | } | ||
1640 | err = intel_pt_mode_tsx(decoder, &no_tip); | ||
1641 | if (err) | ||
1642 | return err; | ||
1643 | goto next; | ||
1644 | |||
1645 | case INTEL_PT_BAD: /* Does not happen */ | ||
1646 | return intel_pt_bug(decoder); | ||
1647 | |||
1648 | case INTEL_PT_PSBEND: | ||
1649 | case INTEL_PT_VMCS: | ||
1650 | case INTEL_PT_MNT: | ||
1651 | case INTEL_PT_PAD: | ||
1652 | break; | ||
1653 | |||
1654 | default: | ||
1655 | return intel_pt_bug(decoder); | ||
1656 | } | ||
1657 | } | ||
1658 | } | ||
1659 | |||
1660 | /* Walk PSB+ packets to get in sync. */ | ||
1661 | static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) | ||
1662 | { | ||
1663 | int err; | ||
1664 | |||
1665 | while (1) { | ||
1666 | err = intel_pt_get_next_packet(decoder); | ||
1667 | if (err) | ||
1668 | return err; | ||
1669 | |||
1670 | switch (decoder->packet.type) { | ||
1671 | case INTEL_PT_TIP_PGD: | ||
1672 | decoder->continuous_period = false; | ||
1673 | case INTEL_PT_TIP_PGE: | ||
1674 | case INTEL_PT_TIP: | ||
1675 | intel_pt_log("ERROR: Unexpected packet\n"); | ||
1676 | return -ENOENT; | ||
1677 | |||
1678 | case INTEL_PT_FUP: | ||
1679 | decoder->pge = true; | ||
1680 | if (decoder->last_ip || decoder->packet.count == 6 || | ||
1681 | decoder->packet.count == 0) { | ||
1682 | uint64_t current_ip = decoder->ip; | ||
1683 | |||
1684 | intel_pt_set_ip(decoder); | ||
1685 | if (current_ip) | ||
1686 | intel_pt_log_to("Setting IP", | ||
1687 | decoder->ip); | ||
1688 | } | ||
1689 | break; | ||
1690 | |||
1691 | case INTEL_PT_MTC: | ||
1692 | intel_pt_calc_mtc_timestamp(decoder); | ||
1693 | break; | ||
1694 | |||
1695 | case INTEL_PT_TSC: | ||
1696 | intel_pt_calc_tsc_timestamp(decoder); | ||
1697 | break; | ||
1698 | |||
1699 | case INTEL_PT_TMA: | ||
1700 | intel_pt_calc_tma(decoder); | ||
1701 | break; | ||
1702 | |||
1703 | case INTEL_PT_CYC: | ||
1704 | intel_pt_calc_cyc_timestamp(decoder); | ||
1705 | break; | ||
1706 | |||
1707 | case INTEL_PT_CBR: | ||
1708 | intel_pt_calc_cbr(decoder); | ||
1709 | break; | ||
1710 | |||
1711 | case INTEL_PT_PIP: | ||
1712 | decoder->cr3 = decoder->packet.payload & (BIT63 - 1); | ||
1713 | break; | ||
1714 | |||
1715 | case INTEL_PT_MODE_EXEC: | ||
1716 | decoder->exec_mode = decoder->packet.payload; | ||
1717 | break; | ||
1718 | |||
1719 | case INTEL_PT_MODE_TSX: | ||
1720 | intel_pt_update_in_tx(decoder); | ||
1721 | break; | ||
1722 | |||
1723 | case INTEL_PT_TRACESTOP: | ||
1724 | decoder->pge = false; | ||
1725 | decoder->continuous_period = false; | ||
1726 | intel_pt_clear_tx_flags(decoder); | ||
1727 | case INTEL_PT_TNT: | ||
1728 | decoder->have_tma = false; | ||
1729 | intel_pt_log("ERROR: Unexpected packet\n"); | ||
1730 | if (decoder->ip) | ||
1731 | decoder->pkt_state = INTEL_PT_STATE_ERR4; | ||
1732 | else | ||
1733 | decoder->pkt_state = INTEL_PT_STATE_ERR3; | ||
1734 | return -ENOENT; | ||
1735 | |||
1736 | case INTEL_PT_BAD: /* Does not happen */ | ||
1737 | return intel_pt_bug(decoder); | ||
1738 | |||
1739 | case INTEL_PT_OVF: | ||
1740 | return intel_pt_overflow(decoder); | ||
1741 | |||
1742 | case INTEL_PT_PSBEND: | ||
1743 | return 0; | ||
1744 | |||
1745 | case INTEL_PT_PSB: | ||
1746 | case INTEL_PT_VMCS: | ||
1747 | case INTEL_PT_MNT: | ||
1748 | case INTEL_PT_PAD: | ||
1749 | default: | ||
1750 | break; | ||
1751 | } | ||
1752 | } | ||
1753 | } | ||
1754 | |||
1755 | static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) | ||
1756 | { | ||
1757 | int err; | ||
1758 | |||
1759 | while (1) { | ||
1760 | err = intel_pt_get_next_packet(decoder); | ||
1761 | if (err) | ||
1762 | return err; | ||
1763 | |||
1764 | switch (decoder->packet.type) { | ||
1765 | case INTEL_PT_TIP_PGD: | ||
1766 | decoder->continuous_period = false; | ||
1767 | case INTEL_PT_TIP_PGE: | ||
1768 | case INTEL_PT_TIP: | ||
1769 | decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; | ||
1770 | if (decoder->last_ip || decoder->packet.count == 6 || | ||
1771 | decoder->packet.count == 0) | ||
1772 | intel_pt_set_ip(decoder); | ||
1773 | if (decoder->ip) | ||
1774 | return 0; | ||
1775 | break; | ||
1776 | |||
1777 | case INTEL_PT_FUP: | ||
1778 | if (decoder->overflow) { | ||
1779 | if (decoder->last_ip || | ||
1780 | decoder->packet.count == 6 || | ||
1781 | decoder->packet.count == 0) | ||
1782 | intel_pt_set_ip(decoder); | ||
1783 | if (decoder->ip) | ||
1784 | return 0; | ||
1785 | } | ||
1786 | if (decoder->packet.count) | ||
1787 | intel_pt_set_last_ip(decoder); | ||
1788 | break; | ||
1789 | |||
1790 | case INTEL_PT_MTC: | ||
1791 | intel_pt_calc_mtc_timestamp(decoder); | ||
1792 | break; | ||
1793 | |||
1794 | case INTEL_PT_TSC: | ||
1795 | intel_pt_calc_tsc_timestamp(decoder); | ||
1796 | break; | ||
1797 | |||
1798 | case INTEL_PT_TMA: | ||
1799 | intel_pt_calc_tma(decoder); | ||
1800 | break; | ||
1801 | |||
1802 | case INTEL_PT_CYC: | ||
1803 | intel_pt_calc_cyc_timestamp(decoder); | ||
1804 | break; | ||
1805 | |||
1806 | case INTEL_PT_CBR: | ||
1807 | intel_pt_calc_cbr(decoder); | ||
1808 | break; | ||
1809 | |||
1810 | case INTEL_PT_PIP: | ||
1811 | decoder->cr3 = decoder->packet.payload & (BIT63 - 1); | ||
1812 | break; | ||
1813 | |||
1814 | case INTEL_PT_MODE_EXEC: | ||
1815 | decoder->exec_mode = decoder->packet.payload; | ||
1816 | break; | ||
1817 | |||
1818 | case INTEL_PT_MODE_TSX: | ||
1819 | intel_pt_update_in_tx(decoder); | ||
1820 | break; | ||
1821 | |||
1822 | case INTEL_PT_OVF: | ||
1823 | return intel_pt_overflow(decoder); | ||
1824 | |||
1825 | case INTEL_PT_BAD: /* Does not happen */ | ||
1826 | return intel_pt_bug(decoder); | ||
1827 | |||
1828 | case INTEL_PT_TRACESTOP: | ||
1829 | decoder->pge = false; | ||
1830 | decoder->continuous_period = false; | ||
1831 | intel_pt_clear_tx_flags(decoder); | ||
1832 | decoder->have_tma = false; | ||
1833 | break; | ||
1834 | |||
1835 | case INTEL_PT_PSB: | ||
1836 | err = intel_pt_walk_psb(decoder); | ||
1837 | if (err) | ||
1838 | return err; | ||
1839 | if (decoder->ip) { | ||
1840 | /* Do not have a sample */ | ||
1841 | decoder->state.type = 0; | ||
1842 | return 0; | ||
1843 | } | ||
1844 | break; | ||
1845 | |||
1846 | case INTEL_PT_TNT: | ||
1847 | case INTEL_PT_PSBEND: | ||
1848 | case INTEL_PT_VMCS: | ||
1849 | case INTEL_PT_MNT: | ||
1850 | case INTEL_PT_PAD: | ||
1851 | default: | ||
1852 | break; | ||
1853 | } | ||
1854 | } | ||
1855 | } | ||
1856 | |||
1857 | static int intel_pt_sync_ip(struct intel_pt_decoder *decoder) | ||
1858 | { | ||
1859 | int err; | ||
1860 | |||
1861 | intel_pt_log("Scanning for full IP\n"); | ||
1862 | err = intel_pt_walk_to_ip(decoder); | ||
1863 | if (err) | ||
1864 | return err; | ||
1865 | |||
1866 | decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; | ||
1867 | decoder->overflow = false; | ||
1868 | |||
1869 | decoder->state.from_ip = 0; | ||
1870 | decoder->state.to_ip = decoder->ip; | ||
1871 | intel_pt_log_to("Setting IP", decoder->ip); | ||
1872 | |||
1873 | return 0; | ||
1874 | } | ||
1875 | |||
1876 | static int intel_pt_part_psb(struct intel_pt_decoder *decoder) | ||
1877 | { | ||
1878 | const unsigned char *end = decoder->buf + decoder->len; | ||
1879 | size_t i; | ||
1880 | |||
1881 | for (i = INTEL_PT_PSB_LEN - 1; i; i--) { | ||
1882 | if (i > decoder->len) | ||
1883 | continue; | ||
1884 | if (!memcmp(end - i, INTEL_PT_PSB_STR, i)) | ||
1885 | return i; | ||
1886 | } | ||
1887 | return 0; | ||
1888 | } | ||
1889 | |||
1890 | static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb) | ||
1891 | { | ||
1892 | size_t rest_psb = INTEL_PT_PSB_LEN - part_psb; | ||
1893 | const char *psb = INTEL_PT_PSB_STR; | ||
1894 | |||
1895 | if (rest_psb > decoder->len || | ||
1896 | memcmp(decoder->buf, psb + part_psb, rest_psb)) | ||
1897 | return 0; | ||
1898 | |||
1899 | return rest_psb; | ||
1900 | } | ||
1901 | |||
1902 | static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder, | ||
1903 | int part_psb) | ||
1904 | { | ||
1905 | int rest_psb, ret; | ||
1906 | |||
1907 | decoder->pos += decoder->len; | ||
1908 | decoder->len = 0; | ||
1909 | |||
1910 | ret = intel_pt_get_next_data(decoder); | ||
1911 | if (ret) | ||
1912 | return ret; | ||
1913 | |||
1914 | rest_psb = intel_pt_rest_psb(decoder, part_psb); | ||
1915 | if (!rest_psb) | ||
1916 | return 0; | ||
1917 | |||
1918 | decoder->pos -= part_psb; | ||
1919 | decoder->next_buf = decoder->buf + rest_psb; | ||
1920 | decoder->next_len = decoder->len - rest_psb; | ||
1921 | memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); | ||
1922 | decoder->buf = decoder->temp_buf; | ||
1923 | decoder->len = INTEL_PT_PSB_LEN; | ||
1924 | |||
1925 | return 0; | ||
1926 | } | ||
1927 | |||
1928 | static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder) | ||
1929 | { | ||
1930 | unsigned char *next; | ||
1931 | int ret; | ||
1932 | |||
1933 | intel_pt_log("Scanning for PSB\n"); | ||
1934 | while (1) { | ||
1935 | if (!decoder->len) { | ||
1936 | ret = intel_pt_get_next_data(decoder); | ||
1937 | if (ret) | ||
1938 | return ret; | ||
1939 | } | ||
1940 | |||
1941 | next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR, | ||
1942 | INTEL_PT_PSB_LEN); | ||
1943 | if (!next) { | ||
1944 | int part_psb; | ||
1945 | |||
1946 | part_psb = intel_pt_part_psb(decoder); | ||
1947 | if (part_psb) { | ||
1948 | ret = intel_pt_get_split_psb(decoder, part_psb); | ||
1949 | if (ret) | ||
1950 | return ret; | ||
1951 | } else { | ||
1952 | decoder->pos += decoder->len; | ||
1953 | decoder->len = 0; | ||
1954 | } | ||
1955 | continue; | ||
1956 | } | ||
1957 | |||
1958 | decoder->pkt_step = next - decoder->buf; | ||
1959 | return intel_pt_get_next_packet(decoder); | ||
1960 | } | ||
1961 | } | ||
1962 | |||
1963 | static int intel_pt_sync(struct intel_pt_decoder *decoder) | ||
1964 | { | ||
1965 | int err; | ||
1966 | |||
1967 | decoder->pge = false; | ||
1968 | decoder->continuous_period = false; | ||
1969 | decoder->last_ip = 0; | ||
1970 | decoder->ip = 0; | ||
1971 | intel_pt_clear_stack(&decoder->stack); | ||
1972 | |||
1973 | err = intel_pt_scan_for_psb(decoder); | ||
1974 | if (err) | ||
1975 | return err; | ||
1976 | |||
1977 | decoder->pkt_state = INTEL_PT_STATE_NO_IP; | ||
1978 | |||
1979 | err = intel_pt_walk_psb(decoder); | ||
1980 | if (err) | ||
1981 | return err; | ||
1982 | |||
1983 | if (decoder->ip) { | ||
1984 | decoder->state.type = 0; /* Do not have a sample */ | ||
1985 | decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; | ||
1986 | } else { | ||
1987 | return intel_pt_sync_ip(decoder); | ||
1988 | } | ||
1989 | |||
1990 | return 0; | ||
1991 | } | ||
1992 | |||
1993 | static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) | ||
1994 | { | ||
1995 | uint64_t est = decoder->timestamp_insn_cnt << 1; | ||
1996 | |||
1997 | if (!decoder->cbr || !decoder->max_non_turbo_ratio) | ||
1998 | goto out; | ||
1999 | |||
2000 | est *= decoder->max_non_turbo_ratio; | ||
2001 | est /= decoder->cbr; | ||
2002 | out: | ||
2003 | return decoder->timestamp + est; | ||
2004 | } | ||
2005 | |||
2006 | const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) | ||
2007 | { | ||
2008 | int err; | ||
2009 | |||
2010 | do { | ||
2011 | decoder->state.type = INTEL_PT_BRANCH; | ||
2012 | decoder->state.flags = 0; | ||
2013 | |||
2014 | switch (decoder->pkt_state) { | ||
2015 | case INTEL_PT_STATE_NO_PSB: | ||
2016 | err = intel_pt_sync(decoder); | ||
2017 | break; | ||
2018 | case INTEL_PT_STATE_NO_IP: | ||
2019 | decoder->last_ip = 0; | ||
2020 | /* Fall through */ | ||
2021 | case INTEL_PT_STATE_ERR_RESYNC: | ||
2022 | err = intel_pt_sync_ip(decoder); | ||
2023 | break; | ||
2024 | case INTEL_PT_STATE_IN_SYNC: | ||
2025 | err = intel_pt_walk_trace(decoder); | ||
2026 | break; | ||
2027 | case INTEL_PT_STATE_TNT: | ||
2028 | err = intel_pt_walk_tnt(decoder); | ||
2029 | if (err == -EAGAIN) | ||
2030 | err = intel_pt_walk_trace(decoder); | ||
2031 | break; | ||
2032 | case INTEL_PT_STATE_TIP: | ||
2033 | case INTEL_PT_STATE_TIP_PGD: | ||
2034 | err = intel_pt_walk_tip(decoder); | ||
2035 | break; | ||
2036 | case INTEL_PT_STATE_FUP: | ||
2037 | decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; | ||
2038 | err = intel_pt_walk_fup(decoder); | ||
2039 | if (err == -EAGAIN) | ||
2040 | err = intel_pt_walk_fup_tip(decoder); | ||
2041 | else if (!err) | ||
2042 | decoder->pkt_state = INTEL_PT_STATE_FUP; | ||
2043 | break; | ||
2044 | case INTEL_PT_STATE_FUP_NO_TIP: | ||
2045 | decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; | ||
2046 | err = intel_pt_walk_fup(decoder); | ||
2047 | if (err == -EAGAIN) | ||
2048 | err = intel_pt_walk_trace(decoder); | ||
2049 | break; | ||
2050 | default: | ||
2051 | err = intel_pt_bug(decoder); | ||
2052 | break; | ||
2053 | } | ||
2054 | } while (err == -ENOLINK); | ||
2055 | |||
2056 | decoder->state.err = err ? intel_pt_ext_err(err) : 0; | ||
2057 | decoder->state.timestamp = decoder->timestamp; | ||
2058 | decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); | ||
2059 | decoder->state.cr3 = decoder->cr3; | ||
2060 | decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; | ||
2061 | |||
2062 | if (err) | ||
2063 | decoder->state.from_ip = decoder->ip; | ||
2064 | |||
2065 | return &decoder->state; | ||
2066 | } | ||
2067 | |||
2068 | static bool intel_pt_at_psb(unsigned char *buf, size_t len) | ||
2069 | { | ||
2070 | if (len < INTEL_PT_PSB_LEN) | ||
2071 | return false; | ||
2072 | return memmem(buf, INTEL_PT_PSB_LEN, INTEL_PT_PSB_STR, | ||
2073 | INTEL_PT_PSB_LEN); | ||
2074 | } | ||
2075 | |||
2076 | /** | ||
2077 | * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet. | ||
2078 | * @buf: pointer to buffer pointer | ||
2079 | * @len: size of buffer | ||
2080 | * | ||
2081 | * Updates the buffer pointer to point to the start of the next PSB packet if | ||
2082 | * there is one, otherwise the buffer pointer is unchanged. If @buf is updated, | ||
2083 | * @len is adjusted accordingly. | ||
2084 | * | ||
2085 | * Return: %true if a PSB packet is found, %false otherwise. | ||
2086 | */ | ||
2087 | static bool intel_pt_next_psb(unsigned char **buf, size_t *len) | ||
2088 | { | ||
2089 | unsigned char *next; | ||
2090 | |||
2091 | next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); | ||
2092 | if (next) { | ||
2093 | *len -= next - *buf; | ||
2094 | *buf = next; | ||
2095 | return true; | ||
2096 | } | ||
2097 | return false; | ||
2098 | } | ||
2099 | |||
2100 | /** | ||
2101 | * intel_pt_step_psb - move buffer pointer to the start of the following PSB | ||
2102 | * packet. | ||
2103 | * @buf: pointer to buffer pointer | ||
2104 | * @len: size of buffer | ||
2105 | * | ||
2106 | * Updates the buffer pointer to point to the start of the following PSB packet | ||
2107 | * (skipping the PSB at @buf itself) if there is one, otherwise the buffer | ||
2108 | * pointer is unchanged. If @buf is updated, @len is adjusted accordingly. | ||
2109 | * | ||
2110 | * Return: %true if a PSB packet is found, %false otherwise. | ||
2111 | */ | ||
2112 | static bool intel_pt_step_psb(unsigned char **buf, size_t *len) | ||
2113 | { | ||
2114 | unsigned char *next; | ||
2115 | |||
2116 | if (!*len) | ||
2117 | return false; | ||
2118 | |||
2119 | next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN); | ||
2120 | if (next) { | ||
2121 | *len -= next - *buf; | ||
2122 | *buf = next; | ||
2123 | return true; | ||
2124 | } | ||
2125 | return false; | ||
2126 | } | ||
2127 | |||
2128 | /** | ||
2129 | * intel_pt_last_psb - find the last PSB packet in a buffer. | ||
2130 | * @buf: buffer | ||
2131 | * @len: size of buffer | ||
2132 | * | ||
2133 | * This function finds the last PSB in a buffer. | ||
2134 | * | ||
2135 | * Return: A pointer to the last PSB in @buf if found, %NULL otherwise. | ||
2136 | */ | ||
2137 | static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len) | ||
2138 | { | ||
2139 | const char *n = INTEL_PT_PSB_STR; | ||
2140 | unsigned char *p; | ||
2141 | size_t k; | ||
2142 | |||
2143 | if (len < INTEL_PT_PSB_LEN) | ||
2144 | return NULL; | ||
2145 | |||
2146 | k = len - INTEL_PT_PSB_LEN + 1; | ||
2147 | while (1) { | ||
2148 | p = memrchr(buf, n[0], k); | ||
2149 | if (!p) | ||
2150 | return NULL; | ||
2151 | if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1)) | ||
2152 | return p; | ||
2153 | k = p - buf; | ||
2154 | if (!k) | ||
2155 | return NULL; | ||
2156 | } | ||
2157 | } | ||
2158 | |||
2159 | /** | ||
2160 | * intel_pt_next_tsc - find and return next TSC. | ||
2161 | * @buf: buffer | ||
2162 | * @len: size of buffer | ||
2163 | * @tsc: TSC value returned | ||
2164 | * | ||
2165 | * Find a TSC packet in @buf and return the TSC value. This function assumes | ||
2166 | * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a | ||
2167 | * PSBEND packet is found. | ||
2168 | * | ||
2169 | * Return: %true if TSC is found, false otherwise. | ||
2170 | */ | ||
2171 | static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc) | ||
2172 | { | ||
2173 | struct intel_pt_pkt packet; | ||
2174 | int ret; | ||
2175 | |||
2176 | while (len) { | ||
2177 | ret = intel_pt_get_packet(buf, len, &packet); | ||
2178 | if (ret <= 0) | ||
2179 | return false; | ||
2180 | if (packet.type == INTEL_PT_TSC) { | ||
2181 | *tsc = packet.payload; | ||
2182 | return true; | ||
2183 | } | ||
2184 | if (packet.type == INTEL_PT_PSBEND) | ||
2185 | return false; | ||
2186 | buf += ret; | ||
2187 | len -= ret; | ||
2188 | } | ||
2189 | return false; | ||
2190 | } | ||
2191 | |||
2192 | /** | ||
2193 | * intel_pt_tsc_cmp - compare 7-byte TSCs. | ||
2194 | * @tsc1: first TSC to compare | ||
2195 | * @tsc2: second TSC to compare | ||
2196 | * | ||
2197 | * This function compares 7-byte TSC values allowing for the possibility that | ||
2198 | * TSC wrapped around. Generally it is not possible to know if TSC has wrapped | ||
2199 | * around so for that purpose this function assumes the absolute difference is | ||
2200 | * less than half the maximum difference. | ||
2201 | * | ||
2202 | * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is | ||
2203 | * after @tsc2. | ||
2204 | */ | ||
2205 | static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2) | ||
2206 | { | ||
2207 | const uint64_t halfway = (1ULL << 55); | ||
2208 | |||
2209 | if (tsc1 == tsc2) | ||
2210 | return 0; | ||
2211 | |||
2212 | if (tsc1 < tsc2) { | ||
2213 | if (tsc2 - tsc1 < halfway) | ||
2214 | return -1; | ||
2215 | else | ||
2216 | return 1; | ||
2217 | } else { | ||
2218 | if (tsc1 - tsc2 < halfway) | ||
2219 | return 1; | ||
2220 | else | ||
2221 | return -1; | ||
2222 | } | ||
2223 | } | ||
2224 | |||
2225 | /** | ||
2226 | * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data | ||
2227 | * using TSC. | ||
2228 | * @buf_a: first buffer | ||
2229 | * @len_a: size of first buffer | ||
2230 | * @buf_b: second buffer | ||
2231 | * @len_b: size of second buffer | ||
2232 | * | ||
2233 | * If the trace contains TSC we can look at the last TSC of @buf_a and the | ||
2234 | * first TSC of @buf_b in order to determine if the buffers overlap, and then | ||
2235 | * walk forward in @buf_b until a later TSC is found. A precondition is that | ||
2236 | * @buf_a and @buf_b are positioned at a PSB. | ||
2237 | * | ||
2238 | * Return: A pointer into @buf_b from where non-overlapped data starts, or | ||
2239 | * @buf_b + @len_b if there is no non-overlapped data. | ||
2240 | */ | ||
2241 | static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a, | ||
2242 | size_t len_a, | ||
2243 | unsigned char *buf_b, | ||
2244 | size_t len_b) | ||
2245 | { | ||
2246 | uint64_t tsc_a, tsc_b; | ||
2247 | unsigned char *p; | ||
2248 | size_t len; | ||
2249 | |||
2250 | p = intel_pt_last_psb(buf_a, len_a); | ||
2251 | if (!p) | ||
2252 | return buf_b; /* No PSB in buf_a => no overlap */ | ||
2253 | |||
2254 | len = len_a - (p - buf_a); | ||
2255 | if (!intel_pt_next_tsc(p, len, &tsc_a)) { | ||
2256 | /* The last PSB+ in buf_a is incomplete, so go back one more */ | ||
2257 | len_a -= len; | ||
2258 | p = intel_pt_last_psb(buf_a, len_a); | ||
2259 | if (!p) | ||
2260 | return buf_b; /* No full PSB+ => assume no overlap */ | ||
2261 | len = len_a - (p - buf_a); | ||
2262 | if (!intel_pt_next_tsc(p, len, &tsc_a)) | ||
2263 | return buf_b; /* No TSC in buf_a => assume no overlap */ | ||
2264 | } | ||
2265 | |||
2266 | while (1) { | ||
2267 | /* Ignore PSB+ with no TSC */ | ||
2268 | if (intel_pt_next_tsc(buf_b, len_b, &tsc_b) && | ||
2269 | intel_pt_tsc_cmp(tsc_a, tsc_b) < 0) | ||
2270 | return buf_b; /* tsc_a < tsc_b => no overlap */ | ||
2271 | |||
2272 | if (!intel_pt_step_psb(&buf_b, &len_b)) | ||
2273 | return buf_b + len_b; /* No PSB in buf_b => no data */ | ||
2274 | } | ||
2275 | } | ||
2276 | |||
2277 | /** | ||
2278 | * intel_pt_find_overlap - determine start of non-overlapped trace data. | ||
2279 | * @buf_a: first buffer | ||
2280 | * @len_a: size of first buffer | ||
2281 | * @buf_b: second buffer | ||
2282 | * @len_b: size of second buffer | ||
2283 | * @have_tsc: can use TSC packets to detect overlap | ||
2284 | * | ||
2285 | * When trace samples or snapshots are recorded there is the possibility that | ||
2286 | * the data overlaps. Note that, for the purposes of decoding, data is only | ||
2287 | * useful if it begins with a PSB packet. | ||
2288 | * | ||
2289 | * Return: A pointer into @buf_b from where non-overlapped data starts, or | ||
2290 | * @buf_b + @len_b if there is no non-overlapped data. | ||
2291 | */ | ||
2292 | unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, | ||
2293 | unsigned char *buf_b, size_t len_b, | ||
2294 | bool have_tsc) | ||
2295 | { | ||
2296 | unsigned char *found; | ||
2297 | |||
2298 | /* Buffer 'b' must start at PSB so throw away everything before that */ | ||
2299 | if (!intel_pt_next_psb(&buf_b, &len_b)) | ||
2300 | return buf_b + len_b; /* No PSB */ | ||
2301 | |||
2302 | if (!intel_pt_next_psb(&buf_a, &len_a)) | ||
2303 | return buf_b; /* No overlap */ | ||
2304 | |||
2305 | if (have_tsc) { | ||
2306 | found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b); | ||
2307 | if (found) | ||
2308 | return found; | ||
2309 | } | ||
2310 | |||
2311 | /* | ||
2312 | * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes, | ||
2313 | * we can ignore the first part of buffer 'a'. | ||
2314 | */ | ||
2315 | while (len_b < len_a) { | ||
2316 | if (!intel_pt_step_psb(&buf_a, &len_a)) | ||
2317 | return buf_b; /* No overlap */ | ||
2318 | } | ||
2319 | |||
2320 | /* Now len_b >= len_a */ | ||
2321 | if (len_b > len_a) { | ||
2322 | /* The leftover buffer 'b' must start at a PSB */ | ||
2323 | while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { | ||
2324 | if (!intel_pt_step_psb(&buf_a, &len_a)) | ||
2325 | return buf_b; /* No overlap */ | ||
2326 | } | ||
2327 | } | ||
2328 | |||
2329 | while (1) { | ||
2330 | /* Potential overlap so check the bytes */ | ||
2331 | found = memmem(buf_a, len_a, buf_b, len_a); | ||
2332 | if (found) | ||
2333 | return buf_b + len_a; | ||
2334 | |||
2335 | /* Try again at next PSB in buffer 'a' */ | ||
2336 | if (!intel_pt_step_psb(&buf_a, &len_a)) | ||
2337 | return buf_b; /* No overlap */ | ||
2338 | |||
2339 | /* The leftover buffer 'b' must start at a PSB */ | ||
2340 | while (!intel_pt_at_psb(buf_b + len_a, len_b - len_a)) { | ||
2341 | if (!intel_pt_step_psb(&buf_a, &len_a)) | ||
2342 | return buf_b; /* No overlap */ | ||
2343 | } | ||
2344 | } | ||
2345 | } | ||
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h new file mode 100644 index 000000000000..02c38fec1c37 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | |||
@@ -0,0 +1,109 @@ | |||
1 | /* | ||
2 | * intel_pt_decoder.h: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #ifndef INCLUDE__INTEL_PT_DECODER_H__ | ||
17 | #define INCLUDE__INTEL_PT_DECODER_H__ | ||
18 | |||
19 | #include <stdint.h> | ||
20 | #include <stddef.h> | ||
21 | #include <stdbool.h> | ||
22 | |||
23 | #include "intel-pt-insn-decoder.h" | ||
24 | |||
25 | #define INTEL_PT_IN_TX (1 << 0) | ||
26 | #define INTEL_PT_ABORT_TX (1 << 1) | ||
27 | #define INTEL_PT_ASYNC (1 << 2) | ||
28 | |||
29 | enum intel_pt_sample_type { | ||
30 | INTEL_PT_BRANCH = 1 << 0, | ||
31 | INTEL_PT_INSTRUCTION = 1 << 1, | ||
32 | INTEL_PT_TRANSACTION = 1 << 2, | ||
33 | }; | ||
34 | |||
35 | enum intel_pt_period_type { | ||
36 | INTEL_PT_PERIOD_NONE, | ||
37 | INTEL_PT_PERIOD_INSTRUCTIONS, | ||
38 | INTEL_PT_PERIOD_TICKS, | ||
39 | INTEL_PT_PERIOD_MTC, | ||
40 | }; | ||
41 | |||
42 | enum { | ||
43 | INTEL_PT_ERR_NOMEM = 1, | ||
44 | INTEL_PT_ERR_INTERN, | ||
45 | INTEL_PT_ERR_BADPKT, | ||
46 | INTEL_PT_ERR_NODATA, | ||
47 | INTEL_PT_ERR_NOINSN, | ||
48 | INTEL_PT_ERR_MISMAT, | ||
49 | INTEL_PT_ERR_OVR, | ||
50 | INTEL_PT_ERR_LOST, | ||
51 | INTEL_PT_ERR_UNK, | ||
52 | INTEL_PT_ERR_NELOOP, | ||
53 | INTEL_PT_ERR_MAX, | ||
54 | }; | ||
55 | |||
56 | struct intel_pt_state { | ||
57 | enum intel_pt_sample_type type; | ||
58 | int err; | ||
59 | uint64_t from_ip; | ||
60 | uint64_t to_ip; | ||
61 | uint64_t cr3; | ||
62 | uint64_t tot_insn_cnt; | ||
63 | uint64_t timestamp; | ||
64 | uint64_t est_timestamp; | ||
65 | uint64_t trace_nr; | ||
66 | uint32_t flags; | ||
67 | enum intel_pt_insn_op insn_op; | ||
68 | int insn_len; | ||
69 | }; | ||
70 | |||
71 | struct intel_pt_insn; | ||
72 | |||
73 | struct intel_pt_buffer { | ||
74 | const unsigned char *buf; | ||
75 | size_t len; | ||
76 | bool consecutive; | ||
77 | uint64_t ref_timestamp; | ||
78 | uint64_t trace_nr; | ||
79 | }; | ||
80 | |||
81 | struct intel_pt_params { | ||
82 | int (*get_trace)(struct intel_pt_buffer *buffer, void *data); | ||
83 | int (*walk_insn)(struct intel_pt_insn *intel_pt_insn, | ||
84 | uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip, | ||
85 | uint64_t max_insn_cnt, void *data); | ||
86 | void *data; | ||
87 | bool return_compression; | ||
88 | uint64_t period; | ||
89 | enum intel_pt_period_type period_type; | ||
90 | unsigned max_non_turbo_ratio; | ||
91 | unsigned int mtc_period; | ||
92 | uint32_t tsc_ctc_ratio_n; | ||
93 | uint32_t tsc_ctc_ratio_d; | ||
94 | }; | ||
95 | |||
96 | struct intel_pt_decoder; | ||
97 | |||
98 | struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params); | ||
99 | void intel_pt_decoder_free(struct intel_pt_decoder *decoder); | ||
100 | |||
101 | const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder); | ||
102 | |||
103 | unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a, | ||
104 | unsigned char *buf_b, size_t len_b, | ||
105 | bool have_tsc); | ||
106 | |||
107 | int intel_pt__strerror(int code, char *buf, size_t buflen); | ||
108 | |||
109 | #endif | ||
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c new file mode 100644 index 000000000000..d23138c06665 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | |||
@@ -0,0 +1,249 @@ | |||
1 | /* | ||
2 | * intel_pt_insn_decoder.c: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <stdio.h> | ||
17 | #include <string.h> | ||
18 | #include <endian.h> | ||
19 | #include <byteswap.h> | ||
20 | |||
21 | #include "event.h" | ||
22 | |||
23 | #include "insn.h" | ||
24 | |||
25 | #include "inat.c" | ||
26 | #include "insn.c" | ||
27 | |||
28 | #include "intel-pt-insn-decoder.h" | ||
29 | |||
30 | /* Based on branch_type() from perf_event_intel_lbr.c */ | ||
31 | static void intel_pt_insn_decoder(struct insn *insn, | ||
32 | struct intel_pt_insn *intel_pt_insn) | ||
33 | { | ||
34 | enum intel_pt_insn_op op = INTEL_PT_OP_OTHER; | ||
35 | enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH; | ||
36 | int ext; | ||
37 | |||
38 | if (insn_is_avx(insn)) { | ||
39 | intel_pt_insn->op = INTEL_PT_OP_OTHER; | ||
40 | intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH; | ||
41 | intel_pt_insn->length = insn->length; | ||
42 | return; | ||
43 | } | ||
44 | |||
45 | switch (insn->opcode.bytes[0]) { | ||
46 | case 0xf: | ||
47 | switch (insn->opcode.bytes[1]) { | ||
48 | case 0x05: /* syscall */ | ||
49 | case 0x34: /* sysenter */ | ||
50 | op = INTEL_PT_OP_SYSCALL; | ||
51 | branch = INTEL_PT_BR_INDIRECT; | ||
52 | break; | ||
53 | case 0x07: /* sysret */ | ||
54 | case 0x35: /* sysexit */ | ||
55 | op = INTEL_PT_OP_SYSRET; | ||
56 | branch = INTEL_PT_BR_INDIRECT; | ||
57 | break; | ||
58 | case 0x80 ... 0x8f: /* jcc */ | ||
59 | op = INTEL_PT_OP_JCC; | ||
60 | branch = INTEL_PT_BR_CONDITIONAL; | ||
61 | break; | ||
62 | default: | ||
63 | break; | ||
64 | } | ||
65 | break; | ||
66 | case 0x70 ... 0x7f: /* jcc */ | ||
67 | op = INTEL_PT_OP_JCC; | ||
68 | branch = INTEL_PT_BR_CONDITIONAL; | ||
69 | break; | ||
70 | case 0xc2: /* near ret */ | ||
71 | case 0xc3: /* near ret */ | ||
72 | case 0xca: /* far ret */ | ||
73 | case 0xcb: /* far ret */ | ||
74 | op = INTEL_PT_OP_RET; | ||
75 | branch = INTEL_PT_BR_INDIRECT; | ||
76 | break; | ||
77 | case 0xcf: /* iret */ | ||
78 | op = INTEL_PT_OP_IRET; | ||
79 | branch = INTEL_PT_BR_INDIRECT; | ||
80 | break; | ||
81 | case 0xcc ... 0xce: /* int */ | ||
82 | op = INTEL_PT_OP_INT; | ||
83 | branch = INTEL_PT_BR_INDIRECT; | ||
84 | break; | ||
85 | case 0xe8: /* call near rel */ | ||
86 | op = INTEL_PT_OP_CALL; | ||
87 | branch = INTEL_PT_BR_UNCONDITIONAL; | ||
88 | break; | ||
89 | case 0x9a: /* call far absolute */ | ||
90 | op = INTEL_PT_OP_CALL; | ||
91 | branch = INTEL_PT_BR_INDIRECT; | ||
92 | break; | ||
93 | case 0xe0 ... 0xe2: /* loop */ | ||
94 | op = INTEL_PT_OP_LOOP; | ||
95 | branch = INTEL_PT_BR_CONDITIONAL; | ||
96 | break; | ||
97 | case 0xe3: /* jcc */ | ||
98 | op = INTEL_PT_OP_JCC; | ||
99 | branch = INTEL_PT_BR_CONDITIONAL; | ||
100 | break; | ||
101 | case 0xe9: /* jmp */ | ||
102 | case 0xeb: /* jmp */ | ||
103 | op = INTEL_PT_OP_JMP; | ||
104 | branch = INTEL_PT_BR_UNCONDITIONAL; | ||
105 | break; | ||
106 | case 0xea: /* far jmp */ | ||
107 | op = INTEL_PT_OP_JMP; | ||
108 | branch = INTEL_PT_BR_INDIRECT; | ||
109 | break; | ||
110 | case 0xff: /* call near absolute, call far absolute ind */ | ||
111 | ext = (insn->modrm.bytes[0] >> 3) & 0x7; | ||
112 | switch (ext) { | ||
113 | case 2: /* near ind call */ | ||
114 | case 3: /* far ind call */ | ||
115 | op = INTEL_PT_OP_CALL; | ||
116 | branch = INTEL_PT_BR_INDIRECT; | ||
117 | break; | ||
118 | case 4: | ||
119 | case 5: | ||
120 | op = INTEL_PT_OP_JMP; | ||
121 | branch = INTEL_PT_BR_INDIRECT; | ||
122 | break; | ||
123 | default: | ||
124 | break; | ||
125 | } | ||
126 | break; | ||
127 | default: | ||
128 | break; | ||
129 | } | ||
130 | |||
131 | intel_pt_insn->op = op; | ||
132 | intel_pt_insn->branch = branch; | ||
133 | intel_pt_insn->length = insn->length; | ||
134 | |||
135 | if (branch == INTEL_PT_BR_CONDITIONAL || | ||
136 | branch == INTEL_PT_BR_UNCONDITIONAL) { | ||
137 | #if __BYTE_ORDER == __BIG_ENDIAN | ||
138 | switch (insn->immediate.nbytes) { | ||
139 | case 1: | ||
140 | intel_pt_insn->rel = insn->immediate.value; | ||
141 | break; | ||
142 | case 2: | ||
143 | intel_pt_insn->rel = | ||
144 | bswap_16((short)insn->immediate.value); | ||
145 | break; | ||
146 | case 4: | ||
147 | intel_pt_insn->rel = bswap_32(insn->immediate.value); | ||
148 | break; | ||
149 | default: | ||
150 | intel_pt_insn->rel = 0; | ||
151 | break; | ||
152 | } | ||
153 | #else | ||
154 | intel_pt_insn->rel = insn->immediate.value; | ||
155 | #endif | ||
156 | } | ||
157 | } | ||
158 | |||
159 | int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, | ||
160 | struct intel_pt_insn *intel_pt_insn) | ||
161 | { | ||
162 | struct insn insn; | ||
163 | |||
164 | insn_init(&insn, buf, len, x86_64); | ||
165 | insn_get_length(&insn); | ||
166 | if (!insn_complete(&insn) || insn.length > len) | ||
167 | return -1; | ||
168 | intel_pt_insn_decoder(&insn, intel_pt_insn); | ||
169 | if (insn.length < INTEL_PT_INSN_DBG_BUF_SZ) | ||
170 | memcpy(intel_pt_insn->buf, buf, insn.length); | ||
171 | else | ||
172 | memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_DBG_BUF_SZ); | ||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | const char *branch_name[] = { | ||
177 | [INTEL_PT_OP_OTHER] = "Other", | ||
178 | [INTEL_PT_OP_CALL] = "Call", | ||
179 | [INTEL_PT_OP_RET] = "Ret", | ||
180 | [INTEL_PT_OP_JCC] = "Jcc", | ||
181 | [INTEL_PT_OP_JMP] = "Jmp", | ||
182 | [INTEL_PT_OP_LOOP] = "Loop", | ||
183 | [INTEL_PT_OP_IRET] = "IRet", | ||
184 | [INTEL_PT_OP_INT] = "Int", | ||
185 | [INTEL_PT_OP_SYSCALL] = "Syscall", | ||
186 | [INTEL_PT_OP_SYSRET] = "Sysret", | ||
187 | }; | ||
188 | |||
189 | const char *intel_pt_insn_name(enum intel_pt_insn_op op) | ||
190 | { | ||
191 | return branch_name[op]; | ||
192 | } | ||
193 | |||
194 | int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, | ||
195 | size_t buf_len) | ||
196 | { | ||
197 | switch (intel_pt_insn->branch) { | ||
198 | case INTEL_PT_BR_CONDITIONAL: | ||
199 | case INTEL_PT_BR_UNCONDITIONAL: | ||
200 | return snprintf(buf, buf_len, "%s %s%d", | ||
201 | intel_pt_insn_name(intel_pt_insn->op), | ||
202 | intel_pt_insn->rel > 0 ? "+" : "", | ||
203 | intel_pt_insn->rel); | ||
204 | case INTEL_PT_BR_NO_BRANCH: | ||
205 | case INTEL_PT_BR_INDIRECT: | ||
206 | return snprintf(buf, buf_len, "%s", | ||
207 | intel_pt_insn_name(intel_pt_insn->op)); | ||
208 | default: | ||
209 | break; | ||
210 | } | ||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | size_t intel_pt_insn_max_size(void) | ||
215 | { | ||
216 | return MAX_INSN_SIZE; | ||
217 | } | ||
218 | |||
219 | int intel_pt_insn_type(enum intel_pt_insn_op op) | ||
220 | { | ||
221 | switch (op) { | ||
222 | case INTEL_PT_OP_OTHER: | ||
223 | return 0; | ||
224 | case INTEL_PT_OP_CALL: | ||
225 | return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL; | ||
226 | case INTEL_PT_OP_RET: | ||
227 | return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN; | ||
228 | case INTEL_PT_OP_JCC: | ||
229 | return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL; | ||
230 | case INTEL_PT_OP_JMP: | ||
231 | return PERF_IP_FLAG_BRANCH; | ||
232 | case INTEL_PT_OP_LOOP: | ||
233 | return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL; | ||
234 | case INTEL_PT_OP_IRET: | ||
235 | return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | | ||
236 | PERF_IP_FLAG_INTERRUPT; | ||
237 | case INTEL_PT_OP_INT: | ||
238 | return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | | ||
239 | PERF_IP_FLAG_INTERRUPT; | ||
240 | case INTEL_PT_OP_SYSCALL: | ||
241 | return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | | ||
242 | PERF_IP_FLAG_SYSCALLRET; | ||
243 | case INTEL_PT_OP_SYSRET: | ||
244 | return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | | ||
245 | PERF_IP_FLAG_SYSCALLRET; | ||
246 | default: | ||
247 | return 0; | ||
248 | } | ||
249 | } | ||
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h new file mode 100644 index 000000000000..b0adbf37323e --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h | |||
@@ -0,0 +1,65 @@ | |||
1 | /* | ||
2 | * intel_pt_insn_decoder.h: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__ | ||
17 | #define INCLUDE__INTEL_PT_INSN_DECODER_H__ | ||
18 | |||
19 | #include <stddef.h> | ||
20 | #include <stdint.h> | ||
21 | |||
22 | #define INTEL_PT_INSN_DESC_MAX 32 | ||
23 | #define INTEL_PT_INSN_DBG_BUF_SZ 16 | ||
24 | |||
25 | enum intel_pt_insn_op { | ||
26 | INTEL_PT_OP_OTHER, | ||
27 | INTEL_PT_OP_CALL, | ||
28 | INTEL_PT_OP_RET, | ||
29 | INTEL_PT_OP_JCC, | ||
30 | INTEL_PT_OP_JMP, | ||
31 | INTEL_PT_OP_LOOP, | ||
32 | INTEL_PT_OP_IRET, | ||
33 | INTEL_PT_OP_INT, | ||
34 | INTEL_PT_OP_SYSCALL, | ||
35 | INTEL_PT_OP_SYSRET, | ||
36 | }; | ||
37 | |||
38 | enum intel_pt_insn_branch { | ||
39 | INTEL_PT_BR_NO_BRANCH, | ||
40 | INTEL_PT_BR_INDIRECT, | ||
41 | INTEL_PT_BR_CONDITIONAL, | ||
42 | INTEL_PT_BR_UNCONDITIONAL, | ||
43 | }; | ||
44 | |||
45 | struct intel_pt_insn { | ||
46 | enum intel_pt_insn_op op; | ||
47 | enum intel_pt_insn_branch branch; | ||
48 | int length; | ||
49 | int32_t rel; | ||
50 | unsigned char buf[INTEL_PT_INSN_DBG_BUF_SZ]; | ||
51 | }; | ||
52 | |||
53 | int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64, | ||
54 | struct intel_pt_insn *intel_pt_insn); | ||
55 | |||
56 | const char *intel_pt_insn_name(enum intel_pt_insn_op op); | ||
57 | |||
58 | int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf, | ||
59 | size_t buf_len); | ||
60 | |||
61 | size_t intel_pt_insn_max_size(void); | ||
62 | |||
63 | int intel_pt_insn_type(enum intel_pt_insn_op op); | ||
64 | |||
65 | #endif | ||
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c new file mode 100644 index 000000000000..d09c7d9f9050 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c | |||
@@ -0,0 +1,155 @@ | |||
1 | /* | ||
2 | * intel_pt_log.c: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <stdio.h> | ||
17 | #include <stdint.h> | ||
18 | #include <inttypes.h> | ||
19 | #include <stdarg.h> | ||
20 | #include <stdbool.h> | ||
21 | #include <string.h> | ||
22 | |||
23 | #include "intel-pt-log.h" | ||
24 | #include "intel-pt-insn-decoder.h" | ||
25 | |||
26 | #include "intel-pt-pkt-decoder.h" | ||
27 | |||
28 | #define MAX_LOG_NAME 256 | ||
29 | |||
30 | static FILE *f; | ||
31 | static char log_name[MAX_LOG_NAME]; | ||
32 | static bool enable_logging; | ||
33 | |||
34 | void intel_pt_log_enable(void) | ||
35 | { | ||
36 | enable_logging = true; | ||
37 | } | ||
38 | |||
39 | void intel_pt_log_disable(void) | ||
40 | { | ||
41 | if (f) | ||
42 | fflush(f); | ||
43 | enable_logging = false; | ||
44 | } | ||
45 | |||
46 | void intel_pt_log_set_name(const char *name) | ||
47 | { | ||
48 | strncpy(log_name, name, MAX_LOG_NAME - 5); | ||
49 | strcat(log_name, ".log"); | ||
50 | } | ||
51 | |||
52 | static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos, | ||
53 | int indent) | ||
54 | { | ||
55 | int i; | ||
56 | |||
57 | for (i = 0; i < indent; i++) | ||
58 | fprintf(f, " "); | ||
59 | |||
60 | fprintf(f, " %08" PRIx64 ": ", pos); | ||
61 | for (i = 0; i < len; i++) | ||
62 | fprintf(f, " %02x", buf[i]); | ||
63 | for (; i < 16; i++) | ||
64 | fprintf(f, " "); | ||
65 | fprintf(f, " "); | ||
66 | } | ||
67 | |||
68 | static void intel_pt_print_no_data(uint64_t pos, int indent) | ||
69 | { | ||
70 | int i; | ||
71 | |||
72 | for (i = 0; i < indent; i++) | ||
73 | fprintf(f, " "); | ||
74 | |||
75 | fprintf(f, " %08" PRIx64 ": ", pos); | ||
76 | for (i = 0; i < 16; i++) | ||
77 | fprintf(f, " "); | ||
78 | fprintf(f, " "); | ||
79 | } | ||
80 | |||
81 | static int intel_pt_log_open(void) | ||
82 | { | ||
83 | if (!enable_logging) | ||
84 | return -1; | ||
85 | |||
86 | if (f) | ||
87 | return 0; | ||
88 | |||
89 | if (!log_name[0]) | ||
90 | return -1; | ||
91 | |||
92 | f = fopen(log_name, "w+"); | ||
93 | if (!f) { | ||
94 | enable_logging = false; | ||
95 | return -1; | ||
96 | } | ||
97 | |||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, | ||
102 | uint64_t pos, const unsigned char *buf) | ||
103 | { | ||
104 | char desc[INTEL_PT_PKT_DESC_MAX]; | ||
105 | |||
106 | if (intel_pt_log_open()) | ||
107 | return; | ||
108 | |||
109 | intel_pt_print_data(buf, pkt_len, pos, 0); | ||
110 | intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX); | ||
111 | fprintf(f, "%s\n", desc); | ||
112 | } | ||
113 | |||
114 | void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip) | ||
115 | { | ||
116 | char desc[INTEL_PT_INSN_DESC_MAX]; | ||
117 | size_t len = intel_pt_insn->length; | ||
118 | |||
119 | if (intel_pt_log_open()) | ||
120 | return; | ||
121 | |||
122 | if (len > INTEL_PT_INSN_DBG_BUF_SZ) | ||
123 | len = INTEL_PT_INSN_DBG_BUF_SZ; | ||
124 | intel_pt_print_data(intel_pt_insn->buf, len, ip, 8); | ||
125 | if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0) | ||
126 | fprintf(f, "%s\n", desc); | ||
127 | else | ||
128 | fprintf(f, "Bad instruction!\n"); | ||
129 | } | ||
130 | |||
131 | void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, uint64_t ip) | ||
132 | { | ||
133 | char desc[INTEL_PT_INSN_DESC_MAX]; | ||
134 | |||
135 | if (intel_pt_log_open()) | ||
136 | return; | ||
137 | |||
138 | intel_pt_print_no_data(ip, 8); | ||
139 | if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0) | ||
140 | fprintf(f, "%s\n", desc); | ||
141 | else | ||
142 | fprintf(f, "Bad instruction!\n"); | ||
143 | } | ||
144 | |||
145 | void intel_pt_log(const char *fmt, ...) | ||
146 | { | ||
147 | va_list args; | ||
148 | |||
149 | if (intel_pt_log_open()) | ||
150 | return; | ||
151 | |||
152 | va_start(args, fmt); | ||
153 | vfprintf(f, fmt, args); | ||
154 | va_end(args); | ||
155 | } | ||
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h new file mode 100644 index 000000000000..db3942f83677 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h | |||
@@ -0,0 +1,52 @@ | |||
1 | /* | ||
2 | * intel_pt_log.h: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #ifndef INCLUDE__INTEL_PT_LOG_H__ | ||
17 | #define INCLUDE__INTEL_PT_LOG_H__ | ||
18 | |||
19 | #include <stdint.h> | ||
20 | #include <inttypes.h> | ||
21 | |||
22 | struct intel_pt_pkt; | ||
23 | |||
24 | void intel_pt_log_enable(void); | ||
25 | void intel_pt_log_disable(void); | ||
26 | void intel_pt_log_set_name(const char *name); | ||
27 | |||
28 | void intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len, | ||
29 | uint64_t pos, const unsigned char *buf); | ||
30 | |||
31 | struct intel_pt_insn; | ||
32 | |||
33 | void intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip); | ||
34 | void intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn, | ||
35 | uint64_t ip); | ||
36 | |||
37 | __attribute__((format(printf, 1, 2))) | ||
38 | void intel_pt_log(const char *fmt, ...); | ||
39 | |||
40 | #define x64_fmt "0x%" PRIx64 | ||
41 | |||
42 | static inline void intel_pt_log_at(const char *msg, uint64_t u) | ||
43 | { | ||
44 | intel_pt_log("%s at " x64_fmt "\n", msg, u); | ||
45 | } | ||
46 | |||
47 | static inline void intel_pt_log_to(const char *msg, uint64_t u) | ||
48 | { | ||
49 | intel_pt_log("%s to " x64_fmt "\n", msg, u); | ||
50 | } | ||
51 | |||
52 | #endif | ||
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c new file mode 100644 index 000000000000..b1257c816310 --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c | |||
@@ -0,0 +1,518 @@ | |||
1 | /* | ||
2 | * intel_pt_pkt_decoder.c: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <stdio.h> | ||
17 | #include <string.h> | ||
18 | #include <endian.h> | ||
19 | #include <byteswap.h> | ||
20 | |||
21 | #include "intel-pt-pkt-decoder.h" | ||
22 | |||
23 | #define BIT(n) (1 << (n)) | ||
24 | |||
25 | #define BIT63 ((uint64_t)1 << 63) | ||
26 | |||
27 | #define NR_FLAG BIT63 | ||
28 | |||
29 | #if __BYTE_ORDER == __BIG_ENDIAN | ||
30 | #define le16_to_cpu bswap_16 | ||
31 | #define le32_to_cpu bswap_32 | ||
32 | #define le64_to_cpu bswap_64 | ||
33 | #define memcpy_le64(d, s, n) do { \ | ||
34 | memcpy((d), (s), (n)); \ | ||
35 | *(d) = le64_to_cpu(*(d)); \ | ||
36 | } while (0) | ||
37 | #else | ||
38 | #define le16_to_cpu | ||
39 | #define le32_to_cpu | ||
40 | #define le64_to_cpu | ||
41 | #define memcpy_le64 memcpy | ||
42 | #endif | ||
43 | |||
44 | static const char * const packet_name[] = { | ||
45 | [INTEL_PT_BAD] = "Bad Packet!", | ||
46 | [INTEL_PT_PAD] = "PAD", | ||
47 | [INTEL_PT_TNT] = "TNT", | ||
48 | [INTEL_PT_TIP_PGD] = "TIP.PGD", | ||
49 | [INTEL_PT_TIP_PGE] = "TIP.PGE", | ||
50 | [INTEL_PT_TSC] = "TSC", | ||
51 | [INTEL_PT_TMA] = "TMA", | ||
52 | [INTEL_PT_MODE_EXEC] = "MODE.Exec", | ||
53 | [INTEL_PT_MODE_TSX] = "MODE.TSX", | ||
54 | [INTEL_PT_MTC] = "MTC", | ||
55 | [INTEL_PT_TIP] = "TIP", | ||
56 | [INTEL_PT_FUP] = "FUP", | ||
57 | [INTEL_PT_CYC] = "CYC", | ||
58 | [INTEL_PT_VMCS] = "VMCS", | ||
59 | [INTEL_PT_PSB] = "PSB", | ||
60 | [INTEL_PT_PSBEND] = "PSBEND", | ||
61 | [INTEL_PT_CBR] = "CBR", | ||
62 | [INTEL_PT_TRACESTOP] = "TraceSTOP", | ||
63 | [INTEL_PT_PIP] = "PIP", | ||
64 | [INTEL_PT_OVF] = "OVF", | ||
65 | [INTEL_PT_MNT] = "MNT", | ||
66 | }; | ||
67 | |||
68 | const char *intel_pt_pkt_name(enum intel_pt_pkt_type type) | ||
69 | { | ||
70 | return packet_name[type]; | ||
71 | } | ||
72 | |||
73 | static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len, | ||
74 | struct intel_pt_pkt *packet) | ||
75 | { | ||
76 | uint64_t payload; | ||
77 | int count; | ||
78 | |||
79 | if (len < 8) | ||
80 | return INTEL_PT_NEED_MORE_BYTES; | ||
81 | |||
82 | payload = le64_to_cpu(*(uint64_t *)buf); | ||
83 | |||
84 | for (count = 47; count; count--) { | ||
85 | if (payload & BIT63) | ||
86 | break; | ||
87 | payload <<= 1; | ||
88 | } | ||
89 | |||
90 | packet->type = INTEL_PT_TNT; | ||
91 | packet->count = count; | ||
92 | packet->payload = payload << 1; | ||
93 | return 8; | ||
94 | } | ||
95 | |||
96 | static int intel_pt_get_pip(const unsigned char *buf, size_t len, | ||
97 | struct intel_pt_pkt *packet) | ||
98 | { | ||
99 | uint64_t payload = 0; | ||
100 | |||
101 | if (len < 8) | ||
102 | return INTEL_PT_NEED_MORE_BYTES; | ||
103 | |||
104 | packet->type = INTEL_PT_PIP; | ||
105 | memcpy_le64(&payload, buf + 2, 6); | ||
106 | packet->payload = payload >> 1; | ||
107 | if (payload & 1) | ||
108 | packet->payload |= NR_FLAG; | ||
109 | |||
110 | return 8; | ||
111 | } | ||
112 | |||
113 | static int intel_pt_get_tracestop(struct intel_pt_pkt *packet) | ||
114 | { | ||
115 | packet->type = INTEL_PT_TRACESTOP; | ||
116 | return 2; | ||
117 | } | ||
118 | |||
119 | static int intel_pt_get_cbr(const unsigned char *buf, size_t len, | ||
120 | struct intel_pt_pkt *packet) | ||
121 | { | ||
122 | if (len < 4) | ||
123 | return INTEL_PT_NEED_MORE_BYTES; | ||
124 | packet->type = INTEL_PT_CBR; | ||
125 | packet->payload = buf[2]; | ||
126 | return 4; | ||
127 | } | ||
128 | |||
129 | static int intel_pt_get_vmcs(const unsigned char *buf, size_t len, | ||
130 | struct intel_pt_pkt *packet) | ||
131 | { | ||
132 | unsigned int count = (52 - 5) >> 3; | ||
133 | |||
134 | if (count < 1 || count > 7) | ||
135 | return INTEL_PT_BAD_PACKET; | ||
136 | |||
137 | if (len < count + 2) | ||
138 | return INTEL_PT_NEED_MORE_BYTES; | ||
139 | |||
140 | packet->type = INTEL_PT_VMCS; | ||
141 | packet->count = count; | ||
142 | memcpy_le64(&packet->payload, buf + 2, count); | ||
143 | |||
144 | return count + 2; | ||
145 | } | ||
146 | |||
147 | static int intel_pt_get_ovf(struct intel_pt_pkt *packet) | ||
148 | { | ||
149 | packet->type = INTEL_PT_OVF; | ||
150 | return 2; | ||
151 | } | ||
152 | |||
153 | static int intel_pt_get_psb(const unsigned char *buf, size_t len, | ||
154 | struct intel_pt_pkt *packet) | ||
155 | { | ||
156 | int i; | ||
157 | |||
158 | if (len < 16) | ||
159 | return INTEL_PT_NEED_MORE_BYTES; | ||
160 | |||
161 | for (i = 2; i < 16; i += 2) { | ||
162 | if (buf[i] != 2 || buf[i + 1] != 0x82) | ||
163 | return INTEL_PT_BAD_PACKET; | ||
164 | } | ||
165 | |||
166 | packet->type = INTEL_PT_PSB; | ||
167 | return 16; | ||
168 | } | ||
169 | |||
170 | static int intel_pt_get_psbend(struct intel_pt_pkt *packet) | ||
171 | { | ||
172 | packet->type = INTEL_PT_PSBEND; | ||
173 | return 2; | ||
174 | } | ||
175 | |||
176 | static int intel_pt_get_tma(const unsigned char *buf, size_t len, | ||
177 | struct intel_pt_pkt *packet) | ||
178 | { | ||
179 | if (len < 7) | ||
180 | return INTEL_PT_NEED_MORE_BYTES; | ||
181 | |||
182 | packet->type = INTEL_PT_TMA; | ||
183 | packet->payload = buf[2] | (buf[3] << 8); | ||
184 | packet->count = buf[5] | ((buf[6] & BIT(0)) << 8); | ||
185 | return 7; | ||
186 | } | ||
187 | |||
188 | static int intel_pt_get_pad(struct intel_pt_pkt *packet) | ||
189 | { | ||
190 | packet->type = INTEL_PT_PAD; | ||
191 | return 1; | ||
192 | } | ||
193 | |||
194 | static int intel_pt_get_mnt(const unsigned char *buf, size_t len, | ||
195 | struct intel_pt_pkt *packet) | ||
196 | { | ||
197 | if (len < 11) | ||
198 | return INTEL_PT_NEED_MORE_BYTES; | ||
199 | packet->type = INTEL_PT_MNT; | ||
200 | memcpy_le64(&packet->payload, buf + 3, 8); | ||
201 | return 11 | ||
202 | ; | ||
203 | } | ||
204 | |||
205 | static int intel_pt_get_3byte(const unsigned char *buf, size_t len, | ||
206 | struct intel_pt_pkt *packet) | ||
207 | { | ||
208 | if (len < 3) | ||
209 | return INTEL_PT_NEED_MORE_BYTES; | ||
210 | |||
211 | switch (buf[2]) { | ||
212 | case 0x88: /* MNT */ | ||
213 | return intel_pt_get_mnt(buf, len, packet); | ||
214 | default: | ||
215 | return INTEL_PT_BAD_PACKET; | ||
216 | } | ||
217 | } | ||
218 | |||
219 | static int intel_pt_get_ext(const unsigned char *buf, size_t len, | ||
220 | struct intel_pt_pkt *packet) | ||
221 | { | ||
222 | if (len < 2) | ||
223 | return INTEL_PT_NEED_MORE_BYTES; | ||
224 | |||
225 | switch (buf[1]) { | ||
226 | case 0xa3: /* Long TNT */ | ||
227 | return intel_pt_get_long_tnt(buf, len, packet); | ||
228 | case 0x43: /* PIP */ | ||
229 | return intel_pt_get_pip(buf, len, packet); | ||
230 | case 0x83: /* TraceStop */ | ||
231 | return intel_pt_get_tracestop(packet); | ||
232 | case 0x03: /* CBR */ | ||
233 | return intel_pt_get_cbr(buf, len, packet); | ||
234 | case 0xc8: /* VMCS */ | ||
235 | return intel_pt_get_vmcs(buf, len, packet); | ||
236 | case 0xf3: /* OVF */ | ||
237 | return intel_pt_get_ovf(packet); | ||
238 | case 0x82: /* PSB */ | ||
239 | return intel_pt_get_psb(buf, len, packet); | ||
240 | case 0x23: /* PSBEND */ | ||
241 | return intel_pt_get_psbend(packet); | ||
242 | case 0x73: /* TMA */ | ||
243 | return intel_pt_get_tma(buf, len, packet); | ||
244 | case 0xC3: /* 3-byte header */ | ||
245 | return intel_pt_get_3byte(buf, len, packet); | ||
246 | default: | ||
247 | return INTEL_PT_BAD_PACKET; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | static int intel_pt_get_short_tnt(unsigned int byte, | ||
252 | struct intel_pt_pkt *packet) | ||
253 | { | ||
254 | int count; | ||
255 | |||
256 | for (count = 6; count; count--) { | ||
257 | if (byte & BIT(7)) | ||
258 | break; | ||
259 | byte <<= 1; | ||
260 | } | ||
261 | |||
262 | packet->type = INTEL_PT_TNT; | ||
263 | packet->count = count; | ||
264 | packet->payload = (uint64_t)byte << 57; | ||
265 | |||
266 | return 1; | ||
267 | } | ||
268 | |||
269 | static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf, | ||
270 | size_t len, struct intel_pt_pkt *packet) | ||
271 | { | ||
272 | unsigned int offs = 1, shift; | ||
273 | uint64_t payload = byte >> 3; | ||
274 | |||
275 | byte >>= 2; | ||
276 | len -= 1; | ||
277 | for (shift = 5; byte & 1; shift += 7) { | ||
278 | if (offs > 9) | ||
279 | return INTEL_PT_BAD_PACKET; | ||
280 | if (len < offs) | ||
281 | return INTEL_PT_NEED_MORE_BYTES; | ||
282 | byte = buf[offs++]; | ||
283 | payload |= (byte >> 1) << shift; | ||
284 | } | ||
285 | |||
286 | packet->type = INTEL_PT_CYC; | ||
287 | packet->payload = payload; | ||
288 | return offs; | ||
289 | } | ||
290 | |||
291 | static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, | ||
292 | const unsigned char *buf, size_t len, | ||
293 | struct intel_pt_pkt *packet) | ||
294 | { | ||
295 | switch (byte >> 5) { | ||
296 | case 0: | ||
297 | packet->count = 0; | ||
298 | break; | ||
299 | case 1: | ||
300 | if (len < 3) | ||
301 | return INTEL_PT_NEED_MORE_BYTES; | ||
302 | packet->count = 2; | ||
303 | packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); | ||
304 | break; | ||
305 | case 2: | ||
306 | if (len < 5) | ||
307 | return INTEL_PT_NEED_MORE_BYTES; | ||
308 | packet->count = 4; | ||
309 | packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1)); | ||
310 | break; | ||
311 | case 3: | ||
312 | case 6: | ||
313 | if (len < 7) | ||
314 | return INTEL_PT_NEED_MORE_BYTES; | ||
315 | packet->count = 6; | ||
316 | memcpy_le64(&packet->payload, buf + 1, 6); | ||
317 | break; | ||
318 | default: | ||
319 | return INTEL_PT_BAD_PACKET; | ||
320 | } | ||
321 | |||
322 | packet->type = type; | ||
323 | |||
324 | return packet->count + 1; | ||
325 | } | ||
326 | |||
327 | static int intel_pt_get_mode(const unsigned char *buf, size_t len, | ||
328 | struct intel_pt_pkt *packet) | ||
329 | { | ||
330 | if (len < 2) | ||
331 | return INTEL_PT_NEED_MORE_BYTES; | ||
332 | |||
333 | switch (buf[1] >> 5) { | ||
334 | case 0: | ||
335 | packet->type = INTEL_PT_MODE_EXEC; | ||
336 | switch (buf[1] & 3) { | ||
337 | case 0: | ||
338 | packet->payload = 16; | ||
339 | break; | ||
340 | case 1: | ||
341 | packet->payload = 64; | ||
342 | break; | ||
343 | case 2: | ||
344 | packet->payload = 32; | ||
345 | break; | ||
346 | default: | ||
347 | return INTEL_PT_BAD_PACKET; | ||
348 | } | ||
349 | break; | ||
350 | case 1: | ||
351 | packet->type = INTEL_PT_MODE_TSX; | ||
352 | if ((buf[1] & 3) == 3) | ||
353 | return INTEL_PT_BAD_PACKET; | ||
354 | packet->payload = buf[1] & 3; | ||
355 | break; | ||
356 | default: | ||
357 | return INTEL_PT_BAD_PACKET; | ||
358 | } | ||
359 | |||
360 | return 2; | ||
361 | } | ||
362 | |||
363 | static int intel_pt_get_tsc(const unsigned char *buf, size_t len, | ||
364 | struct intel_pt_pkt *packet) | ||
365 | { | ||
366 | if (len < 8) | ||
367 | return INTEL_PT_NEED_MORE_BYTES; | ||
368 | packet->type = INTEL_PT_TSC; | ||
369 | memcpy_le64(&packet->payload, buf + 1, 7); | ||
370 | return 8; | ||
371 | } | ||
372 | |||
373 | static int intel_pt_get_mtc(const unsigned char *buf, size_t len, | ||
374 | struct intel_pt_pkt *packet) | ||
375 | { | ||
376 | if (len < 2) | ||
377 | return INTEL_PT_NEED_MORE_BYTES; | ||
378 | packet->type = INTEL_PT_MTC; | ||
379 | packet->payload = buf[1]; | ||
380 | return 2; | ||
381 | } | ||
382 | |||
383 | static int intel_pt_do_get_packet(const unsigned char *buf, size_t len, | ||
384 | struct intel_pt_pkt *packet) | ||
385 | { | ||
386 | unsigned int byte; | ||
387 | |||
388 | memset(packet, 0, sizeof(struct intel_pt_pkt)); | ||
389 | |||
390 | if (!len) | ||
391 | return INTEL_PT_NEED_MORE_BYTES; | ||
392 | |||
393 | byte = buf[0]; | ||
394 | if (!(byte & BIT(0))) { | ||
395 | if (byte == 0) | ||
396 | return intel_pt_get_pad(packet); | ||
397 | if (byte == 2) | ||
398 | return intel_pt_get_ext(buf, len, packet); | ||
399 | return intel_pt_get_short_tnt(byte, packet); | ||
400 | } | ||
401 | |||
402 | if ((byte & 2)) | ||
403 | return intel_pt_get_cyc(byte, buf, len, packet); | ||
404 | |||
405 | switch (byte & 0x1f) { | ||
406 | case 0x0D: | ||
407 | return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet); | ||
408 | case 0x11: | ||
409 | return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len, | ||
410 | packet); | ||
411 | case 0x01: | ||
412 | return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len, | ||
413 | packet); | ||
414 | case 0x1D: | ||
415 | return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet); | ||
416 | case 0x19: | ||
417 | switch (byte) { | ||
418 | case 0x99: | ||
419 | return intel_pt_get_mode(buf, len, packet); | ||
420 | case 0x19: | ||
421 | return intel_pt_get_tsc(buf, len, packet); | ||
422 | case 0x59: | ||
423 | return intel_pt_get_mtc(buf, len, packet); | ||
424 | default: | ||
425 | return INTEL_PT_BAD_PACKET; | ||
426 | } | ||
427 | default: | ||
428 | return INTEL_PT_BAD_PACKET; | ||
429 | } | ||
430 | } | ||
431 | |||
432 | int intel_pt_get_packet(const unsigned char *buf, size_t len, | ||
433 | struct intel_pt_pkt *packet) | ||
434 | { | ||
435 | int ret; | ||
436 | |||
437 | ret = intel_pt_do_get_packet(buf, len, packet); | ||
438 | if (ret > 0) { | ||
439 | while (ret < 8 && len > (size_t)ret && !buf[ret]) | ||
440 | ret += 1; | ||
441 | } | ||
442 | return ret; | ||
443 | } | ||
444 | |||
445 | int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, | ||
446 | size_t buf_len) | ||
447 | { | ||
448 | int ret, i, nr; | ||
449 | unsigned long long payload = packet->payload; | ||
450 | const char *name = intel_pt_pkt_name(packet->type); | ||
451 | |||
452 | switch (packet->type) { | ||
453 | case INTEL_PT_BAD: | ||
454 | case INTEL_PT_PAD: | ||
455 | case INTEL_PT_PSB: | ||
456 | case INTEL_PT_PSBEND: | ||
457 | case INTEL_PT_TRACESTOP: | ||
458 | case INTEL_PT_OVF: | ||
459 | return snprintf(buf, buf_len, "%s", name); | ||
460 | case INTEL_PT_TNT: { | ||
461 | size_t blen = buf_len; | ||
462 | |||
463 | ret = snprintf(buf, blen, "%s ", name); | ||
464 | if (ret < 0) | ||
465 | return ret; | ||
466 | buf += ret; | ||
467 | blen -= ret; | ||
468 | for (i = 0; i < packet->count; i++) { | ||
469 | if (payload & BIT63) | ||
470 | ret = snprintf(buf, blen, "T"); | ||
471 | else | ||
472 | ret = snprintf(buf, blen, "N"); | ||
473 | if (ret < 0) | ||
474 | return ret; | ||
475 | buf += ret; | ||
476 | blen -= ret; | ||
477 | payload <<= 1; | ||
478 | } | ||
479 | ret = snprintf(buf, blen, " (%d)", packet->count); | ||
480 | if (ret < 0) | ||
481 | return ret; | ||
482 | blen -= ret; | ||
483 | return buf_len - blen; | ||
484 | } | ||
485 | case INTEL_PT_TIP_PGD: | ||
486 | case INTEL_PT_TIP_PGE: | ||
487 | case INTEL_PT_TIP: | ||
488 | case INTEL_PT_FUP: | ||
489 | if (!(packet->count)) | ||
490 | return snprintf(buf, buf_len, "%s no ip", name); | ||
491 | case INTEL_PT_CYC: | ||
492 | case INTEL_PT_VMCS: | ||
493 | case INTEL_PT_MTC: | ||
494 | case INTEL_PT_MNT: | ||
495 | case INTEL_PT_CBR: | ||
496 | case INTEL_PT_TSC: | ||
497 | return snprintf(buf, buf_len, "%s 0x%llx", name, payload); | ||
498 | case INTEL_PT_TMA: | ||
499 | return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name, | ||
500 | (unsigned)payload, packet->count); | ||
501 | case INTEL_PT_MODE_EXEC: | ||
502 | return snprintf(buf, buf_len, "%s %lld", name, payload); | ||
503 | case INTEL_PT_MODE_TSX: | ||
504 | return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u", | ||
505 | name, (unsigned)(payload >> 1) & 1, | ||
506 | (unsigned)payload & 1); | ||
507 | case INTEL_PT_PIP: | ||
508 | nr = packet->payload & NR_FLAG ? 1 : 0; | ||
509 | payload &= ~NR_FLAG; | ||
510 | ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)", | ||
511 | name, payload, nr); | ||
512 | return ret; | ||
513 | default: | ||
514 | break; | ||
515 | } | ||
516 | return snprintf(buf, buf_len, "%s 0x%llx (%d)", | ||
517 | name, payload, packet->count); | ||
518 | } | ||
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h new file mode 100644 index 000000000000..781bb79883bd --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h | |||
@@ -0,0 +1,70 @@ | |||
1 | /* | ||
2 | * intel_pt_pkt_decoder.h: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2014, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__ | ||
17 | #define INCLUDE__INTEL_PT_PKT_DECODER_H__ | ||
18 | |||
19 | #include <stddef.h> | ||
20 | #include <stdint.h> | ||
21 | |||
22 | #define INTEL_PT_PKT_DESC_MAX 256 | ||
23 | |||
24 | #define INTEL_PT_NEED_MORE_BYTES -1 | ||
25 | #define INTEL_PT_BAD_PACKET -2 | ||
26 | |||
27 | #define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \ | ||
28 | "\002\202\002\202\002\202\002\202" | ||
29 | #define INTEL_PT_PSB_LEN 16 | ||
30 | |||
31 | #define INTEL_PT_PKT_MAX_SZ 16 | ||
32 | |||
33 | enum intel_pt_pkt_type { | ||
34 | INTEL_PT_BAD, | ||
35 | INTEL_PT_PAD, | ||
36 | INTEL_PT_TNT, | ||
37 | INTEL_PT_TIP_PGD, | ||
38 | INTEL_PT_TIP_PGE, | ||
39 | INTEL_PT_TSC, | ||
40 | INTEL_PT_TMA, | ||
41 | INTEL_PT_MODE_EXEC, | ||
42 | INTEL_PT_MODE_TSX, | ||
43 | INTEL_PT_MTC, | ||
44 | INTEL_PT_TIP, | ||
45 | INTEL_PT_FUP, | ||
46 | INTEL_PT_CYC, | ||
47 | INTEL_PT_VMCS, | ||
48 | INTEL_PT_PSB, | ||
49 | INTEL_PT_PSBEND, | ||
50 | INTEL_PT_CBR, | ||
51 | INTEL_PT_TRACESTOP, | ||
52 | INTEL_PT_PIP, | ||
53 | INTEL_PT_OVF, | ||
54 | INTEL_PT_MNT, | ||
55 | }; | ||
56 | |||
57 | struct intel_pt_pkt { | ||
58 | enum intel_pt_pkt_type type; | ||
59 | int count; | ||
60 | uint64_t payload; | ||
61 | }; | ||
62 | |||
63 | const char *intel_pt_pkt_name(enum intel_pt_pkt_type); | ||
64 | |||
65 | int intel_pt_get_packet(const unsigned char *buf, size_t len, | ||
66 | struct intel_pt_pkt *packet); | ||
67 | |||
68 | int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len); | ||
69 | |||
70 | #endif | ||
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt new file mode 100644 index 000000000000..816488c0b97e --- /dev/null +++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt | |||
@@ -0,0 +1,970 @@ | |||
1 | # x86 Opcode Maps | ||
2 | # | ||
3 | # This is (mostly) based on following documentations. | ||
4 | # - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C | ||
5 | # (#326018-047US, June 2013) | ||
6 | # | ||
7 | #<Opcode maps> | ||
8 | # Table: table-name | ||
9 | # Referrer: escaped-name | ||
10 | # AVXcode: avx-code | ||
11 | # opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] | ||
12 | # (or) | ||
13 | # opcode: escape # escaped-name | ||
14 | # EndTable | ||
15 | # | ||
16 | #<group maps> | ||
17 | # GrpTable: GrpXXX | ||
18 | # reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...] | ||
19 | # EndTable | ||
20 | # | ||
21 | # AVX Superscripts | ||
22 | # (v): this opcode requires VEX prefix. | ||
23 | # (v1): this opcode only supports 128bit VEX. | ||
24 | # | ||
25 | # Last Prefix Superscripts | ||
26 | # - (66): the last prefix is 0x66 | ||
27 | # - (F3): the last prefix is 0xF3 | ||
28 | # - (F2): the last prefix is 0xF2 | ||
29 | # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) | ||
30 | # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. | ||
31 | |||
32 | Table: one byte opcode | ||
33 | Referrer: | ||
34 | AVXcode: | ||
35 | # 0x00 - 0x0f | ||
36 | 00: ADD Eb,Gb | ||
37 | 01: ADD Ev,Gv | ||
38 | 02: ADD Gb,Eb | ||
39 | 03: ADD Gv,Ev | ||
40 | 04: ADD AL,Ib | ||
41 | 05: ADD rAX,Iz | ||
42 | 06: PUSH ES (i64) | ||
43 | 07: POP ES (i64) | ||
44 | 08: OR Eb,Gb | ||
45 | 09: OR Ev,Gv | ||
46 | 0a: OR Gb,Eb | ||
47 | 0b: OR Gv,Ev | ||
48 | 0c: OR AL,Ib | ||
49 | 0d: OR rAX,Iz | ||
50 | 0e: PUSH CS (i64) | ||
51 | 0f: escape # 2-byte escape | ||
52 | # 0x10 - 0x1f | ||
53 | 10: ADC Eb,Gb | ||
54 | 11: ADC Ev,Gv | ||
55 | 12: ADC Gb,Eb | ||
56 | 13: ADC Gv,Ev | ||
57 | 14: ADC AL,Ib | ||
58 | 15: ADC rAX,Iz | ||
59 | 16: PUSH SS (i64) | ||
60 | 17: POP SS (i64) | ||
61 | 18: SBB Eb,Gb | ||
62 | 19: SBB Ev,Gv | ||
63 | 1a: SBB Gb,Eb | ||
64 | 1b: SBB Gv,Ev | ||
65 | 1c: SBB AL,Ib | ||
66 | 1d: SBB rAX,Iz | ||
67 | 1e: PUSH DS (i64) | ||
68 | 1f: POP DS (i64) | ||
69 | # 0x20 - 0x2f | ||
70 | 20: AND Eb,Gb | ||
71 | 21: AND Ev,Gv | ||
72 | 22: AND Gb,Eb | ||
73 | 23: AND Gv,Ev | ||
74 | 24: AND AL,Ib | ||
75 | 25: AND rAx,Iz | ||
76 | 26: SEG=ES (Prefix) | ||
77 | 27: DAA (i64) | ||
78 | 28: SUB Eb,Gb | ||
79 | 29: SUB Ev,Gv | ||
80 | 2a: SUB Gb,Eb | ||
81 | 2b: SUB Gv,Ev | ||
82 | 2c: SUB AL,Ib | ||
83 | 2d: SUB rAX,Iz | ||
84 | 2e: SEG=CS (Prefix) | ||
85 | 2f: DAS (i64) | ||
86 | # 0x30 - 0x3f | ||
87 | 30: XOR Eb,Gb | ||
88 | 31: XOR Ev,Gv | ||
89 | 32: XOR Gb,Eb | ||
90 | 33: XOR Gv,Ev | ||
91 | 34: XOR AL,Ib | ||
92 | 35: XOR rAX,Iz | ||
93 | 36: SEG=SS (Prefix) | ||
94 | 37: AAA (i64) | ||
95 | 38: CMP Eb,Gb | ||
96 | 39: CMP Ev,Gv | ||
97 | 3a: CMP Gb,Eb | ||
98 | 3b: CMP Gv,Ev | ||
99 | 3c: CMP AL,Ib | ||
100 | 3d: CMP rAX,Iz | ||
101 | 3e: SEG=DS (Prefix) | ||
102 | 3f: AAS (i64) | ||
103 | # 0x40 - 0x4f | ||
104 | 40: INC eAX (i64) | REX (o64) | ||
105 | 41: INC eCX (i64) | REX.B (o64) | ||
106 | 42: INC eDX (i64) | REX.X (o64) | ||
107 | 43: INC eBX (i64) | REX.XB (o64) | ||
108 | 44: INC eSP (i64) | REX.R (o64) | ||
109 | 45: INC eBP (i64) | REX.RB (o64) | ||
110 | 46: INC eSI (i64) | REX.RX (o64) | ||
111 | 47: INC eDI (i64) | REX.RXB (o64) | ||
112 | 48: DEC eAX (i64) | REX.W (o64) | ||
113 | 49: DEC eCX (i64) | REX.WB (o64) | ||
114 | 4a: DEC eDX (i64) | REX.WX (o64) | ||
115 | 4b: DEC eBX (i64) | REX.WXB (o64) | ||
116 | 4c: DEC eSP (i64) | REX.WR (o64) | ||
117 | 4d: DEC eBP (i64) | REX.WRB (o64) | ||
118 | 4e: DEC eSI (i64) | REX.WRX (o64) | ||
119 | 4f: DEC eDI (i64) | REX.WRXB (o64) | ||
120 | # 0x50 - 0x5f | ||
121 | 50: PUSH rAX/r8 (d64) | ||
122 | 51: PUSH rCX/r9 (d64) | ||
123 | 52: PUSH rDX/r10 (d64) | ||
124 | 53: PUSH rBX/r11 (d64) | ||
125 | 54: PUSH rSP/r12 (d64) | ||
126 | 55: PUSH rBP/r13 (d64) | ||
127 | 56: PUSH rSI/r14 (d64) | ||
128 | 57: PUSH rDI/r15 (d64) | ||
129 | 58: POP rAX/r8 (d64) | ||
130 | 59: POP rCX/r9 (d64) | ||
131 | 5a: POP rDX/r10 (d64) | ||
132 | 5b: POP rBX/r11 (d64) | ||
133 | 5c: POP rSP/r12 (d64) | ||
134 | 5d: POP rBP/r13 (d64) | ||
135 | 5e: POP rSI/r14 (d64) | ||
136 | 5f: POP rDI/r15 (d64) | ||
137 | # 0x60 - 0x6f | ||
138 | 60: PUSHA/PUSHAD (i64) | ||
139 | 61: POPA/POPAD (i64) | ||
140 | 62: BOUND Gv,Ma (i64) | ||
141 | 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) | ||
142 | 64: SEG=FS (Prefix) | ||
143 | 65: SEG=GS (Prefix) | ||
144 | 66: Operand-Size (Prefix) | ||
145 | 67: Address-Size (Prefix) | ||
146 | 68: PUSH Iz (d64) | ||
147 | 69: IMUL Gv,Ev,Iz | ||
148 | 6a: PUSH Ib (d64) | ||
149 | 6b: IMUL Gv,Ev,Ib | ||
150 | 6c: INS/INSB Yb,DX | ||
151 | 6d: INS/INSW/INSD Yz,DX | ||
152 | 6e: OUTS/OUTSB DX,Xb | ||
153 | 6f: OUTS/OUTSW/OUTSD DX,Xz | ||
154 | # 0x70 - 0x7f | ||
155 | 70: JO Jb | ||
156 | 71: JNO Jb | ||
157 | 72: JB/JNAE/JC Jb | ||
158 | 73: JNB/JAE/JNC Jb | ||
159 | 74: JZ/JE Jb | ||
160 | 75: JNZ/JNE Jb | ||
161 | 76: JBE/JNA Jb | ||
162 | 77: JNBE/JA Jb | ||
163 | 78: JS Jb | ||
164 | 79: JNS Jb | ||
165 | 7a: JP/JPE Jb | ||
166 | 7b: JNP/JPO Jb | ||
167 | 7c: JL/JNGE Jb | ||
168 | 7d: JNL/JGE Jb | ||
169 | 7e: JLE/JNG Jb | ||
170 | 7f: JNLE/JG Jb | ||
171 | # 0x80 - 0x8f | ||
172 | 80: Grp1 Eb,Ib (1A) | ||
173 | 81: Grp1 Ev,Iz (1A) | ||
174 | 82: Grp1 Eb,Ib (1A),(i64) | ||
175 | 83: Grp1 Ev,Ib (1A) | ||
176 | 84: TEST Eb,Gb | ||
177 | 85: TEST Ev,Gv | ||
178 | 86: XCHG Eb,Gb | ||
179 | 87: XCHG Ev,Gv | ||
180 | 88: MOV Eb,Gb | ||
181 | 89: MOV Ev,Gv | ||
182 | 8a: MOV Gb,Eb | ||
183 | 8b: MOV Gv,Ev | ||
184 | 8c: MOV Ev,Sw | ||
185 | 8d: LEA Gv,M | ||
186 | 8e: MOV Sw,Ew | ||
187 | 8f: Grp1A (1A) | POP Ev (d64) | ||
188 | # 0x90 - 0x9f | ||
189 | 90: NOP | PAUSE (F3) | XCHG r8,rAX | ||
190 | 91: XCHG rCX/r9,rAX | ||
191 | 92: XCHG rDX/r10,rAX | ||
192 | 93: XCHG rBX/r11,rAX | ||
193 | 94: XCHG rSP/r12,rAX | ||
194 | 95: XCHG rBP/r13,rAX | ||
195 | 96: XCHG rSI/r14,rAX | ||
196 | 97: XCHG rDI/r15,rAX | ||
197 | 98: CBW/CWDE/CDQE | ||
198 | 99: CWD/CDQ/CQO | ||
199 | 9a: CALLF Ap (i64) | ||
200 | 9b: FWAIT/WAIT | ||
201 | 9c: PUSHF/D/Q Fv (d64) | ||
202 | 9d: POPF/D/Q Fv (d64) | ||
203 | 9e: SAHF | ||
204 | 9f: LAHF | ||
205 | # 0xa0 - 0xaf | ||
206 | a0: MOV AL,Ob | ||
207 | a1: MOV rAX,Ov | ||
208 | a2: MOV Ob,AL | ||
209 | a3: MOV Ov,rAX | ||
210 | a4: MOVS/B Yb,Xb | ||
211 | a5: MOVS/W/D/Q Yv,Xv | ||
212 | a6: CMPS/B Xb,Yb | ||
213 | a7: CMPS/W/D Xv,Yv | ||
214 | a8: TEST AL,Ib | ||
215 | a9: TEST rAX,Iz | ||
216 | aa: STOS/B Yb,AL | ||
217 | ab: STOS/W/D/Q Yv,rAX | ||
218 | ac: LODS/B AL,Xb | ||
219 | ad: LODS/W/D/Q rAX,Xv | ||
220 | ae: SCAS/B AL,Yb | ||
221 | # Note: The May 2011 Intel manual shows Xv for the second parameter of the | ||
222 | # next instruction but Yv is correct | ||
223 | af: SCAS/W/D/Q rAX,Yv | ||
224 | # 0xb0 - 0xbf | ||
225 | b0: MOV AL/R8L,Ib | ||
226 | b1: MOV CL/R9L,Ib | ||
227 | b2: MOV DL/R10L,Ib | ||
228 | b3: MOV BL/R11L,Ib | ||
229 | b4: MOV AH/R12L,Ib | ||
230 | b5: MOV CH/R13L,Ib | ||
231 | b6: MOV DH/R14L,Ib | ||
232 | b7: MOV BH/R15L,Ib | ||
233 | b8: MOV rAX/r8,Iv | ||
234 | b9: MOV rCX/r9,Iv | ||
235 | ba: MOV rDX/r10,Iv | ||
236 | bb: MOV rBX/r11,Iv | ||
237 | bc: MOV rSP/r12,Iv | ||
238 | bd: MOV rBP/r13,Iv | ||
239 | be: MOV rSI/r14,Iv | ||
240 | bf: MOV rDI/r15,Iv | ||
241 | # 0xc0 - 0xcf | ||
242 | c0: Grp2 Eb,Ib (1A) | ||
243 | c1: Grp2 Ev,Ib (1A) | ||
244 | c2: RETN Iw (f64) | ||
245 | c3: RETN | ||
246 | c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) | ||
247 | c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) | ||
248 | c6: Grp11A Eb,Ib (1A) | ||
249 | c7: Grp11B Ev,Iz (1A) | ||
250 | c8: ENTER Iw,Ib | ||
251 | c9: LEAVE (d64) | ||
252 | ca: RETF Iw | ||
253 | cb: RETF | ||
254 | cc: INT3 | ||
255 | cd: INT Ib | ||
256 | ce: INTO (i64) | ||
257 | cf: IRET/D/Q | ||
258 | # 0xd0 - 0xdf | ||
259 | d0: Grp2 Eb,1 (1A) | ||
260 | d1: Grp2 Ev,1 (1A) | ||
261 | d2: Grp2 Eb,CL (1A) | ||
262 | d3: Grp2 Ev,CL (1A) | ||
263 | d4: AAM Ib (i64) | ||
264 | d5: AAD Ib (i64) | ||
265 | d6: | ||
266 | d7: XLAT/XLATB | ||
267 | d8: ESC | ||
268 | d9: ESC | ||
269 | da: ESC | ||
270 | db: ESC | ||
271 | dc: ESC | ||
272 | dd: ESC | ||
273 | de: ESC | ||
274 | df: ESC | ||
275 | # 0xe0 - 0xef | ||
276 | # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix | ||
277 | # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation | ||
278 | # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. | ||
279 | e0: LOOPNE/LOOPNZ Jb (f64) | ||
280 | e1: LOOPE/LOOPZ Jb (f64) | ||
281 | e2: LOOP Jb (f64) | ||
282 | e3: JrCXZ Jb (f64) | ||
283 | e4: IN AL,Ib | ||
284 | e5: IN eAX,Ib | ||
285 | e6: OUT Ib,AL | ||
286 | e7: OUT Ib,eAX | ||
287 | # With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset | ||
288 | # in "near" jumps and calls is 16-bit. For CALL, | ||
289 | # push of return address is 16-bit wide, RSP is decremented by 2 | ||
290 | # but is not truncated to 16 bits, unlike RIP. | ||
291 | e8: CALL Jz (f64) | ||
292 | e9: JMP-near Jz (f64) | ||
293 | ea: JMP-far Ap (i64) | ||
294 | eb: JMP-short Jb (f64) | ||
295 | ec: IN AL,DX | ||
296 | ed: IN eAX,DX | ||
297 | ee: OUT DX,AL | ||
298 | ef: OUT DX,eAX | ||
299 | # 0xf0 - 0xff | ||
300 | f0: LOCK (Prefix) | ||
301 | f1: | ||
302 | f2: REPNE (Prefix) | XACQUIRE (Prefix) | ||
303 | f3: REP/REPE (Prefix) | XRELEASE (Prefix) | ||
304 | f4: HLT | ||
305 | f5: CMC | ||
306 | f6: Grp3_1 Eb (1A) | ||
307 | f7: Grp3_2 Ev (1A) | ||
308 | f8: CLC | ||
309 | f9: STC | ||
310 | fa: CLI | ||
311 | fb: STI | ||
312 | fc: CLD | ||
313 | fd: STD | ||
314 | fe: Grp4 (1A) | ||
315 | ff: Grp5 (1A) | ||
316 | EndTable | ||
317 | |||
318 | Table: 2-byte opcode (0x0f) | ||
319 | Referrer: 2-byte escape | ||
320 | AVXcode: 1 | ||
321 | # 0x0f 0x00-0x0f | ||
322 | 00: Grp6 (1A) | ||
323 | 01: Grp7 (1A) | ||
324 | 02: LAR Gv,Ew | ||
325 | 03: LSL Gv,Ew | ||
326 | 04: | ||
327 | 05: SYSCALL (o64) | ||
328 | 06: CLTS | ||
329 | 07: SYSRET (o64) | ||
330 | 08: INVD | ||
331 | 09: WBINVD | ||
332 | 0a: | ||
333 | 0b: UD2 (1B) | ||
334 | 0c: | ||
335 | # AMD's prefetch group. Intel supports prefetchw(/1) only. | ||
336 | 0d: GrpP | ||
337 | 0e: FEMMS | ||
338 | # 3DNow! uses the last imm byte as opcode extension. | ||
339 | 0f: 3DNow! Pq,Qq,Ib | ||
340 | # 0x0f 0x10-0x1f | ||
341 | # NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands | ||
342 | # but it actually has operands. And also, vmovss and vmovsd only accept 128bit. | ||
343 | # MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form. | ||
344 | # Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming | ||
345 | # Reference A.1 | ||
346 | 10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1) | ||
347 | 11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1) | ||
348 | 12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2) | ||
349 | 13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1) | ||
350 | 14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66) | ||
351 | 15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66) | ||
352 | 16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3) | ||
353 | 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) | ||
354 | 18: Grp16 (1A) | ||
355 | 19: | ||
356 | 1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv | ||
357 | 1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv | ||
358 | 1c: | ||
359 | 1d: | ||
360 | 1e: | ||
361 | 1f: NOP Ev | ||
362 | # 0x0f 0x20-0x2f | ||
363 | 20: MOV Rd,Cd | ||
364 | 21: MOV Rd,Dd | ||
365 | 22: MOV Cd,Rd | ||
366 | 23: MOV Dd,Rd | ||
367 | 24: | ||
368 | 25: | ||
369 | 26: | ||
370 | 27: | ||
371 | 28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66) | ||
372 | 29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66) | ||
373 | 2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1) | ||
374 | 2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66) | ||
375 | 2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1) | ||
376 | 2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1) | ||
377 | 2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1) | ||
378 | 2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1) | ||
379 | # 0x0f 0x30-0x3f | ||
380 | 30: WRMSR | ||
381 | 31: RDTSC | ||
382 | 32: RDMSR | ||
383 | 33: RDPMC | ||
384 | 34: SYSENTER | ||
385 | 35: SYSEXIT | ||
386 | 36: | ||
387 | 37: GETSEC | ||
388 | 38: escape # 3-byte escape 1 | ||
389 | 39: | ||
390 | 3a: escape # 3-byte escape 2 | ||
391 | 3b: | ||
392 | 3c: | ||
393 | 3d: | ||
394 | 3e: | ||
395 | 3f: | ||
396 | # 0x0f 0x40-0x4f | ||
397 | 40: CMOVO Gv,Ev | ||
398 | 41: CMOVNO Gv,Ev | ||
399 | 42: CMOVB/C/NAE Gv,Ev | ||
400 | 43: CMOVAE/NB/NC Gv,Ev | ||
401 | 44: CMOVE/Z Gv,Ev | ||
402 | 45: CMOVNE/NZ Gv,Ev | ||
403 | 46: CMOVBE/NA Gv,Ev | ||
404 | 47: CMOVA/NBE Gv,Ev | ||
405 | 48: CMOVS Gv,Ev | ||
406 | 49: CMOVNS Gv,Ev | ||
407 | 4a: CMOVP/PE Gv,Ev | ||
408 | 4b: CMOVNP/PO Gv,Ev | ||
409 | 4c: CMOVL/NGE Gv,Ev | ||
410 | 4d: CMOVNL/GE Gv,Ev | ||
411 | 4e: CMOVLE/NG Gv,Ev | ||
412 | 4f: CMOVNLE/G Gv,Ev | ||
413 | # 0x0f 0x50-0x5f | ||
414 | 50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66) | ||
415 | 51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1) | ||
416 | 52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1) | ||
417 | 53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1) | ||
418 | 54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66) | ||
419 | 55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66) | ||
420 | 56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66) | ||
421 | 57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66) | ||
422 | 58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1) | ||
423 | 59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1) | ||
424 | 5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1) | ||
425 | 5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3) | ||
426 | 5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1) | ||
427 | 5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1) | ||
428 | 5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1) | ||
429 | 5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1) | ||
430 | # 0x0f 0x60-0x6f | ||
431 | 60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1) | ||
432 | 61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1) | ||
433 | 62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1) | ||
434 | 63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1) | ||
435 | 64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1) | ||
436 | 65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1) | ||
437 | 66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1) | ||
438 | 67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1) | ||
439 | 68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1) | ||
440 | 69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1) | ||
441 | 6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1) | ||
442 | 6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1) | ||
443 | 6c: vpunpcklqdq Vx,Hx,Wx (66),(v1) | ||
444 | 6d: vpunpckhqdq Vx,Hx,Wx (66),(v1) | ||
445 | 6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1) | ||
446 | 6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3) | ||
447 | # 0x0f 0x70-0x7f | ||
448 | 70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1) | ||
449 | 71: Grp12 (1A) | ||
450 | 72: Grp13 (1A) | ||
451 | 73: Grp14 (1A) | ||
452 | 74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1) | ||
453 | 75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1) | ||
454 | 76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1) | ||
455 | # Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX. | ||
456 | 77: emms | vzeroupper | vzeroall | ||
457 | 78: VMREAD Ey,Gy | ||
458 | 79: VMWRITE Gy,Ey | ||
459 | 7a: | ||
460 | 7b: | ||
461 | 7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2) | ||
462 | 7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2) | ||
463 | 7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1) | ||
464 | 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3) | ||
465 | # 0x0f 0x80-0x8f | ||
466 | # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). | ||
467 | 80: JO Jz (f64) | ||
468 | 81: JNO Jz (f64) | ||
469 | 82: JB/JC/JNAE Jz (f64) | ||
470 | 83: JAE/JNB/JNC Jz (f64) | ||
471 | 84: JE/JZ Jz (f64) | ||
472 | 85: JNE/JNZ Jz (f64) | ||
473 | 86: JBE/JNA Jz (f64) | ||
474 | 87: JA/JNBE Jz (f64) | ||
475 | 88: JS Jz (f64) | ||
476 | 89: JNS Jz (f64) | ||
477 | 8a: JP/JPE Jz (f64) | ||
478 | 8b: JNP/JPO Jz (f64) | ||
479 | 8c: JL/JNGE Jz (f64) | ||
480 | 8d: JNL/JGE Jz (f64) | ||
481 | 8e: JLE/JNG Jz (f64) | ||
482 | 8f: JNLE/JG Jz (f64) | ||
483 | # 0x0f 0x90-0x9f | ||
484 | 90: SETO Eb | ||
485 | 91: SETNO Eb | ||
486 | 92: SETB/C/NAE Eb | ||
487 | 93: SETAE/NB/NC Eb | ||
488 | 94: SETE/Z Eb | ||
489 | 95: SETNE/NZ Eb | ||
490 | 96: SETBE/NA Eb | ||
491 | 97: SETA/NBE Eb | ||
492 | 98: SETS Eb | ||
493 | 99: SETNS Eb | ||
494 | 9a: SETP/PE Eb | ||
495 | 9b: SETNP/PO Eb | ||
496 | 9c: SETL/NGE Eb | ||
497 | 9d: SETNL/GE Eb | ||
498 | 9e: SETLE/NG Eb | ||
499 | 9f: SETNLE/G Eb | ||
500 | # 0x0f 0xa0-0xaf | ||
501 | a0: PUSH FS (d64) | ||
502 | a1: POP FS (d64) | ||
503 | a2: CPUID | ||
504 | a3: BT Ev,Gv | ||
505 | a4: SHLD Ev,Gv,Ib | ||
506 | a5: SHLD Ev,Gv,CL | ||
507 | a6: GrpPDLK | ||
508 | a7: GrpRNG | ||
509 | a8: PUSH GS (d64) | ||
510 | a9: POP GS (d64) | ||
511 | aa: RSM | ||
512 | ab: BTS Ev,Gv | ||
513 | ac: SHRD Ev,Gv,Ib | ||
514 | ad: SHRD Ev,Gv,CL | ||
515 | ae: Grp15 (1A),(1C) | ||
516 | af: IMUL Gv,Ev | ||
517 | # 0x0f 0xb0-0xbf | ||
518 | b0: CMPXCHG Eb,Gb | ||
519 | b1: CMPXCHG Ev,Gv | ||
520 | b2: LSS Gv,Mp | ||
521 | b3: BTR Ev,Gv | ||
522 | b4: LFS Gv,Mp | ||
523 | b5: LGS Gv,Mp | ||
524 | b6: MOVZX Gv,Eb | ||
525 | b7: MOVZX Gv,Ew | ||
526 | b8: JMPE (!F3) | POPCNT Gv,Ev (F3) | ||
527 | b9: Grp10 (1A) | ||
528 | ba: Grp8 Ev,Ib (1A) | ||
529 | bb: BTC Ev,Gv | ||
530 | bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3) | ||
531 | bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3) | ||
532 | be: MOVSX Gv,Eb | ||
533 | bf: MOVSX Gv,Ew | ||
534 | # 0x0f 0xc0-0xcf | ||
535 | c0: XADD Eb,Gb | ||
536 | c1: XADD Ev,Gv | ||
537 | c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1) | ||
538 | c3: movnti My,Gy | ||
539 | c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1) | ||
540 | c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1) | ||
541 | c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66) | ||
542 | c7: Grp9 (1A) | ||
543 | c8: BSWAP RAX/EAX/R8/R8D | ||
544 | c9: BSWAP RCX/ECX/R9/R9D | ||
545 | ca: BSWAP RDX/EDX/R10/R10D | ||
546 | cb: BSWAP RBX/EBX/R11/R11D | ||
547 | cc: BSWAP RSP/ESP/R12/R12D | ||
548 | cd: BSWAP RBP/EBP/R13/R13D | ||
549 | ce: BSWAP RSI/ESI/R14/R14D | ||
550 | cf: BSWAP RDI/EDI/R15/R15D | ||
551 | # 0x0f 0xd0-0xdf | ||
552 | d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2) | ||
553 | d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1) | ||
554 | d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1) | ||
555 | d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1) | ||
556 | d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1) | ||
557 | d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1) | ||
558 | d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2) | ||
559 | d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1) | ||
560 | d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1) | ||
561 | d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1) | ||
562 | da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1) | ||
563 | db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | ||
564 | dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1) | ||
565 | dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1) | ||
566 | de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1) | ||
567 | df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | ||
568 | # 0x0f 0xe0-0xef | ||
569 | e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1) | ||
570 | e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1) | ||
571 | e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1) | ||
572 | e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1) | ||
573 | e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1) | ||
574 | e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1) | ||
575 | e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2) | ||
576 | e7: movntq Mq,Pq | vmovntdq Mx,Vx (66) | ||
577 | e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1) | ||
578 | e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1) | ||
579 | ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1) | ||
580 | eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | ||
581 | ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1) | ||
582 | ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1) | ||
583 | ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1) | ||
584 | ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | ||
585 | # 0x0f 0xf0-0xff | ||
586 | f0: vlddqu Vx,Mx (F2) | ||
587 | f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1) | ||
588 | f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1) | ||
589 | f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1) | ||
590 | f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1) | ||
591 | f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1) | ||
592 | f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1) | ||
593 | f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1) | ||
594 | f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1) | ||
595 | f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1) | ||
596 | fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1) | ||
597 | fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1) | ||
598 | fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1) | ||
599 | fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1) | ||
600 | fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1) | ||
601 | ff: | ||
602 | EndTable | ||
603 | |||
604 | Table: 3-byte opcode 1 (0x0f 0x38) | ||
605 | Referrer: 3-byte escape 1 | ||
606 | AVXcode: 2 | ||
607 | # 0x0f 0x38 0x00-0x0f | ||
608 | 00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1) | ||
609 | 01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1) | ||
610 | 02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1) | ||
611 | 03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1) | ||
612 | 04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1) | ||
613 | 05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1) | ||
614 | 06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1) | ||
615 | 07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1) | ||
616 | 08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1) | ||
617 | 09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1) | ||
618 | 0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1) | ||
619 | 0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1) | ||
620 | 0c: vpermilps Vx,Hx,Wx (66),(v) | ||
621 | 0d: vpermilpd Vx,Hx,Wx (66),(v) | ||
622 | 0e: vtestps Vx,Wx (66),(v) | ||
623 | 0f: vtestpd Vx,Wx (66),(v) | ||
624 | # 0x0f 0x38 0x10-0x1f | ||
625 | 10: pblendvb Vdq,Wdq (66) | ||
626 | 11: | ||
627 | 12: | ||
628 | 13: vcvtph2ps Vx,Wx,Ib (66),(v) | ||
629 | 14: blendvps Vdq,Wdq (66) | ||
630 | 15: blendvpd Vdq,Wdq (66) | ||
631 | 16: vpermps Vqq,Hqq,Wqq (66),(v) | ||
632 | 17: vptest Vx,Wx (66) | ||
633 | 18: vbroadcastss Vx,Wd (66),(v) | ||
634 | 19: vbroadcastsd Vqq,Wq (66),(v) | ||
635 | 1a: vbroadcastf128 Vqq,Mdq (66),(v) | ||
636 | 1b: | ||
637 | 1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1) | ||
638 | 1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1) | ||
639 | 1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1) | ||
640 | 1f: | ||
641 | # 0x0f 0x38 0x20-0x2f | ||
642 | 20: vpmovsxbw Vx,Ux/Mq (66),(v1) | ||
643 | 21: vpmovsxbd Vx,Ux/Md (66),(v1) | ||
644 | 22: vpmovsxbq Vx,Ux/Mw (66),(v1) | ||
645 | 23: vpmovsxwd Vx,Ux/Mq (66),(v1) | ||
646 | 24: vpmovsxwq Vx,Ux/Md (66),(v1) | ||
647 | 25: vpmovsxdq Vx,Ux/Mq (66),(v1) | ||
648 | 26: | ||
649 | 27: | ||
650 | 28: vpmuldq Vx,Hx,Wx (66),(v1) | ||
651 | 29: vpcmpeqq Vx,Hx,Wx (66),(v1) | ||
652 | 2a: vmovntdqa Vx,Mx (66),(v1) | ||
653 | 2b: vpackusdw Vx,Hx,Wx (66),(v1) | ||
654 | 2c: vmaskmovps Vx,Hx,Mx (66),(v) | ||
655 | 2d: vmaskmovpd Vx,Hx,Mx (66),(v) | ||
656 | 2e: vmaskmovps Mx,Hx,Vx (66),(v) | ||
657 | 2f: vmaskmovpd Mx,Hx,Vx (66),(v) | ||
658 | # 0x0f 0x38 0x30-0x3f | ||
659 | 30: vpmovzxbw Vx,Ux/Mq (66),(v1) | ||
660 | 31: vpmovzxbd Vx,Ux/Md (66),(v1) | ||
661 | 32: vpmovzxbq Vx,Ux/Mw (66),(v1) | ||
662 | 33: vpmovzxwd Vx,Ux/Mq (66),(v1) | ||
663 | 34: vpmovzxwq Vx,Ux/Md (66),(v1) | ||
664 | 35: vpmovzxdq Vx,Ux/Mq (66),(v1) | ||
665 | 36: vpermd Vqq,Hqq,Wqq (66),(v) | ||
666 | 37: vpcmpgtq Vx,Hx,Wx (66),(v1) | ||
667 | 38: vpminsb Vx,Hx,Wx (66),(v1) | ||
668 | 39: vpminsd Vx,Hx,Wx (66),(v1) | ||
669 | 3a: vpminuw Vx,Hx,Wx (66),(v1) | ||
670 | 3b: vpminud Vx,Hx,Wx (66),(v1) | ||
671 | 3c: vpmaxsb Vx,Hx,Wx (66),(v1) | ||
672 | 3d: vpmaxsd Vx,Hx,Wx (66),(v1) | ||
673 | 3e: vpmaxuw Vx,Hx,Wx (66),(v1) | ||
674 | 3f: vpmaxud Vx,Hx,Wx (66),(v1) | ||
675 | # 0x0f 0x38 0x40-0x8f | ||
676 | 40: vpmulld Vx,Hx,Wx (66),(v1) | ||
677 | 41: vphminposuw Vdq,Wdq (66),(v1) | ||
678 | 42: | ||
679 | 43: | ||
680 | 44: | ||
681 | 45: vpsrlvd/q Vx,Hx,Wx (66),(v) | ||
682 | 46: vpsravd Vx,Hx,Wx (66),(v) | ||
683 | 47: vpsllvd/q Vx,Hx,Wx (66),(v) | ||
684 | # Skip 0x48-0x57 | ||
685 | 58: vpbroadcastd Vx,Wx (66),(v) | ||
686 | 59: vpbroadcastq Vx,Wx (66),(v) | ||
687 | 5a: vbroadcasti128 Vqq,Mdq (66),(v) | ||
688 | # Skip 0x5b-0x77 | ||
689 | 78: vpbroadcastb Vx,Wx (66),(v) | ||
690 | 79: vpbroadcastw Vx,Wx (66),(v) | ||
691 | # Skip 0x7a-0x7f | ||
692 | 80: INVEPT Gy,Mdq (66) | ||
693 | 81: INVPID Gy,Mdq (66) | ||
694 | 82: INVPCID Gy,Mdq (66) | ||
695 | 8c: vpmaskmovd/q Vx,Hx,Mx (66),(v) | ||
696 | 8e: vpmaskmovd/q Mx,Vx,Hx (66),(v) | ||
697 | # 0x0f 0x38 0x90-0xbf (FMA) | ||
698 | 90: vgatherdd/q Vx,Hx,Wx (66),(v) | ||
699 | 91: vgatherqd/q Vx,Hx,Wx (66),(v) | ||
700 | 92: vgatherdps/d Vx,Hx,Wx (66),(v) | ||
701 | 93: vgatherqps/d Vx,Hx,Wx (66),(v) | ||
702 | 94: | ||
703 | 95: | ||
704 | 96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v) | ||
705 | 97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v) | ||
706 | 98: vfmadd132ps/d Vx,Hx,Wx (66),(v) | ||
707 | 99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1) | ||
708 | 9a: vfmsub132ps/d Vx,Hx,Wx (66),(v) | ||
709 | 9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1) | ||
710 | 9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v) | ||
711 | 9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1) | ||
712 | 9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v) | ||
713 | 9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1) | ||
714 | a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v) | ||
715 | a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v) | ||
716 | a8: vfmadd213ps/d Vx,Hx,Wx (66),(v) | ||
717 | a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1) | ||
718 | aa: vfmsub213ps/d Vx,Hx,Wx (66),(v) | ||
719 | ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1) | ||
720 | ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v) | ||
721 | ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1) | ||
722 | ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v) | ||
723 | af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1) | ||
724 | b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v) | ||
725 | b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v) | ||
726 | b8: vfmadd231ps/d Vx,Hx,Wx (66),(v) | ||
727 | b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1) | ||
728 | ba: vfmsub231ps/d Vx,Hx,Wx (66),(v) | ||
729 | bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1) | ||
730 | bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v) | ||
731 | bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1) | ||
732 | be: vfnmsub231ps/d Vx,Hx,Wx (66),(v) | ||
733 | bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1) | ||
734 | # 0x0f 0x38 0xc0-0xff | ||
735 | db: VAESIMC Vdq,Wdq (66),(v1) | ||
736 | dc: VAESENC Vdq,Hdq,Wdq (66),(v1) | ||
737 | dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1) | ||
738 | de: VAESDEC Vdq,Hdq,Wdq (66),(v1) | ||
739 | df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1) | ||
740 | f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2) | ||
741 | f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2) | ||
742 | f2: ANDN Gy,By,Ey (v) | ||
743 | f3: Grp17 (1A) | ||
744 | f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | ||
745 | f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | ||
746 | f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v) | ||
747 | EndTable | ||
748 | |||
749 | Table: 3-byte opcode 2 (0x0f 0x3a) | ||
750 | Referrer: 3-byte escape 2 | ||
751 | AVXcode: 3 | ||
752 | # 0x0f 0x3a 0x00-0xff | ||
753 | 00: vpermq Vqq,Wqq,Ib (66),(v) | ||
754 | 01: vpermpd Vqq,Wqq,Ib (66),(v) | ||
755 | 02: vpblendd Vx,Hx,Wx,Ib (66),(v) | ||
756 | 03: | ||
757 | 04: vpermilps Vx,Wx,Ib (66),(v) | ||
758 | 05: vpermilpd Vx,Wx,Ib (66),(v) | ||
759 | 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) | ||
760 | 07: | ||
761 | 08: vroundps Vx,Wx,Ib (66) | ||
762 | 09: vroundpd Vx,Wx,Ib (66) | ||
763 | 0a: vroundss Vss,Wss,Ib (66),(v1) | ||
764 | 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | ||
765 | 0c: vblendps Vx,Hx,Wx,Ib (66) | ||
766 | 0d: vblendpd Vx,Hx,Wx,Ib (66) | ||
767 | 0e: vpblendw Vx,Hx,Wx,Ib (66),(v1) | ||
768 | 0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1) | ||
769 | 14: vpextrb Rd/Mb,Vdq,Ib (66),(v1) | ||
770 | 15: vpextrw Rd/Mw,Vdq,Ib (66),(v1) | ||
771 | 16: vpextrd/q Ey,Vdq,Ib (66),(v1) | ||
772 | 17: vextractps Ed,Vdq,Ib (66),(v1) | ||
773 | 18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | ||
774 | 19: vextractf128 Wdq,Vqq,Ib (66),(v) | ||
775 | 1d: vcvtps2ph Wx,Vx,Ib (66),(v) | ||
776 | 20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1) | ||
777 | 21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1) | ||
778 | 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) | ||
779 | 38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | ||
780 | 39: vextracti128 Wdq,Vqq,Ib (66),(v) | ||
781 | 40: vdpps Vx,Hx,Wx,Ib (66) | ||
782 | 41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1) | ||
783 | 42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | ||
784 | 44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1) | ||
785 | 46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v) | ||
786 | 4a: vblendvps Vx,Hx,Wx,Lx (66),(v) | ||
787 | 4b: vblendvpd Vx,Hx,Wx,Lx (66),(v) | ||
788 | 4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1) | ||
789 | 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) | ||
790 | 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) | ||
791 | 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) | ||
792 | 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) | ||
793 | df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) | ||
794 | f0: RORX Gy,Ey,Ib (F2),(v) | ||
795 | EndTable | ||
796 | |||
797 | GrpTable: Grp1 | ||
798 | 0: ADD | ||
799 | 1: OR | ||
800 | 2: ADC | ||
801 | 3: SBB | ||
802 | 4: AND | ||
803 | 5: SUB | ||
804 | 6: XOR | ||
805 | 7: CMP | ||
806 | EndTable | ||
807 | |||
808 | GrpTable: Grp1A | ||
809 | 0: POP | ||
810 | EndTable | ||
811 | |||
812 | GrpTable: Grp2 | ||
813 | 0: ROL | ||
814 | 1: ROR | ||
815 | 2: RCL | ||
816 | 3: RCR | ||
817 | 4: SHL/SAL | ||
818 | 5: SHR | ||
819 | 6: | ||
820 | 7: SAR | ||
821 | EndTable | ||
822 | |||
823 | GrpTable: Grp3_1 | ||
824 | 0: TEST Eb,Ib | ||
825 | 1: | ||
826 | 2: NOT Eb | ||
827 | 3: NEG Eb | ||
828 | 4: MUL AL,Eb | ||
829 | 5: IMUL AL,Eb | ||
830 | 6: DIV AL,Eb | ||
831 | 7: IDIV AL,Eb | ||
832 | EndTable | ||
833 | |||
834 | GrpTable: Grp3_2 | ||
835 | 0: TEST Ev,Iz | ||
836 | 1: | ||
837 | 2: NOT Ev | ||
838 | 3: NEG Ev | ||
839 | 4: MUL rAX,Ev | ||
840 | 5: IMUL rAX,Ev | ||
841 | 6: DIV rAX,Ev | ||
842 | 7: IDIV rAX,Ev | ||
843 | EndTable | ||
844 | |||
845 | GrpTable: Grp4 | ||
846 | 0: INC Eb | ||
847 | 1: DEC Eb | ||
848 | EndTable | ||
849 | |||
850 | GrpTable: Grp5 | ||
851 | 0: INC Ev | ||
852 | 1: DEC Ev | ||
853 | # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). | ||
854 | 2: CALLN Ev (f64) | ||
855 | 3: CALLF Ep | ||
856 | 4: JMPN Ev (f64) | ||
857 | 5: JMPF Mp | ||
858 | 6: PUSH Ev (d64) | ||
859 | 7: | ||
860 | EndTable | ||
861 | |||
862 | GrpTable: Grp6 | ||
863 | 0: SLDT Rv/Mw | ||
864 | 1: STR Rv/Mw | ||
865 | 2: LLDT Ew | ||
866 | 3: LTR Ew | ||
867 | 4: VERR Ew | ||
868 | 5: VERW Ew | ||
869 | EndTable | ||
870 | |||
871 | GrpTable: Grp7 | ||
872 | 0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | ||
873 | 1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ||
874 | 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ||
875 | 3: LIDT Ms | ||
876 | 4: SMSW Mw/Rv | ||
877 | 5: | ||
878 | 6: LMSW Ew | ||
879 | 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) | ||
880 | EndTable | ||
881 | |||
882 | GrpTable: Grp8 | ||
883 | 4: BT | ||
884 | 5: BTS | ||
885 | 6: BTR | ||
886 | 7: BTC | ||
887 | EndTable | ||
888 | |||
889 | GrpTable: Grp9 | ||
890 | 1: CMPXCHG8B/16B Mq/Mdq | ||
891 | 6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | ||
892 | 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) | ||
893 | EndTable | ||
894 | |||
895 | GrpTable: Grp10 | ||
896 | EndTable | ||
897 | |||
898 | # Grp11A and Grp11B are expressed as Grp11 in Intel SDM | ||
899 | GrpTable: Grp11A | ||
900 | 0: MOV Eb,Ib | ||
901 | 7: XABORT Ib (000),(11B) | ||
902 | EndTable | ||
903 | |||
904 | GrpTable: Grp11B | ||
905 | 0: MOV Eb,Iz | ||
906 | 7: XBEGIN Jz (000),(11B) | ||
907 | EndTable | ||
908 | |||
909 | GrpTable: Grp12 | ||
910 | 2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1) | ||
911 | 4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1) | ||
912 | 6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1) | ||
913 | EndTable | ||
914 | |||
915 | GrpTable: Grp13 | ||
916 | 2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1) | ||
917 | 4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | ||
918 | 6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1) | ||
919 | EndTable | ||
920 | |||
921 | GrpTable: Grp14 | ||
922 | 2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1) | ||
923 | 3: vpsrldq Hx,Ux,Ib (66),(11B),(v1) | ||
924 | 6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1) | ||
925 | 7: vpslldq Hx,Ux,Ib (66),(11B),(v1) | ||
926 | EndTable | ||
927 | |||
928 | GrpTable: Grp15 | ||
929 | 0: fxsave | RDFSBASE Ry (F3),(11B) | ||
930 | 1: fxstor | RDGSBASE Ry (F3),(11B) | ||
931 | 2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B) | ||
932 | 3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B) | ||
933 | 4: XSAVE | ||
934 | 5: XRSTOR | lfence (11B) | ||
935 | 6: XSAVEOPT | mfence (11B) | ||
936 | 7: clflush | sfence (11B) | ||
937 | EndTable | ||
938 | |||
939 | GrpTable: Grp16 | ||
940 | 0: prefetch NTA | ||
941 | 1: prefetch T0 | ||
942 | 2: prefetch T1 | ||
943 | 3: prefetch T2 | ||
944 | EndTable | ||
945 | |||
946 | GrpTable: Grp17 | ||
947 | 1: BLSR By,Ey (v) | ||
948 | 2: BLSMSK By,Ey (v) | ||
949 | 3: BLSI By,Ey (v) | ||
950 | EndTable | ||
951 | |||
952 | # AMD's Prefetch Group | ||
953 | GrpTable: GrpP | ||
954 | 0: PREFETCH | ||
955 | 1: PREFETCHW | ||
956 | EndTable | ||
957 | |||
958 | GrpTable: GrpPDLK | ||
959 | 0: MONTMUL | ||
960 | 1: XSHA1 | ||
961 | 2: XSHA2 | ||
962 | EndTable | ||
963 | |||
964 | GrpTable: GrpRNG | ||
965 | 0: xstore-rng | ||
966 | 1: xcrypt-ecb | ||
967 | 2: xcrypt-cbc | ||
968 | 4: xcrypt-cfb | ||
969 | 5: xcrypt-ofb | ||
970 | EndTable | ||
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c new file mode 100644 index 000000000000..535d86f8e4d1 --- /dev/null +++ b/tools/perf/util/intel-pt.c | |||
@@ -0,0 +1,1956 @@ | |||
1 | /* | ||
2 | * intel_pt.c: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2015, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <stdio.h> | ||
17 | #include <stdbool.h> | ||
18 | #include <errno.h> | ||
19 | #include <linux/kernel.h> | ||
20 | #include <linux/types.h> | ||
21 | |||
22 | #include "../perf.h" | ||
23 | #include "session.h" | ||
24 | #include "machine.h" | ||
25 | #include "tool.h" | ||
26 | #include "event.h" | ||
27 | #include "evlist.h" | ||
28 | #include "evsel.h" | ||
29 | #include "map.h" | ||
30 | #include "color.h" | ||
31 | #include "util.h" | ||
32 | #include "thread.h" | ||
33 | #include "thread-stack.h" | ||
34 | #include "symbol.h" | ||
35 | #include "callchain.h" | ||
36 | #include "dso.h" | ||
37 | #include "debug.h" | ||
38 | #include "auxtrace.h" | ||
39 | #include "tsc.h" | ||
40 | #include "intel-pt.h" | ||
41 | |||
42 | #include "intel-pt-decoder/intel-pt-log.h" | ||
43 | #include "intel-pt-decoder/intel-pt-decoder.h" | ||
44 | #include "intel-pt-decoder/intel-pt-insn-decoder.h" | ||
45 | #include "intel-pt-decoder/intel-pt-pkt-decoder.h" | ||
46 | |||
47 | #define MAX_TIMESTAMP (~0ULL) | ||
48 | |||
49 | struct intel_pt { | ||
50 | struct auxtrace auxtrace; | ||
51 | struct auxtrace_queues queues; | ||
52 | struct auxtrace_heap heap; | ||
53 | u32 auxtrace_type; | ||
54 | struct perf_session *session; | ||
55 | struct machine *machine; | ||
56 | struct perf_evsel *switch_evsel; | ||
57 | struct thread *unknown_thread; | ||
58 | bool timeless_decoding; | ||
59 | bool sampling_mode; | ||
60 | bool snapshot_mode; | ||
61 | bool per_cpu_mmaps; | ||
62 | bool have_tsc; | ||
63 | bool data_queued; | ||
64 | bool est_tsc; | ||
65 | bool sync_switch; | ||
66 | int have_sched_switch; | ||
67 | u32 pmu_type; | ||
68 | u64 kernel_start; | ||
69 | u64 switch_ip; | ||
70 | u64 ptss_ip; | ||
71 | |||
72 | struct perf_tsc_conversion tc; | ||
73 | bool cap_user_time_zero; | ||
74 | |||
75 | struct itrace_synth_opts synth_opts; | ||
76 | |||
77 | bool sample_instructions; | ||
78 | u64 instructions_sample_type; | ||
79 | u64 instructions_sample_period; | ||
80 | u64 instructions_id; | ||
81 | |||
82 | bool sample_branches; | ||
83 | u32 branches_filter; | ||
84 | u64 branches_sample_type; | ||
85 | u64 branches_id; | ||
86 | |||
87 | bool sample_transactions; | ||
88 | u64 transactions_sample_type; | ||
89 | u64 transactions_id; | ||
90 | |||
91 | bool synth_needs_swap; | ||
92 | |||
93 | u64 tsc_bit; | ||
94 | u64 mtc_bit; | ||
95 | u64 mtc_freq_bits; | ||
96 | u32 tsc_ctc_ratio_n; | ||
97 | u32 tsc_ctc_ratio_d; | ||
98 | u64 cyc_bit; | ||
99 | u64 noretcomp_bit; | ||
100 | unsigned max_non_turbo_ratio; | ||
101 | }; | ||
102 | |||
103 | enum switch_state { | ||
104 | INTEL_PT_SS_NOT_TRACING, | ||
105 | INTEL_PT_SS_UNKNOWN, | ||
106 | INTEL_PT_SS_TRACING, | ||
107 | INTEL_PT_SS_EXPECTING_SWITCH_EVENT, | ||
108 | INTEL_PT_SS_EXPECTING_SWITCH_IP, | ||
109 | }; | ||
110 | |||
111 | struct intel_pt_queue { | ||
112 | struct intel_pt *pt; | ||
113 | unsigned int queue_nr; | ||
114 | struct auxtrace_buffer *buffer; | ||
115 | void *decoder; | ||
116 | const struct intel_pt_state *state; | ||
117 | struct ip_callchain *chain; | ||
118 | union perf_event *event_buf; | ||
119 | bool on_heap; | ||
120 | bool stop; | ||
121 | bool step_through_buffers; | ||
122 | bool use_buffer_pid_tid; | ||
123 | pid_t pid, tid; | ||
124 | int cpu; | ||
125 | int switch_state; | ||
126 | pid_t next_tid; | ||
127 | struct thread *thread; | ||
128 | bool exclude_kernel; | ||
129 | bool have_sample; | ||
130 | u64 time; | ||
131 | u64 timestamp; | ||
132 | u32 flags; | ||
133 | u16 insn_len; | ||
134 | u64 last_insn_cnt; | ||
135 | }; | ||
136 | |||
137 | static void intel_pt_dump(struct intel_pt *pt __maybe_unused, | ||
138 | unsigned char *buf, size_t len) | ||
139 | { | ||
140 | struct intel_pt_pkt packet; | ||
141 | size_t pos = 0; | ||
142 | int ret, pkt_len, i; | ||
143 | char desc[INTEL_PT_PKT_DESC_MAX]; | ||
144 | const char *color = PERF_COLOR_BLUE; | ||
145 | |||
146 | color_fprintf(stdout, color, | ||
147 | ". ... Intel Processor Trace data: size %zu bytes\n", | ||
148 | len); | ||
149 | |||
150 | while (len) { | ||
151 | ret = intel_pt_get_packet(buf, len, &packet); | ||
152 | if (ret > 0) | ||
153 | pkt_len = ret; | ||
154 | else | ||
155 | pkt_len = 1; | ||
156 | printf("."); | ||
157 | color_fprintf(stdout, color, " %08x: ", pos); | ||
158 | for (i = 0; i < pkt_len; i++) | ||
159 | color_fprintf(stdout, color, " %02x", buf[i]); | ||
160 | for (; i < 16; i++) | ||
161 | color_fprintf(stdout, color, " "); | ||
162 | if (ret > 0) { | ||
163 | ret = intel_pt_pkt_desc(&packet, desc, | ||
164 | INTEL_PT_PKT_DESC_MAX); | ||
165 | if (ret > 0) | ||
166 | color_fprintf(stdout, color, " %s\n", desc); | ||
167 | } else { | ||
168 | color_fprintf(stdout, color, " Bad packet!\n"); | ||
169 | } | ||
170 | pos += pkt_len; | ||
171 | buf += pkt_len; | ||
172 | len -= pkt_len; | ||
173 | } | ||
174 | } | ||
175 | |||
176 | static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf, | ||
177 | size_t len) | ||
178 | { | ||
179 | printf(".\n"); | ||
180 | intel_pt_dump(pt, buf, len); | ||
181 | } | ||
182 | |||
183 | static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a, | ||
184 | struct auxtrace_buffer *b) | ||
185 | { | ||
186 | void *start; | ||
187 | |||
188 | start = intel_pt_find_overlap(a->data, a->size, b->data, b->size, | ||
189 | pt->have_tsc); | ||
190 | if (!start) | ||
191 | return -EINVAL; | ||
192 | b->use_size = b->data + b->size - start; | ||
193 | b->use_data = start; | ||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | static void intel_pt_use_buffer_pid_tid(struct intel_pt_queue *ptq, | ||
198 | struct auxtrace_queue *queue, | ||
199 | struct auxtrace_buffer *buffer) | ||
200 | { | ||
201 | if (queue->cpu == -1 && buffer->cpu != -1) | ||
202 | ptq->cpu = buffer->cpu; | ||
203 | |||
204 | ptq->pid = buffer->pid; | ||
205 | ptq->tid = buffer->tid; | ||
206 | |||
207 | intel_pt_log("queue %u cpu %d pid %d tid %d\n", | ||
208 | ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); | ||
209 | |||
210 | thread__zput(ptq->thread); | ||
211 | |||
212 | if (ptq->tid != -1) { | ||
213 | if (ptq->pid != -1) | ||
214 | ptq->thread = machine__findnew_thread(ptq->pt->machine, | ||
215 | ptq->pid, | ||
216 | ptq->tid); | ||
217 | else | ||
218 | ptq->thread = machine__find_thread(ptq->pt->machine, -1, | ||
219 | ptq->tid); | ||
220 | } | ||
221 | } | ||
222 | |||
223 | /* This function assumes data is processed sequentially only */ | ||
224 | static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data) | ||
225 | { | ||
226 | struct intel_pt_queue *ptq = data; | ||
227 | struct auxtrace_buffer *buffer = ptq->buffer, *old_buffer = buffer; | ||
228 | struct auxtrace_queue *queue; | ||
229 | |||
230 | if (ptq->stop) { | ||
231 | b->len = 0; | ||
232 | return 0; | ||
233 | } | ||
234 | |||
235 | queue = &ptq->pt->queues.queue_array[ptq->queue_nr]; | ||
236 | |||
237 | buffer = auxtrace_buffer__next(queue, buffer); | ||
238 | if (!buffer) { | ||
239 | if (old_buffer) | ||
240 | auxtrace_buffer__drop_data(old_buffer); | ||
241 | b->len = 0; | ||
242 | return 0; | ||
243 | } | ||
244 | |||
245 | ptq->buffer = buffer; | ||
246 | |||
247 | if (!buffer->data) { | ||
248 | int fd = perf_data_file__fd(ptq->pt->session->file); | ||
249 | |||
250 | buffer->data = auxtrace_buffer__get_data(buffer, fd); | ||
251 | if (!buffer->data) | ||
252 | return -ENOMEM; | ||
253 | } | ||
254 | |||
255 | if (ptq->pt->snapshot_mode && !buffer->consecutive && old_buffer && | ||
256 | intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer)) | ||
257 | return -ENOMEM; | ||
258 | |||
259 | if (old_buffer) | ||
260 | auxtrace_buffer__drop_data(old_buffer); | ||
261 | |||
262 | if (buffer->use_data) { | ||
263 | b->len = buffer->use_size; | ||
264 | b->buf = buffer->use_data; | ||
265 | } else { | ||
266 | b->len = buffer->size; | ||
267 | b->buf = buffer->data; | ||
268 | } | ||
269 | b->ref_timestamp = buffer->reference; | ||
270 | |||
271 | if (!old_buffer || ptq->pt->sampling_mode || (ptq->pt->snapshot_mode && | ||
272 | !buffer->consecutive)) { | ||
273 | b->consecutive = false; | ||
274 | b->trace_nr = buffer->buffer_nr + 1; | ||
275 | } else { | ||
276 | b->consecutive = true; | ||
277 | } | ||
278 | |||
279 | if (ptq->use_buffer_pid_tid && (ptq->pid != buffer->pid || | ||
280 | ptq->tid != buffer->tid)) | ||
281 | intel_pt_use_buffer_pid_tid(ptq, queue, buffer); | ||
282 | |||
283 | if (ptq->step_through_buffers) | ||
284 | ptq->stop = true; | ||
285 | |||
286 | if (!b->len) | ||
287 | return intel_pt_get_trace(b, data); | ||
288 | |||
289 | return 0; | ||
290 | } | ||
291 | |||
292 | struct intel_pt_cache_entry { | ||
293 | struct auxtrace_cache_entry entry; | ||
294 | u64 insn_cnt; | ||
295 | u64 byte_cnt; | ||
296 | enum intel_pt_insn_op op; | ||
297 | enum intel_pt_insn_branch branch; | ||
298 | int length; | ||
299 | int32_t rel; | ||
300 | }; | ||
301 | |||
302 | static int intel_pt_config_div(const char *var, const char *value, void *data) | ||
303 | { | ||
304 | int *d = data; | ||
305 | long val; | ||
306 | |||
307 | if (!strcmp(var, "intel-pt.cache-divisor")) { | ||
308 | val = strtol(value, NULL, 0); | ||
309 | if (val > 0 && val <= INT_MAX) | ||
310 | *d = val; | ||
311 | } | ||
312 | |||
313 | return 0; | ||
314 | } | ||
315 | |||
316 | static int intel_pt_cache_divisor(void) | ||
317 | { | ||
318 | static int d; | ||
319 | |||
320 | if (d) | ||
321 | return d; | ||
322 | |||
323 | perf_config(intel_pt_config_div, &d); | ||
324 | |||
325 | if (!d) | ||
326 | d = 64; | ||
327 | |||
328 | return d; | ||
329 | } | ||
330 | |||
331 | static unsigned int intel_pt_cache_size(struct dso *dso, | ||
332 | struct machine *machine) | ||
333 | { | ||
334 | off_t size; | ||
335 | |||
336 | size = dso__data_size(dso, machine); | ||
337 | size /= intel_pt_cache_divisor(); | ||
338 | if (size < 1000) | ||
339 | return 10; | ||
340 | if (size > (1 << 21)) | ||
341 | return 21; | ||
342 | return 32 - __builtin_clz(size); | ||
343 | } | ||
344 | |||
345 | static struct auxtrace_cache *intel_pt_cache(struct dso *dso, | ||
346 | struct machine *machine) | ||
347 | { | ||
348 | struct auxtrace_cache *c; | ||
349 | unsigned int bits; | ||
350 | |||
351 | if (dso->auxtrace_cache) | ||
352 | return dso->auxtrace_cache; | ||
353 | |||
354 | bits = intel_pt_cache_size(dso, machine); | ||
355 | |||
356 | /* Ignoring cache creation failure */ | ||
357 | c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200); | ||
358 | |||
359 | dso->auxtrace_cache = c; | ||
360 | |||
361 | return c; | ||
362 | } | ||
363 | |||
364 | static int intel_pt_cache_add(struct dso *dso, struct machine *machine, | ||
365 | u64 offset, u64 insn_cnt, u64 byte_cnt, | ||
366 | struct intel_pt_insn *intel_pt_insn) | ||
367 | { | ||
368 | struct auxtrace_cache *c = intel_pt_cache(dso, machine); | ||
369 | struct intel_pt_cache_entry *e; | ||
370 | int err; | ||
371 | |||
372 | if (!c) | ||
373 | return -ENOMEM; | ||
374 | |||
375 | e = auxtrace_cache__alloc_entry(c); | ||
376 | if (!e) | ||
377 | return -ENOMEM; | ||
378 | |||
379 | e->insn_cnt = insn_cnt; | ||
380 | e->byte_cnt = byte_cnt; | ||
381 | e->op = intel_pt_insn->op; | ||
382 | e->branch = intel_pt_insn->branch; | ||
383 | e->length = intel_pt_insn->length; | ||
384 | e->rel = intel_pt_insn->rel; | ||
385 | |||
386 | err = auxtrace_cache__add(c, offset, &e->entry); | ||
387 | if (err) | ||
388 | auxtrace_cache__free_entry(c, e); | ||
389 | |||
390 | return err; | ||
391 | } | ||
392 | |||
393 | static struct intel_pt_cache_entry * | ||
394 | intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset) | ||
395 | { | ||
396 | struct auxtrace_cache *c = intel_pt_cache(dso, machine); | ||
397 | |||
398 | if (!c) | ||
399 | return NULL; | ||
400 | |||
401 | return auxtrace_cache__lookup(dso->auxtrace_cache, offset); | ||
402 | } | ||
403 | |||
404 | static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn, | ||
405 | uint64_t *insn_cnt_ptr, uint64_t *ip, | ||
406 | uint64_t to_ip, uint64_t max_insn_cnt, | ||
407 | void *data) | ||
408 | { | ||
409 | struct intel_pt_queue *ptq = data; | ||
410 | struct machine *machine = ptq->pt->machine; | ||
411 | struct thread *thread; | ||
412 | struct addr_location al; | ||
413 | unsigned char buf[1024]; | ||
414 | size_t bufsz; | ||
415 | ssize_t len; | ||
416 | int x86_64; | ||
417 | u8 cpumode; | ||
418 | u64 offset, start_offset, start_ip; | ||
419 | u64 insn_cnt = 0; | ||
420 | bool one_map = true; | ||
421 | |||
422 | if (to_ip && *ip == to_ip) | ||
423 | goto out_no_cache; | ||
424 | |||
425 | bufsz = intel_pt_insn_max_size(); | ||
426 | |||
427 | if (*ip >= ptq->pt->kernel_start) | ||
428 | cpumode = PERF_RECORD_MISC_KERNEL; | ||
429 | else | ||
430 | cpumode = PERF_RECORD_MISC_USER; | ||
431 | |||
432 | thread = ptq->thread; | ||
433 | if (!thread) { | ||
434 | if (cpumode != PERF_RECORD_MISC_KERNEL) | ||
435 | return -EINVAL; | ||
436 | thread = ptq->pt->unknown_thread; | ||
437 | } | ||
438 | |||
439 | while (1) { | ||
440 | thread__find_addr_map(thread, cpumode, MAP__FUNCTION, *ip, &al); | ||
441 | if (!al.map || !al.map->dso) | ||
442 | return -EINVAL; | ||
443 | |||
444 | if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && | ||
445 | dso__data_status_seen(al.map->dso, | ||
446 | DSO_DATA_STATUS_SEEN_ITRACE)) | ||
447 | return -ENOENT; | ||
448 | |||
449 | offset = al.map->map_ip(al.map, *ip); | ||
450 | |||
451 | if (!to_ip && one_map) { | ||
452 | struct intel_pt_cache_entry *e; | ||
453 | |||
454 | e = intel_pt_cache_lookup(al.map->dso, machine, offset); | ||
455 | if (e && | ||
456 | (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) { | ||
457 | *insn_cnt_ptr = e->insn_cnt; | ||
458 | *ip += e->byte_cnt; | ||
459 | intel_pt_insn->op = e->op; | ||
460 | intel_pt_insn->branch = e->branch; | ||
461 | intel_pt_insn->length = e->length; | ||
462 | intel_pt_insn->rel = e->rel; | ||
463 | intel_pt_log_insn_no_data(intel_pt_insn, *ip); | ||
464 | return 0; | ||
465 | } | ||
466 | } | ||
467 | |||
468 | start_offset = offset; | ||
469 | start_ip = *ip; | ||
470 | |||
471 | /* Load maps to ensure dso->is_64_bit has been updated */ | ||
472 | map__load(al.map, machine->symbol_filter); | ||
473 | |||
474 | x86_64 = al.map->dso->is_64_bit; | ||
475 | |||
476 | while (1) { | ||
477 | len = dso__data_read_offset(al.map->dso, machine, | ||
478 | offset, buf, bufsz); | ||
479 | if (len <= 0) | ||
480 | return -EINVAL; | ||
481 | |||
482 | if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn)) | ||
483 | return -EINVAL; | ||
484 | |||
485 | intel_pt_log_insn(intel_pt_insn, *ip); | ||
486 | |||
487 | insn_cnt += 1; | ||
488 | |||
489 | if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH) | ||
490 | goto out; | ||
491 | |||
492 | if (max_insn_cnt && insn_cnt >= max_insn_cnt) | ||
493 | goto out_no_cache; | ||
494 | |||
495 | *ip += intel_pt_insn->length; | ||
496 | |||
497 | if (to_ip && *ip == to_ip) | ||
498 | goto out_no_cache; | ||
499 | |||
500 | if (*ip >= al.map->end) | ||
501 | break; | ||
502 | |||
503 | offset += intel_pt_insn->length; | ||
504 | } | ||
505 | one_map = false; | ||
506 | } | ||
507 | out: | ||
508 | *insn_cnt_ptr = insn_cnt; | ||
509 | |||
510 | if (!one_map) | ||
511 | goto out_no_cache; | ||
512 | |||
513 | /* | ||
514 | * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate | ||
515 | * entries. | ||
516 | */ | ||
517 | if (to_ip) { | ||
518 | struct intel_pt_cache_entry *e; | ||
519 | |||
520 | e = intel_pt_cache_lookup(al.map->dso, machine, start_offset); | ||
521 | if (e) | ||
522 | return 0; | ||
523 | } | ||
524 | |||
525 | /* Ignore cache errors */ | ||
526 | intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt, | ||
527 | *ip - start_ip, intel_pt_insn); | ||
528 | |||
529 | return 0; | ||
530 | |||
531 | out_no_cache: | ||
532 | *insn_cnt_ptr = insn_cnt; | ||
533 | return 0; | ||
534 | } | ||
535 | |||
536 | static bool intel_pt_get_config(struct intel_pt *pt, | ||
537 | struct perf_event_attr *attr, u64 *config) | ||
538 | { | ||
539 | if (attr->type == pt->pmu_type) { | ||
540 | if (config) | ||
541 | *config = attr->config; | ||
542 | return true; | ||
543 | } | ||
544 | |||
545 | return false; | ||
546 | } | ||
547 | |||
548 | static bool intel_pt_exclude_kernel(struct intel_pt *pt) | ||
549 | { | ||
550 | struct perf_evsel *evsel; | ||
551 | |||
552 | evlist__for_each(pt->session->evlist, evsel) { | ||
553 | if (intel_pt_get_config(pt, &evsel->attr, NULL) && | ||
554 | !evsel->attr.exclude_kernel) | ||
555 | return false; | ||
556 | } | ||
557 | return true; | ||
558 | } | ||
559 | |||
560 | static bool intel_pt_return_compression(struct intel_pt *pt) | ||
561 | { | ||
562 | struct perf_evsel *evsel; | ||
563 | u64 config; | ||
564 | |||
565 | if (!pt->noretcomp_bit) | ||
566 | return true; | ||
567 | |||
568 | evlist__for_each(pt->session->evlist, evsel) { | ||
569 | if (intel_pt_get_config(pt, &evsel->attr, &config) && | ||
570 | (config & pt->noretcomp_bit)) | ||
571 | return false; | ||
572 | } | ||
573 | return true; | ||
574 | } | ||
575 | |||
576 | static unsigned int intel_pt_mtc_period(struct intel_pt *pt) | ||
577 | { | ||
578 | struct perf_evsel *evsel; | ||
579 | unsigned int shift; | ||
580 | u64 config; | ||
581 | |||
582 | if (!pt->mtc_freq_bits) | ||
583 | return 0; | ||
584 | |||
585 | for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++) | ||
586 | config >>= 1; | ||
587 | |||
588 | evlist__for_each(pt->session->evlist, evsel) { | ||
589 | if (intel_pt_get_config(pt, &evsel->attr, &config)) | ||
590 | return (config & pt->mtc_freq_bits) >> shift; | ||
591 | } | ||
592 | return 0; | ||
593 | } | ||
594 | |||
595 | static bool intel_pt_timeless_decoding(struct intel_pt *pt) | ||
596 | { | ||
597 | struct perf_evsel *evsel; | ||
598 | bool timeless_decoding = true; | ||
599 | u64 config; | ||
600 | |||
601 | if (!pt->tsc_bit || !pt->cap_user_time_zero) | ||
602 | return true; | ||
603 | |||
604 | evlist__for_each(pt->session->evlist, evsel) { | ||
605 | if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME)) | ||
606 | return true; | ||
607 | if (intel_pt_get_config(pt, &evsel->attr, &config)) { | ||
608 | if (config & pt->tsc_bit) | ||
609 | timeless_decoding = false; | ||
610 | else | ||
611 | return true; | ||
612 | } | ||
613 | } | ||
614 | return timeless_decoding; | ||
615 | } | ||
616 | |||
617 | static bool intel_pt_tracing_kernel(struct intel_pt *pt) | ||
618 | { | ||
619 | struct perf_evsel *evsel; | ||
620 | |||
621 | evlist__for_each(pt->session->evlist, evsel) { | ||
622 | if (intel_pt_get_config(pt, &evsel->attr, NULL) && | ||
623 | !evsel->attr.exclude_kernel) | ||
624 | return true; | ||
625 | } | ||
626 | return false; | ||
627 | } | ||
628 | |||
629 | static bool intel_pt_have_tsc(struct intel_pt *pt) | ||
630 | { | ||
631 | struct perf_evsel *evsel; | ||
632 | bool have_tsc = false; | ||
633 | u64 config; | ||
634 | |||
635 | if (!pt->tsc_bit) | ||
636 | return false; | ||
637 | |||
638 | evlist__for_each(pt->session->evlist, evsel) { | ||
639 | if (intel_pt_get_config(pt, &evsel->attr, &config)) { | ||
640 | if (config & pt->tsc_bit) | ||
641 | have_tsc = true; | ||
642 | else | ||
643 | return false; | ||
644 | } | ||
645 | } | ||
646 | return have_tsc; | ||
647 | } | ||
648 | |||
649 | static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) | ||
650 | { | ||
651 | u64 quot, rem; | ||
652 | |||
653 | quot = ns / pt->tc.time_mult; | ||
654 | rem = ns % pt->tc.time_mult; | ||
655 | return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) / | ||
656 | pt->tc.time_mult; | ||
657 | } | ||
658 | |||
659 | static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, | ||
660 | unsigned int queue_nr) | ||
661 | { | ||
662 | struct intel_pt_params params = { .get_trace = 0, }; | ||
663 | struct intel_pt_queue *ptq; | ||
664 | |||
665 | ptq = zalloc(sizeof(struct intel_pt_queue)); | ||
666 | if (!ptq) | ||
667 | return NULL; | ||
668 | |||
669 | if (pt->synth_opts.callchain) { | ||
670 | size_t sz = sizeof(struct ip_callchain); | ||
671 | |||
672 | sz += pt->synth_opts.callchain_sz * sizeof(u64); | ||
673 | ptq->chain = zalloc(sz); | ||
674 | if (!ptq->chain) | ||
675 | goto out_free; | ||
676 | } | ||
677 | |||
678 | ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); | ||
679 | if (!ptq->event_buf) | ||
680 | goto out_free; | ||
681 | |||
682 | ptq->pt = pt; | ||
683 | ptq->queue_nr = queue_nr; | ||
684 | ptq->exclude_kernel = intel_pt_exclude_kernel(pt); | ||
685 | ptq->pid = -1; | ||
686 | ptq->tid = -1; | ||
687 | ptq->cpu = -1; | ||
688 | ptq->next_tid = -1; | ||
689 | |||
690 | params.get_trace = intel_pt_get_trace; | ||
691 | params.walk_insn = intel_pt_walk_next_insn; | ||
692 | params.data = ptq; | ||
693 | params.return_compression = intel_pt_return_compression(pt); | ||
694 | params.max_non_turbo_ratio = pt->max_non_turbo_ratio; | ||
695 | params.mtc_period = intel_pt_mtc_period(pt); | ||
696 | params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; | ||
697 | params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d; | ||
698 | |||
699 | if (pt->synth_opts.instructions) { | ||
700 | if (pt->synth_opts.period) { | ||
701 | switch (pt->synth_opts.period_type) { | ||
702 | case PERF_ITRACE_PERIOD_INSTRUCTIONS: | ||
703 | params.period_type = | ||
704 | INTEL_PT_PERIOD_INSTRUCTIONS; | ||
705 | params.period = pt->synth_opts.period; | ||
706 | break; | ||
707 | case PERF_ITRACE_PERIOD_TICKS: | ||
708 | params.period_type = INTEL_PT_PERIOD_TICKS; | ||
709 | params.period = pt->synth_opts.period; | ||
710 | break; | ||
711 | case PERF_ITRACE_PERIOD_NANOSECS: | ||
712 | params.period_type = INTEL_PT_PERIOD_TICKS; | ||
713 | params.period = intel_pt_ns_to_ticks(pt, | ||
714 | pt->synth_opts.period); | ||
715 | break; | ||
716 | default: | ||
717 | break; | ||
718 | } | ||
719 | } | ||
720 | |||
721 | if (!params.period) { | ||
722 | params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS; | ||
723 | params.period = 1000; | ||
724 | } | ||
725 | } | ||
726 | |||
727 | ptq->decoder = intel_pt_decoder_new(¶ms); | ||
728 | if (!ptq->decoder) | ||
729 | goto out_free; | ||
730 | |||
731 | return ptq; | ||
732 | |||
733 | out_free: | ||
734 | zfree(&ptq->event_buf); | ||
735 | zfree(&ptq->chain); | ||
736 | free(ptq); | ||
737 | return NULL; | ||
738 | } | ||
739 | |||
740 | static void intel_pt_free_queue(void *priv) | ||
741 | { | ||
742 | struct intel_pt_queue *ptq = priv; | ||
743 | |||
744 | if (!ptq) | ||
745 | return; | ||
746 | thread__zput(ptq->thread); | ||
747 | intel_pt_decoder_free(ptq->decoder); | ||
748 | zfree(&ptq->event_buf); | ||
749 | zfree(&ptq->chain); | ||
750 | free(ptq); | ||
751 | } | ||
752 | |||
753 | static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt, | ||
754 | struct auxtrace_queue *queue) | ||
755 | { | ||
756 | struct intel_pt_queue *ptq = queue->priv; | ||
757 | |||
758 | if (queue->tid == -1 || pt->have_sched_switch) { | ||
759 | ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu); | ||
760 | thread__zput(ptq->thread); | ||
761 | } | ||
762 | |||
763 | if (!ptq->thread && ptq->tid != -1) | ||
764 | ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid); | ||
765 | |||
766 | if (ptq->thread) { | ||
767 | ptq->pid = ptq->thread->pid_; | ||
768 | if (queue->cpu == -1) | ||
769 | ptq->cpu = ptq->thread->cpu; | ||
770 | } | ||
771 | } | ||
772 | |||
773 | static void intel_pt_sample_flags(struct intel_pt_queue *ptq) | ||
774 | { | ||
775 | if (ptq->state->flags & INTEL_PT_ABORT_TX) { | ||
776 | ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT; | ||
777 | } else if (ptq->state->flags & INTEL_PT_ASYNC) { | ||
778 | if (ptq->state->to_ip) | ||
779 | ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | | ||
780 | PERF_IP_FLAG_ASYNC | | ||
781 | PERF_IP_FLAG_INTERRUPT; | ||
782 | else | ||
783 | ptq->flags = PERF_IP_FLAG_BRANCH | | ||
784 | PERF_IP_FLAG_TRACE_END; | ||
785 | ptq->insn_len = 0; | ||
786 | } else { | ||
787 | if (ptq->state->from_ip) | ||
788 | ptq->flags = intel_pt_insn_type(ptq->state->insn_op); | ||
789 | else | ||
790 | ptq->flags = PERF_IP_FLAG_BRANCH | | ||
791 | PERF_IP_FLAG_TRACE_BEGIN; | ||
792 | if (ptq->state->flags & INTEL_PT_IN_TX) | ||
793 | ptq->flags |= PERF_IP_FLAG_IN_TX; | ||
794 | ptq->insn_len = ptq->state->insn_len; | ||
795 | } | ||
796 | } | ||
797 | |||
798 | static int intel_pt_setup_queue(struct intel_pt *pt, | ||
799 | struct auxtrace_queue *queue, | ||
800 | unsigned int queue_nr) | ||
801 | { | ||
802 | struct intel_pt_queue *ptq = queue->priv; | ||
803 | |||
804 | if (list_empty(&queue->head)) | ||
805 | return 0; | ||
806 | |||
807 | if (!ptq) { | ||
808 | ptq = intel_pt_alloc_queue(pt, queue_nr); | ||
809 | if (!ptq) | ||
810 | return -ENOMEM; | ||
811 | queue->priv = ptq; | ||
812 | |||
813 | if (queue->cpu != -1) | ||
814 | ptq->cpu = queue->cpu; | ||
815 | ptq->tid = queue->tid; | ||
816 | |||
817 | if (pt->sampling_mode) { | ||
818 | if (pt->timeless_decoding) | ||
819 | ptq->step_through_buffers = true; | ||
820 | if (pt->timeless_decoding || !pt->have_sched_switch) | ||
821 | ptq->use_buffer_pid_tid = true; | ||
822 | } | ||
823 | } | ||
824 | |||
825 | if (!ptq->on_heap && | ||
826 | (!pt->sync_switch || | ||
827 | ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) { | ||
828 | const struct intel_pt_state *state; | ||
829 | int ret; | ||
830 | |||
831 | if (pt->timeless_decoding) | ||
832 | return 0; | ||
833 | |||
834 | intel_pt_log("queue %u getting timestamp\n", queue_nr); | ||
835 | intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", | ||
836 | queue_nr, ptq->cpu, ptq->pid, ptq->tid); | ||
837 | while (1) { | ||
838 | state = intel_pt_decode(ptq->decoder); | ||
839 | if (state->err) { | ||
840 | if (state->err == INTEL_PT_ERR_NODATA) { | ||
841 | intel_pt_log("queue %u has no timestamp\n", | ||
842 | queue_nr); | ||
843 | return 0; | ||
844 | } | ||
845 | continue; | ||
846 | } | ||
847 | if (state->timestamp) | ||
848 | break; | ||
849 | } | ||
850 | |||
851 | ptq->timestamp = state->timestamp; | ||
852 | intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n", | ||
853 | queue_nr, ptq->timestamp); | ||
854 | ptq->state = state; | ||
855 | ptq->have_sample = true; | ||
856 | intel_pt_sample_flags(ptq); | ||
857 | ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp); | ||
858 | if (ret) | ||
859 | return ret; | ||
860 | ptq->on_heap = true; | ||
861 | } | ||
862 | |||
863 | return 0; | ||
864 | } | ||
865 | |||
866 | static int intel_pt_setup_queues(struct intel_pt *pt) | ||
867 | { | ||
868 | unsigned int i; | ||
869 | int ret; | ||
870 | |||
871 | for (i = 0; i < pt->queues.nr_queues; i++) { | ||
872 | ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i); | ||
873 | if (ret) | ||
874 | return ret; | ||
875 | } | ||
876 | return 0; | ||
877 | } | ||
878 | |||
879 | static int intel_pt_inject_event(union perf_event *event, | ||
880 | struct perf_sample *sample, u64 type, | ||
881 | bool swapped) | ||
882 | { | ||
883 | event->header.size = perf_event__sample_event_size(sample, type, 0); | ||
884 | return perf_event__synthesize_sample(event, type, 0, sample, swapped); | ||
885 | } | ||
886 | |||
887 | static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) | ||
888 | { | ||
889 | int ret; | ||
890 | struct intel_pt *pt = ptq->pt; | ||
891 | union perf_event *event = ptq->event_buf; | ||
892 | struct perf_sample sample = { .ip = 0, }; | ||
893 | |||
894 | event->sample.header.type = PERF_RECORD_SAMPLE; | ||
895 | event->sample.header.misc = PERF_RECORD_MISC_USER; | ||
896 | event->sample.header.size = sizeof(struct perf_event_header); | ||
897 | |||
898 | if (!pt->timeless_decoding) | ||
899 | sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); | ||
900 | |||
901 | sample.ip = ptq->state->from_ip; | ||
902 | sample.pid = ptq->pid; | ||
903 | sample.tid = ptq->tid; | ||
904 | sample.addr = ptq->state->to_ip; | ||
905 | sample.id = ptq->pt->branches_id; | ||
906 | sample.stream_id = ptq->pt->branches_id; | ||
907 | sample.period = 1; | ||
908 | sample.cpu = ptq->cpu; | ||
909 | sample.flags = ptq->flags; | ||
910 | sample.insn_len = ptq->insn_len; | ||
911 | |||
912 | if (pt->branches_filter && !(pt->branches_filter & ptq->flags)) | ||
913 | return 0; | ||
914 | |||
915 | if (pt->synth_opts.inject) { | ||
916 | ret = intel_pt_inject_event(event, &sample, | ||
917 | pt->branches_sample_type, | ||
918 | pt->synth_needs_swap); | ||
919 | if (ret) | ||
920 | return ret; | ||
921 | } | ||
922 | |||
923 | ret = perf_session__deliver_synth_event(pt->session, event, &sample); | ||
924 | if (ret) | ||
925 | pr_err("Intel Processor Trace: failed to deliver branch event, error %d\n", | ||
926 | ret); | ||
927 | |||
928 | return ret; | ||
929 | } | ||
930 | |||
931 | static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) | ||
932 | { | ||
933 | int ret; | ||
934 | struct intel_pt *pt = ptq->pt; | ||
935 | union perf_event *event = ptq->event_buf; | ||
936 | struct perf_sample sample = { .ip = 0, }; | ||
937 | |||
938 | event->sample.header.type = PERF_RECORD_SAMPLE; | ||
939 | event->sample.header.misc = PERF_RECORD_MISC_USER; | ||
940 | event->sample.header.size = sizeof(struct perf_event_header); | ||
941 | |||
942 | if (!pt->timeless_decoding) | ||
943 | sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); | ||
944 | |||
945 | sample.ip = ptq->state->from_ip; | ||
946 | sample.pid = ptq->pid; | ||
947 | sample.tid = ptq->tid; | ||
948 | sample.addr = ptq->state->to_ip; | ||
949 | sample.id = ptq->pt->instructions_id; | ||
950 | sample.stream_id = ptq->pt->instructions_id; | ||
951 | sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; | ||
952 | sample.cpu = ptq->cpu; | ||
953 | sample.flags = ptq->flags; | ||
954 | sample.insn_len = ptq->insn_len; | ||
955 | |||
956 | ptq->last_insn_cnt = ptq->state->tot_insn_cnt; | ||
957 | |||
958 | if (pt->synth_opts.callchain) { | ||
959 | thread_stack__sample(ptq->thread, ptq->chain, | ||
960 | pt->synth_opts.callchain_sz, sample.ip); | ||
961 | sample.callchain = ptq->chain; | ||
962 | } | ||
963 | |||
964 | if (pt->synth_opts.inject) { | ||
965 | ret = intel_pt_inject_event(event, &sample, | ||
966 | pt->instructions_sample_type, | ||
967 | pt->synth_needs_swap); | ||
968 | if (ret) | ||
969 | return ret; | ||
970 | } | ||
971 | |||
972 | ret = perf_session__deliver_synth_event(pt->session, event, &sample); | ||
973 | if (ret) | ||
974 | pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n", | ||
975 | ret); | ||
976 | |||
977 | return ret; | ||
978 | } | ||
979 | |||
980 | static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq) | ||
981 | { | ||
982 | int ret; | ||
983 | struct intel_pt *pt = ptq->pt; | ||
984 | union perf_event *event = ptq->event_buf; | ||
985 | struct perf_sample sample = { .ip = 0, }; | ||
986 | |||
987 | event->sample.header.type = PERF_RECORD_SAMPLE; | ||
988 | event->sample.header.misc = PERF_RECORD_MISC_USER; | ||
989 | event->sample.header.size = sizeof(struct perf_event_header); | ||
990 | |||
991 | if (!pt->timeless_decoding) | ||
992 | sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc); | ||
993 | |||
994 | sample.ip = ptq->state->from_ip; | ||
995 | sample.pid = ptq->pid; | ||
996 | sample.tid = ptq->tid; | ||
997 | sample.addr = ptq->state->to_ip; | ||
998 | sample.id = ptq->pt->transactions_id; | ||
999 | sample.stream_id = ptq->pt->transactions_id; | ||
1000 | sample.period = 1; | ||
1001 | sample.cpu = ptq->cpu; | ||
1002 | sample.flags = ptq->flags; | ||
1003 | sample.insn_len = ptq->insn_len; | ||
1004 | |||
1005 | if (pt->synth_opts.callchain) { | ||
1006 | thread_stack__sample(ptq->thread, ptq->chain, | ||
1007 | pt->synth_opts.callchain_sz, sample.ip); | ||
1008 | sample.callchain = ptq->chain; | ||
1009 | } | ||
1010 | |||
1011 | if (pt->synth_opts.inject) { | ||
1012 | ret = intel_pt_inject_event(event, &sample, | ||
1013 | pt->transactions_sample_type, | ||
1014 | pt->synth_needs_swap); | ||
1015 | if (ret) | ||
1016 | return ret; | ||
1017 | } | ||
1018 | |||
1019 | ret = perf_session__deliver_synth_event(pt->session, event, &sample); | ||
1020 | if (ret) | ||
1021 | pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n", | ||
1022 | ret); | ||
1023 | |||
1024 | return ret; | ||
1025 | } | ||
1026 | |||
1027 | static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu, | ||
1028 | pid_t pid, pid_t tid, u64 ip) | ||
1029 | { | ||
1030 | union perf_event event; | ||
1031 | char msg[MAX_AUXTRACE_ERROR_MSG]; | ||
1032 | int err; | ||
1033 | |||
1034 | intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG); | ||
1035 | |||
1036 | auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE, | ||
1037 | code, cpu, pid, tid, ip, msg); | ||
1038 | |||
1039 | err = perf_session__deliver_synth_event(pt->session, &event, NULL); | ||
1040 | if (err) | ||
1041 | pr_err("Intel Processor Trace: failed to deliver error event, error %d\n", | ||
1042 | err); | ||
1043 | |||
1044 | return err; | ||
1045 | } | ||
1046 | |||
1047 | static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq) | ||
1048 | { | ||
1049 | struct auxtrace_queue *queue; | ||
1050 | pid_t tid = ptq->next_tid; | ||
1051 | int err; | ||
1052 | |||
1053 | if (tid == -1) | ||
1054 | return 0; | ||
1055 | |||
1056 | intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid); | ||
1057 | |||
1058 | err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid); | ||
1059 | |||
1060 | queue = &pt->queues.queue_array[ptq->queue_nr]; | ||
1061 | intel_pt_set_pid_tid_cpu(pt, queue); | ||
1062 | |||
1063 | ptq->next_tid = -1; | ||
1064 | |||
1065 | return err; | ||
1066 | } | ||
1067 | |||
1068 | static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip) | ||
1069 | { | ||
1070 | struct intel_pt *pt = ptq->pt; | ||
1071 | |||
1072 | return ip == pt->switch_ip && | ||
1073 | (ptq->flags & PERF_IP_FLAG_BRANCH) && | ||
1074 | !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC | | ||
1075 | PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT)); | ||
1076 | } | ||
1077 | |||
1078 | static int intel_pt_sample(struct intel_pt_queue *ptq) | ||
1079 | { | ||
1080 | const struct intel_pt_state *state = ptq->state; | ||
1081 | struct intel_pt *pt = ptq->pt; | ||
1082 | int err; | ||
1083 | |||
1084 | if (!ptq->have_sample) | ||
1085 | return 0; | ||
1086 | |||
1087 | ptq->have_sample = false; | ||
1088 | |||
1089 | if (pt->sample_instructions && | ||
1090 | (state->type & INTEL_PT_INSTRUCTION)) { | ||
1091 | err = intel_pt_synth_instruction_sample(ptq); | ||
1092 | if (err) | ||
1093 | return err; | ||
1094 | } | ||
1095 | |||
1096 | if (pt->sample_transactions && | ||
1097 | (state->type & INTEL_PT_TRANSACTION)) { | ||
1098 | err = intel_pt_synth_transaction_sample(ptq); | ||
1099 | if (err) | ||
1100 | return err; | ||
1101 | } | ||
1102 | |||
1103 | if (!(state->type & INTEL_PT_BRANCH)) | ||
1104 | return 0; | ||
1105 | |||
1106 | if (pt->synth_opts.callchain) | ||
1107 | thread_stack__event(ptq->thread, ptq->flags, state->from_ip, | ||
1108 | state->to_ip, ptq->insn_len, | ||
1109 | state->trace_nr); | ||
1110 | else | ||
1111 | thread_stack__set_trace_nr(ptq->thread, state->trace_nr); | ||
1112 | |||
1113 | if (pt->sample_branches) { | ||
1114 | err = intel_pt_synth_branch_sample(ptq); | ||
1115 | if (err) | ||
1116 | return err; | ||
1117 | } | ||
1118 | |||
1119 | if (!pt->sync_switch) | ||
1120 | return 0; | ||
1121 | |||
1122 | if (intel_pt_is_switch_ip(ptq, state->to_ip)) { | ||
1123 | switch (ptq->switch_state) { | ||
1124 | case INTEL_PT_SS_UNKNOWN: | ||
1125 | case INTEL_PT_SS_EXPECTING_SWITCH_IP: | ||
1126 | err = intel_pt_next_tid(pt, ptq); | ||
1127 | if (err) | ||
1128 | return err; | ||
1129 | ptq->switch_state = INTEL_PT_SS_TRACING; | ||
1130 | break; | ||
1131 | default: | ||
1132 | ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT; | ||
1133 | return 1; | ||
1134 | } | ||
1135 | } else if (!state->to_ip) { | ||
1136 | ptq->switch_state = INTEL_PT_SS_NOT_TRACING; | ||
1137 | } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) { | ||
1138 | ptq->switch_state = INTEL_PT_SS_UNKNOWN; | ||
1139 | } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN && | ||
1140 | state->to_ip == pt->ptss_ip && | ||
1141 | (ptq->flags & PERF_IP_FLAG_CALL)) { | ||
1142 | ptq->switch_state = INTEL_PT_SS_TRACING; | ||
1143 | } | ||
1144 | |||
1145 | return 0; | ||
1146 | } | ||
1147 | |||
1148 | static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip) | ||
1149 | { | ||
1150 | struct map *map; | ||
1151 | struct symbol *sym, *start; | ||
1152 | u64 ip, switch_ip = 0; | ||
1153 | |||
1154 | if (ptss_ip) | ||
1155 | *ptss_ip = 0; | ||
1156 | |||
1157 | map = machine__kernel_map(machine, MAP__FUNCTION); | ||
1158 | if (!map) | ||
1159 | return 0; | ||
1160 | |||
1161 | if (map__load(map, machine->symbol_filter)) | ||
1162 | return 0; | ||
1163 | |||
1164 | start = dso__first_symbol(map->dso, MAP__FUNCTION); | ||
1165 | |||
1166 | for (sym = start; sym; sym = dso__next_symbol(sym)) { | ||
1167 | if (sym->binding == STB_GLOBAL && | ||
1168 | !strcmp(sym->name, "__switch_to")) { | ||
1169 | ip = map->unmap_ip(map, sym->start); | ||
1170 | if (ip >= map->start && ip < map->end) { | ||
1171 | switch_ip = ip; | ||
1172 | break; | ||
1173 | } | ||
1174 | } | ||
1175 | } | ||
1176 | |||
1177 | if (!switch_ip || !ptss_ip) | ||
1178 | return 0; | ||
1179 | |||
1180 | for (sym = start; sym; sym = dso__next_symbol(sym)) { | ||
1181 | if (!strcmp(sym->name, "perf_trace_sched_switch")) { | ||
1182 | ip = map->unmap_ip(map, sym->start); | ||
1183 | if (ip >= map->start && ip < map->end) { | ||
1184 | *ptss_ip = ip; | ||
1185 | break; | ||
1186 | } | ||
1187 | } | ||
1188 | } | ||
1189 | |||
1190 | return switch_ip; | ||
1191 | } | ||
1192 | |||
1193 | static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp) | ||
1194 | { | ||
1195 | const struct intel_pt_state *state = ptq->state; | ||
1196 | struct intel_pt *pt = ptq->pt; | ||
1197 | int err; | ||
1198 | |||
1199 | if (!pt->kernel_start) { | ||
1200 | pt->kernel_start = machine__kernel_start(pt->machine); | ||
1201 | if (pt->per_cpu_mmaps && pt->have_sched_switch && | ||
1202 | !pt->timeless_decoding && intel_pt_tracing_kernel(pt) && | ||
1203 | !pt->sampling_mode) { | ||
1204 | pt->switch_ip = intel_pt_switch_ip(pt->machine, | ||
1205 | &pt->ptss_ip); | ||
1206 | if (pt->switch_ip) { | ||
1207 | intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n", | ||
1208 | pt->switch_ip, pt->ptss_ip); | ||
1209 | pt->sync_switch = true; | ||
1210 | } | ||
1211 | } | ||
1212 | } | ||
1213 | |||
1214 | intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n", | ||
1215 | ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid); | ||
1216 | while (1) { | ||
1217 | err = intel_pt_sample(ptq); | ||
1218 | if (err) | ||
1219 | return err; | ||
1220 | |||
1221 | state = intel_pt_decode(ptq->decoder); | ||
1222 | if (state->err) { | ||
1223 | if (state->err == INTEL_PT_ERR_NODATA) | ||
1224 | return 1; | ||
1225 | if (pt->sync_switch && | ||
1226 | state->from_ip >= pt->kernel_start) { | ||
1227 | pt->sync_switch = false; | ||
1228 | intel_pt_next_tid(pt, ptq); | ||
1229 | } | ||
1230 | if (pt->synth_opts.errors) { | ||
1231 | err = intel_pt_synth_error(pt, state->err, | ||
1232 | ptq->cpu, ptq->pid, | ||
1233 | ptq->tid, | ||
1234 | state->from_ip); | ||
1235 | if (err) | ||
1236 | return err; | ||
1237 | } | ||
1238 | continue; | ||
1239 | } | ||
1240 | |||
1241 | ptq->state = state; | ||
1242 | ptq->have_sample = true; | ||
1243 | intel_pt_sample_flags(ptq); | ||
1244 | |||
1245 | /* Use estimated TSC upon return to user space */ | ||
1246 | if (pt->est_tsc && | ||
1247 | (state->from_ip >= pt->kernel_start || !state->from_ip) && | ||
1248 | state->to_ip && state->to_ip < pt->kernel_start) { | ||
1249 | intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", | ||
1250 | state->timestamp, state->est_timestamp); | ||
1251 | ptq->timestamp = state->est_timestamp; | ||
1252 | /* Use estimated TSC in unknown switch state */ | ||
1253 | } else if (pt->sync_switch && | ||
1254 | ptq->switch_state == INTEL_PT_SS_UNKNOWN && | ||
1255 | intel_pt_is_switch_ip(ptq, state->to_ip) && | ||
1256 | ptq->next_tid == -1) { | ||
1257 | intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n", | ||
1258 | state->timestamp, state->est_timestamp); | ||
1259 | ptq->timestamp = state->est_timestamp; | ||
1260 | } else if (state->timestamp > ptq->timestamp) { | ||
1261 | ptq->timestamp = state->timestamp; | ||
1262 | } | ||
1263 | |||
1264 | if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) { | ||
1265 | *timestamp = ptq->timestamp; | ||
1266 | return 0; | ||
1267 | } | ||
1268 | } | ||
1269 | return 0; | ||
1270 | } | ||
1271 | |||
1272 | static inline int intel_pt_update_queues(struct intel_pt *pt) | ||
1273 | { | ||
1274 | if (pt->queues.new_data) { | ||
1275 | pt->queues.new_data = false; | ||
1276 | return intel_pt_setup_queues(pt); | ||
1277 | } | ||
1278 | return 0; | ||
1279 | } | ||
1280 | |||
1281 | static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp) | ||
1282 | { | ||
1283 | unsigned int queue_nr; | ||
1284 | u64 ts; | ||
1285 | int ret; | ||
1286 | |||
1287 | while (1) { | ||
1288 | struct auxtrace_queue *queue; | ||
1289 | struct intel_pt_queue *ptq; | ||
1290 | |||
1291 | if (!pt->heap.heap_cnt) | ||
1292 | return 0; | ||
1293 | |||
1294 | if (pt->heap.heap_array[0].ordinal >= timestamp) | ||
1295 | return 0; | ||
1296 | |||
1297 | queue_nr = pt->heap.heap_array[0].queue_nr; | ||
1298 | queue = &pt->queues.queue_array[queue_nr]; | ||
1299 | ptq = queue->priv; | ||
1300 | |||
1301 | intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n", | ||
1302 | queue_nr, pt->heap.heap_array[0].ordinal, | ||
1303 | timestamp); | ||
1304 | |||
1305 | auxtrace_heap__pop(&pt->heap); | ||
1306 | |||
1307 | if (pt->heap.heap_cnt) { | ||
1308 | ts = pt->heap.heap_array[0].ordinal + 1; | ||
1309 | if (ts > timestamp) | ||
1310 | ts = timestamp; | ||
1311 | } else { | ||
1312 | ts = timestamp; | ||
1313 | } | ||
1314 | |||
1315 | intel_pt_set_pid_tid_cpu(pt, queue); | ||
1316 | |||
1317 | ret = intel_pt_run_decoder(ptq, &ts); | ||
1318 | |||
1319 | if (ret < 0) { | ||
1320 | auxtrace_heap__add(&pt->heap, queue_nr, ts); | ||
1321 | return ret; | ||
1322 | } | ||
1323 | |||
1324 | if (!ret) { | ||
1325 | ret = auxtrace_heap__add(&pt->heap, queue_nr, ts); | ||
1326 | if (ret < 0) | ||
1327 | return ret; | ||
1328 | } else { | ||
1329 | ptq->on_heap = false; | ||
1330 | } | ||
1331 | } | ||
1332 | |||
1333 | return 0; | ||
1334 | } | ||
1335 | |||
1336 | static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid, | ||
1337 | u64 time_) | ||
1338 | { | ||
1339 | struct auxtrace_queues *queues = &pt->queues; | ||
1340 | unsigned int i; | ||
1341 | u64 ts = 0; | ||
1342 | |||
1343 | for (i = 0; i < queues->nr_queues; i++) { | ||
1344 | struct auxtrace_queue *queue = &pt->queues.queue_array[i]; | ||
1345 | struct intel_pt_queue *ptq = queue->priv; | ||
1346 | |||
1347 | if (ptq && (tid == -1 || ptq->tid == tid)) { | ||
1348 | ptq->time = time_; | ||
1349 | intel_pt_set_pid_tid_cpu(pt, queue); | ||
1350 | intel_pt_run_decoder(ptq, &ts); | ||
1351 | } | ||
1352 | } | ||
1353 | return 0; | ||
1354 | } | ||
1355 | |||
1356 | static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample) | ||
1357 | { | ||
1358 | return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu, | ||
1359 | sample->pid, sample->tid, 0); | ||
1360 | } | ||
1361 | |||
1362 | static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu) | ||
1363 | { | ||
1364 | unsigned i, j; | ||
1365 | |||
1366 | if (cpu < 0 || !pt->queues.nr_queues) | ||
1367 | return NULL; | ||
1368 | |||
1369 | if ((unsigned)cpu >= pt->queues.nr_queues) | ||
1370 | i = pt->queues.nr_queues - 1; | ||
1371 | else | ||
1372 | i = cpu; | ||
1373 | |||
1374 | if (pt->queues.queue_array[i].cpu == cpu) | ||
1375 | return pt->queues.queue_array[i].priv; | ||
1376 | |||
1377 | for (j = 0; i > 0; j++) { | ||
1378 | if (pt->queues.queue_array[--i].cpu == cpu) | ||
1379 | return pt->queues.queue_array[i].priv; | ||
1380 | } | ||
1381 | |||
1382 | for (; j < pt->queues.nr_queues; j++) { | ||
1383 | if (pt->queues.queue_array[j].cpu == cpu) | ||
1384 | return pt->queues.queue_array[j].priv; | ||
1385 | } | ||
1386 | |||
1387 | return NULL; | ||
1388 | } | ||
1389 | |||
1390 | static int intel_pt_process_switch(struct intel_pt *pt, | ||
1391 | struct perf_sample *sample) | ||
1392 | { | ||
1393 | struct intel_pt_queue *ptq; | ||
1394 | struct perf_evsel *evsel; | ||
1395 | pid_t tid; | ||
1396 | int cpu, err; | ||
1397 | |||
1398 | evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id); | ||
1399 | if (evsel != pt->switch_evsel) | ||
1400 | return 0; | ||
1401 | |||
1402 | tid = perf_evsel__intval(evsel, sample, "next_pid"); | ||
1403 | cpu = sample->cpu; | ||
1404 | |||
1405 | intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", | ||
1406 | cpu, tid, sample->time, perf_time_to_tsc(sample->time, | ||
1407 | &pt->tc)); | ||
1408 | |||
1409 | if (!pt->sync_switch) | ||
1410 | goto out; | ||
1411 | |||
1412 | ptq = intel_pt_cpu_to_ptq(pt, cpu); | ||
1413 | if (!ptq) | ||
1414 | goto out; | ||
1415 | |||
1416 | switch (ptq->switch_state) { | ||
1417 | case INTEL_PT_SS_NOT_TRACING: | ||
1418 | ptq->next_tid = -1; | ||
1419 | break; | ||
1420 | case INTEL_PT_SS_UNKNOWN: | ||
1421 | case INTEL_PT_SS_TRACING: | ||
1422 | ptq->next_tid = tid; | ||
1423 | ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP; | ||
1424 | return 0; | ||
1425 | case INTEL_PT_SS_EXPECTING_SWITCH_EVENT: | ||
1426 | if (!ptq->on_heap) { | ||
1427 | ptq->timestamp = perf_time_to_tsc(sample->time, | ||
1428 | &pt->tc); | ||
1429 | err = auxtrace_heap__add(&pt->heap, ptq->queue_nr, | ||
1430 | ptq->timestamp); | ||
1431 | if (err) | ||
1432 | return err; | ||
1433 | ptq->on_heap = true; | ||
1434 | } | ||
1435 | ptq->switch_state = INTEL_PT_SS_TRACING; | ||
1436 | break; | ||
1437 | case INTEL_PT_SS_EXPECTING_SWITCH_IP: | ||
1438 | ptq->next_tid = tid; | ||
1439 | intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu); | ||
1440 | break; | ||
1441 | default: | ||
1442 | break; | ||
1443 | } | ||
1444 | out: | ||
1445 | return machine__set_current_tid(pt->machine, cpu, -1, tid); | ||
1446 | } | ||
1447 | |||
1448 | static int intel_pt_process_itrace_start(struct intel_pt *pt, | ||
1449 | union perf_event *event, | ||
1450 | struct perf_sample *sample) | ||
1451 | { | ||
1452 | if (!pt->per_cpu_mmaps) | ||
1453 | return 0; | ||
1454 | |||
1455 | intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n", | ||
1456 | sample->cpu, event->itrace_start.pid, | ||
1457 | event->itrace_start.tid, sample->time, | ||
1458 | perf_time_to_tsc(sample->time, &pt->tc)); | ||
1459 | |||
1460 | return machine__set_current_tid(pt->machine, sample->cpu, | ||
1461 | event->itrace_start.pid, | ||
1462 | event->itrace_start.tid); | ||
1463 | } | ||
1464 | |||
1465 | static int intel_pt_process_event(struct perf_session *session, | ||
1466 | union perf_event *event, | ||
1467 | struct perf_sample *sample, | ||
1468 | struct perf_tool *tool) | ||
1469 | { | ||
1470 | struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, | ||
1471 | auxtrace); | ||
1472 | u64 timestamp; | ||
1473 | int err = 0; | ||
1474 | |||
1475 | if (dump_trace) | ||
1476 | return 0; | ||
1477 | |||
1478 | if (!tool->ordered_events) { | ||
1479 | pr_err("Intel Processor Trace requires ordered events\n"); | ||
1480 | return -EINVAL; | ||
1481 | } | ||
1482 | |||
1483 | if (sample->time && sample->time != (u64)-1) | ||
1484 | timestamp = perf_time_to_tsc(sample->time, &pt->tc); | ||
1485 | else | ||
1486 | timestamp = 0; | ||
1487 | |||
1488 | if (timestamp || pt->timeless_decoding) { | ||
1489 | err = intel_pt_update_queues(pt); | ||
1490 | if (err) | ||
1491 | return err; | ||
1492 | } | ||
1493 | |||
1494 | if (pt->timeless_decoding) { | ||
1495 | if (event->header.type == PERF_RECORD_EXIT) { | ||
1496 | err = intel_pt_process_timeless_queues(pt, | ||
1497 | event->fork.tid, | ||
1498 | sample->time); | ||
1499 | } | ||
1500 | } else if (timestamp) { | ||
1501 | err = intel_pt_process_queues(pt, timestamp); | ||
1502 | } | ||
1503 | if (err) | ||
1504 | return err; | ||
1505 | |||
1506 | if (event->header.type == PERF_RECORD_AUX && | ||
1507 | (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) && | ||
1508 | pt->synth_opts.errors) { | ||
1509 | err = intel_pt_lost(pt, sample); | ||
1510 | if (err) | ||
1511 | return err; | ||
1512 | } | ||
1513 | |||
1514 | if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE) | ||
1515 | err = intel_pt_process_switch(pt, sample); | ||
1516 | else if (event->header.type == PERF_RECORD_ITRACE_START) | ||
1517 | err = intel_pt_process_itrace_start(pt, event, sample); | ||
1518 | |||
1519 | intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n", | ||
1520 | perf_event__name(event->header.type), event->header.type, | ||
1521 | sample->cpu, sample->time, timestamp); | ||
1522 | |||
1523 | return err; | ||
1524 | } | ||
1525 | |||
1526 | static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool) | ||
1527 | { | ||
1528 | struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, | ||
1529 | auxtrace); | ||
1530 | int ret; | ||
1531 | |||
1532 | if (dump_trace) | ||
1533 | return 0; | ||
1534 | |||
1535 | if (!tool->ordered_events) | ||
1536 | return -EINVAL; | ||
1537 | |||
1538 | ret = intel_pt_update_queues(pt); | ||
1539 | if (ret < 0) | ||
1540 | return ret; | ||
1541 | |||
1542 | if (pt->timeless_decoding) | ||
1543 | return intel_pt_process_timeless_queues(pt, -1, | ||
1544 | MAX_TIMESTAMP - 1); | ||
1545 | |||
1546 | return intel_pt_process_queues(pt, MAX_TIMESTAMP); | ||
1547 | } | ||
1548 | |||
1549 | static void intel_pt_free_events(struct perf_session *session) | ||
1550 | { | ||
1551 | struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, | ||
1552 | auxtrace); | ||
1553 | struct auxtrace_queues *queues = &pt->queues; | ||
1554 | unsigned int i; | ||
1555 | |||
1556 | for (i = 0; i < queues->nr_queues; i++) { | ||
1557 | intel_pt_free_queue(queues->queue_array[i].priv); | ||
1558 | queues->queue_array[i].priv = NULL; | ||
1559 | } | ||
1560 | intel_pt_log_disable(); | ||
1561 | auxtrace_queues__free(queues); | ||
1562 | } | ||
1563 | |||
1564 | static void intel_pt_free(struct perf_session *session) | ||
1565 | { | ||
1566 | struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, | ||
1567 | auxtrace); | ||
1568 | |||
1569 | auxtrace_heap__free(&pt->heap); | ||
1570 | intel_pt_free_events(session); | ||
1571 | session->auxtrace = NULL; | ||
1572 | thread__delete(pt->unknown_thread); | ||
1573 | free(pt); | ||
1574 | } | ||
1575 | |||
1576 | static int intel_pt_process_auxtrace_event(struct perf_session *session, | ||
1577 | union perf_event *event, | ||
1578 | struct perf_tool *tool __maybe_unused) | ||
1579 | { | ||
1580 | struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt, | ||
1581 | auxtrace); | ||
1582 | |||
1583 | if (pt->sampling_mode) | ||
1584 | return 0; | ||
1585 | |||
1586 | if (!pt->data_queued) { | ||
1587 | struct auxtrace_buffer *buffer; | ||
1588 | off_t data_offset; | ||
1589 | int fd = perf_data_file__fd(session->file); | ||
1590 | int err; | ||
1591 | |||
1592 | if (perf_data_file__is_pipe(session->file)) { | ||
1593 | data_offset = 0; | ||
1594 | } else { | ||
1595 | data_offset = lseek(fd, 0, SEEK_CUR); | ||
1596 | if (data_offset == -1) | ||
1597 | return -errno; | ||
1598 | } | ||
1599 | |||
1600 | err = auxtrace_queues__add_event(&pt->queues, session, event, | ||
1601 | data_offset, &buffer); | ||
1602 | if (err) | ||
1603 | return err; | ||
1604 | |||
1605 | /* Dump here now we have copied a piped trace out of the pipe */ | ||
1606 | if (dump_trace) { | ||
1607 | if (auxtrace_buffer__get_data(buffer, fd)) { | ||
1608 | intel_pt_dump_event(pt, buffer->data, | ||
1609 | buffer->size); | ||
1610 | auxtrace_buffer__put_data(buffer); | ||
1611 | } | ||
1612 | } | ||
1613 | } | ||
1614 | |||
1615 | return 0; | ||
1616 | } | ||
1617 | |||
1618 | struct intel_pt_synth { | ||
1619 | struct perf_tool dummy_tool; | ||
1620 | struct perf_session *session; | ||
1621 | }; | ||
1622 | |||
1623 | static int intel_pt_event_synth(struct perf_tool *tool, | ||
1624 | union perf_event *event, | ||
1625 | struct perf_sample *sample __maybe_unused, | ||
1626 | struct machine *machine __maybe_unused) | ||
1627 | { | ||
1628 | struct intel_pt_synth *intel_pt_synth = | ||
1629 | container_of(tool, struct intel_pt_synth, dummy_tool); | ||
1630 | |||
1631 | return perf_session__deliver_synth_event(intel_pt_synth->session, event, | ||
1632 | NULL); | ||
1633 | } | ||
1634 | |||
1635 | static int intel_pt_synth_event(struct perf_session *session, | ||
1636 | struct perf_event_attr *attr, u64 id) | ||
1637 | { | ||
1638 | struct intel_pt_synth intel_pt_synth; | ||
1639 | |||
1640 | memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth)); | ||
1641 | intel_pt_synth.session = session; | ||
1642 | |||
1643 | return perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1, | ||
1644 | &id, intel_pt_event_synth); | ||
1645 | } | ||
1646 | |||
1647 | static int intel_pt_synth_events(struct intel_pt *pt, | ||
1648 | struct perf_session *session) | ||
1649 | { | ||
1650 | struct perf_evlist *evlist = session->evlist; | ||
1651 | struct perf_evsel *evsel; | ||
1652 | struct perf_event_attr attr; | ||
1653 | bool found = false; | ||
1654 | u64 id; | ||
1655 | int err; | ||
1656 | |||
1657 | evlist__for_each(evlist, evsel) { | ||
1658 | if (evsel->attr.type == pt->pmu_type && evsel->ids) { | ||
1659 | found = true; | ||
1660 | break; | ||
1661 | } | ||
1662 | } | ||
1663 | |||
1664 | if (!found) { | ||
1665 | pr_debug("There are no selected events with Intel Processor Trace data\n"); | ||
1666 | return 0; | ||
1667 | } | ||
1668 | |||
1669 | memset(&attr, 0, sizeof(struct perf_event_attr)); | ||
1670 | attr.size = sizeof(struct perf_event_attr); | ||
1671 | attr.type = PERF_TYPE_HARDWARE; | ||
1672 | attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; | ||
1673 | attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | | ||
1674 | PERF_SAMPLE_PERIOD; | ||
1675 | if (pt->timeless_decoding) | ||
1676 | attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; | ||
1677 | else | ||
1678 | attr.sample_type |= PERF_SAMPLE_TIME; | ||
1679 | if (!pt->per_cpu_mmaps) | ||
1680 | attr.sample_type &= ~(u64)PERF_SAMPLE_CPU; | ||
1681 | attr.exclude_user = evsel->attr.exclude_user; | ||
1682 | attr.exclude_kernel = evsel->attr.exclude_kernel; | ||
1683 | attr.exclude_hv = evsel->attr.exclude_hv; | ||
1684 | attr.exclude_host = evsel->attr.exclude_host; | ||
1685 | attr.exclude_guest = evsel->attr.exclude_guest; | ||
1686 | attr.sample_id_all = evsel->attr.sample_id_all; | ||
1687 | attr.read_format = evsel->attr.read_format; | ||
1688 | |||
1689 | id = evsel->id[0] + 1000000000; | ||
1690 | if (!id) | ||
1691 | id = 1; | ||
1692 | |||
1693 | if (pt->synth_opts.instructions) { | ||
1694 | attr.config = PERF_COUNT_HW_INSTRUCTIONS; | ||
1695 | if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS) | ||
1696 | attr.sample_period = | ||
1697 | intel_pt_ns_to_ticks(pt, pt->synth_opts.period); | ||
1698 | else | ||
1699 | attr.sample_period = pt->synth_opts.period; | ||
1700 | pt->instructions_sample_period = attr.sample_period; | ||
1701 | if (pt->synth_opts.callchain) | ||
1702 | attr.sample_type |= PERF_SAMPLE_CALLCHAIN; | ||
1703 | pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", | ||
1704 | id, (u64)attr.sample_type); | ||
1705 | err = intel_pt_synth_event(session, &attr, id); | ||
1706 | if (err) { | ||
1707 | pr_err("%s: failed to synthesize 'instructions' event type\n", | ||
1708 | __func__); | ||
1709 | return err; | ||
1710 | } | ||
1711 | pt->sample_instructions = true; | ||
1712 | pt->instructions_sample_type = attr.sample_type; | ||
1713 | pt->instructions_id = id; | ||
1714 | id += 1; | ||
1715 | } | ||
1716 | |||
1717 | if (pt->synth_opts.transactions) { | ||
1718 | attr.config = PERF_COUNT_HW_INSTRUCTIONS; | ||
1719 | attr.sample_period = 1; | ||
1720 | if (pt->synth_opts.callchain) | ||
1721 | attr.sample_type |= PERF_SAMPLE_CALLCHAIN; | ||
1722 | pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n", | ||
1723 | id, (u64)attr.sample_type); | ||
1724 | err = intel_pt_synth_event(session, &attr, id); | ||
1725 | if (err) { | ||
1726 | pr_err("%s: failed to synthesize 'transactions' event type\n", | ||
1727 | __func__); | ||
1728 | return err; | ||
1729 | } | ||
1730 | pt->sample_transactions = true; | ||
1731 | pt->transactions_id = id; | ||
1732 | id += 1; | ||
1733 | evlist__for_each(evlist, evsel) { | ||
1734 | if (evsel->id && evsel->id[0] == pt->transactions_id) { | ||
1735 | if (evsel->name) | ||
1736 | zfree(&evsel->name); | ||
1737 | evsel->name = strdup("transactions"); | ||
1738 | break; | ||
1739 | } | ||
1740 | } | ||
1741 | } | ||
1742 | |||
1743 | if (pt->synth_opts.branches) { | ||
1744 | attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; | ||
1745 | attr.sample_period = 1; | ||
1746 | attr.sample_type |= PERF_SAMPLE_ADDR; | ||
1747 | attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN; | ||
1748 | pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n", | ||
1749 | id, (u64)attr.sample_type); | ||
1750 | err = intel_pt_synth_event(session, &attr, id); | ||
1751 | if (err) { | ||
1752 | pr_err("%s: failed to synthesize 'branches' event type\n", | ||
1753 | __func__); | ||
1754 | return err; | ||
1755 | } | ||
1756 | pt->sample_branches = true; | ||
1757 | pt->branches_sample_type = attr.sample_type; | ||
1758 | pt->branches_id = id; | ||
1759 | } | ||
1760 | |||
1761 | pt->synth_needs_swap = evsel->needs_swap; | ||
1762 | |||
1763 | return 0; | ||
1764 | } | ||
1765 | |||
1766 | static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist) | ||
1767 | { | ||
1768 | struct perf_evsel *evsel; | ||
1769 | |||
1770 | evlist__for_each_reverse(evlist, evsel) { | ||
1771 | const char *name = perf_evsel__name(evsel); | ||
1772 | |||
1773 | if (!strcmp(name, "sched:sched_switch")) | ||
1774 | return evsel; | ||
1775 | } | ||
1776 | |||
1777 | return NULL; | ||
1778 | } | ||
1779 | |||
1780 | static const char * const intel_pt_info_fmts[] = { | ||
1781 | [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", | ||
1782 | [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", | ||
1783 | [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", | ||
1784 | [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", | ||
1785 | [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", | ||
1786 | [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", | ||
1787 | [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n", | ||
1788 | [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n", | ||
1789 | [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", | ||
1790 | [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n", | ||
1791 | [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n", | ||
1792 | [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n", | ||
1793 | [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n", | ||
1794 | [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n", | ||
1795 | }; | ||
1796 | |||
1797 | static void intel_pt_print_info(u64 *arr, int start, int finish) | ||
1798 | { | ||
1799 | int i; | ||
1800 | |||
1801 | if (!dump_trace) | ||
1802 | return; | ||
1803 | |||
1804 | for (i = start; i <= finish; i++) | ||
1805 | fprintf(stdout, intel_pt_info_fmts[i], arr[i]); | ||
1806 | } | ||
1807 | |||
1808 | int intel_pt_process_auxtrace_info(union perf_event *event, | ||
1809 | struct perf_session *session) | ||
1810 | { | ||
1811 | struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; | ||
1812 | size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS; | ||
1813 | struct intel_pt *pt; | ||
1814 | int err; | ||
1815 | |||
1816 | if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + | ||
1817 | min_sz) | ||
1818 | return -EINVAL; | ||
1819 | |||
1820 | pt = zalloc(sizeof(struct intel_pt)); | ||
1821 | if (!pt) | ||
1822 | return -ENOMEM; | ||
1823 | |||
1824 | err = auxtrace_queues__init(&pt->queues); | ||
1825 | if (err) | ||
1826 | goto err_free; | ||
1827 | |||
1828 | intel_pt_log_set_name(INTEL_PT_PMU_NAME); | ||
1829 | |||
1830 | pt->session = session; | ||
1831 | pt->machine = &session->machines.host; /* No kvm support */ | ||
1832 | pt->auxtrace_type = auxtrace_info->type; | ||
1833 | pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE]; | ||
1834 | pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT]; | ||
1835 | pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT]; | ||
1836 | pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO]; | ||
1837 | pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO]; | ||
1838 | pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT]; | ||
1839 | pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT]; | ||
1840 | pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH]; | ||
1841 | pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE]; | ||
1842 | pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS]; | ||
1843 | intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE, | ||
1844 | INTEL_PT_PER_CPU_MMAPS); | ||
1845 | |||
1846 | if (auxtrace_info->header.size >= sizeof(struct auxtrace_info_event) + | ||
1847 | (sizeof(u64) * INTEL_PT_CYC_BIT)) { | ||
1848 | pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT]; | ||
1849 | pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS]; | ||
1850 | pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N]; | ||
1851 | pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D]; | ||
1852 | pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT]; | ||
1853 | intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT, | ||
1854 | INTEL_PT_CYC_BIT); | ||
1855 | } | ||
1856 | |||
1857 | pt->timeless_decoding = intel_pt_timeless_decoding(pt); | ||
1858 | pt->have_tsc = intel_pt_have_tsc(pt); | ||
1859 | pt->sampling_mode = false; | ||
1860 | pt->est_tsc = !pt->timeless_decoding; | ||
1861 | |||
1862 | pt->unknown_thread = thread__new(999999999, 999999999); | ||
1863 | if (!pt->unknown_thread) { | ||
1864 | err = -ENOMEM; | ||
1865 | goto err_free_queues; | ||
1866 | } | ||
1867 | err = thread__set_comm(pt->unknown_thread, "unknown", 0); | ||
1868 | if (err) | ||
1869 | goto err_delete_thread; | ||
1870 | if (thread__init_map_groups(pt->unknown_thread, pt->machine)) { | ||
1871 | err = -ENOMEM; | ||
1872 | goto err_delete_thread; | ||
1873 | } | ||
1874 | |||
1875 | pt->auxtrace.process_event = intel_pt_process_event; | ||
1876 | pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event; | ||
1877 | pt->auxtrace.flush_events = intel_pt_flush; | ||
1878 | pt->auxtrace.free_events = intel_pt_free_events; | ||
1879 | pt->auxtrace.free = intel_pt_free; | ||
1880 | session->auxtrace = &pt->auxtrace; | ||
1881 | |||
1882 | if (dump_trace) | ||
1883 | return 0; | ||
1884 | |||
1885 | if (pt->have_sched_switch == 1) { | ||
1886 | pt->switch_evsel = intel_pt_find_sched_switch(session->evlist); | ||
1887 | if (!pt->switch_evsel) { | ||
1888 | pr_err("%s: missing sched_switch event\n", __func__); | ||
1889 | goto err_delete_thread; | ||
1890 | } | ||
1891 | } | ||
1892 | |||
1893 | if (session->itrace_synth_opts && session->itrace_synth_opts->set) { | ||
1894 | pt->synth_opts = *session->itrace_synth_opts; | ||
1895 | } else { | ||
1896 | itrace_synth_opts__set_default(&pt->synth_opts); | ||
1897 | if (use_browser != -1) { | ||
1898 | pt->synth_opts.branches = false; | ||
1899 | pt->synth_opts.callchain = true; | ||
1900 | } | ||
1901 | } | ||
1902 | |||
1903 | if (pt->synth_opts.log) | ||
1904 | intel_pt_log_enable(); | ||
1905 | |||
1906 | /* Maximum non-turbo ratio is TSC freq / 100 MHz */ | ||
1907 | if (pt->tc.time_mult) { | ||
1908 | u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000); | ||
1909 | |||
1910 | pt->max_non_turbo_ratio = (tsc_freq + 50000000) / 100000000; | ||
1911 | intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq); | ||
1912 | intel_pt_log("Maximum non-turbo ratio %u\n", | ||
1913 | pt->max_non_turbo_ratio); | ||
1914 | } | ||
1915 | |||
1916 | if (pt->synth_opts.calls) | ||
1917 | pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | | ||
1918 | PERF_IP_FLAG_TRACE_END; | ||
1919 | if (pt->synth_opts.returns) | ||
1920 | pt->branches_filter |= PERF_IP_FLAG_RETURN | | ||
1921 | PERF_IP_FLAG_TRACE_BEGIN; | ||
1922 | |||
1923 | if (pt->synth_opts.callchain && !symbol_conf.use_callchain) { | ||
1924 | symbol_conf.use_callchain = true; | ||
1925 | if (callchain_register_param(&callchain_param) < 0) { | ||
1926 | symbol_conf.use_callchain = false; | ||
1927 | pt->synth_opts.callchain = false; | ||
1928 | } | ||
1929 | } | ||
1930 | |||
1931 | err = intel_pt_synth_events(pt, session); | ||
1932 | if (err) | ||
1933 | goto err_delete_thread; | ||
1934 | |||
1935 | err = auxtrace_queues__process_index(&pt->queues, session); | ||
1936 | if (err) | ||
1937 | goto err_delete_thread; | ||
1938 | |||
1939 | if (pt->queues.populated) | ||
1940 | pt->data_queued = true; | ||
1941 | |||
1942 | if (pt->timeless_decoding) | ||
1943 | pr_debug2("Intel PT decoding without timestamps\n"); | ||
1944 | |||
1945 | return 0; | ||
1946 | |||
1947 | err_delete_thread: | ||
1948 | thread__delete(pt->unknown_thread); | ||
1949 | err_free_queues: | ||
1950 | intel_pt_log_disable(); | ||
1951 | auxtrace_queues__free(&pt->queues); | ||
1952 | session->auxtrace = NULL; | ||
1953 | err_free: | ||
1954 | free(pt); | ||
1955 | return err; | ||
1956 | } | ||
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h new file mode 100644 index 000000000000..0065949df693 --- /dev/null +++ b/tools/perf/util/intel-pt.h | |||
@@ -0,0 +1,56 @@ | |||
1 | /* | ||
2 | * intel_pt.h: Intel Processor Trace support | ||
3 | * Copyright (c) 2013-2015, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #ifndef INCLUDE__PERF_INTEL_PT_H__ | ||
17 | #define INCLUDE__PERF_INTEL_PT_H__ | ||
18 | |||
19 | #define INTEL_PT_PMU_NAME "intel_pt" | ||
20 | |||
21 | enum { | ||
22 | INTEL_PT_PMU_TYPE, | ||
23 | INTEL_PT_TIME_SHIFT, | ||
24 | INTEL_PT_TIME_MULT, | ||
25 | INTEL_PT_TIME_ZERO, | ||
26 | INTEL_PT_CAP_USER_TIME_ZERO, | ||
27 | INTEL_PT_TSC_BIT, | ||
28 | INTEL_PT_NORETCOMP_BIT, | ||
29 | INTEL_PT_HAVE_SCHED_SWITCH, | ||
30 | INTEL_PT_SNAPSHOT_MODE, | ||
31 | INTEL_PT_PER_CPU_MMAPS, | ||
32 | INTEL_PT_MTC_BIT, | ||
33 | INTEL_PT_MTC_FREQ_BITS, | ||
34 | INTEL_PT_TSC_CTC_N, | ||
35 | INTEL_PT_TSC_CTC_D, | ||
36 | INTEL_PT_CYC_BIT, | ||
37 | INTEL_PT_AUXTRACE_PRIV_MAX, | ||
38 | }; | ||
39 | |||
40 | #define INTEL_PT_AUXTRACE_PRIV_SIZE (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) | ||
41 | |||
42 | struct auxtrace_record; | ||
43 | struct perf_tool; | ||
44 | union perf_event; | ||
45 | struct perf_session; | ||
46 | struct perf_event_attr; | ||
47 | struct perf_pmu; | ||
48 | |||
49 | struct auxtrace_record *intel_pt_recording_init(int *err); | ||
50 | |||
51 | int intel_pt_process_auxtrace_info(union perf_event *event, | ||
52 | struct perf_session *session); | ||
53 | |||
54 | struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu); | ||
55 | |||
56 | #endif | ||
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c new file mode 100644 index 000000000000..4f6a4780bd5f --- /dev/null +++ b/tools/perf/util/llvm-utils.c | |||
@@ -0,0 +1,408 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> | ||
3 | * Copyright (C) 2015, Huawei Inc. | ||
4 | */ | ||
5 | |||
6 | #include <stdio.h> | ||
7 | #include <sys/utsname.h> | ||
8 | #include "util.h" | ||
9 | #include "debug.h" | ||
10 | #include "llvm-utils.h" | ||
11 | #include "cache.h" | ||
12 | |||
13 | #define CLANG_BPF_CMD_DEFAULT_TEMPLATE \ | ||
14 | "$CLANG_EXEC -D__KERNEL__ $CLANG_OPTIONS " \ | ||
15 | "$KERNEL_INC_OPTIONS -Wno-unused-value " \ | ||
16 | "-Wno-pointer-sign -working-directory " \ | ||
17 | "$WORKING_DIR -c \"$CLANG_SOURCE\" -target bpf -O2 -o -" | ||
18 | |||
19 | struct llvm_param llvm_param = { | ||
20 | .clang_path = "clang", | ||
21 | .clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE, | ||
22 | .clang_opt = NULL, | ||
23 | .kbuild_dir = NULL, | ||
24 | .kbuild_opts = NULL, | ||
25 | .user_set_param = false, | ||
26 | }; | ||
27 | |||
28 | int perf_llvm_config(const char *var, const char *value) | ||
29 | { | ||
30 | if (prefixcmp(var, "llvm.")) | ||
31 | return 0; | ||
32 | var += sizeof("llvm.") - 1; | ||
33 | |||
34 | if (!strcmp(var, "clang-path")) | ||
35 | llvm_param.clang_path = strdup(value); | ||
36 | else if (!strcmp(var, "clang-bpf-cmd-template")) | ||
37 | llvm_param.clang_bpf_cmd_template = strdup(value); | ||
38 | else if (!strcmp(var, "clang-opt")) | ||
39 | llvm_param.clang_opt = strdup(value); | ||
40 | else if (!strcmp(var, "kbuild-dir")) | ||
41 | llvm_param.kbuild_dir = strdup(value); | ||
42 | else if (!strcmp(var, "kbuild-opts")) | ||
43 | llvm_param.kbuild_opts = strdup(value); | ||
44 | else | ||
45 | return -1; | ||
46 | llvm_param.user_set_param = true; | ||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | static int | ||
51 | search_program(const char *def, const char *name, | ||
52 | char *output) | ||
53 | { | ||
54 | char *env, *path, *tmp = NULL; | ||
55 | char buf[PATH_MAX]; | ||
56 | int ret; | ||
57 | |||
58 | output[0] = '\0'; | ||
59 | if (def && def[0] != '\0') { | ||
60 | if (def[0] == '/') { | ||
61 | if (access(def, F_OK) == 0) { | ||
62 | strlcpy(output, def, PATH_MAX); | ||
63 | return 0; | ||
64 | } | ||
65 | } else if (def[0] != '\0') | ||
66 | name = def; | ||
67 | } | ||
68 | |||
69 | env = getenv("PATH"); | ||
70 | if (!env) | ||
71 | return -1; | ||
72 | env = strdup(env); | ||
73 | if (!env) | ||
74 | return -1; | ||
75 | |||
76 | ret = -ENOENT; | ||
77 | path = strtok_r(env, ":", &tmp); | ||
78 | while (path) { | ||
79 | scnprintf(buf, sizeof(buf), "%s/%s", path, name); | ||
80 | if (access(buf, F_OK) == 0) { | ||
81 | strlcpy(output, buf, PATH_MAX); | ||
82 | ret = 0; | ||
83 | break; | ||
84 | } | ||
85 | path = strtok_r(NULL, ":", &tmp); | ||
86 | } | ||
87 | |||
88 | free(env); | ||
89 | return ret; | ||
90 | } | ||
91 | |||
92 | #define READ_SIZE 4096 | ||
93 | static int | ||
94 | read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz) | ||
95 | { | ||
96 | int err = 0; | ||
97 | void *buf = NULL; | ||
98 | FILE *file = NULL; | ||
99 | size_t read_sz = 0, buf_sz = 0; | ||
100 | |||
101 | file = popen(cmd, "r"); | ||
102 | if (!file) { | ||
103 | pr_err("ERROR: unable to popen cmd: %s\n", | ||
104 | strerror(errno)); | ||
105 | return -EINVAL; | ||
106 | } | ||
107 | |||
108 | while (!feof(file) && !ferror(file)) { | ||
109 | /* | ||
110 | * Make buf_sz always have obe byte extra space so we | ||
111 | * can put '\0' there. | ||
112 | */ | ||
113 | if (buf_sz - read_sz < READ_SIZE + 1) { | ||
114 | void *new_buf; | ||
115 | |||
116 | buf_sz = read_sz + READ_SIZE + 1; | ||
117 | new_buf = realloc(buf, buf_sz); | ||
118 | |||
119 | if (!new_buf) { | ||
120 | pr_err("ERROR: failed to realloc memory\n"); | ||
121 | err = -ENOMEM; | ||
122 | goto errout; | ||
123 | } | ||
124 | |||
125 | buf = new_buf; | ||
126 | } | ||
127 | read_sz += fread(buf + read_sz, 1, READ_SIZE, file); | ||
128 | } | ||
129 | |||
130 | if (buf_sz - read_sz < 1) { | ||
131 | pr_err("ERROR: internal error\n"); | ||
132 | err = -EINVAL; | ||
133 | goto errout; | ||
134 | } | ||
135 | |||
136 | if (ferror(file)) { | ||
137 | pr_err("ERROR: error occurred when reading from pipe: %s\n", | ||
138 | strerror(errno)); | ||
139 | err = -EIO; | ||
140 | goto errout; | ||
141 | } | ||
142 | |||
143 | err = WEXITSTATUS(pclose(file)); | ||
144 | file = NULL; | ||
145 | if (err) { | ||
146 | err = -EINVAL; | ||
147 | goto errout; | ||
148 | } | ||
149 | |||
150 | /* | ||
151 | * If buf is string, give it terminal '\0' to make our life | ||
152 | * easier. If buf is not string, that '\0' is out of space | ||
153 | * indicated by read_sz so caller won't even notice it. | ||
154 | */ | ||
155 | ((char *)buf)[read_sz] = '\0'; | ||
156 | |||
157 | if (!p_buf) | ||
158 | free(buf); | ||
159 | else | ||
160 | *p_buf = buf; | ||
161 | |||
162 | if (p_read_sz) | ||
163 | *p_read_sz = read_sz; | ||
164 | return 0; | ||
165 | |||
166 | errout: | ||
167 | if (file) | ||
168 | pclose(file); | ||
169 | free(buf); | ||
170 | if (p_buf) | ||
171 | *p_buf = NULL; | ||
172 | if (p_read_sz) | ||
173 | *p_read_sz = 0; | ||
174 | return err; | ||
175 | } | ||
176 | |||
177 | static inline void | ||
178 | force_set_env(const char *var, const char *value) | ||
179 | { | ||
180 | if (value) { | ||
181 | setenv(var, value, 1); | ||
182 | pr_debug("set env: %s=%s\n", var, value); | ||
183 | } else { | ||
184 | unsetenv(var); | ||
185 | pr_debug("unset env: %s\n", var); | ||
186 | } | ||
187 | } | ||
188 | |||
189 | static void | ||
190 | version_notice(void) | ||
191 | { | ||
192 | pr_err( | ||
193 | " \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n" | ||
194 | " \tYou may want to try git trunk:\n" | ||
195 | " \t\tgit clone http://llvm.org/git/llvm.git\n" | ||
196 | " \t\t and\n" | ||
197 | " \t\tgit clone http://llvm.org/git/clang.git\n\n" | ||
198 | " \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n" | ||
199 | " \tdebian/ubuntu:\n" | ||
200 | " \t\thttp://llvm.org/apt\n\n" | ||
201 | " \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n" | ||
202 | " \toption in [llvm] section of ~/.perfconfig to:\n\n" | ||
203 | " \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS \\\n" | ||
204 | " \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n" | ||
205 | " \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n" | ||
206 | " \t(Replace /path/to/llc with path to your llc)\n\n" | ||
207 | ); | ||
208 | } | ||
209 | |||
210 | static int detect_kbuild_dir(char **kbuild_dir) | ||
211 | { | ||
212 | const char *test_dir = llvm_param.kbuild_dir; | ||
213 | const char *prefix_dir = ""; | ||
214 | const char *suffix_dir = ""; | ||
215 | |||
216 | char *autoconf_path; | ||
217 | struct utsname utsname; | ||
218 | |||
219 | int err; | ||
220 | |||
221 | if (!test_dir) { | ||
222 | err = uname(&utsname); | ||
223 | if (err) { | ||
224 | pr_warning("uname failed: %s\n", strerror(errno)); | ||
225 | return -EINVAL; | ||
226 | } | ||
227 | |||
228 | test_dir = utsname.release; | ||
229 | prefix_dir = "/lib/modules/"; | ||
230 | suffix_dir = "/build"; | ||
231 | } | ||
232 | |||
233 | err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h", | ||
234 | prefix_dir, test_dir, suffix_dir); | ||
235 | if (err < 0) | ||
236 | return -ENOMEM; | ||
237 | |||
238 | if (access(autoconf_path, R_OK) == 0) { | ||
239 | free(autoconf_path); | ||
240 | |||
241 | err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir, | ||
242 | suffix_dir); | ||
243 | if (err < 0) | ||
244 | return -ENOMEM; | ||
245 | return 0; | ||
246 | } | ||
247 | free(autoconf_path); | ||
248 | return -ENOENT; | ||
249 | } | ||
250 | |||
251 | static const char *kinc_fetch_script = | ||
252 | "#!/usr/bin/env sh\n" | ||
253 | "if ! test -d \"$KBUILD_DIR\"\n" | ||
254 | "then\n" | ||
255 | " exit -1\n" | ||
256 | "fi\n" | ||
257 | "if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n" | ||
258 | "then\n" | ||
259 | " exit -1\n" | ||
260 | "fi\n" | ||
261 | "TMPDIR=`mktemp -d`\n" | ||
262 | "if test -z \"$TMPDIR\"\n" | ||
263 | "then\n" | ||
264 | " exit -1\n" | ||
265 | "fi\n" | ||
266 | "cat << EOF > $TMPDIR/Makefile\n" | ||
267 | "obj-y := dummy.o\n" | ||
268 | "\\$(obj)/%.o: \\$(src)/%.c\n" | ||
269 | "\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n" | ||
270 | "EOF\n" | ||
271 | "touch $TMPDIR/dummy.c\n" | ||
272 | "make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n" | ||
273 | "RET=$?\n" | ||
274 | "rm -rf $TMPDIR\n" | ||
275 | "exit $RET\n"; | ||
276 | |||
277 | static inline void | ||
278 | get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts) | ||
279 | { | ||
280 | int err; | ||
281 | |||
282 | if (!kbuild_dir || !kbuild_include_opts) | ||
283 | return; | ||
284 | |||
285 | *kbuild_dir = NULL; | ||
286 | *kbuild_include_opts = NULL; | ||
287 | |||
288 | if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) { | ||
289 | pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n"); | ||
290 | pr_debug("Skip kbuild options detection.\n"); | ||
291 | return; | ||
292 | } | ||
293 | |||
294 | err = detect_kbuild_dir(kbuild_dir); | ||
295 | if (err) { | ||
296 | pr_warning( | ||
297 | "WARNING:\tunable to get correct kernel building directory.\n" | ||
298 | "Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n" | ||
299 | " \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n" | ||
300 | " \tdetection.\n\n"); | ||
301 | return; | ||
302 | } | ||
303 | |||
304 | pr_debug("Kernel build dir is set to %s\n", *kbuild_dir); | ||
305 | force_set_env("KBUILD_DIR", *kbuild_dir); | ||
306 | force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts); | ||
307 | err = read_from_pipe(kinc_fetch_script, | ||
308 | (void **)kbuild_include_opts, | ||
309 | NULL); | ||
310 | if (err) { | ||
311 | pr_warning( | ||
312 | "WARNING:\tunable to get kernel include directories from '%s'\n" | ||
313 | "Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n" | ||
314 | " \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n" | ||
315 | " \toption in [llvm] to \"\" to suppress this detection.\n\n", | ||
316 | *kbuild_dir); | ||
317 | |||
318 | free(*kbuild_dir); | ||
319 | *kbuild_dir = NULL; | ||
320 | return; | ||
321 | } | ||
322 | |||
323 | pr_debug("include option is set to %s\n", *kbuild_include_opts); | ||
324 | } | ||
325 | |||
326 | int llvm__compile_bpf(const char *path, void **p_obj_buf, | ||
327 | size_t *p_obj_buf_sz) | ||
328 | { | ||
329 | int err; | ||
330 | char clang_path[PATH_MAX]; | ||
331 | const char *clang_opt = llvm_param.clang_opt; | ||
332 | const char *template = llvm_param.clang_bpf_cmd_template; | ||
333 | char *kbuild_dir = NULL, *kbuild_include_opts = NULL; | ||
334 | void *obj_buf = NULL; | ||
335 | size_t obj_buf_sz; | ||
336 | |||
337 | if (!template) | ||
338 | template = CLANG_BPF_CMD_DEFAULT_TEMPLATE; | ||
339 | |||
340 | err = search_program(llvm_param.clang_path, | ||
341 | "clang", clang_path); | ||
342 | if (err) { | ||
343 | pr_err( | ||
344 | "ERROR:\tunable to find clang.\n" | ||
345 | "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n" | ||
346 | " \tand 'clang-path' option in [llvm] section of ~/.perfconfig.\n"); | ||
347 | version_notice(); | ||
348 | return -ENOENT; | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | * This is an optional work. Even it fail we can continue our | ||
353 | * work. Needn't to check error return. | ||
354 | */ | ||
355 | get_kbuild_opts(&kbuild_dir, &kbuild_include_opts); | ||
356 | |||
357 | force_set_env("CLANG_EXEC", clang_path); | ||
358 | force_set_env("CLANG_OPTIONS", clang_opt); | ||
359 | force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts); | ||
360 | force_set_env("WORKING_DIR", kbuild_dir ? : "."); | ||
361 | |||
362 | /* | ||
363 | * Since we may reset clang's working dir, path of source file | ||
364 | * should be transferred into absolute path, except we want | ||
365 | * stdin to be source file (testing). | ||
366 | */ | ||
367 | force_set_env("CLANG_SOURCE", | ||
368 | (path[0] == '-') ? path : | ||
369 | make_nonrelative_path(path)); | ||
370 | |||
371 | pr_debug("llvm compiling command template: %s\n", template); | ||
372 | err = read_from_pipe(template, &obj_buf, &obj_buf_sz); | ||
373 | if (err) { | ||
374 | pr_err("ERROR:\tunable to compile %s\n", path); | ||
375 | pr_err("Hint:\tCheck error message shown above.\n"); | ||
376 | pr_err("Hint:\tYou can also pre-compile it into .o using:\n"); | ||
377 | pr_err(" \t\tclang -target bpf -O2 -c %s\n", path); | ||
378 | pr_err(" \twith proper -I and -D options.\n"); | ||
379 | goto errout; | ||
380 | } | ||
381 | |||
382 | free(kbuild_dir); | ||
383 | free(kbuild_include_opts); | ||
384 | if (!p_obj_buf) | ||
385 | free(obj_buf); | ||
386 | else | ||
387 | *p_obj_buf = obj_buf; | ||
388 | |||
389 | if (p_obj_buf_sz) | ||
390 | *p_obj_buf_sz = obj_buf_sz; | ||
391 | return 0; | ||
392 | errout: | ||
393 | free(kbuild_dir); | ||
394 | free(kbuild_include_opts); | ||
395 | free(obj_buf); | ||
396 | if (p_obj_buf) | ||
397 | *p_obj_buf = NULL; | ||
398 | if (p_obj_buf_sz) | ||
399 | *p_obj_buf_sz = 0; | ||
400 | return err; | ||
401 | } | ||
402 | |||
403 | int llvm__search_clang(void) | ||
404 | { | ||
405 | char clang_path[PATH_MAX]; | ||
406 | |||
407 | return search_program(llvm_param.clang_path, "clang", clang_path); | ||
408 | } | ||
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h new file mode 100644 index 000000000000..5b3cf1c229e2 --- /dev/null +++ b/tools/perf/util/llvm-utils.h | |||
@@ -0,0 +1,49 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com> | ||
3 | * Copyright (C) 2015, Huawei Inc. | ||
4 | */ | ||
5 | #ifndef __LLVM_UTILS_H | ||
6 | #define __LLVM_UTILS_H | ||
7 | |||
8 | #include "debug.h" | ||
9 | |||
10 | struct llvm_param { | ||
11 | /* Path of clang executable */ | ||
12 | const char *clang_path; | ||
13 | /* | ||
14 | * Template of clang bpf compiling. 5 env variables | ||
15 | * can be used: | ||
16 | * $CLANG_EXEC: Path to clang. | ||
17 | * $CLANG_OPTIONS: Extra options to clang. | ||
18 | * $KERNEL_INC_OPTIONS: Kernel include directories. | ||
19 | * $WORKING_DIR: Kernel source directory. | ||
20 | * $CLANG_SOURCE: Source file to be compiled. | ||
21 | */ | ||
22 | const char *clang_bpf_cmd_template; | ||
23 | /* Will be filled in $CLANG_OPTIONS */ | ||
24 | const char *clang_opt; | ||
25 | /* Where to find kbuild system */ | ||
26 | const char *kbuild_dir; | ||
27 | /* | ||
28 | * Arguments passed to make, like 'ARCH=arm' if doing cross | ||
29 | * compiling. Should not be used for dynamic compiling. | ||
30 | */ | ||
31 | const char *kbuild_opts; | ||
32 | /* | ||
33 | * Default is false. If one of the above fields is set by user | ||
34 | * explicitly then user_set_llvm is set to true. This is used | ||
35 | * for perf test. If user doesn't set anything in .perfconfig | ||
36 | * and clang is not found, don't trigger llvm test. | ||
37 | */ | ||
38 | bool user_set_param; | ||
39 | }; | ||
40 | |||
41 | extern struct llvm_param llvm_param; | ||
42 | extern int perf_llvm_config(const char *var, const char *value); | ||
43 | |||
44 | extern int llvm__compile_bpf(const char *path, void **p_obj_buf, | ||
45 | size_t *p_obj_buf_sz); | ||
46 | |||
47 | /* This function is for test__llvm() use only */ | ||
48 | extern int llvm__search_clang(void); | ||
49 | #endif | ||
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7ff682770fdb..6309f7ceb08f 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c | |||
@@ -250,7 +250,7 @@ struct machine *machines__findnew(struct machines *machines, pid_t pid) | |||
250 | static struct strlist *seen; | 250 | static struct strlist *seen; |
251 | 251 | ||
252 | if (!seen) | 252 | if (!seen) |
253 | seen = strlist__new(true, NULL); | 253 | seen = strlist__new(NULL, NULL); |
254 | 254 | ||
255 | if (!strlist__has_entry(seen, path)) { | 255 | if (!strlist__has_entry(seen, path)) { |
256 | pr_err("Can't access file %s\n", path); | 256 | pr_err("Can't access file %s\n", path); |
@@ -550,6 +550,14 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused, | |||
550 | return 0; | 550 | return 0; |
551 | } | 551 | } |
552 | 552 | ||
553 | int machine__process_switch_event(struct machine *machine __maybe_unused, | ||
554 | union perf_event *event) | ||
555 | { | ||
556 | if (dump_trace) | ||
557 | perf_event__fprintf_switch(event, stdout); | ||
558 | return 0; | ||
559 | } | ||
560 | |||
553 | struct map *machine__findnew_module_map(struct machine *machine, u64 start, | 561 | struct map *machine__findnew_module_map(struct machine *machine, u64 start, |
554 | const char *filename) | 562 | const char *filename) |
555 | { | 563 | { |
@@ -1387,6 +1395,24 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event | |||
1387 | event->fork.ptid); | 1395 | event->fork.ptid); |
1388 | int err = 0; | 1396 | int err = 0; |
1389 | 1397 | ||
1398 | if (dump_trace) | ||
1399 | perf_event__fprintf_task(event, stdout); | ||
1400 | |||
1401 | /* | ||
1402 | * There may be an existing thread that is not actually the parent, | ||
1403 | * either because we are processing events out of order, or because the | ||
1404 | * (fork) event that would have removed the thread was lost. Assume the | ||
1405 | * latter case and continue on as best we can. | ||
1406 | */ | ||
1407 | if (parent->pid_ != (pid_t)event->fork.ppid) { | ||
1408 | dump_printf("removing erroneous parent thread %d/%d\n", | ||
1409 | parent->pid_, parent->tid); | ||
1410 | machine__remove_thread(machine, parent); | ||
1411 | thread__put(parent); | ||
1412 | parent = machine__findnew_thread(machine, event->fork.ppid, | ||
1413 | event->fork.ptid); | ||
1414 | } | ||
1415 | |||
1390 | /* if a thread currently exists for the thread id remove it */ | 1416 | /* if a thread currently exists for the thread id remove it */ |
1391 | if (thread != NULL) { | 1417 | if (thread != NULL) { |
1392 | machine__remove_thread(machine, thread); | 1418 | machine__remove_thread(machine, thread); |
@@ -1395,8 +1421,6 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event | |||
1395 | 1421 | ||
1396 | thread = machine__findnew_thread(machine, event->fork.pid, | 1422 | thread = machine__findnew_thread(machine, event->fork.pid, |
1397 | event->fork.tid); | 1423 | event->fork.tid); |
1398 | if (dump_trace) | ||
1399 | perf_event__fprintf_task(event, stdout); | ||
1400 | 1424 | ||
1401 | if (thread == NULL || parent == NULL || | 1425 | if (thread == NULL || parent == NULL || |
1402 | thread__fork(thread, parent, sample->time) < 0) { | 1426 | thread__fork(thread, parent, sample->time) < 0) { |
@@ -1451,6 +1475,9 @@ int machine__process_event(struct machine *machine, union perf_event *event, | |||
1451 | ret = machine__process_itrace_start_event(machine, event); break; | 1475 | ret = machine__process_itrace_start_event(machine, event); break; |
1452 | case PERF_RECORD_LOST_SAMPLES: | 1476 | case PERF_RECORD_LOST_SAMPLES: |
1453 | ret = machine__process_lost_samples_event(machine, event, sample); break; | 1477 | ret = machine__process_lost_samples_event(machine, event, sample); break; |
1478 | case PERF_RECORD_SWITCH: | ||
1479 | case PERF_RECORD_SWITCH_CPU_WIDE: | ||
1480 | ret = machine__process_switch_event(machine, event); break; | ||
1454 | default: | 1481 | default: |
1455 | ret = -1; | 1482 | ret = -1; |
1456 | break; | 1483 | break; |
@@ -1993,3 +2020,17 @@ struct dso *machine__findnew_dso(struct machine *machine, const char *filename) | |||
1993 | { | 2020 | { |
1994 | return dsos__findnew(&machine->dsos, filename); | 2021 | return dsos__findnew(&machine->dsos, filename); |
1995 | } | 2022 | } |
2023 | |||
2024 | char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp) | ||
2025 | { | ||
2026 | struct machine *machine = vmachine; | ||
2027 | struct map *map; | ||
2028 | struct symbol *sym = map_groups__find_symbol(&machine->kmaps, MAP__FUNCTION, *addrp, &map, NULL); | ||
2029 | |||
2030 | if (sym == NULL) | ||
2031 | return NULL; | ||
2032 | |||
2033 | *modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL; | ||
2034 | *addrp = map->unmap_ip(map, sym->start); | ||
2035 | return sym->name; | ||
2036 | } | ||
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 887798e511e9..ea5cb4a621db 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h | |||
@@ -87,6 +87,8 @@ int machine__process_aux_event(struct machine *machine, | |||
87 | union perf_event *event); | 87 | union perf_event *event); |
88 | int machine__process_itrace_start_event(struct machine *machine, | 88 | int machine__process_itrace_start_event(struct machine *machine, |
89 | union perf_event *event); | 89 | union perf_event *event); |
90 | int machine__process_switch_event(struct machine *machine __maybe_unused, | ||
91 | union perf_event *event); | ||
90 | int machine__process_mmap_event(struct machine *machine, union perf_event *event, | 92 | int machine__process_mmap_event(struct machine *machine, union perf_event *event, |
91 | struct perf_sample *sample); | 93 | struct perf_sample *sample); |
92 | int machine__process_mmap2_event(struct machine *machine, union perf_event *event, | 94 | int machine__process_mmap2_event(struct machine *machine, union perf_event *event, |
@@ -237,5 +239,9 @@ int machine__synthesize_threads(struct machine *machine, struct target *target, | |||
237 | pid_t machine__get_current_tid(struct machine *machine, int cpu); | 239 | pid_t machine__get_current_tid(struct machine *machine, int cpu); |
238 | int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, | 240 | int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, |
239 | pid_t tid); | 241 | pid_t tid); |
242 | /* | ||
243 | * For use with libtraceevent's pevent_set_function_resolver() | ||
244 | */ | ||
245 | char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp); | ||
240 | 246 | ||
241 | #endif /* __PERF_MACHINE_H */ | 247 | #endif /* __PERF_MACHINE_H */ |
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index b5a5e9c02437..b1c475d9b240 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c | |||
@@ -224,6 +224,20 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) | |||
224 | return map; | 224 | return map; |
225 | } | 225 | } |
226 | 226 | ||
227 | /* | ||
228 | * Use this and __map__is_kmodule() for map instances that are in | ||
229 | * machine->kmaps, and thus have map->groups->machine all properly set, to | ||
230 | * disambiguate between the kernel and modules. | ||
231 | * | ||
232 | * When the need arises, introduce map__is_{kernel,kmodule)() that | ||
233 | * checks (map->groups != NULL && map->groups->machine != NULL && | ||
234 | * map->dso->kernel) before calling __map__is_{kernel,kmodule}()) | ||
235 | */ | ||
236 | bool __map__is_kernel(const struct map *map) | ||
237 | { | ||
238 | return map->groups->machine->vmlinux_maps[map->type] == map; | ||
239 | } | ||
240 | |||
227 | static void map__exit(struct map *map) | 241 | static void map__exit(struct map *map) |
228 | { | 242 | { |
229 | BUG_ON(!RB_EMPTY_NODE(&map->rb_node)); | 243 | BUG_ON(!RB_EMPTY_NODE(&map->rb_node)); |
@@ -334,9 +348,18 @@ struct symbol *map__find_symbol_by_name(struct map *map, const char *name, | |||
334 | return dso__find_symbol_by_name(map->dso, map->type, name); | 348 | return dso__find_symbol_by_name(map->dso, map->type, name); |
335 | } | 349 | } |
336 | 350 | ||
337 | struct map *map__clone(struct map *map) | 351 | struct map *map__clone(struct map *from) |
338 | { | 352 | { |
339 | return memdup(map, sizeof(*map)); | 353 | struct map *map = memdup(from, sizeof(*map)); |
354 | |||
355 | if (map != NULL) { | ||
356 | atomic_set(&map->refcnt, 1); | ||
357 | RB_CLEAR_NODE(&map->rb_node); | ||
358 | dso__get(map->dso); | ||
359 | map->groups = NULL; | ||
360 | } | ||
361 | |||
362 | return map; | ||
340 | } | 363 | } |
341 | 364 | ||
342 | int map__overlap(struct map *l, struct map *r) | 365 | int map__overlap(struct map *l, struct map *r) |
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index d73e687b224e..57829e89b78b 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h | |||
@@ -256,4 +256,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, | |||
256 | struct map *map_groups__find_by_name(struct map_groups *mg, | 256 | struct map *map_groups__find_by_name(struct map_groups *mg, |
257 | enum map_type type, const char *name); | 257 | enum map_type type, const char *name); |
258 | 258 | ||
259 | bool __map__is_kernel(const struct map *map); | ||
260 | |||
261 | static inline bool __map__is_kmodule(const struct map *map) | ||
262 | { | ||
263 | return !__map__is_kernel(map); | ||
264 | } | ||
265 | |||
259 | #endif /* __PERF_MAP_H */ | 266 | #endif /* __PERF_MAP_H */ |
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 52be201b9b25..b1b9e2385f4b 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c | |||
@@ -220,6 +220,9 @@ static int __ordered_events__flush(struct ordered_events *oe) | |||
220 | else if (last_ts <= limit) | 220 | else if (last_ts <= limit) |
221 | oe->last = list_entry(head->prev, struct ordered_event, list); | 221 | oe->last = list_entry(head->prev, struct ordered_event, list); |
222 | 222 | ||
223 | if (show_progress) | ||
224 | ui_progress__finish(); | ||
225 | |||
223 | return 0; | 226 | return 0; |
224 | } | 227 | } |
225 | 228 | ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 09f8d2357108..21ed6ee63da9 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c | |||
@@ -276,7 +276,8 @@ const char *event_type(int type) | |||
276 | static struct perf_evsel * | 276 | static struct perf_evsel * |
277 | __add_event(struct list_head *list, int *idx, | 277 | __add_event(struct list_head *list, int *idx, |
278 | struct perf_event_attr *attr, | 278 | struct perf_event_attr *attr, |
279 | char *name, struct cpu_map *cpus) | 279 | char *name, struct cpu_map *cpus, |
280 | struct list_head *config_terms) | ||
280 | { | 281 | { |
281 | struct perf_evsel *evsel; | 282 | struct perf_evsel *evsel; |
282 | 283 | ||
@@ -286,19 +287,24 @@ __add_event(struct list_head *list, int *idx, | |||
286 | if (!evsel) | 287 | if (!evsel) |
287 | return NULL; | 288 | return NULL; |
288 | 289 | ||
289 | if (cpus) | 290 | evsel->cpus = cpu_map__get(cpus); |
290 | evsel->cpus = cpu_map__get(cpus); | 291 | evsel->own_cpus = cpu_map__get(cpus); |
291 | 292 | ||
292 | if (name) | 293 | if (name) |
293 | evsel->name = strdup(name); | 294 | evsel->name = strdup(name); |
295 | |||
296 | if (config_terms) | ||
297 | list_splice(config_terms, &evsel->config_terms); | ||
298 | |||
294 | list_add_tail(&evsel->node, list); | 299 | list_add_tail(&evsel->node, list); |
295 | return evsel; | 300 | return evsel; |
296 | } | 301 | } |
297 | 302 | ||
298 | static int add_event(struct list_head *list, int *idx, | 303 | static int add_event(struct list_head *list, int *idx, |
299 | struct perf_event_attr *attr, char *name) | 304 | struct perf_event_attr *attr, char *name, |
305 | struct list_head *config_terms) | ||
300 | { | 306 | { |
301 | return __add_event(list, idx, attr, name, NULL) ? 0 : -ENOMEM; | 307 | return __add_event(list, idx, attr, name, NULL, config_terms) ? 0 : -ENOMEM; |
302 | } | 308 | } |
303 | 309 | ||
304 | static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) | 310 | static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size) |
@@ -377,7 +383,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, | |||
377 | memset(&attr, 0, sizeof(attr)); | 383 | memset(&attr, 0, sizeof(attr)); |
378 | attr.config = cache_type | (cache_op << 8) | (cache_result << 16); | 384 | attr.config = cache_type | (cache_op << 8) | (cache_result << 16); |
379 | attr.type = PERF_TYPE_HW_CACHE; | 385 | attr.type = PERF_TYPE_HW_CACHE; |
380 | return add_event(list, idx, &attr, name); | 386 | return add_event(list, idx, &attr, name, NULL); |
381 | } | 387 | } |
382 | 388 | ||
383 | static int add_tracepoint(struct list_head *list, int *idx, | 389 | static int add_tracepoint(struct list_head *list, int *idx, |
@@ -539,7 +545,7 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, | |||
539 | attr.type = PERF_TYPE_BREAKPOINT; | 545 | attr.type = PERF_TYPE_BREAKPOINT; |
540 | attr.sample_period = 1; | 546 | attr.sample_period = 1; |
541 | 547 | ||
542 | return add_event(list, idx, &attr, NULL); | 548 | return add_event(list, idx, &attr, NULL, NULL); |
543 | } | 549 | } |
544 | 550 | ||
545 | static int check_type_val(struct parse_events_term *term, | 551 | static int check_type_val(struct parse_events_term *term, |
@@ -590,7 +596,9 @@ do { \ | |||
590 | break; | 596 | break; |
591 | case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: | 597 | case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: |
592 | CHECK_TYPE_VAL(NUM); | 598 | CHECK_TYPE_VAL(NUM); |
593 | attr->sample_period = term->val.num; | 599 | break; |
600 | case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: | ||
601 | CHECK_TYPE_VAL(NUM); | ||
594 | break; | 602 | break; |
595 | case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: | 603 | case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE: |
596 | /* | 604 | /* |
@@ -598,6 +606,20 @@ do { \ | |||
598 | * attr->branch_sample_type = term->val.num; | 606 | * attr->branch_sample_type = term->val.num; |
599 | */ | 607 | */ |
600 | break; | 608 | break; |
609 | case PARSE_EVENTS__TERM_TYPE_TIME: | ||
610 | CHECK_TYPE_VAL(NUM); | ||
611 | if (term->val.num > 1) { | ||
612 | err->str = strdup("expected 0 or 1"); | ||
613 | err->idx = term->err_val; | ||
614 | return -EINVAL; | ||
615 | } | ||
616 | break; | ||
617 | case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: | ||
618 | CHECK_TYPE_VAL(STR); | ||
619 | break; | ||
620 | case PARSE_EVENTS__TERM_TYPE_STACKSIZE: | ||
621 | CHECK_TYPE_VAL(NUM); | ||
622 | break; | ||
601 | case PARSE_EVENTS__TERM_TYPE_NAME: | 623 | case PARSE_EVENTS__TERM_TYPE_NAME: |
602 | CHECK_TYPE_VAL(STR); | 624 | CHECK_TYPE_VAL(STR); |
603 | break; | 625 | break; |
@@ -622,22 +644,71 @@ static int config_attr(struct perf_event_attr *attr, | |||
622 | return 0; | 644 | return 0; |
623 | } | 645 | } |
624 | 646 | ||
647 | static int get_config_terms(struct list_head *head_config, | ||
648 | struct list_head *head_terms __maybe_unused) | ||
649 | { | ||
650 | #define ADD_CONFIG_TERM(__type, __name, __val) \ | ||
651 | do { \ | ||
652 | struct perf_evsel_config_term *__t; \ | ||
653 | \ | ||
654 | __t = zalloc(sizeof(*__t)); \ | ||
655 | if (!__t) \ | ||
656 | return -ENOMEM; \ | ||
657 | \ | ||
658 | INIT_LIST_HEAD(&__t->list); \ | ||
659 | __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ | ||
660 | __t->val.__name = __val; \ | ||
661 | list_add_tail(&__t->list, head_terms); \ | ||
662 | } while (0) | ||
663 | |||
664 | struct parse_events_term *term; | ||
665 | |||
666 | list_for_each_entry(term, head_config, list) { | ||
667 | switch (term->type_term) { | ||
668 | case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: | ||
669 | ADD_CONFIG_TERM(PERIOD, period, term->val.num); | ||
670 | break; | ||
671 | case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: | ||
672 | ADD_CONFIG_TERM(FREQ, freq, term->val.num); | ||
673 | break; | ||
674 | case PARSE_EVENTS__TERM_TYPE_TIME: | ||
675 | ADD_CONFIG_TERM(TIME, time, term->val.num); | ||
676 | break; | ||
677 | case PARSE_EVENTS__TERM_TYPE_CALLGRAPH: | ||
678 | ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str); | ||
679 | break; | ||
680 | case PARSE_EVENTS__TERM_TYPE_STACKSIZE: | ||
681 | ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num); | ||
682 | break; | ||
683 | default: | ||
684 | break; | ||
685 | } | ||
686 | } | ||
687 | #undef ADD_EVSEL_CONFIG | ||
688 | return 0; | ||
689 | } | ||
690 | |||
625 | int parse_events_add_numeric(struct parse_events_evlist *data, | 691 | int parse_events_add_numeric(struct parse_events_evlist *data, |
626 | struct list_head *list, | 692 | struct list_head *list, |
627 | u32 type, u64 config, | 693 | u32 type, u64 config, |
628 | struct list_head *head_config) | 694 | struct list_head *head_config) |
629 | { | 695 | { |
630 | struct perf_event_attr attr; | 696 | struct perf_event_attr attr; |
697 | LIST_HEAD(config_terms); | ||
631 | 698 | ||
632 | memset(&attr, 0, sizeof(attr)); | 699 | memset(&attr, 0, sizeof(attr)); |
633 | attr.type = type; | 700 | attr.type = type; |
634 | attr.config = config; | 701 | attr.config = config; |
635 | 702 | ||
636 | if (head_config && | 703 | if (head_config) { |
637 | config_attr(&attr, head_config, data->error)) | 704 | if (config_attr(&attr, head_config, data->error)) |
638 | return -EINVAL; | 705 | return -EINVAL; |
706 | |||
707 | if (get_config_terms(head_config, &config_terms)) | ||
708 | return -ENOMEM; | ||
709 | } | ||
639 | 710 | ||
640 | return add_event(list, &data->idx, &attr, NULL); | 711 | return add_event(list, &data->idx, &attr, NULL, &config_terms); |
641 | } | 712 | } |
642 | 713 | ||
643 | static int parse_events__is_name_term(struct parse_events_term *term) | 714 | static int parse_events__is_name_term(struct parse_events_term *term) |
@@ -664,6 +735,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, | |||
664 | struct perf_pmu_info info; | 735 | struct perf_pmu_info info; |
665 | struct perf_pmu *pmu; | 736 | struct perf_pmu *pmu; |
666 | struct perf_evsel *evsel; | 737 | struct perf_evsel *evsel; |
738 | LIST_HEAD(config_terms); | ||
667 | 739 | ||
668 | pmu = perf_pmu__find(name); | 740 | pmu = perf_pmu__find(name); |
669 | if (!pmu) | 741 | if (!pmu) |
@@ -678,7 +750,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, | |||
678 | 750 | ||
679 | if (!head_config) { | 751 | if (!head_config) { |
680 | attr.type = pmu->type; | 752 | attr.type = pmu->type; |
681 | evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus); | 753 | evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus, NULL); |
682 | return evsel ? 0 : -ENOMEM; | 754 | return evsel ? 0 : -ENOMEM; |
683 | } | 755 | } |
684 | 756 | ||
@@ -692,11 +764,15 @@ int parse_events_add_pmu(struct parse_events_evlist *data, | |||
692 | if (config_attr(&attr, head_config, data->error)) | 764 | if (config_attr(&attr, head_config, data->error)) |
693 | return -EINVAL; | 765 | return -EINVAL; |
694 | 766 | ||
767 | if (get_config_terms(head_config, &config_terms)) | ||
768 | return -ENOMEM; | ||
769 | |||
695 | if (perf_pmu__config(pmu, &attr, head_config, data->error)) | 770 | if (perf_pmu__config(pmu, &attr, head_config, data->error)) |
696 | return -EINVAL; | 771 | return -EINVAL; |
697 | 772 | ||
698 | evsel = __add_event(list, &data->idx, &attr, | 773 | evsel = __add_event(list, &data->idx, &attr, |
699 | pmu_event_name(head_config), pmu->cpus); | 774 | pmu_event_name(head_config), pmu->cpus, |
775 | &config_terms); | ||
700 | if (evsel) { | 776 | if (evsel) { |
701 | evsel->unit = info.unit; | 777 | evsel->unit = info.unit; |
702 | evsel->scale = info.scale; | 778 | evsel->scale = info.scale; |
@@ -1064,9 +1140,13 @@ int parse_events(struct perf_evlist *evlist, const char *str, | |||
1064 | ret = parse_events__scanner(str, &data, PE_START_EVENTS); | 1140 | ret = parse_events__scanner(str, &data, PE_START_EVENTS); |
1065 | perf_pmu__parse_cleanup(); | 1141 | perf_pmu__parse_cleanup(); |
1066 | if (!ret) { | 1142 | if (!ret) { |
1067 | int entries = data.idx - evlist->nr_entries; | 1143 | struct perf_evsel *last; |
1068 | perf_evlist__splice_list_tail(evlist, &data.list, entries); | 1144 | |
1145 | perf_evlist__splice_list_tail(evlist, &data.list); | ||
1069 | evlist->nr_groups += data.nr_groups; | 1146 | evlist->nr_groups += data.nr_groups; |
1147 | last = perf_evlist__last(evlist); | ||
1148 | last->cmdline_group_boundary = true; | ||
1149 | |||
1070 | return 0; | 1150 | return 0; |
1071 | } | 1151 | } |
1072 | 1152 | ||
@@ -1105,7 +1185,7 @@ static void parse_events_print_error(struct parse_events_error *err, | |||
1105 | * Maximum error index indent, we will cut | 1185 | * Maximum error index indent, we will cut |
1106 | * the event string if it's bigger. | 1186 | * the event string if it's bigger. |
1107 | */ | 1187 | */ |
1108 | int max_err_idx = 10; | 1188 | int max_err_idx = 13; |
1109 | 1189 | ||
1110 | /* | 1190 | /* |
1111 | * Let's be specific with the message when | 1191 | * Let's be specific with the message when |
@@ -1162,30 +1242,93 @@ int parse_events_option(const struct option *opt, const char *str, | |||
1162 | return ret; | 1242 | return ret; |
1163 | } | 1243 | } |
1164 | 1244 | ||
1165 | int parse_filter(const struct option *opt, const char *str, | 1245 | static int |
1166 | int unset __maybe_unused) | 1246 | foreach_evsel_in_last_glob(struct perf_evlist *evlist, |
1247 | int (*func)(struct perf_evsel *evsel, | ||
1248 | const void *arg), | ||
1249 | const void *arg) | ||
1167 | { | 1250 | { |
1168 | struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; | ||
1169 | struct perf_evsel *last = NULL; | 1251 | struct perf_evsel *last = NULL; |
1252 | int err; | ||
1170 | 1253 | ||
1171 | if (evlist->nr_entries > 0) | 1254 | if (evlist->nr_entries > 0) |
1172 | last = perf_evlist__last(evlist); | 1255 | last = perf_evlist__last(evlist); |
1173 | 1256 | ||
1174 | if (last == NULL || last->attr.type != PERF_TYPE_TRACEPOINT) { | 1257 | do { |
1258 | err = (*func)(last, arg); | ||
1259 | if (err) | ||
1260 | return -1; | ||
1261 | if (!last) | ||
1262 | return 0; | ||
1263 | |||
1264 | if (last->node.prev == &evlist->entries) | ||
1265 | return 0; | ||
1266 | last = list_entry(last->node.prev, struct perf_evsel, node); | ||
1267 | } while (!last->cmdline_group_boundary); | ||
1268 | |||
1269 | return 0; | ||
1270 | } | ||
1271 | |||
1272 | static int set_filter(struct perf_evsel *evsel, const void *arg) | ||
1273 | { | ||
1274 | const char *str = arg; | ||
1275 | |||
1276 | if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) { | ||
1175 | fprintf(stderr, | 1277 | fprintf(stderr, |
1176 | "--filter option should follow a -e tracepoint option\n"); | 1278 | "--filter option should follow a -e tracepoint option\n"); |
1177 | return -1; | 1279 | return -1; |
1178 | } | 1280 | } |
1179 | 1281 | ||
1180 | last->filter = strdup(str); | 1282 | if (perf_evsel__append_filter(evsel, "&&", str) < 0) { |
1181 | if (last->filter == NULL) { | 1283 | fprintf(stderr, |
1182 | fprintf(stderr, "not enough memory to hold filter string\n"); | 1284 | "not enough memory to hold filter string\n"); |
1183 | return -1; | 1285 | return -1; |
1184 | } | 1286 | } |
1185 | 1287 | ||
1186 | return 0; | 1288 | return 0; |
1187 | } | 1289 | } |
1188 | 1290 | ||
1291 | int parse_filter(const struct option *opt, const char *str, | ||
1292 | int unset __maybe_unused) | ||
1293 | { | ||
1294 | struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; | ||
1295 | |||
1296 | return foreach_evsel_in_last_glob(evlist, set_filter, | ||
1297 | (const void *)str); | ||
1298 | } | ||
1299 | |||
1300 | static int add_exclude_perf_filter(struct perf_evsel *evsel, | ||
1301 | const void *arg __maybe_unused) | ||
1302 | { | ||
1303 | char new_filter[64]; | ||
1304 | |||
1305 | if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) { | ||
1306 | fprintf(stderr, | ||
1307 | "--exclude-perf option should follow a -e tracepoint option\n"); | ||
1308 | return -1; | ||
1309 | } | ||
1310 | |||
1311 | snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid()); | ||
1312 | |||
1313 | if (perf_evsel__append_filter(evsel, "&&", new_filter) < 0) { | ||
1314 | fprintf(stderr, | ||
1315 | "not enough memory to hold filter string\n"); | ||
1316 | return -1; | ||
1317 | } | ||
1318 | |||
1319 | return 0; | ||
1320 | } | ||
1321 | |||
1322 | int exclude_perf(const struct option *opt, | ||
1323 | const char *arg __maybe_unused, | ||
1324 | int unset __maybe_unused) | ||
1325 | { | ||
1326 | struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; | ||
1327 | |||
1328 | return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter, | ||
1329 | NULL); | ||
1330 | } | ||
1331 | |||
1189 | static const char * const event_type_descriptors[] = { | 1332 | static const char * const event_type_descriptors[] = { |
1190 | "Hardware event", | 1333 | "Hardware event", |
1191 | "Software event", | 1334 | "Software event", |
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 131f29b2f132..a09b0e210997 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h | |||
@@ -34,6 +34,7 @@ extern int parse_events(struct perf_evlist *evlist, const char *str, | |||
34 | struct parse_events_error *error); | 34 | struct parse_events_error *error); |
35 | extern int parse_events_terms(struct list_head *terms, const char *str); | 35 | extern int parse_events_terms(struct list_head *terms, const char *str); |
36 | extern int parse_filter(const struct option *opt, const char *str, int unset); | 36 | extern int parse_filter(const struct option *opt, const char *str, int unset); |
37 | extern int exclude_perf(const struct option *opt, const char *arg, int unset); | ||
37 | 38 | ||
38 | #define EVENTS_HELP_MAX (128*1024) | 39 | #define EVENTS_HELP_MAX (128*1024) |
39 | 40 | ||
@@ -61,7 +62,11 @@ enum { | |||
61 | PARSE_EVENTS__TERM_TYPE_CONFIG2, | 62 | PARSE_EVENTS__TERM_TYPE_CONFIG2, |
62 | PARSE_EVENTS__TERM_TYPE_NAME, | 63 | PARSE_EVENTS__TERM_TYPE_NAME, |
63 | PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD, | 64 | PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD, |
65 | PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ, | ||
64 | PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE, | 66 | PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE, |
67 | PARSE_EVENTS__TERM_TYPE_TIME, | ||
68 | PARSE_EVENTS__TERM_TYPE_CALLGRAPH, | ||
69 | PARSE_EVENTS__TERM_TYPE_STACKSIZE, | ||
65 | }; | 70 | }; |
66 | 71 | ||
67 | struct parse_events_term { | 72 | struct parse_events_term { |
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 13cef3c65565..936d566f48d8 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l | |||
@@ -182,7 +182,11 @@ config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } | |||
182 | config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } | 182 | config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } |
183 | name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } | 183 | name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); } |
184 | period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } | 184 | period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); } |
185 | freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); } | ||
185 | branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } | 186 | branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); } |
187 | time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); } | ||
188 | call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); } | ||
189 | stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); } | ||
186 | , { return ','; } | 190 | , { return ','; } |
187 | "/" { BEGIN(INITIAL); return '/'; } | 191 | "/" { BEGIN(INITIAL); return '/'; } |
188 | {name_minus} { return str(yyscanner, PE_NAME); } | 192 | {name_minus} { return str(yyscanner, PE_NAME); } |
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 591905a02b92..9cd70819c795 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y | |||
@@ -255,7 +255,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc | |||
255 | list_add_tail(&term->list, head); | 255 | list_add_tail(&term->list, head); |
256 | 256 | ||
257 | ALLOC_LIST(list); | 257 | ALLOC_LIST(list); |
258 | ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head)); | 258 | ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); |
259 | parse_events__free_terms(head); | 259 | parse_events__free_terms(head); |
260 | $$ = list; | 260 | $$ = list; |
261 | } | 261 | } |
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c new file mode 100644 index 000000000000..4f2c1c255d81 --- /dev/null +++ b/tools/perf/util/parse-regs-options.c | |||
@@ -0,0 +1,71 @@ | |||
1 | #include "perf.h" | ||
2 | #include "util/util.h" | ||
3 | #include "util/debug.h" | ||
4 | #include "util/parse-options.h" | ||
5 | #include "util/parse-regs-options.h" | ||
6 | |||
7 | int | ||
8 | parse_regs(const struct option *opt, const char *str, int unset) | ||
9 | { | ||
10 | uint64_t *mode = (uint64_t *)opt->value; | ||
11 | const struct sample_reg *r; | ||
12 | char *s, *os = NULL, *p; | ||
13 | int ret = -1; | ||
14 | |||
15 | if (unset) | ||
16 | return 0; | ||
17 | |||
18 | /* | ||
19 | * cannot set it twice | ||
20 | */ | ||
21 | if (*mode) | ||
22 | return -1; | ||
23 | |||
24 | /* str may be NULL in case no arg is passed to -I */ | ||
25 | if (str) { | ||
26 | /* because str is read-only */ | ||
27 | s = os = strdup(str); | ||
28 | if (!s) | ||
29 | return -1; | ||
30 | |||
31 | for (;;) { | ||
32 | p = strchr(s, ','); | ||
33 | if (p) | ||
34 | *p = '\0'; | ||
35 | |||
36 | if (!strcmp(s, "?")) { | ||
37 | fprintf(stderr, "available registers: "); | ||
38 | for (r = sample_reg_masks; r->name; r++) { | ||
39 | fprintf(stderr, "%s ", r->name); | ||
40 | } | ||
41 | fputc('\n', stderr); | ||
42 | /* just printing available regs */ | ||
43 | return -1; | ||
44 | } | ||
45 | for (r = sample_reg_masks; r->name; r++) { | ||
46 | if (!strcasecmp(s, r->name)) | ||
47 | break; | ||
48 | } | ||
49 | if (!r->name) { | ||
50 | ui__warning("unknown register %s," | ||
51 | " check man page\n", s); | ||
52 | goto error; | ||
53 | } | ||
54 | |||
55 | *mode |= r->mask; | ||
56 | |||
57 | if (!p) | ||
58 | break; | ||
59 | |||
60 | s = p + 1; | ||
61 | } | ||
62 | } | ||
63 | ret = 0; | ||
64 | |||
65 | /* default to all possible regs */ | ||
66 | if (*mode == 0) | ||
67 | *mode = PERF_REGS_MASK; | ||
68 | error: | ||
69 | free(os); | ||
70 | return ret; | ||
71 | } | ||
diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h new file mode 100644 index 000000000000..7d762b188007 --- /dev/null +++ b/tools/perf/util/parse-regs-options.h | |||
@@ -0,0 +1,5 @@ | |||
1 | #ifndef _PERF_PARSE_REGS_OPTIONS_H | ||
2 | #define _PERF_PARSE_REGS_OPTIONS_H 1 | ||
3 | struct option; | ||
4 | int parse_regs(const struct option *opt, const char *str, int unset); | ||
5 | #endif /* _PERF_PARSE_REGS_OPTIONS_H */ | ||
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 43168fb0d9a2..6b8eb13e14e4 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c | |||
@@ -2,6 +2,11 @@ | |||
2 | #include "perf_regs.h" | 2 | #include "perf_regs.h" |
3 | #include "event.h" | 3 | #include "event.h" |
4 | 4 | ||
5 | const struct sample_reg __weak sample_reg_masks[] = { | ||
6 | SMPL_REG_END | ||
7 | }; | ||
8 | |||
9 | #ifdef HAVE_PERF_REGS_SUPPORT | ||
5 | int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) | 10 | int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) |
6 | { | 11 | { |
7 | int i, idx = 0; | 12 | int i, idx = 0; |
@@ -25,3 +30,4 @@ out: | |||
25 | *valp = regs->cache_regs[id]; | 30 | *valp = regs->cache_regs[id]; |
26 | return 0; | 31 | return 0; |
27 | } | 32 | } |
33 | #endif | ||
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index 980dbf76bc98..679d6e493962 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h | |||
@@ -2,9 +2,19 @@ | |||
2 | #define __PERF_REGS_H | 2 | #define __PERF_REGS_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <linux/compiler.h> | ||
5 | 6 | ||
6 | struct regs_dump; | 7 | struct regs_dump; |
7 | 8 | ||
9 | struct sample_reg { | ||
10 | const char *name; | ||
11 | uint64_t mask; | ||
12 | }; | ||
13 | #define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } | ||
14 | #define SMPL_REG_END { .name = NULL } | ||
15 | |||
16 | extern const struct sample_reg sample_reg_masks[]; | ||
17 | |||
8 | #ifdef HAVE_PERF_REGS_SUPPORT | 18 | #ifdef HAVE_PERF_REGS_SUPPORT |
9 | #include <perf_regs.h> | 19 | #include <perf_regs.h> |
10 | 20 | ||
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 7bcb8c315615..89c91a1a67e7 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c | |||
@@ -462,10 +462,6 @@ static struct perf_pmu *pmu_lookup(const char *name) | |||
462 | LIST_HEAD(aliases); | 462 | LIST_HEAD(aliases); |
463 | __u32 type; | 463 | __u32 type; |
464 | 464 | ||
465 | /* No support for intel_bts or intel_pt so disallow them */ | ||
466 | if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt")) | ||
467 | return NULL; | ||
468 | |||
469 | /* | 465 | /* |
470 | * The pmu data we store & need consists of the pmu | 466 | * The pmu data we store & need consists of the pmu |
471 | * type value and format definitions. Load both right | 467 | * type value and format definitions. Load both right |
@@ -542,7 +538,7 @@ struct perf_pmu *perf_pmu__find(const char *name) | |||
542 | } | 538 | } |
543 | 539 | ||
544 | static struct perf_pmu_format * | 540 | static struct perf_pmu_format * |
545 | pmu_find_format(struct list_head *formats, char *name) | 541 | pmu_find_format(struct list_head *formats, const char *name) |
546 | { | 542 | { |
547 | struct perf_pmu_format *format; | 543 | struct perf_pmu_format *format; |
548 | 544 | ||
@@ -553,6 +549,21 @@ pmu_find_format(struct list_head *formats, char *name) | |||
553 | return NULL; | 549 | return NULL; |
554 | } | 550 | } |
555 | 551 | ||
552 | __u64 perf_pmu__format_bits(struct list_head *formats, const char *name) | ||
553 | { | ||
554 | struct perf_pmu_format *format = pmu_find_format(formats, name); | ||
555 | __u64 bits = 0; | ||
556 | int fbit; | ||
557 | |||
558 | if (!format) | ||
559 | return 0; | ||
560 | |||
561 | for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS) | ||
562 | bits |= 1ULL << fbit; | ||
563 | |||
564 | return bits; | ||
565 | } | ||
566 | |||
556 | /* | 567 | /* |
557 | * Sets value based on the format definition (format parameter) | 568 | * Sets value based on the format definition (format parameter) |
558 | * and unformated value (value parameter). | 569 | * and unformated value (value parameter). |
@@ -574,6 +585,18 @@ static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v, | |||
574 | } | 585 | } |
575 | } | 586 | } |
576 | 587 | ||
588 | static __u64 pmu_format_max_value(const unsigned long *format) | ||
589 | { | ||
590 | int w; | ||
591 | |||
592 | w = bitmap_weight(format, PERF_PMU_FORMAT_BITS); | ||
593 | if (!w) | ||
594 | return 0; | ||
595 | if (w < 64) | ||
596 | return (1ULL << w) - 1; | ||
597 | return -1; | ||
598 | } | ||
599 | |||
577 | /* | 600 | /* |
578 | * Term is a string term, and might be a param-term. Try to look up it's value | 601 | * Term is a string term, and might be a param-term. Try to look up it's value |
579 | * in the remaining terms. | 602 | * in the remaining terms. |
@@ -607,7 +630,9 @@ static char *formats_error_string(struct list_head *formats) | |||
607 | { | 630 | { |
608 | struct perf_pmu_format *format; | 631 | struct perf_pmu_format *format; |
609 | char *err, *str; | 632 | char *err, *str; |
610 | static const char *static_terms = "config,config1,config2,name,period,branch_type\n"; | 633 | static const char *static_terms = "config,config1,config2,name," |
634 | "period,freq,branch_type,time," | ||
635 | "call-graph,stack-size\n"; | ||
611 | unsigned i = 0; | 636 | unsigned i = 0; |
612 | 637 | ||
613 | if (!asprintf(&str, "valid terms:")) | 638 | if (!asprintf(&str, "valid terms:")) |
@@ -647,7 +672,7 @@ static int pmu_config_term(struct list_head *formats, | |||
647 | { | 672 | { |
648 | struct perf_pmu_format *format; | 673 | struct perf_pmu_format *format; |
649 | __u64 *vp; | 674 | __u64 *vp; |
650 | __u64 val; | 675 | __u64 val, max_val; |
651 | 676 | ||
652 | /* | 677 | /* |
653 | * If this is a parameter we've already used for parameterized-eval, | 678 | * If this is a parameter we've already used for parameterized-eval, |
@@ -713,6 +738,22 @@ static int pmu_config_term(struct list_head *formats, | |||
713 | } else | 738 | } else |
714 | return -EINVAL; | 739 | return -EINVAL; |
715 | 740 | ||
741 | max_val = pmu_format_max_value(format->bits); | ||
742 | if (val > max_val) { | ||
743 | if (err) { | ||
744 | err->idx = term->err_val; | ||
745 | if (asprintf(&err->str, | ||
746 | "value too big for format, maximum is %llu", | ||
747 | (unsigned long long)max_val) < 0) | ||
748 | err->str = strdup("value too big for format"); | ||
749 | return -EINVAL; | ||
750 | } | ||
751 | /* | ||
752 | * Assume we don't care if !err, in which case the value will be | ||
753 | * silently truncated. | ||
754 | */ | ||
755 | } | ||
756 | |||
716 | pmu_format_value(format->bits, val, vp, zero); | 757 | pmu_format_value(format->bits, val, vp, zero); |
717 | return 0; | 758 | return 0; |
718 | } | 759 | } |
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 7b9c8cf8ae3e..5d7e84466bee 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h | |||
@@ -54,6 +54,7 @@ int perf_pmu__config_terms(struct list_head *formats, | |||
54 | struct perf_event_attr *attr, | 54 | struct perf_event_attr *attr, |
55 | struct list_head *head_terms, | 55 | struct list_head *head_terms, |
56 | bool zero, struct parse_events_error *error); | 56 | bool zero, struct parse_events_error *error); |
57 | __u64 perf_pmu__format_bits(struct list_head *formats, const char *name); | ||
57 | int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, | 58 | int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, |
58 | struct perf_pmu_info *info); | 59 | struct perf_pmu_info *info); |
59 | struct list_head *perf_pmu__alias(struct perf_pmu *pmu, | 60 | struct list_head *perf_pmu__alias(struct perf_pmu *pmu, |
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 381f23a443c7..c6f9af78f6f5 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "trace-event.h" /* For __maybe_unused */ | 45 | #include "trace-event.h" /* For __maybe_unused */ |
46 | #include "probe-event.h" | 46 | #include "probe-event.h" |
47 | #include "probe-finder.h" | 47 | #include "probe-finder.h" |
48 | #include "probe-file.h" | ||
48 | #include "session.h" | 49 | #include "session.h" |
49 | 50 | ||
50 | #define MAX_CMDLEN 256 | 51 | #define MAX_CMDLEN 256 |
@@ -55,11 +56,7 @@ struct probe_conf probe_conf; | |||
55 | 56 | ||
56 | #define semantic_error(msg ...) pr_err("Semantic error :" msg) | 57 | #define semantic_error(msg ...) pr_err("Semantic error :" msg) |
57 | 58 | ||
58 | /* If there is no space to write, returns -E2BIG. */ | 59 | int e_snprintf(char *str, size_t size, const char *format, ...) |
59 | static int e_snprintf(char *str, size_t size, const char *format, ...) | ||
60 | __attribute__((format(printf, 3, 4))); | ||
61 | |||
62 | static int e_snprintf(char *str, size_t size, const char *format, ...) | ||
63 | { | 60 | { |
64 | int ret; | 61 | int ret; |
65 | va_list ap; | 62 | va_list ap; |
@@ -72,7 +69,6 @@ static int e_snprintf(char *str, size_t size, const char *format, ...) | |||
72 | } | 69 | } |
73 | 70 | ||
74 | static char *synthesize_perf_probe_point(struct perf_probe_point *pp); | 71 | static char *synthesize_perf_probe_point(struct perf_probe_point *pp); |
75 | static void clear_probe_trace_event(struct probe_trace_event *tev); | ||
76 | static struct machine *host_machine; | 72 | static struct machine *host_machine; |
77 | 73 | ||
78 | /* Initialize symbol maps and path of vmlinux/modules */ | 74 | /* Initialize symbol maps and path of vmlinux/modules */ |
@@ -274,12 +270,13 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso) | |||
274 | int ret = 0; | 270 | int ret = 0; |
275 | 271 | ||
276 | if (module) { | 272 | if (module) { |
277 | list_for_each_entry(dso, &host_machine->dsos.head, node) { | 273 | char module_name[128]; |
278 | if (!dso->kernel) | 274 | |
279 | continue; | 275 | snprintf(module_name, sizeof(module_name), "[%s]", module); |
280 | if (strncmp(dso->short_name + 1, module, | 276 | map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name); |
281 | dso->short_name_len - 2) == 0) | 277 | if (map) { |
282 | goto found; | 278 | dso = map->dso; |
279 | goto found; | ||
283 | } | 280 | } |
284 | pr_debug("Failed to find module %s.\n", module); | 281 | pr_debug("Failed to find module %s.\n", module); |
285 | return -ENOENT; | 282 | return -ENOENT; |
@@ -519,7 +516,7 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, | |||
519 | if (ret < 0) | 516 | if (ret < 0) |
520 | goto error; | 517 | goto error; |
521 | addr += stext; | 518 | addr += stext; |
522 | } else { | 519 | } else if (tp->symbol) { |
523 | addr = kernel_get_symbol_address_by_name(tp->symbol, false); | 520 | addr = kernel_get_symbol_address_by_name(tp->symbol, false); |
524 | if (addr == 0) | 521 | if (addr == 0) |
525 | goto error; | 522 | goto error; |
@@ -709,9 +706,10 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, | |||
709 | } | 706 | } |
710 | /* Error path : ntevs < 0 */ | 707 | /* Error path : ntevs < 0 */ |
711 | pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); | 708 | pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); |
712 | if (ntevs == -EBADF) { | 709 | if (ntevs < 0) { |
713 | pr_warning("Warning: No dwarf info found in the vmlinux - " | 710 | if (ntevs == -EBADF) |
714 | "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); | 711 | pr_warning("Warning: No dwarf info found in the vmlinux - " |
712 | "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n"); | ||
715 | if (!need_dwarf) { | 713 | if (!need_dwarf) { |
716 | pr_debug("Trying to use symbols.\n"); | 714 | pr_debug("Trying to use symbols.\n"); |
717 | return 0; | 715 | return 0; |
@@ -1197,15 +1195,37 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) | |||
1197 | *ptr++ = '\0'; | 1195 | *ptr++ = '\0'; |
1198 | } | 1196 | } |
1199 | 1197 | ||
1200 | tmp = strdup(arg); | 1198 | if (arg[0] == '\0') |
1201 | if (tmp == NULL) | 1199 | tmp = NULL; |
1202 | return -ENOMEM; | 1200 | else { |
1201 | tmp = strdup(arg); | ||
1202 | if (tmp == NULL) | ||
1203 | return -ENOMEM; | ||
1204 | } | ||
1203 | 1205 | ||
1204 | if (file_spec) | 1206 | if (file_spec) |
1205 | pp->file = tmp; | 1207 | pp->file = tmp; |
1206 | else | 1208 | else { |
1207 | pp->function = tmp; | 1209 | pp->function = tmp; |
1208 | 1210 | ||
1211 | /* | ||
1212 | * Keep pp->function even if this is absolute address, | ||
1213 | * so it can mark whether abs_address is valid. | ||
1214 | * Which make 'perf probe lib.bin 0x0' possible. | ||
1215 | * | ||
1216 | * Note that checking length of tmp is not needed | ||
1217 | * because when we access tmp[1] we know tmp[0] is '0', | ||
1218 | * so tmp[1] should always valid (but could be '\0'). | ||
1219 | */ | ||
1220 | if (tmp && !strncmp(tmp, "0x", 2)) { | ||
1221 | pp->abs_address = strtoul(pp->function, &tmp, 0); | ||
1222 | if (*tmp != '\0') { | ||
1223 | semantic_error("Invalid absolute address.\n"); | ||
1224 | return -EINVAL; | ||
1225 | } | ||
1226 | } | ||
1227 | } | ||
1228 | |||
1209 | /* Parse other options */ | 1229 | /* Parse other options */ |
1210 | while (ptr) { | 1230 | while (ptr) { |
1211 | arg = ptr; | 1231 | arg = ptr; |
@@ -1467,8 +1487,7 @@ bool perf_probe_event_need_dwarf(struct perf_probe_event *pev) | |||
1467 | } | 1487 | } |
1468 | 1488 | ||
1469 | /* Parse probe_events event into struct probe_point */ | 1489 | /* Parse probe_events event into struct probe_point */ |
1470 | static int parse_probe_trace_command(const char *cmd, | 1490 | int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev) |
1471 | struct probe_trace_event *tev) | ||
1472 | { | 1491 | { |
1473 | struct probe_trace_point *tp = &tev->point; | 1492 | struct probe_trace_point *tp = &tev->point; |
1474 | char pr; | 1493 | char pr; |
@@ -1523,9 +1542,31 @@ static int parse_probe_trace_command(const char *cmd, | |||
1523 | } else | 1542 | } else |
1524 | p = argv[1]; | 1543 | p = argv[1]; |
1525 | fmt1_str = strtok_r(p, "+", &fmt); | 1544 | fmt1_str = strtok_r(p, "+", &fmt); |
1526 | if (fmt1_str[0] == '0') /* only the address started with 0x */ | 1545 | /* only the address started with 0x */ |
1527 | tp->address = strtoul(fmt1_str, NULL, 0); | 1546 | if (fmt1_str[0] == '0') { |
1528 | else { | 1547 | /* |
1548 | * Fix a special case: | ||
1549 | * if address == 0, kernel reports something like: | ||
1550 | * p:probe_libc/abs_0 /lib/libc-2.18.so:0x (null) arg1=%ax | ||
1551 | * Newer kernel may fix that, but we want to | ||
1552 | * support old kernel also. | ||
1553 | */ | ||
1554 | if (strcmp(fmt1_str, "0x") == 0) { | ||
1555 | if (!argv[2] || strcmp(argv[2], "(null)")) { | ||
1556 | ret = -EINVAL; | ||
1557 | goto out; | ||
1558 | } | ||
1559 | tp->address = 0; | ||
1560 | |||
1561 | free(argv[2]); | ||
1562 | for (i = 2; argv[i + 1] != NULL; i++) | ||
1563 | argv[i] = argv[i + 1]; | ||
1564 | |||
1565 | argv[i] = NULL; | ||
1566 | argc -= 1; | ||
1567 | } else | ||
1568 | tp->address = strtoul(fmt1_str, NULL, 0); | ||
1569 | } else { | ||
1529 | /* Only the symbol-based probe has offset */ | 1570 | /* Only the symbol-based probe has offset */ |
1530 | tp->symbol = strdup(fmt1_str); | 1571 | tp->symbol = strdup(fmt1_str); |
1531 | if (tp->symbol == NULL) { | 1572 | if (tp->symbol == NULL) { |
@@ -1782,14 +1823,29 @@ char *synthesize_probe_trace_command(struct probe_trace_event *tev) | |||
1782 | if (len <= 0) | 1823 | if (len <= 0) |
1783 | goto error; | 1824 | goto error; |
1784 | 1825 | ||
1785 | /* Uprobes must have tp->address and tp->module */ | 1826 | /* Uprobes must have tp->module */ |
1786 | if (tev->uprobes && (!tp->address || !tp->module)) | 1827 | if (tev->uprobes && !tp->module) |
1787 | goto error; | 1828 | goto error; |
1829 | /* | ||
1830 | * If tp->address == 0, then this point must be a | ||
1831 | * absolute address uprobe. | ||
1832 | * try_to_find_absolute_address() should have made | ||
1833 | * tp->symbol to "0x0". | ||
1834 | */ | ||
1835 | if (tev->uprobes && !tp->address) { | ||
1836 | if (!tp->symbol || strcmp(tp->symbol, "0x0")) | ||
1837 | goto error; | ||
1838 | } | ||
1788 | 1839 | ||
1789 | /* Use the tp->address for uprobes */ | 1840 | /* Use the tp->address for uprobes */ |
1790 | if (tev->uprobes) | 1841 | if (tev->uprobes) |
1791 | ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx", | 1842 | ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s:0x%lx", |
1792 | tp->module, tp->address); | 1843 | tp->module, tp->address); |
1844 | else if (!strncmp(tp->symbol, "0x", 2)) | ||
1845 | /* Absolute address. See try_to_find_absolute_address() */ | ||
1846 | ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s0x%lx", | ||
1847 | tp->module ?: "", tp->module ? ":" : "", | ||
1848 | tp->address); | ||
1793 | else | 1849 | else |
1794 | ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu", | 1850 | ret = e_snprintf(buf + len, MAX_CMDLEN - len, "%s%s%s+%lu", |
1795 | tp->module ?: "", tp->module ? ":" : "", | 1851 | tp->module ?: "", tp->module ? ":" : "", |
@@ -1819,17 +1875,17 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, | |||
1819 | { | 1875 | { |
1820 | struct symbol *sym = NULL; | 1876 | struct symbol *sym = NULL; |
1821 | struct map *map; | 1877 | struct map *map; |
1822 | u64 addr; | 1878 | u64 addr = tp->address; |
1823 | int ret = -ENOENT; | 1879 | int ret = -ENOENT; |
1824 | 1880 | ||
1825 | if (!is_kprobe) { | 1881 | if (!is_kprobe) { |
1826 | map = dso__new_map(tp->module); | 1882 | map = dso__new_map(tp->module); |
1827 | if (!map) | 1883 | if (!map) |
1828 | goto out; | 1884 | goto out; |
1829 | addr = tp->address; | ||
1830 | sym = map__find_symbol(map, addr, NULL); | 1885 | sym = map__find_symbol(map, addr, NULL); |
1831 | } else { | 1886 | } else { |
1832 | addr = kernel_get_symbol_address_by_name(tp->symbol, true); | 1887 | if (tp->symbol) |
1888 | addr = kernel_get_symbol_address_by_name(tp->symbol, true); | ||
1833 | if (addr) { | 1889 | if (addr) { |
1834 | addr += tp->offset; | 1890 | addr += tp->offset; |
1835 | sym = __find_kernel_function(addr, &map); | 1891 | sym = __find_kernel_function(addr, &map); |
@@ -1852,8 +1908,8 @@ out: | |||
1852 | } | 1908 | } |
1853 | 1909 | ||
1854 | static int convert_to_perf_probe_point(struct probe_trace_point *tp, | 1910 | static int convert_to_perf_probe_point(struct probe_trace_point *tp, |
1855 | struct perf_probe_point *pp, | 1911 | struct perf_probe_point *pp, |
1856 | bool is_kprobe) | 1912 | bool is_kprobe) |
1857 | { | 1913 | { |
1858 | char buf[128]; | 1914 | char buf[128]; |
1859 | int ret; | 1915 | int ret; |
@@ -1870,7 +1926,7 @@ static int convert_to_perf_probe_point(struct probe_trace_point *tp, | |||
1870 | if (tp->symbol) { | 1926 | if (tp->symbol) { |
1871 | pp->function = strdup(tp->symbol); | 1927 | pp->function = strdup(tp->symbol); |
1872 | pp->offset = tp->offset; | 1928 | pp->offset = tp->offset; |
1873 | } else if (!tp->module && !is_kprobe) { | 1929 | } else { |
1874 | ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address); | 1930 | ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address); |
1875 | if (ret < 0) | 1931 | if (ret < 0) |
1876 | return ret; | 1932 | return ret; |
@@ -1951,7 +2007,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev) | |||
1951 | memset(pev, 0, sizeof(*pev)); | 2007 | memset(pev, 0, sizeof(*pev)); |
1952 | } | 2008 | } |
1953 | 2009 | ||
1954 | static void clear_probe_trace_event(struct probe_trace_event *tev) | 2010 | void clear_probe_trace_event(struct probe_trace_event *tev) |
1955 | { | 2011 | { |
1956 | struct probe_trace_arg_ref *ref, *next; | 2012 | struct probe_trace_arg_ref *ref, *next; |
1957 | int i; | 2013 | int i; |
@@ -1976,119 +2032,6 @@ static void clear_probe_trace_event(struct probe_trace_event *tev) | |||
1976 | memset(tev, 0, sizeof(*tev)); | 2032 | memset(tev, 0, sizeof(*tev)); |
1977 | } | 2033 | } |
1978 | 2034 | ||
1979 | static void print_open_warning(int err, bool is_kprobe) | ||
1980 | { | ||
1981 | char sbuf[STRERR_BUFSIZE]; | ||
1982 | |||
1983 | if (err == -ENOENT) { | ||
1984 | const char *config; | ||
1985 | |||
1986 | if (!is_kprobe) | ||
1987 | config = "CONFIG_UPROBE_EVENTS"; | ||
1988 | else | ||
1989 | config = "CONFIG_KPROBE_EVENTS"; | ||
1990 | |||
1991 | pr_warning("%cprobe_events file does not exist" | ||
1992 | " - please rebuild kernel with %s.\n", | ||
1993 | is_kprobe ? 'k' : 'u', config); | ||
1994 | } else if (err == -ENOTSUP) | ||
1995 | pr_warning("Tracefs or debugfs is not mounted.\n"); | ||
1996 | else | ||
1997 | pr_warning("Failed to open %cprobe_events: %s\n", | ||
1998 | is_kprobe ? 'k' : 'u', | ||
1999 | strerror_r(-err, sbuf, sizeof(sbuf))); | ||
2000 | } | ||
2001 | |||
2002 | static void print_both_open_warning(int kerr, int uerr) | ||
2003 | { | ||
2004 | /* Both kprobes and uprobes are disabled, warn it. */ | ||
2005 | if (kerr == -ENOTSUP && uerr == -ENOTSUP) | ||
2006 | pr_warning("Tracefs or debugfs is not mounted.\n"); | ||
2007 | else if (kerr == -ENOENT && uerr == -ENOENT) | ||
2008 | pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " | ||
2009 | "or/and CONFIG_UPROBE_EVENTS.\n"); | ||
2010 | else { | ||
2011 | char sbuf[STRERR_BUFSIZE]; | ||
2012 | pr_warning("Failed to open kprobe events: %s.\n", | ||
2013 | strerror_r(-kerr, sbuf, sizeof(sbuf))); | ||
2014 | pr_warning("Failed to open uprobe events: %s.\n", | ||
2015 | strerror_r(-uerr, sbuf, sizeof(sbuf))); | ||
2016 | } | ||
2017 | } | ||
2018 | |||
2019 | static int open_probe_events(const char *trace_file, bool readwrite) | ||
2020 | { | ||
2021 | char buf[PATH_MAX]; | ||
2022 | const char *__debugfs; | ||
2023 | const char *tracing_dir = ""; | ||
2024 | int ret; | ||
2025 | |||
2026 | __debugfs = tracefs_find_mountpoint(); | ||
2027 | if (__debugfs == NULL) { | ||
2028 | tracing_dir = "tracing/"; | ||
2029 | |||
2030 | __debugfs = debugfs_find_mountpoint(); | ||
2031 | if (__debugfs == NULL) | ||
2032 | return -ENOTSUP; | ||
2033 | } | ||
2034 | |||
2035 | ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", | ||
2036 | __debugfs, tracing_dir, trace_file); | ||
2037 | if (ret >= 0) { | ||
2038 | pr_debug("Opening %s write=%d\n", buf, readwrite); | ||
2039 | if (readwrite && !probe_event_dry_run) | ||
2040 | ret = open(buf, O_RDWR | O_APPEND, 0); | ||
2041 | else | ||
2042 | ret = open(buf, O_RDONLY, 0); | ||
2043 | |||
2044 | if (ret < 0) | ||
2045 | ret = -errno; | ||
2046 | } | ||
2047 | return ret; | ||
2048 | } | ||
2049 | |||
2050 | static int open_kprobe_events(bool readwrite) | ||
2051 | { | ||
2052 | return open_probe_events("kprobe_events", readwrite); | ||
2053 | } | ||
2054 | |||
2055 | static int open_uprobe_events(bool readwrite) | ||
2056 | { | ||
2057 | return open_probe_events("uprobe_events", readwrite); | ||
2058 | } | ||
2059 | |||
2060 | /* Get raw string list of current kprobe_events or uprobe_events */ | ||
2061 | static struct strlist *get_probe_trace_command_rawlist(int fd) | ||
2062 | { | ||
2063 | int ret, idx; | ||
2064 | FILE *fp; | ||
2065 | char buf[MAX_CMDLEN]; | ||
2066 | char *p; | ||
2067 | struct strlist *sl; | ||
2068 | |||
2069 | sl = strlist__new(true, NULL); | ||
2070 | |||
2071 | fp = fdopen(dup(fd), "r"); | ||
2072 | while (!feof(fp)) { | ||
2073 | p = fgets(buf, MAX_CMDLEN, fp); | ||
2074 | if (!p) | ||
2075 | break; | ||
2076 | |||
2077 | idx = strlen(p) - 1; | ||
2078 | if (p[idx] == '\n') | ||
2079 | p[idx] = '\0'; | ||
2080 | ret = strlist__add(sl, buf); | ||
2081 | if (ret < 0) { | ||
2082 | pr_debug("strlist__add failed (%d)\n", ret); | ||
2083 | strlist__delete(sl); | ||
2084 | return NULL; | ||
2085 | } | ||
2086 | } | ||
2087 | fclose(fp); | ||
2088 | |||
2089 | return sl; | ||
2090 | } | ||
2091 | |||
2092 | struct kprobe_blacklist_node { | 2035 | struct kprobe_blacklist_node { |
2093 | struct list_head list; | 2036 | struct list_head list; |
2094 | unsigned long start; | 2037 | unsigned long start; |
@@ -2284,7 +2227,7 @@ static int __show_perf_probe_events(int fd, bool is_kprobe, | |||
2284 | memset(&tev, 0, sizeof(tev)); | 2227 | memset(&tev, 0, sizeof(tev)); |
2285 | memset(&pev, 0, sizeof(pev)); | 2228 | memset(&pev, 0, sizeof(pev)); |
2286 | 2229 | ||
2287 | rawlist = get_probe_trace_command_rawlist(fd); | 2230 | rawlist = probe_file__get_rawlist(fd); |
2288 | if (!rawlist) | 2231 | if (!rawlist) |
2289 | return -ENOMEM; | 2232 | return -ENOMEM; |
2290 | 2233 | ||
@@ -2325,89 +2268,20 @@ int show_perf_probe_events(struct strfilter *filter) | |||
2325 | if (ret < 0) | 2268 | if (ret < 0) |
2326 | return ret; | 2269 | return ret; |
2327 | 2270 | ||
2328 | kp_fd = open_kprobe_events(false); | 2271 | ret = probe_file__open_both(&kp_fd, &up_fd, 0); |
2329 | if (kp_fd >= 0) { | 2272 | if (ret < 0) |
2330 | ret = __show_perf_probe_events(kp_fd, true, filter); | 2273 | return ret; |
2331 | close(kp_fd); | ||
2332 | if (ret < 0) | ||
2333 | goto out; | ||
2334 | } | ||
2335 | |||
2336 | up_fd = open_uprobe_events(false); | ||
2337 | if (kp_fd < 0 && up_fd < 0) { | ||
2338 | print_both_open_warning(kp_fd, up_fd); | ||
2339 | ret = kp_fd; | ||
2340 | goto out; | ||
2341 | } | ||
2342 | 2274 | ||
2343 | if (up_fd >= 0) { | 2275 | if (kp_fd >= 0) |
2276 | ret = __show_perf_probe_events(kp_fd, true, filter); | ||
2277 | if (up_fd >= 0 && ret >= 0) | ||
2344 | ret = __show_perf_probe_events(up_fd, false, filter); | 2278 | ret = __show_perf_probe_events(up_fd, false, filter); |
2279 | if (kp_fd > 0) | ||
2280 | close(kp_fd); | ||
2281 | if (up_fd > 0) | ||
2345 | close(up_fd); | 2282 | close(up_fd); |
2346 | } | ||
2347 | out: | ||
2348 | exit_symbol_maps(); | 2283 | exit_symbol_maps(); |
2349 | return ret; | ||
2350 | } | ||
2351 | |||
2352 | /* Get current perf-probe event names */ | ||
2353 | static struct strlist *get_probe_trace_event_names(int fd, bool include_group) | ||
2354 | { | ||
2355 | char buf[128]; | ||
2356 | struct strlist *sl, *rawlist; | ||
2357 | struct str_node *ent; | ||
2358 | struct probe_trace_event tev; | ||
2359 | int ret = 0; | ||
2360 | |||
2361 | memset(&tev, 0, sizeof(tev)); | ||
2362 | rawlist = get_probe_trace_command_rawlist(fd); | ||
2363 | if (!rawlist) | ||
2364 | return NULL; | ||
2365 | sl = strlist__new(true, NULL); | ||
2366 | strlist__for_each(ent, rawlist) { | ||
2367 | ret = parse_probe_trace_command(ent->s, &tev); | ||
2368 | if (ret < 0) | ||
2369 | break; | ||
2370 | if (include_group) { | ||
2371 | ret = e_snprintf(buf, 128, "%s:%s", tev.group, | ||
2372 | tev.event); | ||
2373 | if (ret >= 0) | ||
2374 | ret = strlist__add(sl, buf); | ||
2375 | } else | ||
2376 | ret = strlist__add(sl, tev.event); | ||
2377 | clear_probe_trace_event(&tev); | ||
2378 | if (ret < 0) | ||
2379 | break; | ||
2380 | } | ||
2381 | strlist__delete(rawlist); | ||
2382 | |||
2383 | if (ret < 0) { | ||
2384 | strlist__delete(sl); | ||
2385 | return NULL; | ||
2386 | } | ||
2387 | return sl; | ||
2388 | } | ||
2389 | |||
2390 | static int write_probe_trace_event(int fd, struct probe_trace_event *tev) | ||
2391 | { | ||
2392 | int ret = 0; | ||
2393 | char *buf = synthesize_probe_trace_command(tev); | ||
2394 | char sbuf[STRERR_BUFSIZE]; | ||
2395 | |||
2396 | if (!buf) { | ||
2397 | pr_debug("Failed to synthesize probe trace event.\n"); | ||
2398 | return -EINVAL; | ||
2399 | } | ||
2400 | 2284 | ||
2401 | pr_debug("Writing event: %s\n", buf); | ||
2402 | if (!probe_event_dry_run) { | ||
2403 | ret = write(fd, buf, strlen(buf)); | ||
2404 | if (ret <= 0) { | ||
2405 | ret = -errno; | ||
2406 | pr_warning("Failed to write event: %s\n", | ||
2407 | strerror_r(errno, sbuf, sizeof(sbuf))); | ||
2408 | } | ||
2409 | } | ||
2410 | free(buf); | ||
2411 | return ret; | 2285 | return ret; |
2412 | } | 2286 | } |
2413 | 2287 | ||
@@ -2478,36 +2352,69 @@ out: | |||
2478 | free(buf); | 2352 | free(buf); |
2479 | } | 2353 | } |
2480 | 2354 | ||
2355 | /* Set new name from original perf_probe_event and namelist */ | ||
2356 | static int probe_trace_event__set_name(struct probe_trace_event *tev, | ||
2357 | struct perf_probe_event *pev, | ||
2358 | struct strlist *namelist, | ||
2359 | bool allow_suffix) | ||
2360 | { | ||
2361 | const char *event, *group; | ||
2362 | char buf[64]; | ||
2363 | int ret; | ||
2364 | |||
2365 | if (pev->event) | ||
2366 | event = pev->event; | ||
2367 | else | ||
2368 | if (pev->point.function && | ||
2369 | (strncmp(pev->point.function, "0x", 2) != 0) && | ||
2370 | !strisglob(pev->point.function)) | ||
2371 | event = pev->point.function; | ||
2372 | else | ||
2373 | event = tev->point.realname; | ||
2374 | if (pev->group) | ||
2375 | group = pev->group; | ||
2376 | else | ||
2377 | group = PERFPROBE_GROUP; | ||
2378 | |||
2379 | /* Get an unused new event name */ | ||
2380 | ret = get_new_event_name(buf, 64, event, | ||
2381 | namelist, allow_suffix); | ||
2382 | if (ret < 0) | ||
2383 | return ret; | ||
2384 | |||
2385 | event = buf; | ||
2386 | |||
2387 | tev->event = strdup(event); | ||
2388 | tev->group = strdup(group); | ||
2389 | if (tev->event == NULL || tev->group == NULL) | ||
2390 | return -ENOMEM; | ||
2391 | |||
2392 | /* Add added event name to namelist */ | ||
2393 | strlist__add(namelist, event); | ||
2394 | return 0; | ||
2395 | } | ||
2396 | |||
2481 | static int __add_probe_trace_events(struct perf_probe_event *pev, | 2397 | static int __add_probe_trace_events(struct perf_probe_event *pev, |
2482 | struct probe_trace_event *tevs, | 2398 | struct probe_trace_event *tevs, |
2483 | int ntevs, bool allow_suffix) | 2399 | int ntevs, bool allow_suffix) |
2484 | { | 2400 | { |
2485 | int i, fd, ret; | 2401 | int i, fd, ret; |
2486 | struct probe_trace_event *tev = NULL; | 2402 | struct probe_trace_event *tev = NULL; |
2487 | char buf[64]; | ||
2488 | const char *event = NULL, *group = NULL; | 2403 | const char *event = NULL, *group = NULL; |
2489 | struct strlist *namelist; | 2404 | struct strlist *namelist; |
2490 | bool safename; | ||
2491 | 2405 | ||
2492 | if (pev->uprobes) | 2406 | fd = probe_file__open(PF_FL_RW | (pev->uprobes ? PF_FL_UPROBE : 0)); |
2493 | fd = open_uprobe_events(true); | 2407 | if (fd < 0) |
2494 | else | ||
2495 | fd = open_kprobe_events(true); | ||
2496 | |||
2497 | if (fd < 0) { | ||
2498 | print_open_warning(fd, !pev->uprobes); | ||
2499 | return fd; | 2408 | return fd; |
2500 | } | ||
2501 | 2409 | ||
2502 | /* Get current event names */ | 2410 | /* Get current event names */ |
2503 | namelist = get_probe_trace_event_names(fd, false); | 2411 | namelist = probe_file__get_namelist(fd); |
2504 | if (!namelist) { | 2412 | if (!namelist) { |
2505 | pr_debug("Failed to get current event list.\n"); | 2413 | pr_debug("Failed to get current event list.\n"); |
2506 | ret = -ENOMEM; | 2414 | ret = -ENOMEM; |
2507 | goto close_out; | 2415 | goto close_out; |
2508 | } | 2416 | } |
2509 | 2417 | ||
2510 | safename = (pev->point.function && !strisglob(pev->point.function)); | ||
2511 | ret = 0; | 2418 | ret = 0; |
2512 | pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); | 2419 | pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); |
2513 | for (i = 0; i < ntevs; i++) { | 2420 | for (i = 0; i < ntevs; i++) { |
@@ -2516,36 +2423,15 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, | |||
2516 | if (!tev->point.symbol) | 2423 | if (!tev->point.symbol) |
2517 | continue; | 2424 | continue; |
2518 | 2425 | ||
2519 | if (pev->event) | 2426 | /* Set new name for tev (and update namelist) */ |
2520 | event = pev->event; | 2427 | ret = probe_trace_event__set_name(tev, pev, namelist, |
2521 | else | 2428 | allow_suffix); |
2522 | if (safename) | ||
2523 | event = pev->point.function; | ||
2524 | else | ||
2525 | event = tev->point.realname; | ||
2526 | if (pev->group) | ||
2527 | group = pev->group; | ||
2528 | else | ||
2529 | group = PERFPROBE_GROUP; | ||
2530 | |||
2531 | /* Get an unused new event name */ | ||
2532 | ret = get_new_event_name(buf, 64, event, | ||
2533 | namelist, allow_suffix); | ||
2534 | if (ret < 0) | 2429 | if (ret < 0) |
2535 | break; | 2430 | break; |
2536 | event = buf; | ||
2537 | 2431 | ||
2538 | tev->event = strdup(event); | 2432 | ret = probe_file__add_event(fd, tev); |
2539 | tev->group = strdup(group); | ||
2540 | if (tev->event == NULL || tev->group == NULL) { | ||
2541 | ret = -ENOMEM; | ||
2542 | break; | ||
2543 | } | ||
2544 | ret = write_probe_trace_event(fd, tev); | ||
2545 | if (ret < 0) | 2433 | if (ret < 0) |
2546 | break; | 2434 | break; |
2547 | /* Add added event name to namelist */ | ||
2548 | strlist__add(namelist, event); | ||
2549 | 2435 | ||
2550 | /* We use tev's name for showing new events */ | 2436 | /* We use tev's name for showing new events */ |
2551 | show_perf_probe_event(tev->group, tev->event, pev, | 2437 | show_perf_probe_event(tev->group, tev->event, pev, |
@@ -2748,6 +2634,98 @@ err_out: | |||
2748 | goto out; | 2634 | goto out; |
2749 | } | 2635 | } |
2750 | 2636 | ||
2637 | static int try_to_find_absolute_address(struct perf_probe_event *pev, | ||
2638 | struct probe_trace_event **tevs) | ||
2639 | { | ||
2640 | struct perf_probe_point *pp = &pev->point; | ||
2641 | struct probe_trace_event *tev; | ||
2642 | struct probe_trace_point *tp; | ||
2643 | int i, err; | ||
2644 | |||
2645 | if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2))) | ||
2646 | return -EINVAL; | ||
2647 | if (perf_probe_event_need_dwarf(pev)) | ||
2648 | return -EINVAL; | ||
2649 | |||
2650 | /* | ||
2651 | * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at | ||
2652 | * absolute address. | ||
2653 | * | ||
2654 | * Only one tev can be generated by this. | ||
2655 | */ | ||
2656 | *tevs = zalloc(sizeof(*tev)); | ||
2657 | if (!*tevs) | ||
2658 | return -ENOMEM; | ||
2659 | |||
2660 | tev = *tevs; | ||
2661 | tp = &tev->point; | ||
2662 | |||
2663 | /* | ||
2664 | * Don't use tp->offset, use address directly, because | ||
2665 | * in synthesize_probe_trace_command() address cannot be | ||
2666 | * zero. | ||
2667 | */ | ||
2668 | tp->address = pev->point.abs_address; | ||
2669 | tp->retprobe = pp->retprobe; | ||
2670 | tev->uprobes = pev->uprobes; | ||
2671 | |||
2672 | err = -ENOMEM; | ||
2673 | /* | ||
2674 | * Give it a '0x' leading symbol name. | ||
2675 | * In __add_probe_trace_events, a NULL symbol is interpreted as | ||
2676 | * invalud. | ||
2677 | */ | ||
2678 | if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0) | ||
2679 | goto errout; | ||
2680 | |||
2681 | /* For kprobe, check range */ | ||
2682 | if ((!tev->uprobes) && | ||
2683 | (kprobe_warn_out_range(tev->point.symbol, | ||
2684 | tev->point.address))) { | ||
2685 | err = -EACCES; | ||
2686 | goto errout; | ||
2687 | } | ||
2688 | |||
2689 | if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0) | ||
2690 | goto errout; | ||
2691 | |||
2692 | if (pev->target) { | ||
2693 | tp->module = strdup(pev->target); | ||
2694 | if (!tp->module) | ||
2695 | goto errout; | ||
2696 | } | ||
2697 | |||
2698 | if (tev->group) { | ||
2699 | tev->group = strdup(pev->group); | ||
2700 | if (!tev->group) | ||
2701 | goto errout; | ||
2702 | } | ||
2703 | |||
2704 | if (pev->event) { | ||
2705 | tev->event = strdup(pev->event); | ||
2706 | if (!tev->event) | ||
2707 | goto errout; | ||
2708 | } | ||
2709 | |||
2710 | tev->nargs = pev->nargs; | ||
2711 | tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs); | ||
2712 | if (!tev->args) { | ||
2713 | err = -ENOMEM; | ||
2714 | goto errout; | ||
2715 | } | ||
2716 | for (i = 0; i < tev->nargs; i++) | ||
2717 | copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]); | ||
2718 | |||
2719 | return 1; | ||
2720 | |||
2721 | errout: | ||
2722 | if (*tevs) { | ||
2723 | clear_probe_trace_events(*tevs, 1); | ||
2724 | *tevs = NULL; | ||
2725 | } | ||
2726 | return err; | ||
2727 | } | ||
2728 | |||
2751 | bool __weak arch__prefers_symtab(void) { return false; } | 2729 | bool __weak arch__prefers_symtab(void) { return false; } |
2752 | 2730 | ||
2753 | static int convert_to_probe_trace_events(struct perf_probe_event *pev, | 2731 | static int convert_to_probe_trace_events(struct perf_probe_event *pev, |
@@ -2764,6 +2742,10 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev, | |||
2764 | } | 2742 | } |
2765 | } | 2743 | } |
2766 | 2744 | ||
2745 | ret = try_to_find_absolute_address(pev, tevs); | ||
2746 | if (ret > 0) | ||
2747 | return ret; | ||
2748 | |||
2767 | if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) { | 2749 | if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) { |
2768 | ret = find_probe_trace_events_from_map(pev, tevs); | 2750 | ret = find_probe_trace_events_from_map(pev, tevs); |
2769 | if (ret > 0) | 2751 | if (ret > 0) |
@@ -2838,68 +2820,9 @@ end: | |||
2838 | return ret; | 2820 | return ret; |
2839 | } | 2821 | } |
2840 | 2822 | ||
2841 | static int __del_trace_probe_event(int fd, struct str_node *ent) | ||
2842 | { | ||
2843 | char *p; | ||
2844 | char buf[128]; | ||
2845 | int ret; | ||
2846 | |||
2847 | /* Convert from perf-probe event to trace-probe event */ | ||
2848 | ret = e_snprintf(buf, 128, "-:%s", ent->s); | ||
2849 | if (ret < 0) | ||
2850 | goto error; | ||
2851 | |||
2852 | p = strchr(buf + 2, ':'); | ||
2853 | if (!p) { | ||
2854 | pr_debug("Internal error: %s should have ':' but not.\n", | ||
2855 | ent->s); | ||
2856 | ret = -ENOTSUP; | ||
2857 | goto error; | ||
2858 | } | ||
2859 | *p = '/'; | ||
2860 | |||
2861 | pr_debug("Writing event: %s\n", buf); | ||
2862 | ret = write(fd, buf, strlen(buf)); | ||
2863 | if (ret < 0) { | ||
2864 | ret = -errno; | ||
2865 | goto error; | ||
2866 | } | ||
2867 | |||
2868 | pr_info("Removed event: %s\n", ent->s); | ||
2869 | return 0; | ||
2870 | error: | ||
2871 | pr_warning("Failed to delete event: %s\n", | ||
2872 | strerror_r(-ret, buf, sizeof(buf))); | ||
2873 | return ret; | ||
2874 | } | ||
2875 | |||
2876 | static int del_trace_probe_events(int fd, struct strfilter *filter, | ||
2877 | struct strlist *namelist) | ||
2878 | { | ||
2879 | struct str_node *ent; | ||
2880 | const char *p; | ||
2881 | int ret = -ENOENT; | ||
2882 | |||
2883 | if (!namelist) | ||
2884 | return -ENOENT; | ||
2885 | |||
2886 | strlist__for_each(ent, namelist) { | ||
2887 | p = strchr(ent->s, ':'); | ||
2888 | if ((p && strfilter__compare(filter, p + 1)) || | ||
2889 | strfilter__compare(filter, ent->s)) { | ||
2890 | ret = __del_trace_probe_event(fd, ent); | ||
2891 | if (ret < 0) | ||
2892 | break; | ||
2893 | } | ||
2894 | } | ||
2895 | |||
2896 | return ret; | ||
2897 | } | ||
2898 | |||
2899 | int del_perf_probe_events(struct strfilter *filter) | 2823 | int del_perf_probe_events(struct strfilter *filter) |
2900 | { | 2824 | { |
2901 | int ret, ret2, ufd = -1, kfd = -1; | 2825 | int ret, ret2, ufd = -1, kfd = -1; |
2902 | struct strlist *namelist = NULL, *unamelist = NULL; | ||
2903 | char *str = strfilter__string(filter); | 2826 | char *str = strfilter__string(filter); |
2904 | 2827 | ||
2905 | if (!str) | 2828 | if (!str) |
@@ -2908,25 +2831,15 @@ int del_perf_probe_events(struct strfilter *filter) | |||
2908 | pr_debug("Delete filter: \'%s\'\n", str); | 2831 | pr_debug("Delete filter: \'%s\'\n", str); |
2909 | 2832 | ||
2910 | /* Get current event names */ | 2833 | /* Get current event names */ |
2911 | kfd = open_kprobe_events(true); | 2834 | ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); |
2912 | if (kfd >= 0) | 2835 | if (ret < 0) |
2913 | namelist = get_probe_trace_event_names(kfd, true); | 2836 | goto out; |
2914 | |||
2915 | ufd = open_uprobe_events(true); | ||
2916 | if (ufd >= 0) | ||
2917 | unamelist = get_probe_trace_event_names(ufd, true); | ||
2918 | |||
2919 | if (kfd < 0 && ufd < 0) { | ||
2920 | print_both_open_warning(kfd, ufd); | ||
2921 | ret = kfd; | ||
2922 | goto error; | ||
2923 | } | ||
2924 | 2837 | ||
2925 | ret = del_trace_probe_events(kfd, filter, namelist); | 2838 | ret = probe_file__del_events(kfd, filter); |
2926 | if (ret < 0 && ret != -ENOENT) | 2839 | if (ret < 0 && ret != -ENOENT) |
2927 | goto error; | 2840 | goto error; |
2928 | 2841 | ||
2929 | ret2 = del_trace_probe_events(ufd, filter, unamelist); | 2842 | ret2 = probe_file__del_events(ufd, filter); |
2930 | if (ret2 < 0 && ret2 != -ENOENT) { | 2843 | if (ret2 < 0 && ret2 != -ENOENT) { |
2931 | ret = ret2; | 2844 | ret = ret2; |
2932 | goto error; | 2845 | goto error; |
@@ -2937,15 +2850,11 @@ int del_perf_probe_events(struct strfilter *filter) | |||
2937 | ret = 0; | 2850 | ret = 0; |
2938 | 2851 | ||
2939 | error: | 2852 | error: |
2940 | if (kfd >= 0) { | 2853 | if (kfd >= 0) |
2941 | strlist__delete(namelist); | ||
2942 | close(kfd); | 2854 | close(kfd); |
2943 | } | 2855 | if (ufd >= 0) |
2944 | |||
2945 | if (ufd >= 0) { | ||
2946 | strlist__delete(unamelist); | ||
2947 | close(ufd); | 2856 | close(ufd); |
2948 | } | 2857 | out: |
2949 | free(str); | 2858 | free(str); |
2950 | 2859 | ||
2951 | return ret; | 2860 | return ret; |
@@ -3007,3 +2916,22 @@ end: | |||
3007 | return ret; | 2916 | return ret; |
3008 | } | 2917 | } |
3009 | 2918 | ||
2919 | int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, | ||
2920 | struct perf_probe_arg *pvar) | ||
2921 | { | ||
2922 | tvar->value = strdup(pvar->var); | ||
2923 | if (tvar->value == NULL) | ||
2924 | return -ENOMEM; | ||
2925 | if (pvar->type) { | ||
2926 | tvar->type = strdup(pvar->type); | ||
2927 | if (tvar->type == NULL) | ||
2928 | return -ENOMEM; | ||
2929 | } | ||
2930 | if (pvar->name) { | ||
2931 | tvar->name = strdup(pvar->name); | ||
2932 | if (tvar->name == NULL) | ||
2933 | return -ENOMEM; | ||
2934 | } else | ||
2935 | tvar->name = NULL; | ||
2936 | return 0; | ||
2937 | } | ||
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 31db6ee7db54..6e7ec68a4aa8 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h | |||
@@ -59,6 +59,7 @@ struct perf_probe_point { | |||
59 | bool retprobe; /* Return probe flag */ | 59 | bool retprobe; /* Return probe flag */ |
60 | char *lazy_line; /* Lazy matching pattern */ | 60 | char *lazy_line; /* Lazy matching pattern */ |
61 | unsigned long offset; /* Offset from function entry */ | 61 | unsigned long offset; /* Offset from function entry */ |
62 | unsigned long abs_address; /* Absolute address of the point */ | ||
62 | }; | 63 | }; |
63 | 64 | ||
64 | /* Perf probe probing argument field chain */ | 65 | /* Perf probe probing argument field chain */ |
@@ -106,9 +107,13 @@ struct variable_list { | |||
106 | struct strlist *vars; /* Available variables */ | 107 | struct strlist *vars; /* Available variables */ |
107 | }; | 108 | }; |
108 | 109 | ||
110 | struct map; | ||
111 | |||
109 | /* Command string to events */ | 112 | /* Command string to events */ |
110 | extern int parse_perf_probe_command(const char *cmd, | 113 | extern int parse_perf_probe_command(const char *cmd, |
111 | struct perf_probe_event *pev); | 114 | struct perf_probe_event *pev); |
115 | extern int parse_probe_trace_command(const char *cmd, | ||
116 | struct probe_trace_event *tev); | ||
112 | 117 | ||
113 | /* Events to command string */ | 118 | /* Events to command string */ |
114 | extern char *synthesize_perf_probe_command(struct perf_probe_event *pev); | 119 | extern char *synthesize_perf_probe_command(struct perf_probe_event *pev); |
@@ -121,6 +126,7 @@ extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev); | |||
121 | 126 | ||
122 | /* Release event contents */ | 127 | /* Release event contents */ |
123 | extern void clear_perf_probe_event(struct perf_probe_event *pev); | 128 | extern void clear_perf_probe_event(struct perf_probe_event *pev); |
129 | extern void clear_probe_trace_event(struct probe_trace_event *tev); | ||
124 | 130 | ||
125 | /* Command string to line-range */ | 131 | /* Command string to line-range */ |
126 | extern int parse_line_range_desc(const char *cmd, struct line_range *lr); | 132 | extern int parse_line_range_desc(const char *cmd, struct line_range *lr); |
@@ -144,7 +150,14 @@ bool arch__prefers_symtab(void); | |||
144 | void arch__fix_tev_from_maps(struct perf_probe_event *pev, | 150 | void arch__fix_tev_from_maps(struct perf_probe_event *pev, |
145 | struct probe_trace_event *tev, struct map *map); | 151 | struct probe_trace_event *tev, struct map *map); |
146 | 152 | ||
153 | /* If there is no space to write, returns -E2BIG. */ | ||
154 | int e_snprintf(char *str, size_t size, const char *format, ...) | ||
155 | __attribute__((format(printf, 3, 4))); | ||
156 | |||
147 | /* Maximum index number of event-name postfix */ | 157 | /* Maximum index number of event-name postfix */ |
148 | #define MAX_EVENT_INDEX 1024 | 158 | #define MAX_EVENT_INDEX 1024 |
149 | 159 | ||
160 | int copy_to_probe_trace_arg(struct probe_trace_arg *tvar, | ||
161 | struct perf_probe_arg *pvar); | ||
162 | |||
150 | #endif /*_PROBE_EVENT_H */ | 163 | #endif /*_PROBE_EVENT_H */ |
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c new file mode 100644 index 000000000000..bbb243717ec8 --- /dev/null +++ b/tools/perf/util/probe-file.c | |||
@@ -0,0 +1,301 @@ | |||
1 | /* | ||
2 | * probe-file.c : operate ftrace k/uprobe events files | ||
3 | * | ||
4 | * Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | */ | ||
17 | #include "util.h" | ||
18 | #include "event.h" | ||
19 | #include "strlist.h" | ||
20 | #include "debug.h" | ||
21 | #include "cache.h" | ||
22 | #include "color.h" | ||
23 | #include "symbol.h" | ||
24 | #include "thread.h" | ||
25 | #include <api/fs/debugfs.h> | ||
26 | #include <api/fs/tracefs.h> | ||
27 | #include "probe-event.h" | ||
28 | #include "probe-file.h" | ||
29 | #include "session.h" | ||
30 | |||
31 | #define MAX_CMDLEN 256 | ||
32 | |||
33 | static void print_open_warning(int err, bool uprobe) | ||
34 | { | ||
35 | char sbuf[STRERR_BUFSIZE]; | ||
36 | |||
37 | if (err == -ENOENT) { | ||
38 | const char *config; | ||
39 | |||
40 | if (uprobe) | ||
41 | config = "CONFIG_UPROBE_EVENTS"; | ||
42 | else | ||
43 | config = "CONFIG_KPROBE_EVENTS"; | ||
44 | |||
45 | pr_warning("%cprobe_events file does not exist" | ||
46 | " - please rebuild kernel with %s.\n", | ||
47 | uprobe ? 'u' : 'k', config); | ||
48 | } else if (err == -ENOTSUP) | ||
49 | pr_warning("Tracefs or debugfs is not mounted.\n"); | ||
50 | else | ||
51 | pr_warning("Failed to open %cprobe_events: %s\n", | ||
52 | uprobe ? 'u' : 'k', | ||
53 | strerror_r(-err, sbuf, sizeof(sbuf))); | ||
54 | } | ||
55 | |||
56 | static void print_both_open_warning(int kerr, int uerr) | ||
57 | { | ||
58 | /* Both kprobes and uprobes are disabled, warn it. */ | ||
59 | if (kerr == -ENOTSUP && uerr == -ENOTSUP) | ||
60 | pr_warning("Tracefs or debugfs is not mounted.\n"); | ||
61 | else if (kerr == -ENOENT && uerr == -ENOENT) | ||
62 | pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " | ||
63 | "or/and CONFIG_UPROBE_EVENTS.\n"); | ||
64 | else { | ||
65 | char sbuf[STRERR_BUFSIZE]; | ||
66 | pr_warning("Failed to open kprobe events: %s.\n", | ||
67 | strerror_r(-kerr, sbuf, sizeof(sbuf))); | ||
68 | pr_warning("Failed to open uprobe events: %s.\n", | ||
69 | strerror_r(-uerr, sbuf, sizeof(sbuf))); | ||
70 | } | ||
71 | } | ||
72 | |||
73 | static int open_probe_events(const char *trace_file, bool readwrite) | ||
74 | { | ||
75 | char buf[PATH_MAX]; | ||
76 | const char *__debugfs; | ||
77 | const char *tracing_dir = ""; | ||
78 | int ret; | ||
79 | |||
80 | __debugfs = tracefs_find_mountpoint(); | ||
81 | if (__debugfs == NULL) { | ||
82 | tracing_dir = "tracing/"; | ||
83 | |||
84 | __debugfs = debugfs_find_mountpoint(); | ||
85 | if (__debugfs == NULL) | ||
86 | return -ENOTSUP; | ||
87 | } | ||
88 | |||
89 | ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", | ||
90 | __debugfs, tracing_dir, trace_file); | ||
91 | if (ret >= 0) { | ||
92 | pr_debug("Opening %s write=%d\n", buf, readwrite); | ||
93 | if (readwrite && !probe_event_dry_run) | ||
94 | ret = open(buf, O_RDWR | O_APPEND, 0); | ||
95 | else | ||
96 | ret = open(buf, O_RDONLY, 0); | ||
97 | |||
98 | if (ret < 0) | ||
99 | ret = -errno; | ||
100 | } | ||
101 | return ret; | ||
102 | } | ||
103 | |||
104 | static int open_kprobe_events(bool readwrite) | ||
105 | { | ||
106 | return open_probe_events("kprobe_events", readwrite); | ||
107 | } | ||
108 | |||
109 | static int open_uprobe_events(bool readwrite) | ||
110 | { | ||
111 | return open_probe_events("uprobe_events", readwrite); | ||
112 | } | ||
113 | |||
114 | int probe_file__open(int flag) | ||
115 | { | ||
116 | int fd; | ||
117 | |||
118 | if (flag & PF_FL_UPROBE) | ||
119 | fd = open_uprobe_events(flag & PF_FL_RW); | ||
120 | else | ||
121 | fd = open_kprobe_events(flag & PF_FL_RW); | ||
122 | if (fd < 0) | ||
123 | print_open_warning(fd, flag & PF_FL_UPROBE); | ||
124 | |||
125 | return fd; | ||
126 | } | ||
127 | |||
128 | int probe_file__open_both(int *kfd, int *ufd, int flag) | ||
129 | { | ||
130 | if (!kfd || !ufd) | ||
131 | return -EINVAL; | ||
132 | |||
133 | *kfd = open_kprobe_events(flag & PF_FL_RW); | ||
134 | *ufd = open_uprobe_events(flag & PF_FL_RW); | ||
135 | if (*kfd < 0 && *ufd < 0) { | ||
136 | print_both_open_warning(*kfd, *ufd); | ||
137 | return *kfd; | ||
138 | } | ||
139 | |||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | /* Get raw string list of current kprobe_events or uprobe_events */ | ||
144 | struct strlist *probe_file__get_rawlist(int fd) | ||
145 | { | ||
146 | int ret, idx; | ||
147 | FILE *fp; | ||
148 | char buf[MAX_CMDLEN]; | ||
149 | char *p; | ||
150 | struct strlist *sl; | ||
151 | |||
152 | sl = strlist__new(NULL, NULL); | ||
153 | |||
154 | fp = fdopen(dup(fd), "r"); | ||
155 | while (!feof(fp)) { | ||
156 | p = fgets(buf, MAX_CMDLEN, fp); | ||
157 | if (!p) | ||
158 | break; | ||
159 | |||
160 | idx = strlen(p) - 1; | ||
161 | if (p[idx] == '\n') | ||
162 | p[idx] = '\0'; | ||
163 | ret = strlist__add(sl, buf); | ||
164 | if (ret < 0) { | ||
165 | pr_debug("strlist__add failed (%d)\n", ret); | ||
166 | strlist__delete(sl); | ||
167 | return NULL; | ||
168 | } | ||
169 | } | ||
170 | fclose(fp); | ||
171 | |||
172 | return sl; | ||
173 | } | ||
174 | |||
175 | static struct strlist *__probe_file__get_namelist(int fd, bool include_group) | ||
176 | { | ||
177 | char buf[128]; | ||
178 | struct strlist *sl, *rawlist; | ||
179 | struct str_node *ent; | ||
180 | struct probe_trace_event tev; | ||
181 | int ret = 0; | ||
182 | |||
183 | memset(&tev, 0, sizeof(tev)); | ||
184 | rawlist = probe_file__get_rawlist(fd); | ||
185 | if (!rawlist) | ||
186 | return NULL; | ||
187 | sl = strlist__new(NULL, NULL); | ||
188 | strlist__for_each(ent, rawlist) { | ||
189 | ret = parse_probe_trace_command(ent->s, &tev); | ||
190 | if (ret < 0) | ||
191 | break; | ||
192 | if (include_group) { | ||
193 | ret = e_snprintf(buf, 128, "%s:%s", tev.group, | ||
194 | tev.event); | ||
195 | if (ret >= 0) | ||
196 | ret = strlist__add(sl, buf); | ||
197 | } else | ||
198 | ret = strlist__add(sl, tev.event); | ||
199 | clear_probe_trace_event(&tev); | ||
200 | if (ret < 0) | ||
201 | break; | ||
202 | } | ||
203 | strlist__delete(rawlist); | ||
204 | |||
205 | if (ret < 0) { | ||
206 | strlist__delete(sl); | ||
207 | return NULL; | ||
208 | } | ||
209 | return sl; | ||
210 | } | ||
211 | |||
212 | /* Get current perf-probe event names */ | ||
213 | struct strlist *probe_file__get_namelist(int fd) | ||
214 | { | ||
215 | return __probe_file__get_namelist(fd, false); | ||
216 | } | ||
217 | |||
218 | int probe_file__add_event(int fd, struct probe_trace_event *tev) | ||
219 | { | ||
220 | int ret = 0; | ||
221 | char *buf = synthesize_probe_trace_command(tev); | ||
222 | char sbuf[STRERR_BUFSIZE]; | ||
223 | |||
224 | if (!buf) { | ||
225 | pr_debug("Failed to synthesize probe trace event.\n"); | ||
226 | return -EINVAL; | ||
227 | } | ||
228 | |||
229 | pr_debug("Writing event: %s\n", buf); | ||
230 | if (!probe_event_dry_run) { | ||
231 | ret = write(fd, buf, strlen(buf)); | ||
232 | if (ret <= 0) { | ||
233 | ret = -errno; | ||
234 | pr_warning("Failed to write event: %s\n", | ||
235 | strerror_r(errno, sbuf, sizeof(sbuf))); | ||
236 | } | ||
237 | } | ||
238 | free(buf); | ||
239 | |||
240 | return ret; | ||
241 | } | ||
242 | |||
243 | static int __del_trace_probe_event(int fd, struct str_node *ent) | ||
244 | { | ||
245 | char *p; | ||
246 | char buf[128]; | ||
247 | int ret; | ||
248 | |||
249 | /* Convert from perf-probe event to trace-probe event */ | ||
250 | ret = e_snprintf(buf, 128, "-:%s", ent->s); | ||
251 | if (ret < 0) | ||
252 | goto error; | ||
253 | |||
254 | p = strchr(buf + 2, ':'); | ||
255 | if (!p) { | ||
256 | pr_debug("Internal error: %s should have ':' but not.\n", | ||
257 | ent->s); | ||
258 | ret = -ENOTSUP; | ||
259 | goto error; | ||
260 | } | ||
261 | *p = '/'; | ||
262 | |||
263 | pr_debug("Writing event: %s\n", buf); | ||
264 | ret = write(fd, buf, strlen(buf)); | ||
265 | if (ret < 0) { | ||
266 | ret = -errno; | ||
267 | goto error; | ||
268 | } | ||
269 | |||
270 | pr_info("Removed event: %s\n", ent->s); | ||
271 | return 0; | ||
272 | error: | ||
273 | pr_warning("Failed to delete event: %s\n", | ||
274 | strerror_r(-ret, buf, sizeof(buf))); | ||
275 | return ret; | ||
276 | } | ||
277 | |||
278 | int probe_file__del_events(int fd, struct strfilter *filter) | ||
279 | { | ||
280 | struct strlist *namelist; | ||
281 | struct str_node *ent; | ||
282 | const char *p; | ||
283 | int ret = -ENOENT; | ||
284 | |||
285 | namelist = __probe_file__get_namelist(fd, true); | ||
286 | if (!namelist) | ||
287 | return -ENOENT; | ||
288 | |||
289 | strlist__for_each(ent, namelist) { | ||
290 | p = strchr(ent->s, ':'); | ||
291 | if ((p && strfilter__compare(filter, p + 1)) || | ||
292 | strfilter__compare(filter, ent->s)) { | ||
293 | ret = __del_trace_probe_event(fd, ent); | ||
294 | if (ret < 0) | ||
295 | break; | ||
296 | } | ||
297 | } | ||
298 | strlist__delete(namelist); | ||
299 | |||
300 | return ret; | ||
301 | } | ||
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h new file mode 100644 index 000000000000..ada94a242a17 --- /dev/null +++ b/tools/perf/util/probe-file.h | |||
@@ -0,0 +1,18 @@ | |||
1 | #ifndef __PROBE_FILE_H | ||
2 | #define __PROBE_FILE_H | ||
3 | |||
4 | #include "strlist.h" | ||
5 | #include "strfilter.h" | ||
6 | #include "probe-event.h" | ||
7 | |||
8 | #define PF_FL_UPROBE 1 | ||
9 | #define PF_FL_RW 2 | ||
10 | |||
11 | int probe_file__open(int flag); | ||
12 | int probe_file__open_both(int *kfd, int *ufd, int flag); | ||
13 | struct strlist *probe_file__get_namelist(int fd); | ||
14 | struct strlist *probe_file__get_rawlist(int fd); | ||
15 | int probe_file__add_event(int fd, struct probe_trace_event *tev); | ||
16 | int probe_file__del_events(int fd, struct strfilter *filter); | ||
17 | |||
18 | #endif | ||
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 2da65a710893..29c43c0680a8 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c | |||
@@ -553,24 +553,9 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) | |||
553 | char buf[32], *ptr; | 553 | char buf[32], *ptr; |
554 | int ret = 0; | 554 | int ret = 0; |
555 | 555 | ||
556 | if (!is_c_varname(pf->pvar->var)) { | 556 | /* Copy raw parameters */ |
557 | /* Copy raw parameters */ | 557 | if (!is_c_varname(pf->pvar->var)) |
558 | pf->tvar->value = strdup(pf->pvar->var); | 558 | return copy_to_probe_trace_arg(pf->tvar, pf->pvar); |
559 | if (pf->tvar->value == NULL) | ||
560 | return -ENOMEM; | ||
561 | if (pf->pvar->type) { | ||
562 | pf->tvar->type = strdup(pf->pvar->type); | ||
563 | if (pf->tvar->type == NULL) | ||
564 | return -ENOMEM; | ||
565 | } | ||
566 | if (pf->pvar->name) { | ||
567 | pf->tvar->name = strdup(pf->pvar->name); | ||
568 | if (pf->tvar->name == NULL) | ||
569 | return -ENOMEM; | ||
570 | } else | ||
571 | pf->tvar->name = NULL; | ||
572 | return 0; | ||
573 | } | ||
574 | 559 | ||
575 | if (pf->pvar->name) | 560 | if (pf->pvar->name) |
576 | pf->tvar->name = strdup(pf->pvar->name); | 561 | pf->tvar->name = strdup(pf->pvar->name); |
@@ -1355,7 +1340,7 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) | |||
1355 | vl->point.offset); | 1340 | vl->point.offset); |
1356 | 1341 | ||
1357 | /* Find local variables */ | 1342 | /* Find local variables */ |
1358 | vl->vars = strlist__new(true, NULL); | 1343 | vl->vars = strlist__new(NULL, NULL); |
1359 | if (vl->vars == NULL) | 1344 | if (vl->vars == NULL) |
1360 | return -ENOMEM; | 1345 | return -ENOMEM; |
1361 | af->child = true; | 1346 | af->child = true; |
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 0766d98c5da5..51be28b1bca2 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources | |||
@@ -16,7 +16,7 @@ util/util.c | |||
16 | util/xyarray.c | 16 | util/xyarray.c |
17 | util/cgroup.c | 17 | util/cgroup.c |
18 | util/rblist.c | 18 | util/rblist.c |
19 | util/stat.c | 19 | util/counts.c |
20 | util/strlist.c | 20 | util/strlist.c |
21 | util/trace-event.c | 21 | util/trace-event.c |
22 | ../lib/rbtree.c | 22 | ../lib/rbtree.c |
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 626422eda727..6324fe6b161e 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c | |||
@@ -941,76 +941,84 @@ static int pyrf_evlist__setup_types(void) | |||
941 | return PyType_Ready(&pyrf_evlist__type); | 941 | return PyType_Ready(&pyrf_evlist__type); |
942 | } | 942 | } |
943 | 943 | ||
944 | #define PERF_CONST(name) { #name, PERF_##name } | ||
945 | |||
944 | static struct { | 946 | static struct { |
945 | const char *name; | 947 | const char *name; |
946 | int value; | 948 | int value; |
947 | } perf__constants[] = { | 949 | } perf__constants[] = { |
948 | { "TYPE_HARDWARE", PERF_TYPE_HARDWARE }, | 950 | PERF_CONST(TYPE_HARDWARE), |
949 | { "TYPE_SOFTWARE", PERF_TYPE_SOFTWARE }, | 951 | PERF_CONST(TYPE_SOFTWARE), |
950 | { "TYPE_TRACEPOINT", PERF_TYPE_TRACEPOINT }, | 952 | PERF_CONST(TYPE_TRACEPOINT), |
951 | { "TYPE_HW_CACHE", PERF_TYPE_HW_CACHE }, | 953 | PERF_CONST(TYPE_HW_CACHE), |
952 | { "TYPE_RAW", PERF_TYPE_RAW }, | 954 | PERF_CONST(TYPE_RAW), |
953 | { "TYPE_BREAKPOINT", PERF_TYPE_BREAKPOINT }, | 955 | PERF_CONST(TYPE_BREAKPOINT), |
954 | 956 | ||
955 | { "COUNT_HW_CPU_CYCLES", PERF_COUNT_HW_CPU_CYCLES }, | 957 | PERF_CONST(COUNT_HW_CPU_CYCLES), |
956 | { "COUNT_HW_INSTRUCTIONS", PERF_COUNT_HW_INSTRUCTIONS }, | 958 | PERF_CONST(COUNT_HW_INSTRUCTIONS), |
957 | { "COUNT_HW_CACHE_REFERENCES", PERF_COUNT_HW_CACHE_REFERENCES }, | 959 | PERF_CONST(COUNT_HW_CACHE_REFERENCES), |
958 | { "COUNT_HW_CACHE_MISSES", PERF_COUNT_HW_CACHE_MISSES }, | 960 | PERF_CONST(COUNT_HW_CACHE_MISSES), |
959 | { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, | 961 | PERF_CONST(COUNT_HW_BRANCH_INSTRUCTIONS), |
960 | { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES }, | 962 | PERF_CONST(COUNT_HW_BRANCH_MISSES), |
961 | { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES }, | 963 | PERF_CONST(COUNT_HW_BUS_CYCLES), |
962 | { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D }, | 964 | PERF_CONST(COUNT_HW_CACHE_L1D), |
963 | { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I }, | 965 | PERF_CONST(COUNT_HW_CACHE_L1I), |
964 | { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL }, | 966 | PERF_CONST(COUNT_HW_CACHE_LL), |
965 | { "COUNT_HW_CACHE_DTLB", PERF_COUNT_HW_CACHE_DTLB }, | 967 | PERF_CONST(COUNT_HW_CACHE_DTLB), |
966 | { "COUNT_HW_CACHE_ITLB", PERF_COUNT_HW_CACHE_ITLB }, | 968 | PERF_CONST(COUNT_HW_CACHE_ITLB), |
967 | { "COUNT_HW_CACHE_BPU", PERF_COUNT_HW_CACHE_BPU }, | 969 | PERF_CONST(COUNT_HW_CACHE_BPU), |
968 | { "COUNT_HW_CACHE_OP_READ", PERF_COUNT_HW_CACHE_OP_READ }, | 970 | PERF_CONST(COUNT_HW_CACHE_OP_READ), |
969 | { "COUNT_HW_CACHE_OP_WRITE", PERF_COUNT_HW_CACHE_OP_WRITE }, | 971 | PERF_CONST(COUNT_HW_CACHE_OP_WRITE), |
970 | { "COUNT_HW_CACHE_OP_PREFETCH", PERF_COUNT_HW_CACHE_OP_PREFETCH }, | 972 | PERF_CONST(COUNT_HW_CACHE_OP_PREFETCH), |
971 | { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS }, | 973 | PERF_CONST(COUNT_HW_CACHE_RESULT_ACCESS), |
972 | { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS }, | 974 | PERF_CONST(COUNT_HW_CACHE_RESULT_MISS), |
973 | 975 | ||
974 | { "COUNT_HW_STALLED_CYCLES_FRONTEND", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, | 976 | PERF_CONST(COUNT_HW_STALLED_CYCLES_FRONTEND), |
975 | { "COUNT_HW_STALLED_CYCLES_BACKEND", PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, | 977 | PERF_CONST(COUNT_HW_STALLED_CYCLES_BACKEND), |
976 | 978 | ||
977 | { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK }, | 979 | PERF_CONST(COUNT_SW_CPU_CLOCK), |
978 | { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK }, | 980 | PERF_CONST(COUNT_SW_TASK_CLOCK), |
979 | { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS }, | 981 | PERF_CONST(COUNT_SW_PAGE_FAULTS), |
980 | { "COUNT_SW_CONTEXT_SWITCHES", PERF_COUNT_SW_CONTEXT_SWITCHES }, | 982 | PERF_CONST(COUNT_SW_CONTEXT_SWITCHES), |
981 | { "COUNT_SW_CPU_MIGRATIONS", PERF_COUNT_SW_CPU_MIGRATIONS }, | 983 | PERF_CONST(COUNT_SW_CPU_MIGRATIONS), |
982 | { "COUNT_SW_PAGE_FAULTS_MIN", PERF_COUNT_SW_PAGE_FAULTS_MIN }, | 984 | PERF_CONST(COUNT_SW_PAGE_FAULTS_MIN), |
983 | { "COUNT_SW_PAGE_FAULTS_MAJ", PERF_COUNT_SW_PAGE_FAULTS_MAJ }, | 985 | PERF_CONST(COUNT_SW_PAGE_FAULTS_MAJ), |
984 | { "COUNT_SW_ALIGNMENT_FAULTS", PERF_COUNT_SW_ALIGNMENT_FAULTS }, | 986 | PERF_CONST(COUNT_SW_ALIGNMENT_FAULTS), |
985 | { "COUNT_SW_EMULATION_FAULTS", PERF_COUNT_SW_EMULATION_FAULTS }, | 987 | PERF_CONST(COUNT_SW_EMULATION_FAULTS), |
986 | { "COUNT_SW_DUMMY", PERF_COUNT_SW_DUMMY }, | 988 | PERF_CONST(COUNT_SW_DUMMY), |
987 | 989 | ||
988 | { "SAMPLE_IP", PERF_SAMPLE_IP }, | 990 | PERF_CONST(SAMPLE_IP), |
989 | { "SAMPLE_TID", PERF_SAMPLE_TID }, | 991 | PERF_CONST(SAMPLE_TID), |
990 | { "SAMPLE_TIME", PERF_SAMPLE_TIME }, | 992 | PERF_CONST(SAMPLE_TIME), |
991 | { "SAMPLE_ADDR", PERF_SAMPLE_ADDR }, | 993 | PERF_CONST(SAMPLE_ADDR), |
992 | { "SAMPLE_READ", PERF_SAMPLE_READ }, | 994 | PERF_CONST(SAMPLE_READ), |
993 | { "SAMPLE_CALLCHAIN", PERF_SAMPLE_CALLCHAIN }, | 995 | PERF_CONST(SAMPLE_CALLCHAIN), |
994 | { "SAMPLE_ID", PERF_SAMPLE_ID }, | 996 | PERF_CONST(SAMPLE_ID), |
995 | { "SAMPLE_CPU", PERF_SAMPLE_CPU }, | 997 | PERF_CONST(SAMPLE_CPU), |
996 | { "SAMPLE_PERIOD", PERF_SAMPLE_PERIOD }, | 998 | PERF_CONST(SAMPLE_PERIOD), |
997 | { "SAMPLE_STREAM_ID", PERF_SAMPLE_STREAM_ID }, | 999 | PERF_CONST(SAMPLE_STREAM_ID), |
998 | { "SAMPLE_RAW", PERF_SAMPLE_RAW }, | 1000 | PERF_CONST(SAMPLE_RAW), |
999 | 1001 | ||
1000 | { "FORMAT_TOTAL_TIME_ENABLED", PERF_FORMAT_TOTAL_TIME_ENABLED }, | 1002 | PERF_CONST(FORMAT_TOTAL_TIME_ENABLED), |
1001 | { "FORMAT_TOTAL_TIME_RUNNING", PERF_FORMAT_TOTAL_TIME_RUNNING }, | 1003 | PERF_CONST(FORMAT_TOTAL_TIME_RUNNING), |
1002 | { "FORMAT_ID", PERF_FORMAT_ID }, | 1004 | PERF_CONST(FORMAT_ID), |
1003 | { "FORMAT_GROUP", PERF_FORMAT_GROUP }, | 1005 | PERF_CONST(FORMAT_GROUP), |
1004 | 1006 | ||
1005 | { "RECORD_MMAP", PERF_RECORD_MMAP }, | 1007 | PERF_CONST(RECORD_MMAP), |
1006 | { "RECORD_LOST", PERF_RECORD_LOST }, | 1008 | PERF_CONST(RECORD_LOST), |
1007 | { "RECORD_COMM", PERF_RECORD_COMM }, | 1009 | PERF_CONST(RECORD_COMM), |
1008 | { "RECORD_EXIT", PERF_RECORD_EXIT }, | 1010 | PERF_CONST(RECORD_EXIT), |
1009 | { "RECORD_THROTTLE", PERF_RECORD_THROTTLE }, | 1011 | PERF_CONST(RECORD_THROTTLE), |
1010 | { "RECORD_UNTHROTTLE", PERF_RECORD_UNTHROTTLE }, | 1012 | PERF_CONST(RECORD_UNTHROTTLE), |
1011 | { "RECORD_FORK", PERF_RECORD_FORK }, | 1013 | PERF_CONST(RECORD_FORK), |
1012 | { "RECORD_READ", PERF_RECORD_READ }, | 1014 | PERF_CONST(RECORD_READ), |
1013 | { "RECORD_SAMPLE", PERF_RECORD_SAMPLE }, | 1015 | PERF_CONST(RECORD_SAMPLE), |
1016 | PERF_CONST(RECORD_MMAP2), | ||
1017 | PERF_CONST(RECORD_AUX), | ||
1018 | PERF_CONST(RECORD_ITRACE_START), | ||
1019 | PERF_CONST(RECORD_LOST_SAMPLES), | ||
1020 | PERF_CONST(RECORD_SWITCH), | ||
1021 | PERF_CONST(RECORD_SWITCH_CPU_WIDE), | ||
1014 | { .name = NULL, }, | 1022 | { .name = NULL, }, |
1015 | }; | 1023 | }; |
1016 | 1024 | ||
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 1f7becbe5e18..0467367dc315 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c | |||
@@ -85,6 +85,11 @@ static void perf_probe_comm_exec(struct perf_evsel *evsel) | |||
85 | evsel->attr.comm_exec = 1; | 85 | evsel->attr.comm_exec = 1; |
86 | } | 86 | } |
87 | 87 | ||
88 | static void perf_probe_context_switch(struct perf_evsel *evsel) | ||
89 | { | ||
90 | evsel->attr.context_switch = 1; | ||
91 | } | ||
92 | |||
88 | bool perf_can_sample_identifier(void) | 93 | bool perf_can_sample_identifier(void) |
89 | { | 94 | { |
90 | return perf_probe_api(perf_probe_sample_identifier); | 95 | return perf_probe_api(perf_probe_sample_identifier); |
@@ -95,6 +100,35 @@ static bool perf_can_comm_exec(void) | |||
95 | return perf_probe_api(perf_probe_comm_exec); | 100 | return perf_probe_api(perf_probe_comm_exec); |
96 | } | 101 | } |
97 | 102 | ||
103 | bool perf_can_record_switch_events(void) | ||
104 | { | ||
105 | return perf_probe_api(perf_probe_context_switch); | ||
106 | } | ||
107 | |||
108 | bool perf_can_record_cpu_wide(void) | ||
109 | { | ||
110 | struct perf_event_attr attr = { | ||
111 | .type = PERF_TYPE_SOFTWARE, | ||
112 | .config = PERF_COUNT_SW_CPU_CLOCK, | ||
113 | .exclude_kernel = 1, | ||
114 | }; | ||
115 | struct cpu_map *cpus; | ||
116 | int cpu, fd; | ||
117 | |||
118 | cpus = cpu_map__new(NULL); | ||
119 | if (!cpus) | ||
120 | return false; | ||
121 | cpu = cpus->map[0]; | ||
122 | cpu_map__put(cpus); | ||
123 | |||
124 | fd = sys_perf_event_open(&attr, -1, cpu, -1, 0); | ||
125 | if (fd < 0) | ||
126 | return false; | ||
127 | close(fd); | ||
128 | |||
129 | return true; | ||
130 | } | ||
131 | |||
98 | void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) | 132 | void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) |
99 | { | 133 | { |
100 | struct perf_evsel *evsel; | 134 | struct perf_evsel *evsel; |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index ed9dc2555ec7..fc3f7c922f99 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -170,7 +170,7 @@ static void perf_session__delete_threads(struct perf_session *session) | |||
170 | machine__delete_threads(&session->machines.host); | 170 | machine__delete_threads(&session->machines.host); |
171 | } | 171 | } |
172 | 172 | ||
173 | static void perf_session_env__delete(struct perf_session_env *env) | 173 | static void perf_session_env__exit(struct perf_env *env) |
174 | { | 174 | { |
175 | zfree(&env->hostname); | 175 | zfree(&env->hostname); |
176 | zfree(&env->os_release); | 176 | zfree(&env->os_release); |
@@ -180,6 +180,7 @@ static void perf_session_env__delete(struct perf_session_env *env) | |||
180 | zfree(&env->cpuid); | 180 | zfree(&env->cpuid); |
181 | 181 | ||
182 | zfree(&env->cmdline); | 182 | zfree(&env->cmdline); |
183 | zfree(&env->cmdline_argv); | ||
183 | zfree(&env->sibling_cores); | 184 | zfree(&env->sibling_cores); |
184 | zfree(&env->sibling_threads); | 185 | zfree(&env->sibling_threads); |
185 | zfree(&env->numa_nodes); | 186 | zfree(&env->numa_nodes); |
@@ -192,7 +193,7 @@ void perf_session__delete(struct perf_session *session) | |||
192 | auxtrace_index__free(&session->auxtrace_index); | 193 | auxtrace_index__free(&session->auxtrace_index); |
193 | perf_session__destroy_kernel_maps(session); | 194 | perf_session__destroy_kernel_maps(session); |
194 | perf_session__delete_threads(session); | 195 | perf_session__delete_threads(session); |
195 | perf_session_env__delete(&session->header.env); | 196 | perf_session_env__exit(&session->header.env); |
196 | machines__exit(&session->machines); | 197 | machines__exit(&session->machines); |
197 | if (session->file) | 198 | if (session->file) |
198 | perf_data_file__close(session->file); | 199 | perf_data_file__close(session->file); |
@@ -332,6 +333,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) | |||
332 | tool->aux = perf_event__process_aux; | 333 | tool->aux = perf_event__process_aux; |
333 | if (tool->itrace_start == NULL) | 334 | if (tool->itrace_start == NULL) |
334 | tool->itrace_start = perf_event__process_itrace_start; | 335 | tool->itrace_start = perf_event__process_itrace_start; |
336 | if (tool->context_switch == NULL) | ||
337 | tool->context_switch = perf_event__process_switch; | ||
335 | if (tool->read == NULL) | 338 | if (tool->read == NULL) |
336 | tool->read = process_event_sample_stub; | 339 | tool->read = process_event_sample_stub; |
337 | if (tool->throttle == NULL) | 340 | if (tool->throttle == NULL) |
@@ -470,6 +473,19 @@ static void perf_event__itrace_start_swap(union perf_event *event, | |||
470 | swap_sample_id_all(event, &event->itrace_start + 1); | 473 | swap_sample_id_all(event, &event->itrace_start + 1); |
471 | } | 474 | } |
472 | 475 | ||
476 | static void perf_event__switch_swap(union perf_event *event, bool sample_id_all) | ||
477 | { | ||
478 | if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) { | ||
479 | event->context_switch.next_prev_pid = | ||
480 | bswap_32(event->context_switch.next_prev_pid); | ||
481 | event->context_switch.next_prev_tid = | ||
482 | bswap_32(event->context_switch.next_prev_tid); | ||
483 | } | ||
484 | |||
485 | if (sample_id_all) | ||
486 | swap_sample_id_all(event, &event->context_switch + 1); | ||
487 | } | ||
488 | |||
473 | static void perf_event__throttle_swap(union perf_event *event, | 489 | static void perf_event__throttle_swap(union perf_event *event, |
474 | bool sample_id_all) | 490 | bool sample_id_all) |
475 | { | 491 | { |
@@ -632,6 +648,8 @@ static perf_event__swap_op perf_event__swap_ops[] = { | |||
632 | [PERF_RECORD_AUX] = perf_event__aux_swap, | 648 | [PERF_RECORD_AUX] = perf_event__aux_swap, |
633 | [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap, | 649 | [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap, |
634 | [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, | 650 | [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, |
651 | [PERF_RECORD_SWITCH] = perf_event__switch_swap, | ||
652 | [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, | ||
635 | [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, | 653 | [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, |
636 | [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, | 654 | [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, |
637 | [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, | 655 | [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, |
@@ -766,10 +784,18 @@ static void branch_stack__printf(struct perf_sample *sample) | |||
766 | 784 | ||
767 | printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); | 785 | printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr); |
768 | 786 | ||
769 | for (i = 0; i < sample->branch_stack->nr; i++) | 787 | for (i = 0; i < sample->branch_stack->nr; i++) { |
770 | printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 "\n", | 788 | struct branch_entry *e = &sample->branch_stack->entries[i]; |
771 | i, sample->branch_stack->entries[i].from, | 789 | |
772 | sample->branch_stack->entries[i].to); | 790 | printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n", |
791 | i, e->from, e->to, | ||
792 | e->flags.cycles, | ||
793 | e->flags.mispred ? "M" : " ", | ||
794 | e->flags.predicted ? "P" : " ", | ||
795 | e->flags.abort ? "A" : " ", | ||
796 | e->flags.in_tx ? "T" : " ", | ||
797 | (unsigned)e->flags.reserved); | ||
798 | } | ||
773 | } | 799 | } |
774 | 800 | ||
775 | static void regs_dump__printf(u64 mask, u64 *regs) | 801 | static void regs_dump__printf(u64 mask, u64 *regs) |
@@ -1093,6 +1119,9 @@ static int machines__deliver_event(struct machines *machines, | |||
1093 | return tool->aux(tool, event, sample, machine); | 1119 | return tool->aux(tool, event, sample, machine); |
1094 | case PERF_RECORD_ITRACE_START: | 1120 | case PERF_RECORD_ITRACE_START: |
1095 | return tool->itrace_start(tool, event, sample, machine); | 1121 | return tool->itrace_start(tool, event, sample, machine); |
1122 | case PERF_RECORD_SWITCH: | ||
1123 | case PERF_RECORD_SWITCH_CPU_WIDE: | ||
1124 | return tool->context_switch(tool, event, sample, machine); | ||
1096 | default: | 1125 | default: |
1097 | ++evlist->stats.nr_unknown_events; | 1126 | ++evlist->stats.nr_unknown_events; |
1098 | return -1; | 1127 | return -1; |
@@ -1551,7 +1580,10 @@ static int __perf_session__process_events(struct perf_session *session, | |||
1551 | file_offset = page_offset; | 1580 | file_offset = page_offset; |
1552 | head = data_offset - page_offset; | 1581 | head = data_offset - page_offset; |
1553 | 1582 | ||
1554 | if (data_size && (data_offset + data_size < file_size)) | 1583 | if (data_size == 0) |
1584 | goto out; | ||
1585 | |||
1586 | if (data_offset + data_size < file_size) | ||
1555 | file_size = data_offset + data_size; | 1587 | file_size = data_offset + data_size; |
1556 | 1588 | ||
1557 | ui_progress__init(&prog, file_size, "Processing events..."); | 1589 | ui_progress__init(&prog, file_size, "Processing events..."); |
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 4c65a143a34c..7e3871606df3 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c | |||
@@ -9,7 +9,7 @@ regex_t parent_regex; | |||
9 | const char default_parent_pattern[] = "^sys_|^do_page_fault"; | 9 | const char default_parent_pattern[] = "^sys_|^do_page_fault"; |
10 | const char *parent_pattern = default_parent_pattern; | 10 | const char *parent_pattern = default_parent_pattern; |
11 | const char default_sort_order[] = "comm,dso,symbol"; | 11 | const char default_sort_order[] = "comm,dso,symbol"; |
12 | const char default_branch_sort_order[] = "comm,dso_from,symbol_from,dso_to,symbol_to"; | 12 | const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles"; |
13 | const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; | 13 | const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked"; |
14 | const char default_top_sort_order[] = "dso,symbol"; | 14 | const char default_top_sort_order[] = "dso,symbol"; |
15 | const char default_diff_sort_order[] = "dso,symbol"; | 15 | const char default_diff_sort_order[] = "dso,symbol"; |
@@ -319,6 +319,59 @@ struct sort_entry sort_srcline = { | |||
319 | .se_width_idx = HISTC_SRCLINE, | 319 | .se_width_idx = HISTC_SRCLINE, |
320 | }; | 320 | }; |
321 | 321 | ||
322 | /* --sort srcfile */ | ||
323 | |||
324 | static char no_srcfile[1]; | ||
325 | |||
326 | static char *get_srcfile(struct hist_entry *e) | ||
327 | { | ||
328 | char *sf, *p; | ||
329 | struct map *map = e->ms.map; | ||
330 | |||
331 | sf = get_srcline(map->dso, map__rip_2objdump(map, e->ip), | ||
332 | e->ms.sym, true); | ||
333 | if (!strcmp(sf, SRCLINE_UNKNOWN)) | ||
334 | return no_srcfile; | ||
335 | p = strchr(sf, ':'); | ||
336 | if (p && *sf) { | ||
337 | *p = 0; | ||
338 | return sf; | ||
339 | } | ||
340 | free(sf); | ||
341 | return no_srcfile; | ||
342 | } | ||
343 | |||
344 | static int64_t | ||
345 | sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right) | ||
346 | { | ||
347 | if (!left->srcfile) { | ||
348 | if (!left->ms.map) | ||
349 | left->srcfile = no_srcfile; | ||
350 | else | ||
351 | left->srcfile = get_srcfile(left); | ||
352 | } | ||
353 | if (!right->srcfile) { | ||
354 | if (!right->ms.map) | ||
355 | right->srcfile = no_srcfile; | ||
356 | else | ||
357 | right->srcfile = get_srcfile(right); | ||
358 | } | ||
359 | return strcmp(right->srcfile, left->srcfile); | ||
360 | } | ||
361 | |||
362 | static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf, | ||
363 | size_t size, unsigned int width) | ||
364 | { | ||
365 | return repsep_snprintf(bf, size, "%-*.*s", width, width, he->srcfile); | ||
366 | } | ||
367 | |||
368 | struct sort_entry sort_srcfile = { | ||
369 | .se_header = "Source File", | ||
370 | .se_cmp = sort__srcfile_cmp, | ||
371 | .se_snprintf = hist_entry__srcfile_snprintf, | ||
372 | .se_width_idx = HISTC_SRCFILE, | ||
373 | }; | ||
374 | |||
322 | /* --sort parent */ | 375 | /* --sort parent */ |
323 | 376 | ||
324 | static int64_t | 377 | static int64_t |
@@ -526,6 +579,29 @@ static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf, | |||
526 | return repsep_snprintf(bf, size, "%-*.*s", width, width, out); | 579 | return repsep_snprintf(bf, size, "%-*.*s", width, width, out); |
527 | } | 580 | } |
528 | 581 | ||
582 | static int64_t | ||
583 | sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right) | ||
584 | { | ||
585 | return left->branch_info->flags.cycles - | ||
586 | right->branch_info->flags.cycles; | ||
587 | } | ||
588 | |||
589 | static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf, | ||
590 | size_t size, unsigned int width) | ||
591 | { | ||
592 | if (he->branch_info->flags.cycles == 0) | ||
593 | return repsep_snprintf(bf, size, "%-*s", width, "-"); | ||
594 | return repsep_snprintf(bf, size, "%-*hd", width, | ||
595 | he->branch_info->flags.cycles); | ||
596 | } | ||
597 | |||
598 | struct sort_entry sort_cycles = { | ||
599 | .se_header = "Basic Block Cycles", | ||
600 | .se_cmp = sort__cycles_cmp, | ||
601 | .se_snprintf = hist_entry__cycles_snprintf, | ||
602 | .se_width_idx = HISTC_CYCLES, | ||
603 | }; | ||
604 | |||
529 | /* --sort daddr_sym */ | 605 | /* --sort daddr_sym */ |
530 | static int64_t | 606 | static int64_t |
531 | sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) | 607 | sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right) |
@@ -1173,6 +1249,7 @@ static struct sort_dimension common_sort_dimensions[] = { | |||
1173 | DIM(SORT_PARENT, "parent", sort_parent), | 1249 | DIM(SORT_PARENT, "parent", sort_parent), |
1174 | DIM(SORT_CPU, "cpu", sort_cpu), | 1250 | DIM(SORT_CPU, "cpu", sort_cpu), |
1175 | DIM(SORT_SRCLINE, "srcline", sort_srcline), | 1251 | DIM(SORT_SRCLINE, "srcline", sort_srcline), |
1252 | DIM(SORT_SRCFILE, "srcfile", sort_srcfile), | ||
1176 | DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), | 1253 | DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight), |
1177 | DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), | 1254 | DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight), |
1178 | DIM(SORT_TRANSACTION, "transaction", sort_transaction), | 1255 | DIM(SORT_TRANSACTION, "transaction", sort_transaction), |
@@ -1190,6 +1267,7 @@ static struct sort_dimension bstack_sort_dimensions[] = { | |||
1190 | DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), | 1267 | DIM(SORT_MISPREDICT, "mispredict", sort_mispredict), |
1191 | DIM(SORT_IN_TX, "in_tx", sort_in_tx), | 1268 | DIM(SORT_IN_TX, "in_tx", sort_in_tx), |
1192 | DIM(SORT_ABORT, "abort", sort_abort), | 1269 | DIM(SORT_ABORT, "abort", sort_abort), |
1270 | DIM(SORT_CYCLES, "cycles", sort_cycles), | ||
1193 | }; | 1271 | }; |
1194 | 1272 | ||
1195 | #undef DIM | 1273 | #undef DIM |
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index e97cd476d336..3c2a399f8f5b 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h | |||
@@ -114,6 +114,7 @@ struct hist_entry { | |||
114 | }; | 114 | }; |
115 | }; | 115 | }; |
116 | char *srcline; | 116 | char *srcline; |
117 | char *srcfile; | ||
117 | struct symbol *parent; | 118 | struct symbol *parent; |
118 | struct rb_root sorted_chain; | 119 | struct rb_root sorted_chain; |
119 | struct branch_info *branch_info; | 120 | struct branch_info *branch_info; |
@@ -172,6 +173,7 @@ enum sort_type { | |||
172 | SORT_PARENT, | 173 | SORT_PARENT, |
173 | SORT_CPU, | 174 | SORT_CPU, |
174 | SORT_SRCLINE, | 175 | SORT_SRCLINE, |
176 | SORT_SRCFILE, | ||
175 | SORT_LOCAL_WEIGHT, | 177 | SORT_LOCAL_WEIGHT, |
176 | SORT_GLOBAL_WEIGHT, | 178 | SORT_GLOBAL_WEIGHT, |
177 | SORT_TRANSACTION, | 179 | SORT_TRANSACTION, |
@@ -185,6 +187,7 @@ enum sort_type { | |||
185 | SORT_MISPREDICT, | 187 | SORT_MISPREDICT, |
186 | SORT_ABORT, | 188 | SORT_ABORT, |
187 | SORT_IN_TX, | 189 | SORT_IN_TX, |
190 | SORT_CYCLES, | ||
188 | 191 | ||
189 | /* memory mode specific sort keys */ | 192 | /* memory mode specific sort keys */ |
190 | __SORT_MEMORY_MODE, | 193 | __SORT_MEMORY_MODE, |
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index c93fb0c5bd0b..fc08248f08ca 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c | |||
@@ -10,6 +10,8 @@ | |||
10 | 10 | ||
11 | #include "symbol.h" | 11 | #include "symbol.h" |
12 | 12 | ||
13 | bool srcline_full_filename; | ||
14 | |||
13 | #ifdef HAVE_LIBBFD_SUPPORT | 15 | #ifdef HAVE_LIBBFD_SUPPORT |
14 | 16 | ||
15 | /* | 17 | /* |
@@ -277,7 +279,9 @@ char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, | |||
277 | if (!addr2line(dso_name, addr, &file, &line, dso)) | 279 | if (!addr2line(dso_name, addr, &file, &line, dso)) |
278 | goto out; | 280 | goto out; |
279 | 281 | ||
280 | if (asprintf(&srcline, "%s:%u", basename(file), line) < 0) { | 282 | if (asprintf(&srcline, "%s:%u", |
283 | srcline_full_filename ? file : basename(file), | ||
284 | line) < 0) { | ||
281 | free(file); | 285 | free(file); |
282 | goto out; | 286 | goto out; |
283 | } | 287 | } |
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 53e8bb7bc852..2a5d8d7698ae 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c | |||
@@ -85,7 +85,7 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | |||
85 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) | 85 | else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) |
86 | update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); | 86 | update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); |
87 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) | 87 | else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) |
88 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | 88 | update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count[0]); |
89 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) | 89 | else if (perf_stat_evsel__is(counter, TRANSACTION_START)) |
90 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); | 90 | update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); |
91 | else if (perf_stat_evsel__is(counter, ELISION_START)) | 91 | else if (perf_stat_evsel__is(counter, ELISION_START)) |
@@ -398,20 +398,18 @@ void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, | |||
398 | " # %5.2f%% aborted cycles ", | 398 | " # %5.2f%% aborted cycles ", |
399 | 100.0 * ((total2-avg) / total)); | 399 | 100.0 * ((total2-avg) / total)); |
400 | } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && | 400 | } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && |
401 | avg > 0 && | ||
402 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { | 401 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { |
403 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | 402 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); |
404 | 403 | ||
405 | if (total) | 404 | if (avg) |
406 | ratio = total / avg; | 405 | ratio = total / avg; |
407 | 406 | ||
408 | fprintf(out, " # %8.0f cycles / transaction ", ratio); | 407 | fprintf(out, " # %8.0f cycles / transaction ", ratio); |
409 | } else if (perf_stat_evsel__is(evsel, ELISION_START) && | 408 | } else if (perf_stat_evsel__is(evsel, ELISION_START) && |
410 | avg > 0 && | ||
411 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { | 409 | runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { |
412 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); | 410 | total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); |
413 | 411 | ||
414 | if (total) | 412 | if (avg) |
415 | ratio = total / avg; | 413 | ratio = total / avg; |
416 | 414 | ||
417 | fprintf(out, " # %8.0f cycles / elision ", ratio); | 415 | fprintf(out, " # %8.0f cycles / elision ", ratio); |
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index f2a0d1521e26..2d065d065b67 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c | |||
@@ -97,55 +97,6 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel) | |||
97 | } | 97 | } |
98 | } | 98 | } |
99 | 99 | ||
100 | struct perf_counts *perf_counts__new(int ncpus, int nthreads) | ||
101 | { | ||
102 | struct perf_counts *counts = zalloc(sizeof(*counts)); | ||
103 | |||
104 | if (counts) { | ||
105 | struct xyarray *values; | ||
106 | |||
107 | values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); | ||
108 | if (!values) { | ||
109 | free(counts); | ||
110 | return NULL; | ||
111 | } | ||
112 | |||
113 | counts->values = values; | ||
114 | } | ||
115 | |||
116 | return counts; | ||
117 | } | ||
118 | |||
119 | void perf_counts__delete(struct perf_counts *counts) | ||
120 | { | ||
121 | if (counts) { | ||
122 | xyarray__delete(counts->values); | ||
123 | free(counts); | ||
124 | } | ||
125 | } | ||
126 | |||
127 | static void perf_counts__reset(struct perf_counts *counts) | ||
128 | { | ||
129 | xyarray__reset(counts->values); | ||
130 | } | ||
131 | |||
132 | void perf_evsel__reset_counts(struct perf_evsel *evsel) | ||
133 | { | ||
134 | perf_counts__reset(evsel->counts); | ||
135 | } | ||
136 | |||
137 | int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) | ||
138 | { | ||
139 | evsel->counts = perf_counts__new(ncpus, nthreads); | ||
140 | return evsel->counts != NULL ? 0 : -ENOMEM; | ||
141 | } | ||
142 | |||
143 | void perf_evsel__free_counts(struct perf_evsel *evsel) | ||
144 | { | ||
145 | perf_counts__delete(evsel->counts); | ||
146 | evsel->counts = NULL; | ||
147 | } | ||
148 | |||
149 | void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) | 100 | void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) |
150 | { | 101 | { |
151 | int i; | 102 | int i; |
@@ -238,3 +189,154 @@ void perf_evlist__reset_stats(struct perf_evlist *evlist) | |||
238 | perf_evsel__reset_counts(evsel); | 189 | perf_evsel__reset_counts(evsel); |
239 | } | 190 | } |
240 | } | 191 | } |
192 | |||
193 | static void zero_per_pkg(struct perf_evsel *counter) | ||
194 | { | ||
195 | if (counter->per_pkg_mask) | ||
196 | memset(counter->per_pkg_mask, 0, MAX_NR_CPUS); | ||
197 | } | ||
198 | |||
199 | static int check_per_pkg(struct perf_evsel *counter, | ||
200 | struct perf_counts_values *vals, int cpu, bool *skip) | ||
201 | { | ||
202 | unsigned long *mask = counter->per_pkg_mask; | ||
203 | struct cpu_map *cpus = perf_evsel__cpus(counter); | ||
204 | int s; | ||
205 | |||
206 | *skip = false; | ||
207 | |||
208 | if (!counter->per_pkg) | ||
209 | return 0; | ||
210 | |||
211 | if (cpu_map__empty(cpus)) | ||
212 | return 0; | ||
213 | |||
214 | if (!mask) { | ||
215 | mask = zalloc(MAX_NR_CPUS); | ||
216 | if (!mask) | ||
217 | return -ENOMEM; | ||
218 | |||
219 | counter->per_pkg_mask = mask; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * we do not consider an event that has not run as a good | ||
224 | * instance to mark a package as used (skip=1). Otherwise | ||
225 | * we may run into a situation where the first CPU in a package | ||
226 | * is not running anything, yet the second is, and this function | ||
227 | * would mark the package as used after the first CPU and would | ||
228 | * not read the values from the second CPU. | ||
229 | */ | ||
230 | if (!(vals->run && vals->ena)) | ||
231 | return 0; | ||
232 | |||
233 | s = cpu_map__get_socket(cpus, cpu); | ||
234 | if (s < 0) | ||
235 | return -1; | ||
236 | |||
237 | *skip = test_and_set_bit(s, mask) == 1; | ||
238 | return 0; | ||
239 | } | ||
240 | |||
241 | static int | ||
242 | process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel, | ||
243 | int cpu, int thread, | ||
244 | struct perf_counts_values *count) | ||
245 | { | ||
246 | struct perf_counts_values *aggr = &evsel->counts->aggr; | ||
247 | static struct perf_counts_values zero; | ||
248 | bool skip = false; | ||
249 | |||
250 | if (check_per_pkg(evsel, count, cpu, &skip)) { | ||
251 | pr_err("failed to read per-pkg counter\n"); | ||
252 | return -1; | ||
253 | } | ||
254 | |||
255 | if (skip) | ||
256 | count = &zero; | ||
257 | |||
258 | switch (config->aggr_mode) { | ||
259 | case AGGR_THREAD: | ||
260 | case AGGR_CORE: | ||
261 | case AGGR_SOCKET: | ||
262 | case AGGR_NONE: | ||
263 | if (!evsel->snapshot) | ||
264 | perf_evsel__compute_deltas(evsel, cpu, thread, count); | ||
265 | perf_counts_values__scale(count, config->scale, NULL); | ||
266 | if (config->aggr_mode == AGGR_NONE) | ||
267 | perf_stat__update_shadow_stats(evsel, count->values, cpu); | ||
268 | break; | ||
269 | case AGGR_GLOBAL: | ||
270 | aggr->val += count->val; | ||
271 | if (config->scale) { | ||
272 | aggr->ena += count->ena; | ||
273 | aggr->run += count->run; | ||
274 | } | ||
275 | default: | ||
276 | break; | ||
277 | } | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | static int process_counter_maps(struct perf_stat_config *config, | ||
283 | struct perf_evsel *counter) | ||
284 | { | ||
285 | int nthreads = thread_map__nr(counter->threads); | ||
286 | int ncpus = perf_evsel__nr_cpus(counter); | ||
287 | int cpu, thread; | ||
288 | |||
289 | if (counter->system_wide) | ||
290 | nthreads = 1; | ||
291 | |||
292 | for (thread = 0; thread < nthreads; thread++) { | ||
293 | for (cpu = 0; cpu < ncpus; cpu++) { | ||
294 | if (process_counter_values(config, counter, cpu, thread, | ||
295 | perf_counts(counter->counts, cpu, thread))) | ||
296 | return -1; | ||
297 | } | ||
298 | } | ||
299 | |||
300 | return 0; | ||
301 | } | ||
302 | |||
303 | int perf_stat_process_counter(struct perf_stat_config *config, | ||
304 | struct perf_evsel *counter) | ||
305 | { | ||
306 | struct perf_counts_values *aggr = &counter->counts->aggr; | ||
307 | struct perf_stat *ps = counter->priv; | ||
308 | u64 *count = counter->counts->aggr.values; | ||
309 | int i, ret; | ||
310 | |||
311 | aggr->val = aggr->ena = aggr->run = 0; | ||
312 | init_stats(ps->res_stats); | ||
313 | |||
314 | if (counter->per_pkg) | ||
315 | zero_per_pkg(counter); | ||
316 | |||
317 | ret = process_counter_maps(config, counter); | ||
318 | if (ret) | ||
319 | return ret; | ||
320 | |||
321 | if (config->aggr_mode != AGGR_GLOBAL) | ||
322 | return 0; | ||
323 | |||
324 | if (!counter->snapshot) | ||
325 | perf_evsel__compute_deltas(counter, -1, -1, aggr); | ||
326 | perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); | ||
327 | |||
328 | for (i = 0; i < 3; i++) | ||
329 | update_stats(&ps->res_stats[i], count[i]); | ||
330 | |||
331 | if (verbose) { | ||
332 | fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n", | ||
333 | perf_evsel__name(counter), count[0], count[1], count[2]); | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Save the full runtime - to allow normalization during printout: | ||
338 | */ | ||
339 | perf_stat__update_shadow_stats(counter, count, 0); | ||
340 | |||
341 | return 0; | ||
342 | } | ||
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 1cfbe0a980ac..62448c8175d3 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h | |||
@@ -33,29 +33,13 @@ enum aggr_mode { | |||
33 | AGGR_THREAD, | 33 | AGGR_THREAD, |
34 | }; | 34 | }; |
35 | 35 | ||
36 | struct perf_counts_values { | 36 | struct perf_stat_config { |
37 | union { | 37 | enum aggr_mode aggr_mode; |
38 | struct { | 38 | bool scale; |
39 | u64 val; | 39 | FILE *output; |
40 | u64 ena; | 40 | unsigned int interval; |
41 | u64 run; | ||
42 | }; | ||
43 | u64 values[3]; | ||
44 | }; | ||
45 | }; | 41 | }; |
46 | 42 | ||
47 | struct perf_counts { | ||
48 | s8 scaled; | ||
49 | struct perf_counts_values aggr; | ||
50 | struct xyarray *values; | ||
51 | }; | ||
52 | |||
53 | static inline struct perf_counts_values* | ||
54 | perf_counts(struct perf_counts *counts, int cpu, int thread) | ||
55 | { | ||
56 | return xyarray__entry(counts->values, cpu, thread); | ||
57 | } | ||
58 | |||
59 | void update_stats(struct stats *stats, u64 val); | 43 | void update_stats(struct stats *stats, u64 val); |
60 | double avg_stats(struct stats *stats); | 44 | double avg_stats(struct stats *stats); |
61 | double stddev_stats(struct stats *stats); | 45 | double stddev_stats(struct stats *stats); |
@@ -89,13 +73,6 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, | |||
89 | void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, | 73 | void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, |
90 | double avg, int cpu, enum aggr_mode aggr); | 74 | double avg, int cpu, enum aggr_mode aggr); |
91 | 75 | ||
92 | struct perf_counts *perf_counts__new(int ncpus, int nthreads); | ||
93 | void perf_counts__delete(struct perf_counts *counts); | ||
94 | |||
95 | void perf_evsel__reset_counts(struct perf_evsel *evsel); | ||
96 | int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); | ||
97 | void perf_evsel__free_counts(struct perf_evsel *evsel); | ||
98 | |||
99 | void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); | 76 | void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); |
100 | int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); | 77 | int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); |
101 | void perf_evsel__free_stat_priv(struct perf_evsel *evsel); | 78 | void perf_evsel__free_stat_priv(struct perf_evsel *evsel); |
@@ -109,4 +86,7 @@ int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); | |||
109 | int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); | 86 | int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); |
110 | void perf_evlist__free_stats(struct perf_evlist *evlist); | 87 | void perf_evlist__free_stats(struct perf_evlist *evlist); |
111 | void perf_evlist__reset_stats(struct perf_evlist *evlist); | 88 | void perf_evlist__reset_stats(struct perf_evlist *evlist); |
89 | |||
90 | int perf_stat_process_counter(struct perf_stat_config *config, | ||
91 | struct perf_evsel *counter); | ||
112 | #endif | 92 | #endif |
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 6afd6106ceb5..fc8781de62db 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c | |||
@@ -357,3 +357,42 @@ void *memdup(const void *src, size_t len) | |||
357 | 357 | ||
358 | return p; | 358 | return p; |
359 | } | 359 | } |
360 | |||
361 | char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints) | ||
362 | { | ||
363 | /* | ||
364 | * FIXME: replace this with an expression using log10() when we | ||
365 | * find a suitable implementation, maybe the one in the dvb drivers... | ||
366 | * | ||
367 | * "%s == %d || " = log10(MAXINT) * 2 + 8 chars for the operators | ||
368 | */ | ||
369 | size_t size = nints * 28 + 1; /* \0 */ | ||
370 | size_t i, printed = 0; | ||
371 | char *expr = malloc(size); | ||
372 | |||
373 | if (expr) { | ||
374 | const char *or_and = "||", *eq_neq = "=="; | ||
375 | char *e = expr; | ||
376 | |||
377 | if (!in) { | ||
378 | or_and = "&&"; | ||
379 | eq_neq = "!="; | ||
380 | } | ||
381 | |||
382 | for (i = 0; i < nints; ++i) { | ||
383 | if (printed == size) | ||
384 | goto out_err_overflow; | ||
385 | |||
386 | if (i > 0) | ||
387 | printed += snprintf(e + printed, size - printed, " %s ", or_and); | ||
388 | printed += scnprintf(e + printed, size - printed, | ||
389 | "%s %s %d", var, eq_neq, ints[i]); | ||
390 | } | ||
391 | } | ||
392 | |||
393 | return expr; | ||
394 | |||
395 | out_err_overflow: | ||
396 | free(expr); | ||
397 | return NULL; | ||
398 | } | ||
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c index 71f9d102b96f..bdf98f6f27bb 100644 --- a/tools/perf/util/strlist.c +++ b/tools/perf/util/strlist.c | |||
@@ -72,7 +72,7 @@ int strlist__load(struct strlist *slist, const char *filename) | |||
72 | FILE *fp = fopen(filename, "r"); | 72 | FILE *fp = fopen(filename, "r"); |
73 | 73 | ||
74 | if (fp == NULL) | 74 | if (fp == NULL) |
75 | return errno; | 75 | return -errno; |
76 | 76 | ||
77 | while (fgets(entry, sizeof(entry), fp) != NULL) { | 77 | while (fgets(entry, sizeof(entry), fp) != NULL) { |
78 | const size_t len = strlen(entry); | 78 | const size_t len = strlen(entry); |
@@ -108,43 +108,70 @@ struct str_node *strlist__find(struct strlist *slist, const char *entry) | |||
108 | return snode; | 108 | return snode; |
109 | } | 109 | } |
110 | 110 | ||
111 | static int strlist__parse_list_entry(struct strlist *slist, const char *s) | 111 | static int strlist__parse_list_entry(struct strlist *slist, const char *s, |
112 | const char *subst_dir) | ||
112 | { | 113 | { |
114 | int err; | ||
115 | char *subst = NULL; | ||
116 | |||
113 | if (strncmp(s, "file://", 7) == 0) | 117 | if (strncmp(s, "file://", 7) == 0) |
114 | return strlist__load(slist, s + 7); | 118 | return strlist__load(slist, s + 7); |
115 | 119 | ||
116 | return strlist__add(slist, s); | 120 | if (subst_dir) { |
121 | err = -ENOMEM; | ||
122 | if (asprintf(&subst, "%s/%s", subst_dir, s) < 0) | ||
123 | goto out; | ||
124 | |||
125 | if (access(subst, F_OK) == 0) { | ||
126 | err = strlist__load(slist, subst); | ||
127 | goto out; | ||
128 | } | ||
129 | } | ||
130 | |||
131 | err = strlist__add(slist, s); | ||
132 | out: | ||
133 | free(subst); | ||
134 | return err; | ||
117 | } | 135 | } |
118 | 136 | ||
119 | int strlist__parse_list(struct strlist *slist, const char *s) | 137 | static int strlist__parse_list(struct strlist *slist, const char *s, const char *subst_dir) |
120 | { | 138 | { |
121 | char *sep; | 139 | char *sep; |
122 | int err; | 140 | int err; |
123 | 141 | ||
124 | while ((sep = strchr(s, ',')) != NULL) { | 142 | while ((sep = strchr(s, ',')) != NULL) { |
125 | *sep = '\0'; | 143 | *sep = '\0'; |
126 | err = strlist__parse_list_entry(slist, s); | 144 | err = strlist__parse_list_entry(slist, s, subst_dir); |
127 | *sep = ','; | 145 | *sep = ','; |
128 | if (err != 0) | 146 | if (err != 0) |
129 | return err; | 147 | return err; |
130 | s = sep + 1; | 148 | s = sep + 1; |
131 | } | 149 | } |
132 | 150 | ||
133 | return *s ? strlist__parse_list_entry(slist, s) : 0; | 151 | return *s ? strlist__parse_list_entry(slist, s, subst_dir) : 0; |
134 | } | 152 | } |
135 | 153 | ||
136 | struct strlist *strlist__new(bool dupstr, const char *list) | 154 | struct strlist *strlist__new(const char *list, const struct strlist_config *config) |
137 | { | 155 | { |
138 | struct strlist *slist = malloc(sizeof(*slist)); | 156 | struct strlist *slist = malloc(sizeof(*slist)); |
139 | 157 | ||
140 | if (slist != NULL) { | 158 | if (slist != NULL) { |
159 | bool dupstr = true; | ||
160 | const char *dirname = NULL; | ||
161 | |||
162 | if (config) { | ||
163 | dupstr = !config->dont_dupstr; | ||
164 | dirname = config->dirname; | ||
165 | } | ||
166 | |||
141 | rblist__init(&slist->rblist); | 167 | rblist__init(&slist->rblist); |
142 | slist->rblist.node_cmp = strlist__node_cmp; | 168 | slist->rblist.node_cmp = strlist__node_cmp; |
143 | slist->rblist.node_new = strlist__node_new; | 169 | slist->rblist.node_new = strlist__node_new; |
144 | slist->rblist.node_delete = strlist__node_delete; | 170 | slist->rblist.node_delete = strlist__node_delete; |
145 | 171 | ||
146 | slist->dupstr = dupstr; | 172 | slist->dupstr = dupstr; |
147 | if (list && strlist__parse_list(slist, list) != 0) | 173 | |
174 | if (list && strlist__parse_list(slist, list, dirname) != 0) | ||
148 | goto out_error; | 175 | goto out_error; |
149 | } | 176 | } |
150 | 177 | ||
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h index 5c7f87069d9c..297565aa7535 100644 --- a/tools/perf/util/strlist.h +++ b/tools/perf/util/strlist.h | |||
@@ -16,7 +16,12 @@ struct strlist { | |||
16 | bool dupstr; | 16 | bool dupstr; |
17 | }; | 17 | }; |
18 | 18 | ||
19 | struct strlist *strlist__new(bool dupstr, const char *slist); | 19 | struct strlist_config { |
20 | bool dont_dupstr; | ||
21 | const char *dirname; | ||
22 | }; | ||
23 | |||
24 | struct strlist *strlist__new(const char *slist, const struct strlist_config *config); | ||
20 | void strlist__delete(struct strlist *slist); | 25 | void strlist__delete(struct strlist *slist); |
21 | 26 | ||
22 | void strlist__remove(struct strlist *slist, struct str_node *sn); | 27 | void strlist__remove(struct strlist *slist, struct str_node *sn); |
@@ -74,6 +79,4 @@ static inline struct str_node *strlist__next(struct str_node *sn) | |||
74 | #define strlist__for_each_safe(pos, n, slist) \ | 79 | #define strlist__for_each_safe(pos, n, slist) \ |
75 | for (pos = strlist__first(slist), n = strlist__next(pos); pos;\ | 80 | for (pos = strlist__first(slist), n = strlist__next(pos); pos;\ |
76 | pos = n, n = strlist__next(n)) | 81 | pos = n, n = strlist__next(n)) |
77 | |||
78 | int strlist__parse_list(struct strlist *slist, const char *s); | ||
79 | #endif /* __PERF_STRLIST_H */ | 82 | #endif /* __PERF_STRLIST_H */ |
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 65f7e389ae09..475d88d0a1c9 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c | |||
@@ -875,6 +875,17 @@ int dso__load_sym(struct dso *dso, struct map *map, | |||
875 | } | 875 | } |
876 | } | 876 | } |
877 | 877 | ||
878 | /* | ||
879 | * Handle any relocation of vdso necessary because older kernels | ||
880 | * attempted to prelink vdso to its virtual address. | ||
881 | */ | ||
882 | if (dso__is_vdso(dso)) { | ||
883 | GElf_Shdr tshdr; | ||
884 | |||
885 | if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) | ||
886 | map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset; | ||
887 | } | ||
888 | |||
878 | dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); | 889 | dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap); |
879 | /* | 890 | /* |
880 | * Initial kernel and module mappings do not map to the dso. For | 891 | * Initial kernel and module mappings do not map to the dso. For |
@@ -1260,8 +1271,6 @@ out_close: | |||
1260 | static int kcore__init(struct kcore *kcore, char *filename, int elfclass, | 1271 | static int kcore__init(struct kcore *kcore, char *filename, int elfclass, |
1261 | bool temp) | 1272 | bool temp) |
1262 | { | 1273 | { |
1263 | GElf_Ehdr *ehdr; | ||
1264 | |||
1265 | kcore->elfclass = elfclass; | 1274 | kcore->elfclass = elfclass; |
1266 | 1275 | ||
1267 | if (temp) | 1276 | if (temp) |
@@ -1278,9 +1287,7 @@ static int kcore__init(struct kcore *kcore, char *filename, int elfclass, | |||
1278 | if (!gelf_newehdr(kcore->elf, elfclass)) | 1287 | if (!gelf_newehdr(kcore->elf, elfclass)) |
1279 | goto out_end; | 1288 | goto out_end; |
1280 | 1289 | ||
1281 | ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr); | 1290 | memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr)); |
1282 | if (!ehdr) | ||
1283 | goto out_end; | ||
1284 | 1291 | ||
1285 | return 0; | 1292 | return 0; |
1286 | 1293 | ||
@@ -1337,23 +1344,18 @@ static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count) | |||
1337 | static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, | 1344 | static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset, |
1338 | u64 addr, u64 len) | 1345 | u64 addr, u64 len) |
1339 | { | 1346 | { |
1340 | GElf_Phdr gphdr; | 1347 | GElf_Phdr phdr = { |
1341 | GElf_Phdr *phdr; | 1348 | .p_type = PT_LOAD, |
1342 | 1349 | .p_flags = PF_R | PF_W | PF_X, | |
1343 | phdr = gelf_getphdr(kcore->elf, idx, &gphdr); | 1350 | .p_offset = offset, |
1344 | if (!phdr) | 1351 | .p_vaddr = addr, |
1345 | return -1; | 1352 | .p_paddr = 0, |
1346 | 1353 | .p_filesz = len, | |
1347 | phdr->p_type = PT_LOAD; | 1354 | .p_memsz = len, |
1348 | phdr->p_flags = PF_R | PF_W | PF_X; | 1355 | .p_align = page_size, |
1349 | phdr->p_offset = offset; | 1356 | }; |
1350 | phdr->p_vaddr = addr; | 1357 | |
1351 | phdr->p_paddr = 0; | 1358 | if (!gelf_update_phdr(kcore->elf, idx, &phdr)) |
1352 | phdr->p_filesz = len; | ||
1353 | phdr->p_memsz = len; | ||
1354 | phdr->p_align = page_size; | ||
1355 | |||
1356 | if (!gelf_update_phdr(kcore->elf, idx, phdr)) | ||
1357 | return -1; | 1359 | return -1; |
1358 | 1360 | ||
1359 | return 0; | 1361 | return 0; |
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 60f11414bb5c..1f97ffb158a6 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c | |||
@@ -441,10 +441,25 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, | |||
441 | return &s->sym; | 441 | return &s->sym; |
442 | } | 442 | } |
443 | 443 | ||
444 | void dso__reset_find_symbol_cache(struct dso *dso) | ||
445 | { | ||
446 | enum map_type type; | ||
447 | |||
448 | for (type = MAP__FUNCTION; type <= MAP__VARIABLE; ++type) { | ||
449 | dso->last_find_result[type].addr = 0; | ||
450 | dso->last_find_result[type].symbol = NULL; | ||
451 | } | ||
452 | } | ||
453 | |||
444 | struct symbol *dso__find_symbol(struct dso *dso, | 454 | struct symbol *dso__find_symbol(struct dso *dso, |
445 | enum map_type type, u64 addr) | 455 | enum map_type type, u64 addr) |
446 | { | 456 | { |
447 | return symbols__find(&dso->symbols[type], addr); | 457 | if (dso->last_find_result[type].addr != addr) { |
458 | dso->last_find_result[type].addr = addr; | ||
459 | dso->last_find_result[type].symbol = symbols__find(&dso->symbols[type], addr); | ||
460 | } | ||
461 | |||
462 | return dso->last_find_result[type].symbol; | ||
448 | } | 463 | } |
449 | 464 | ||
450 | struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) | 465 | struct symbol *dso__first_symbol(struct dso *dso, enum map_type type) |
@@ -1133,8 +1148,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map, | |||
1133 | 1148 | ||
1134 | fd = open(kcore_filename, O_RDONLY); | 1149 | fd = open(kcore_filename, O_RDONLY); |
1135 | if (fd < 0) { | 1150 | if (fd < 0) { |
1136 | pr_err("%s requires CAP_SYS_RAWIO capability to access.\n", | 1151 | pr_debug("Failed to open %s. Note /proc/kcore requires CAP_SYS_RAWIO capability to access.\n", |
1137 | kcore_filename); | 1152 | kcore_filename); |
1138 | return -EINVAL; | 1153 | return -EINVAL; |
1139 | } | 1154 | } |
1140 | 1155 | ||
@@ -1838,7 +1853,7 @@ static void vmlinux_path__exit(void) | |||
1838 | zfree(&vmlinux_path); | 1853 | zfree(&vmlinux_path); |
1839 | } | 1854 | } |
1840 | 1855 | ||
1841 | static int vmlinux_path__init(struct perf_session_env *env) | 1856 | static int vmlinux_path__init(struct perf_env *env) |
1842 | { | 1857 | { |
1843 | struct utsname uts; | 1858 | struct utsname uts; |
1844 | char bf[PATH_MAX]; | 1859 | char bf[PATH_MAX]; |
@@ -1906,7 +1921,7 @@ int setup_list(struct strlist **list, const char *list_str, | |||
1906 | if (list_str == NULL) | 1921 | if (list_str == NULL) |
1907 | return 0; | 1922 | return 0; |
1908 | 1923 | ||
1909 | *list = strlist__new(true, list_str); | 1924 | *list = strlist__new(list_str, NULL); |
1910 | if (!*list) { | 1925 | if (!*list) { |
1911 | pr_err("problems parsing %s list\n", list_name); | 1926 | pr_err("problems parsing %s list\n", list_name); |
1912 | return -1; | 1927 | return -1; |
@@ -1949,7 +1964,7 @@ static bool symbol__read_kptr_restrict(void) | |||
1949 | return value; | 1964 | return value; |
1950 | } | 1965 | } |
1951 | 1966 | ||
1952 | int symbol__init(struct perf_session_env *env) | 1967 | int symbol__init(struct perf_env *env) |
1953 | { | 1968 | { |
1954 | const char *symfs; | 1969 | const char *symfs; |
1955 | 1970 | ||
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index b98ce51af142..440ba8ae888f 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h | |||
@@ -106,7 +106,8 @@ struct symbol_conf { | |||
106 | filter_relative, | 106 | filter_relative, |
107 | show_hist_headers, | 107 | show_hist_headers, |
108 | branch_callstack, | 108 | branch_callstack, |
109 | has_filter; | 109 | has_filter, |
110 | show_ref_callgraph; | ||
110 | const char *vmlinux_name, | 111 | const char *vmlinux_name, |
111 | *kallsyms_name, | 112 | *kallsyms_name, |
112 | *source_prefix, | 113 | *source_prefix, |
@@ -251,8 +252,8 @@ int modules__parse(const char *filename, void *arg, | |||
251 | int filename__read_debuglink(const char *filename, char *debuglink, | 252 | int filename__read_debuglink(const char *filename, char *debuglink, |
252 | size_t size); | 253 | size_t size); |
253 | 254 | ||
254 | struct perf_session_env; | 255 | struct perf_env; |
255 | int symbol__init(struct perf_session_env *env); | 256 | int symbol__init(struct perf_env *env); |
256 | void symbol__exit(void); | 257 | void symbol__exit(void); |
257 | void symbol__elf_init(void); | 258 | void symbol__elf_init(void); |
258 | struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); | 259 | struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name); |
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 28c4b746baa1..0a9ae8014729 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c | |||
@@ -191,6 +191,12 @@ static int thread__clone_map_groups(struct thread *thread, | |||
191 | if (thread->pid_ == parent->pid_) | 191 | if (thread->pid_ == parent->pid_) |
192 | return 0; | 192 | return 0; |
193 | 193 | ||
194 | if (thread->mg == parent->mg) { | ||
195 | pr_debug("broken map groups on thread %d/%d parent %d/%d\n", | ||
196 | thread->pid_, thread->tid, parent->pid_, parent->tid); | ||
197 | return 0; | ||
198 | } | ||
199 | |||
194 | /* But this one is new process, copy maps. */ | 200 | /* But this one is new process, copy maps. */ |
195 | for (i = 0; i < MAP__NR_TYPES; ++i) | 201 | for (i = 0; i < MAP__NR_TYPES; ++i) |
196 | if (map_groups__clone(thread->mg, parent->mg, i) < 0) | 202 | if (map_groups__clone(thread->mg, parent->mg, i) < 0) |
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 292ae2c90e06..6ec3c5ca438f 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c | |||
@@ -195,7 +195,8 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) | |||
195 | pid_t pid, prev_pid = INT_MAX; | 195 | pid_t pid, prev_pid = INT_MAX; |
196 | char *end_ptr; | 196 | char *end_ptr; |
197 | struct str_node *pos; | 197 | struct str_node *pos; |
198 | struct strlist *slist = strlist__new(false, pid_str); | 198 | struct strlist_config slist_config = { .dont_dupstr = true, }; |
199 | struct strlist *slist = strlist__new(pid_str, &slist_config); | ||
199 | 200 | ||
200 | if (!slist) | 201 | if (!slist) |
201 | return NULL; | 202 | return NULL; |
@@ -265,13 +266,14 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) | |||
265 | pid_t tid, prev_tid = INT_MAX; | 266 | pid_t tid, prev_tid = INT_MAX; |
266 | char *end_ptr; | 267 | char *end_ptr; |
267 | struct str_node *pos; | 268 | struct str_node *pos; |
269 | struct strlist_config slist_config = { .dont_dupstr = true, }; | ||
268 | struct strlist *slist; | 270 | struct strlist *slist; |
269 | 271 | ||
270 | /* perf-stat expects threads to be generated even if tid not given */ | 272 | /* perf-stat expects threads to be generated even if tid not given */ |
271 | if (!tid_str) | 273 | if (!tid_str) |
272 | return thread_map__new_dummy(); | 274 | return thread_map__new_dummy(); |
273 | 275 | ||
274 | slist = strlist__new(false, tid_str); | 276 | slist = strlist__new(tid_str, &slist_config); |
275 | if (!slist) | 277 | if (!slist) |
276 | return NULL; | 278 | return NULL; |
277 | 279 | ||
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index c307dd438286..cab8cc24831b 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h | |||
@@ -46,6 +46,7 @@ struct perf_tool { | |||
46 | lost_samples, | 46 | lost_samples, |
47 | aux, | 47 | aux, |
48 | itrace_start, | 48 | itrace_start, |
49 | context_switch, | ||
49 | throttle, | 50 | throttle, |
50 | unthrottle; | 51 | unthrottle; |
51 | event_attr_op attr; | 52 | event_attr_op attr; |
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c index eb72716017ac..22245986e59e 100644 --- a/tools/perf/util/trace-event-info.c +++ b/tools/perf/util/trace-event-info.c | |||
@@ -341,20 +341,14 @@ out: | |||
341 | 341 | ||
342 | static int record_proc_kallsyms(void) | 342 | static int record_proc_kallsyms(void) |
343 | { | 343 | { |
344 | unsigned int size; | 344 | unsigned long long size = 0; |
345 | const char *path = "/proc/kallsyms"; | 345 | /* |
346 | struct stat st; | 346 | * Just to keep older perf.data file parsers happy, record a zero |
347 | int ret, err = 0; | 347 | * sized kallsyms file, i.e. do the same thing that was done when |
348 | 348 | * /proc/kallsyms (or something specified via --kallsyms, in a | |
349 | ret = stat(path, &st); | 349 | * different path) couldn't be read. |
350 | if (ret < 0) { | 350 | */ |
351 | /* not found */ | 351 | return write(output_fd, &size, 4) != 4 ? -EIO : 0; |
352 | size = 0; | ||
353 | if (write(output_fd, &size, 4) != 4) | ||
354 | err = -EIO; | ||
355 | return err; | ||
356 | } | ||
357 | return record_file(path, 4); | ||
358 | } | 352 | } |
359 | 353 | ||
360 | static int record_ftrace_printk(void) | 354 | static int record_ftrace_printk(void) |
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index d4957418657e..8ff7d620d942 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c | |||
@@ -135,36 +135,6 @@ void event_format__print(struct event_format *event, | |||
135 | return event_format__fprintf(event, cpu, data, size, stdout); | 135 | return event_format__fprintf(event, cpu, data, size, stdout); |
136 | } | 136 | } |
137 | 137 | ||
138 | void parse_proc_kallsyms(struct pevent *pevent, | ||
139 | char *file, unsigned int size __maybe_unused) | ||
140 | { | ||
141 | unsigned long long addr; | ||
142 | char *func; | ||
143 | char *line; | ||
144 | char *next = NULL; | ||
145 | char *addr_str; | ||
146 | char *mod; | ||
147 | char *fmt = NULL; | ||
148 | |||
149 | line = strtok_r(file, "\n", &next); | ||
150 | while (line) { | ||
151 | mod = NULL; | ||
152 | addr_str = strtok_r(line, " ", &fmt); | ||
153 | addr = strtoull(addr_str, NULL, 16); | ||
154 | /* skip character */ | ||
155 | strtok_r(NULL, " ", &fmt); | ||
156 | func = strtok_r(NULL, "\t", &fmt); | ||
157 | mod = strtok_r(NULL, "]", &fmt); | ||
158 | /* truncate the extra '[' */ | ||
159 | if (mod) | ||
160 | mod = mod + 1; | ||
161 | |||
162 | pevent_register_function(pevent, func, addr, mod); | ||
163 | |||
164 | line = strtok_r(NULL, "\n", &next); | ||
165 | } | ||
166 | } | ||
167 | |||
168 | void parse_ftrace_printk(struct pevent *pevent, | 138 | void parse_ftrace_printk(struct pevent *pevent, |
169 | char *file, unsigned int size __maybe_unused) | 139 | char *file, unsigned int size __maybe_unused) |
170 | { | 140 | { |
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 54d9e9b548a8..b67a0ccf5ab9 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c | |||
@@ -162,25 +162,23 @@ out: | |||
162 | static int read_proc_kallsyms(struct pevent *pevent) | 162 | static int read_proc_kallsyms(struct pevent *pevent) |
163 | { | 163 | { |
164 | unsigned int size; | 164 | unsigned int size; |
165 | char *buf; | ||
166 | 165 | ||
167 | size = read4(pevent); | 166 | size = read4(pevent); |
168 | if (!size) | 167 | if (!size) |
169 | return 0; | 168 | return 0; |
170 | 169 | /* | |
171 | buf = malloc(size + 1); | 170 | * Just skip it, now that we configure libtraceevent to use the |
172 | if (buf == NULL) | 171 | * tools/perf/ symbol resolver. |
173 | return -1; | 172 | * |
174 | 173 | * We need to skip it so that we can continue parsing old perf.data | |
175 | if (do_read(buf, size) < 0) { | 174 | * files, that contains this /proc/kallsyms payload. |
176 | free(buf); | 175 | * |
177 | return -1; | 176 | * Newer perf.data files will have just the 4-bytes zeros "kallsyms |
178 | } | 177 | * payload", so that older tools can continue reading it and interpret |
179 | buf[size] = '\0'; | 178 | * it as "no kallsyms payload is present". |
180 | 179 | */ | |
181 | parse_proc_kallsyms(pevent, buf, size); | 180 | lseek(input_fd, size, SEEK_CUR); |
182 | 181 | trace_data_size += size; | |
183 | free(buf); | ||
184 | return 0; | 182 | return 0; |
185 | } | 183 | } |
186 | 184 | ||
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 6322d37164c5..b90e646c7a91 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <traceevent/event-parse.h> | 10 | #include <traceevent/event-parse.h> |
11 | #include "trace-event.h" | 11 | #include "trace-event.h" |
12 | #include "machine.h" | ||
12 | #include "util.h" | 13 | #include "util.h" |
13 | 14 | ||
14 | /* | 15 | /* |
@@ -19,6 +20,7 @@ | |||
19 | * there. | 20 | * there. |
20 | */ | 21 | */ |
21 | static struct trace_event tevent; | 22 | static struct trace_event tevent; |
23 | static bool tevent_initialized; | ||
22 | 24 | ||
23 | int trace_event__init(struct trace_event *t) | 25 | int trace_event__init(struct trace_event *t) |
24 | { | 26 | { |
@@ -32,6 +34,31 @@ int trace_event__init(struct trace_event *t) | |||
32 | return pevent ? 0 : -1; | 34 | return pevent ? 0 : -1; |
33 | } | 35 | } |
34 | 36 | ||
37 | static int trace_event__init2(void) | ||
38 | { | ||
39 | int be = traceevent_host_bigendian(); | ||
40 | struct pevent *pevent; | ||
41 | |||
42 | if (trace_event__init(&tevent)) | ||
43 | return -1; | ||
44 | |||
45 | pevent = tevent.pevent; | ||
46 | pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); | ||
47 | pevent_set_file_bigendian(pevent, be); | ||
48 | pevent_set_host_bigendian(pevent, be); | ||
49 | tevent_initialized = true; | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | int trace_event__register_resolver(struct machine *machine, | ||
54 | pevent_func_resolver_t *func) | ||
55 | { | ||
56 | if (!tevent_initialized && trace_event__init2()) | ||
57 | return -1; | ||
58 | |||
59 | return pevent_set_function_resolver(tevent.pevent, func, machine); | ||
60 | } | ||
61 | |||
35 | void trace_event__cleanup(struct trace_event *t) | 62 | void trace_event__cleanup(struct trace_event *t) |
36 | { | 63 | { |
37 | traceevent_unload_plugins(t->plugin_list, t->pevent); | 64 | traceevent_unload_plugins(t->plugin_list, t->pevent); |
@@ -62,21 +89,8 @@ tp_format(const char *sys, const char *name) | |||
62 | struct event_format* | 89 | struct event_format* |
63 | trace_event__tp_format(const char *sys, const char *name) | 90 | trace_event__tp_format(const char *sys, const char *name) |
64 | { | 91 | { |
65 | static bool initialized; | 92 | if (!tevent_initialized && trace_event__init2()) |
66 | 93 | return NULL; | |
67 | if (!initialized) { | ||
68 | int be = traceevent_host_bigendian(); | ||
69 | struct pevent *pevent; | ||
70 | |||
71 | if (trace_event__init(&tevent)) | ||
72 | return NULL; | ||
73 | |||
74 | pevent = tevent.pevent; | ||
75 | pevent_set_flag(pevent, PEVENT_NSEC_OUTPUT); | ||
76 | pevent_set_file_bigendian(pevent, be); | ||
77 | pevent_set_host_bigendian(pevent, be); | ||
78 | initialized = true; | ||
79 | } | ||
80 | 94 | ||
81 | return tp_format(sys, name); | 95 | return tp_format(sys, name); |
82 | } | 96 | } |
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index d5168f0be4ec..da6cc4cc2a4f 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h | |||
@@ -18,6 +18,8 @@ struct trace_event { | |||
18 | 18 | ||
19 | int trace_event__init(struct trace_event *t); | 19 | int trace_event__init(struct trace_event *t); |
20 | void trace_event__cleanup(struct trace_event *t); | 20 | void trace_event__cleanup(struct trace_event *t); |
21 | int trace_event__register_resolver(struct machine *machine, | ||
22 | pevent_func_resolver_t *func); | ||
21 | struct event_format* | 23 | struct event_format* |
22 | trace_event__tp_format(const char *sys, const char *name); | 24 | trace_event__tp_format(const char *sys, const char *name); |
23 | 25 | ||
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index edc2d633b332..c2cd9bf2348b 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c | |||
@@ -34,6 +34,7 @@ bool test_attr__enabled; | |||
34 | bool perf_host = true; | 34 | bool perf_host = true; |
35 | bool perf_guest = false; | 35 | bool perf_guest = false; |
36 | 36 | ||
37 | char tracing_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing"; | ||
37 | char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; | 38 | char tracing_events_path[PATH_MAX + 1] = "/sys/kernel/debug/tracing/events"; |
38 | 39 | ||
39 | void event_attr_init(struct perf_event_attr *attr) | 40 | void event_attr_init(struct perf_event_attr *attr) |
@@ -391,6 +392,8 @@ void set_term_quiet_input(struct termios *old) | |||
391 | 392 | ||
392 | static void set_tracing_events_path(const char *tracing, const char *mountpoint) | 393 | static void set_tracing_events_path(const char *tracing, const char *mountpoint) |
393 | { | 394 | { |
395 | snprintf(tracing_path, sizeof(tracing_path), "%s/%s", | ||
396 | mountpoint, tracing); | ||
394 | snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", | 397 | snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", |
395 | mountpoint, tracing, "events"); | 398 | mountpoint, tracing, "events"); |
396 | } | 399 | } |
@@ -436,66 +439,14 @@ const char *perf_debugfs_mount(const char *mountpoint) | |||
436 | 439 | ||
437 | void perf_debugfs_set_path(const char *mntpt) | 440 | void perf_debugfs_set_path(const char *mntpt) |
438 | { | 441 | { |
439 | snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt); | ||
440 | set_tracing_events_path("tracing/", mntpt); | 442 | set_tracing_events_path("tracing/", mntpt); |
441 | } | 443 | } |
442 | 444 | ||
443 | static const char *find_tracefs(void) | ||
444 | { | ||
445 | const char *path = __perf_tracefs_mount(NULL); | ||
446 | |||
447 | return path; | ||
448 | } | ||
449 | |||
450 | static const char *find_debugfs(void) | ||
451 | { | ||
452 | const char *path = __perf_debugfs_mount(NULL); | ||
453 | |||
454 | if (!path) | ||
455 | fprintf(stderr, "Your kernel does not support the debugfs filesystem"); | ||
456 | |||
457 | return path; | ||
458 | } | ||
459 | |||
460 | /* | ||
461 | * Finds the path to the debugfs/tracing | ||
462 | * Allocates the string and stores it. | ||
463 | */ | ||
464 | const char *find_tracing_dir(void) | ||
465 | { | ||
466 | const char *tracing_dir = ""; | ||
467 | static char *tracing; | ||
468 | static int tracing_found; | ||
469 | const char *debugfs; | ||
470 | |||
471 | if (tracing_found) | ||
472 | return tracing; | ||
473 | |||
474 | debugfs = find_tracefs(); | ||
475 | if (!debugfs) { | ||
476 | tracing_dir = "/tracing"; | ||
477 | debugfs = find_debugfs(); | ||
478 | if (!debugfs) | ||
479 | return NULL; | ||
480 | } | ||
481 | |||
482 | if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0) | ||
483 | return NULL; | ||
484 | |||
485 | tracing_found = 1; | ||
486 | return tracing; | ||
487 | } | ||
488 | |||
489 | char *get_tracing_file(const char *name) | 445 | char *get_tracing_file(const char *name) |
490 | { | 446 | { |
491 | const char *tracing; | ||
492 | char *file; | 447 | char *file; |
493 | 448 | ||
494 | tracing = find_tracing_dir(); | 449 | if (asprintf(&file, "%s/%s", tracing_path, name) < 0) |
495 | if (!tracing) | ||
496 | return NULL; | ||
497 | |||
498 | if (asprintf(&file, "%s/%s", tracing, name) < 0) | ||
499 | return NULL; | 450 | return NULL; |
500 | 451 | ||
501 | return file; | 452 | return file; |
@@ -566,6 +517,96 @@ unsigned long parse_tag_value(const char *str, struct parse_tag *tags) | |||
566 | return (unsigned long) -1; | 517 | return (unsigned long) -1; |
567 | } | 518 | } |
568 | 519 | ||
520 | int get_stack_size(const char *str, unsigned long *_size) | ||
521 | { | ||
522 | char *endptr; | ||
523 | unsigned long size; | ||
524 | unsigned long max_size = round_down(USHRT_MAX, sizeof(u64)); | ||
525 | |||
526 | size = strtoul(str, &endptr, 0); | ||
527 | |||
528 | do { | ||
529 | if (*endptr) | ||
530 | break; | ||
531 | |||
532 | size = round_up(size, sizeof(u64)); | ||
533 | if (!size || size > max_size) | ||
534 | break; | ||
535 | |||
536 | *_size = size; | ||
537 | return 0; | ||
538 | |||
539 | } while (0); | ||
540 | |||
541 | pr_err("callchain: Incorrect stack dump size (max %ld): %s\n", | ||
542 | max_size, str); | ||
543 | return -1; | ||
544 | } | ||
545 | |||
546 | int parse_callchain_record(const char *arg, struct callchain_param *param) | ||
547 | { | ||
548 | char *tok, *name, *saveptr = NULL; | ||
549 | char *buf; | ||
550 | int ret = -1; | ||
551 | |||
552 | /* We need buffer that we know we can write to. */ | ||
553 | buf = malloc(strlen(arg) + 1); | ||
554 | if (!buf) | ||
555 | return -ENOMEM; | ||
556 | |||
557 | strcpy(buf, arg); | ||
558 | |||
559 | tok = strtok_r((char *)buf, ",", &saveptr); | ||
560 | name = tok ? : (char *)buf; | ||
561 | |||
562 | do { | ||
563 | /* Framepointer style */ | ||
564 | if (!strncmp(name, "fp", sizeof("fp"))) { | ||
565 | if (!strtok_r(NULL, ",", &saveptr)) { | ||
566 | param->record_mode = CALLCHAIN_FP; | ||
567 | ret = 0; | ||
568 | } else | ||
569 | pr_err("callchain: No more arguments " | ||
570 | "needed for --call-graph fp\n"); | ||
571 | break; | ||
572 | |||
573 | #ifdef HAVE_DWARF_UNWIND_SUPPORT | ||
574 | /* Dwarf style */ | ||
575 | } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) { | ||
576 | const unsigned long default_stack_dump_size = 8192; | ||
577 | |||
578 | ret = 0; | ||
579 | param->record_mode = CALLCHAIN_DWARF; | ||
580 | param->dump_size = default_stack_dump_size; | ||
581 | |||
582 | tok = strtok_r(NULL, ",", &saveptr); | ||
583 | if (tok) { | ||
584 | unsigned long size = 0; | ||
585 | |||
586 | ret = get_stack_size(tok, &size); | ||
587 | param->dump_size = size; | ||
588 | } | ||
589 | #endif /* HAVE_DWARF_UNWIND_SUPPORT */ | ||
590 | } else if (!strncmp(name, "lbr", sizeof("lbr"))) { | ||
591 | if (!strtok_r(NULL, ",", &saveptr)) { | ||
592 | param->record_mode = CALLCHAIN_LBR; | ||
593 | ret = 0; | ||
594 | } else | ||
595 | pr_err("callchain: No more arguments " | ||
596 | "needed for --call-graph lbr\n"); | ||
597 | break; | ||
598 | } else { | ||
599 | pr_err("callchain: Unknown --call-graph option " | ||
600 | "value: %s\n", arg); | ||
601 | break; | ||
602 | } | ||
603 | |||
604 | } while (0); | ||
605 | |||
606 | free(buf); | ||
607 | return ret; | ||
608 | } | ||
609 | |||
569 | int filename__read_str(const char *filename, char **buf, size_t *sizep) | 610 | int filename__read_str(const char *filename, char **buf, size_t *sizep) |
570 | { | 611 | { |
571 | size_t size = 0, alloc_size = 0; | 612 | size_t size = 0, alloc_size = 0; |
@@ -668,7 +709,7 @@ bool find_process(const char *name) | |||
668 | 709 | ||
669 | dir = opendir(procfs__mountpoint()); | 710 | dir = opendir(procfs__mountpoint()); |
670 | if (!dir) | 711 | if (!dir) |
671 | return -1; | 712 | return false; |
672 | 713 | ||
673 | /* Walk through the directory. */ | 714 | /* Walk through the directory. */ |
674 | while (ret && (d = readdir(dir)) != NULL) { | 715 | while (ret && (d = readdir(dir)) != NULL) { |
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 8bce58b47a82..291be1d84bc3 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h | |||
@@ -83,10 +83,10 @@ | |||
83 | extern const char *graph_line; | 83 | extern const char *graph_line; |
84 | extern const char *graph_dotted_line; | 84 | extern const char *graph_dotted_line; |
85 | extern char buildid_dir[]; | 85 | extern char buildid_dir[]; |
86 | extern char tracing_path[]; | ||
86 | extern char tracing_events_path[]; | 87 | extern char tracing_events_path[]; |
87 | extern void perf_debugfs_set_path(const char *mountpoint); | 88 | extern void perf_debugfs_set_path(const char *mountpoint); |
88 | const char *perf_debugfs_mount(const char *mountpoint); | 89 | const char *perf_debugfs_mount(const char *mountpoint); |
89 | const char *find_tracing_dir(void); | ||
90 | char *get_tracing_file(const char *name); | 90 | char *get_tracing_file(const char *name); |
91 | void put_tracing_file(char *file); | 91 | void put_tracing_file(char *file); |
92 | 92 | ||
@@ -318,6 +318,7 @@ static inline int path__join3(char *bf, size_t size, | |||
318 | struct dso; | 318 | struct dso; |
319 | struct symbol; | 319 | struct symbol; |
320 | 320 | ||
321 | extern bool srcline_full_filename; | ||
321 | char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, | 322 | char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, |
322 | bool show_sym); | 323 | bool show_sym); |
323 | void free_srcline(char *srcline); | 324 | void free_srcline(char *srcline); |
@@ -339,4 +340,18 @@ int gzip_decompress_to_file(const char *input, int output_fd); | |||
339 | int lzma_decompress_to_file(const char *input, int output_fd); | 340 | int lzma_decompress_to_file(const char *input, int output_fd); |
340 | #endif | 341 | #endif |
341 | 342 | ||
343 | char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints); | ||
344 | |||
345 | static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints) | ||
346 | { | ||
347 | return asprintf_expr_inout_ints(var, true, nints, ints); | ||
348 | } | ||
349 | |||
350 | static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints) | ||
351 | { | ||
352 | return asprintf_expr_inout_ints(var, false, nints, ints); | ||
353 | } | ||
354 | |||
355 | int get_stack_size(const char *str, unsigned long *_size); | ||
356 | |||
342 | #endif /* GIT_COMPAT_UTIL_H */ | 357 | #endif /* GIT_COMPAT_UTIL_H */ |