diff options
author | Andi Kleen <ak@linux.intel.com> | 2016-06-24 16:41:25 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2016-06-29 09:07:23 -0400 |
commit | d4897e1935552663030fe7681a53eccc58d6aebd (patch) | |
tree | 6407ab455513e18de408f5b00da0790b3de370f2 | |
parent | d905768c9e1addfa35d9731dbaa9242e8991f6ac (diff) |
perf tools: Add documentation for perf.data on disk format
Add some documentation for the on disk format of perf.data. This is not
documenting the actual perf events -- which are documented in
perf_event.h -- but just the additional headers that perf record adds
around them when writing the data to disk.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/1466800885-12974-1-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/Documentation/perf.data-file-format.txt | 442 |
1 files changed, 442 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt new file mode 100644 index 000000000000..fdc99fe6bbc3 --- /dev/null +++ b/tools/perf/Documentation/perf.data-file-format.txt | |||
@@ -0,0 +1,442 @@ | |||
1 | perf.data format | ||
2 | |||
3 | Uptodate as of v4.7 | ||
4 | |||
5 | This document describes the on-disk perf.data format, generated by perf record | ||
6 | or perf inject and consumed by the other perf tools. | ||
7 | |||
8 | On a high level perf.data contains the events generated by the PMUs, plus metadata. | ||
9 | |||
10 | All fields are in native-endian of the machine that generated the perf.data. | ||
11 | |||
12 | When perf is writing to a pipe it uses a special version of the file | ||
13 | format that does not rely on seeking to adjust data offsets. This | ||
14 | format is not described here. The pipe version can be converted to | ||
15 | normal perf.data with perf inject. | ||
16 | |||
17 | The file starts with a perf_header: | ||
18 | |||
19 | struct perf_header { | ||
20 | char magic[8]; /* PERFILE2 */ | ||
21 | uint64_t size; /* size of the header */ | ||
22 | uint64_t attr_size; /* size of an attribute in attrs */ | ||
23 | struct perf_file_section attrs; | ||
24 | struct perf_file_section data; | ||
25 | struct perf_file_section event_types; | ||
26 | uint64_t flags; | ||
27 | uint64_t flags1[3]; | ||
28 | }; | ||
29 | |||
30 | The magic number identifies the perf file and the version. Current perf versions | ||
31 | use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1 | ||
32 | is not described here. The magic number also identifies the endian. When the | ||
33 | magic value is 64bit byte swapped compared the file is in non-native | ||
34 | endian. | ||
35 | |||
36 | A perf_file_section contains a pointer to another section of the perf file. | ||
37 | The header contains three such pointers: for attributes, data and event types. | ||
38 | |||
39 | struct perf_file_section { | ||
40 | uint64_t offset; /* offset from start of file */ | ||
41 | uint64_t size; /* size of the section */ | ||
42 | }; | ||
43 | |||
44 | Flags section: | ||
45 | |||
46 | The header is followed by different optional headers, described by the bits set | ||
47 | in flags. Only headers for which the bit is set are included. Each header | ||
48 | consists of a perf_file_section located after the initial header. | ||
49 | The respective perf_file_section points to the data of the additional | ||
50 | header and defines its size. | ||
51 | |||
52 | Some headers consist of strings, which are defined like this: | ||
53 | |||
54 | struct perf_header_string { | ||
55 | uint32_t len; | ||
56 | char string[len]; /* zero terminated */ | ||
57 | }; | ||
58 | |||
59 | Some headers consist of a sequence of strings, which start with a | ||
60 | |||
61 | struct perf_header_string_list { | ||
62 | uint32_t nr; | ||
63 | struct perf_header_string strings[nr]; /* variable length records */ | ||
64 | }; | ||
65 | |||
66 | The bits are the flags bits in a 256 bit bitmap starting with | ||
67 | flags. These define the valid bits: | ||
68 | |||
69 | HEADER_RESERVED = 0, /* always cleared */ | ||
70 | HEADER_FIRST_FEATURE = 1, | ||
71 | HEADER_TRACING_DATA = 1, | ||
72 | |||
73 | Describe me. | ||
74 | |||
75 | HEADER_BUILD_ID = 2, | ||
76 | |||
77 | The header consists of an sequence of build_id_event. The size of each record | ||
78 | is defined by header.size (see perf_event.h). Each event defines a ELF build id | ||
79 | for a executable file name for a pid. An ELF build id is a unique identifier | ||
80 | assigned by the linker to an executable. | ||
81 | |||
82 | struct build_id_event { | ||
83 | struct perf_event_header header; | ||
84 | pid_t pid; | ||
85 | uint8_t build_id[24]; | ||
86 | char filename[header.size - offsetof(struct build_id_event, filename)]; | ||
87 | }; | ||
88 | |||
89 | HEADER_HOSTNAME = 3, | ||
90 | |||
91 | A perf_header_string with the hostname where the data was collected | ||
92 | (uname -n) | ||
93 | |||
94 | HEADER_OSRELEASE = 4, | ||
95 | |||
96 | A perf_header_string with the os release where the data was collected | ||
97 | (uname -r) | ||
98 | |||
99 | HEADER_VERSION = 5, | ||
100 | |||
101 | A perf_header_string with the perf user tool version where the | ||
102 | data was collected. This is the same as the version of the source tree | ||
103 | the perf tool was built from. | ||
104 | |||
105 | HEADER_ARCH = 6, | ||
106 | |||
107 | A perf_header_string with the CPU architecture (uname -m) | ||
108 | |||
109 | HEADER_NRCPUS = 7, | ||
110 | |||
111 | A structure defining the number of CPUs. | ||
112 | |||
113 | struct nr_cpus { | ||
114 | uint32_t nr_cpus_online; | ||
115 | uint32_t nr_cpus_available; /* CPUs not yet onlined */ | ||
116 | }; | ||
117 | |||
118 | HEADER_CPUDESC = 8, | ||
119 | |||
120 | A perf_header_string with description of the CPU. On x86 this is the model name | ||
121 | in /proc/cpuinfo | ||
122 | |||
123 | HEADER_CPUID = 9, | ||
124 | |||
125 | A perf_header_string with the exact CPU type. On x86 this is | ||
126 | vendor,family,model,stepping. For example: GenuineIntel,6,69,1 | ||
127 | |||
128 | HEADER_TOTAL_MEM = 10, | ||
129 | |||
130 | An uint64_t with the total memory in bytes. | ||
131 | |||
132 | HEADER_CMDLINE = 11, | ||
133 | |||
134 | A perf_header_string with the perf command line used to collect the data. | ||
135 | |||
136 | HEADER_EVENT_DESC = 12, | ||
137 | |||
138 | Another description of the perf_event_attrs, more detailed than header.attrs | ||
139 | including IDs and names. See perf_event.h or the man page for a description | ||
140 | of a struct perf_event_attr. | ||
141 | |||
142 | struct { | ||
143 | uint32_t nr; /* number of events */ | ||
144 | uint32_t attr_size; /* size of each perf_event_attr */ | ||
145 | struct { | ||
146 | struct perf_event_attr attr; /* size of attr_size */ | ||
147 | uint32_t nr_ids; | ||
148 | struct perf_header_string event_string; | ||
149 | uint64_t ids[nr_ids]; | ||
150 | } events[nr]; /* Variable length records */ | ||
151 | }; | ||
152 | |||
153 | HEADER_CPU_TOPOLOGY = 13, | ||
154 | |||
155 | String lists defining the core and CPU threads topology. | ||
156 | |||
157 | struct { | ||
158 | struct perf_header_string_list cores; /* Variable length */ | ||
159 | struct perf_header_string_list threads; /* Variable length */ | ||
160 | }; | ||
161 | |||
162 | Example: | ||
163 | sibling cores : 0-3 | ||
164 | sibling threads : 0-1 | ||
165 | sibling threads : 2-3 | ||
166 | |||
167 | HEADER_NUMA_TOPOLOGY = 14, | ||
168 | |||
169 | A list of NUMA node descriptions | ||
170 | |||
171 | struct { | ||
172 | uint32_t nr; | ||
173 | struct { | ||
174 | uint32_t nodenr; | ||
175 | uint64_t mem_total; | ||
176 | uint64_t mem_free; | ||
177 | struct perf_header_string cpus; | ||
178 | } nodes[nr]; /* Variable length records */ | ||
179 | }; | ||
180 | |||
181 | HEADER_BRANCH_STACK = 15, | ||
182 | |||
183 | Not implemented in perf. | ||
184 | |||
185 | HEADER_PMU_MAPPINGS = 16, | ||
186 | |||
187 | A list of PMU structures, defining the different PMUs supported by perf. | ||
188 | |||
189 | struct { | ||
190 | uint32_t nr; | ||
191 | struct pmu { | ||
192 | uint32_t pmu_type; | ||
193 | struct perf_header_string pmu_name; | ||
194 | } [nr]; /* Variable length records */ | ||
195 | }; | ||
196 | |||
197 | HEADER_GROUP_DESC = 17, | ||
198 | |||
199 | Description of counter groups ({...} in perf syntax) | ||
200 | |||
201 | struct { | ||
202 | uint32_t nr; | ||
203 | struct { | ||
204 | struct perf_header_string string; | ||
205 | uint32_t leader_idx; | ||
206 | uint32_t nr_members; | ||
207 | } [nr]; /* Variable length records */ | ||
208 | }; | ||
209 | |||
210 | HEADER_AUXTRACE = 18, | ||
211 | |||
212 | Define additional auxtrace areas in the perf.data. auxtrace is used to store | ||
213 | undecoded hardware tracing information, such as Intel Processor Trace data. | ||
214 | |||
215 | /** | ||
216 | * struct auxtrace_index_entry - indexes a AUX area tracing event within a | ||
217 | * perf.data file. | ||
218 | * @file_offset: offset within the perf.data file | ||
219 | * @sz: size of the event | ||
220 | */ | ||
221 | struct auxtrace_index_entry { | ||
222 | u64 file_offset; | ||
223 | u64 sz; | ||
224 | }; | ||
225 | |||
226 | #define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256 | ||
227 | |||
228 | /** | ||
229 | * struct auxtrace_index - index of AUX area tracing events within a perf.data | ||
230 | * file. | ||
231 | * @list: linking a number of arrays of entries | ||
232 | * @nr: number of entries | ||
233 | * @entries: array of entries | ||
234 | */ | ||
235 | struct auxtrace_index { | ||
236 | struct list_head list; | ||
237 | size_t nr; | ||
238 | struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT]; | ||
239 | }; | ||
240 | |||
241 | other bits are reserved and should ignored for now | ||
242 | HEADER_FEAT_BITS = 256, | ||
243 | |||
244 | Attributes | ||
245 | |||
246 | This is an array of perf_event_attrs, each attr_size bytes long, which defines | ||
247 | each event collected. See perf_event.h or the man page for a detailed | ||
248 | description. | ||
249 | |||
250 | Data | ||
251 | |||
252 | This section is the bulk of the file. It consist of a stream of perf_events | ||
253 | describing events. This matches the format generated by the kernel. | ||
254 | See perf_event.h or the manpage for a detailed description. | ||
255 | |||
256 | Some notes on parsing: | ||
257 | |||
258 | Ordering | ||
259 | |||
260 | The events are not necessarily in time stamp order, as they can be | ||
261 | collected in parallel on different CPUs. If the events should be | ||
262 | processed in time order they need to be sorted first. It is possible | ||
263 | to only do a partial sort using the FINISHED_ROUND event header (see | ||
264 | below). perf record guarantees that there is no reordering over a | ||
265 | FINISHED_ROUND. | ||
266 | |||
267 | ID vs IDENTIFIER | ||
268 | |||
269 | When the event stream contains multiple events each event is identified | ||
270 | by an ID. This can be either through the PERF_SAMPLE_ID or the | ||
271 | PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is | ||
272 | at a fixed offset from the event header, which allows reliable | ||
273 | parsing of the header. Relying on ID may be ambigious. | ||
274 | IDENTIFIER is only supported by newer Linux kernels. | ||
275 | |||
276 | Perf record specific events: | ||
277 | |||
278 | In addition to the kernel generated event types perf record adds its | ||
279 | own event types (in addition it also synthesizes some kernel events, | ||
280 | for example MMAP events) | ||
281 | |||
282 | PERF_RECORD_USER_TYPE_START = 64, | ||
283 | PERF_RECORD_HEADER_ATTR = 64, | ||
284 | |||
285 | struct attr_event { | ||
286 | struct perf_event_header header; | ||
287 | struct perf_event_attr attr; | ||
288 | uint64_t id[]; | ||
289 | }; | ||
290 | |||
291 | PERF_RECORD_HEADER_EVENT_TYPE = 65, /* depreceated */ | ||
292 | |||
293 | #define MAX_EVENT_NAME 64 | ||
294 | |||
295 | struct perf_trace_event_type { | ||
296 | uint64_t event_id; | ||
297 | char name[MAX_EVENT_NAME]; | ||
298 | }; | ||
299 | |||
300 | struct event_type_event { | ||
301 | struct perf_event_header header; | ||
302 | struct perf_trace_event_type event_type; | ||
303 | }; | ||
304 | |||
305 | |||
306 | PERF_RECORD_HEADER_TRACING_DATA = 66, | ||
307 | |||
308 | Describe me | ||
309 | |||
310 | struct tracing_data_event { | ||
311 | struct perf_event_header header; | ||
312 | uint32_t size; | ||
313 | }; | ||
314 | |||
315 | PERF_RECORD_HEADER_BUILD_ID = 67, | ||
316 | |||
317 | Define a ELF build ID for a referenced executable. | ||
318 | |||
319 | struct build_id_event; /* See above */ | ||
320 | |||
321 | PERF_RECORD_FINISHED_ROUND = 68, | ||
322 | |||
323 | No event reordering over this header. No payload. | ||
324 | |||
325 | PERF_RECORD_ID_INDEX = 69, | ||
326 | |||
327 | Map event ids to CPUs and TIDs. | ||
328 | |||
329 | struct id_index_entry { | ||
330 | uint64_t id; | ||
331 | uint64_t idx; | ||
332 | uint64_t cpu; | ||
333 | uint64_t tid; | ||
334 | }; | ||
335 | |||
336 | struct id_index_event { | ||
337 | struct perf_event_header header; | ||
338 | uint64_t nr; | ||
339 | struct id_index_entry entries[nr]; | ||
340 | }; | ||
341 | |||
342 | PERF_RECORD_AUXTRACE_INFO = 70, | ||
343 | |||
344 | Auxtrace type specific information. Describe me | ||
345 | |||
346 | struct auxtrace_info_event { | ||
347 | struct perf_event_header header; | ||
348 | uint32_t type; | ||
349 | uint32_t reserved__; /* For alignment */ | ||
350 | uint64_t priv[]; | ||
351 | }; | ||
352 | |||
353 | PERF_RECORD_AUXTRACE = 71, | ||
354 | |||
355 | Defines auxtrace data. Followed by the actual data. The contents of | ||
356 | the auxtrace data is dependent on the event and the CPU. For example | ||
357 | for Intel Processor Trace it contains Processor Trace data generated | ||
358 | by the CPU. | ||
359 | |||
360 | struct auxtrace_event { | ||
361 | struct perf_event_header header; | ||
362 | uint64_t size; | ||
363 | uint64_t offset; | ||
364 | uint64_t reference; | ||
365 | uint32_t idx; | ||
366 | uint32_t tid; | ||
367 | uint32_t cpu; | ||
368 | uint32_t reserved__; /* For alignment */ | ||
369 | }; | ||
370 | |||
371 | struct aux_event { | ||
372 | struct perf_event_header header; | ||
373 | uint64_t aux_offset; | ||
374 | uint64_t aux_size; | ||
375 | uint64_t flags; | ||
376 | }; | ||
377 | |||
378 | PERF_RECORD_AUXTRACE_ERROR = 72, | ||
379 | |||
380 | Describes an error in hardware tracing | ||
381 | |||
382 | enum auxtrace_error_type { | ||
383 | PERF_AUXTRACE_ERROR_ITRACE = 1, | ||
384 | PERF_AUXTRACE_ERROR_MAX | ||
385 | }; | ||
386 | |||
387 | #define MAX_AUXTRACE_ERROR_MSG 64 | ||
388 | |||
389 | struct auxtrace_error_event { | ||
390 | struct perf_event_header header; | ||
391 | uint32_t type; | ||
392 | uint32_t code; | ||
393 | uint32_t cpu; | ||
394 | uint32_t pid; | ||
395 | uint32_t tid; | ||
396 | uint32_t reserved__; /* For alignment */ | ||
397 | uint64_t ip; | ||
398 | char msg[MAX_AUXTRACE_ERROR_MSG]; | ||
399 | }; | ||
400 | |||
401 | Event types | ||
402 | |||
403 | Define the event attributes with their IDs. | ||
404 | |||
405 | An array bound by the perf_file_section size. | ||
406 | |||
407 | struct { | ||
408 | struct perf_event_attr attr; /* Size defined by header.attr_size */ | ||
409 | struct perf_file_section ids; | ||
410 | } | ||
411 | |||
412 | ids points to a array of uint64_t defining the ids for event attr attr. | ||
413 | |||
414 | References: | ||
415 | |||
416 | include/uapi/linux/perf_event.h | ||
417 | |||
418 | This is the canonical description of the kernel generated perf_events | ||
419 | and the perf_event_attrs. | ||
420 | |||
421 | perf_events manpage | ||
422 | |||
423 | A manpage describing perf_event and perf_event_attr is here: | ||
424 | http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html | ||
425 | This tends to be slightly behind the kernel include, but has better | ||
426 | descriptions. An (typically older) version of the man page may be | ||
427 | included with the standard Linux man pages, available with "man | ||
428 | perf_events" | ||
429 | |||
430 | pmu-tools | ||
431 | |||
432 | https://github.com/andikleen/pmu-tools/tree/master/parser | ||
433 | |||
434 | A definition of the perf.data format in python "construct" format is available | ||
435 | in pmu-tools parser. This allows to read perf.data from python and dump it. | ||
436 | |||
437 | quipper | ||
438 | |||
439 | The quipper C++ parser is available at | ||
440 | https://chromium.googlesource.com/chromiumos/platform/chromiumos-wide-profiling/ | ||
441 | Unfortunately this parser tends to be many versions behind and may not be able | ||
442 | to parse data files generated by recent perf. | ||