aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/perf_event.h
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2012-03-22 12:26:36 -0400
committerIngo Molnar <mingo@kernel.org>2012-03-23 04:52:16 -0400
commitc7206205d00ab375839bd6c7ddb247d600693c09 (patch)
tree24b9d66a5eaef77c1fc40bc8f6f28acfd167bf5b /include/linux/perf_event.h
parentc5bc437702b24817cabd65a6a57971ff91a7712c (diff)
perf: Fix mmap_page capabilities and docs
Complete the syscall-less self-profiling feature and address all complaints, namely: - capabilities, so we can detect what is actually available at runtime Add a capabilities field to perf_event_mmap_page to indicate what is actually available for use. - on x86: RDPMC weirdness due to being 40/48 bits and not sign-extending properly. - ABI documentation as to how all this stuff works. Also improve the documentation for the new features. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Cc: Vince Weaver <vweaver1@eecs.utk.edu> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1332433596.2487.33.camel@twins Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'include/linux/perf_event.h')
-rw-r--r--include/linux/perf_event.h83
1 files changed, 73 insertions, 10 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 57ae485e80fc..ca9ed4e6a286 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -299,18 +299,31 @@ struct perf_event_mmap_page {
299 /* 299 /*
300 * Bits needed to read the hw events in user-space. 300 * Bits needed to read the hw events in user-space.
301 * 301 *
302 * u32 seq; 302 * u32 seq, time_mult, time_shift, idx, width;
303 * s64 count; 303 * u64 count, enabled, running;
304 * u64 cyc, time_offset;
305 * s64 pmc = 0;
304 * 306 *
305 * do { 307 * do {
306 * seq = pc->lock; 308 * seq = pc->lock;
307 *
308 * barrier() 309 * barrier()
309 * if (pc->index) { 310 *
310 * count = pmc_read(pc->index - 1); 311 * enabled = pc->time_enabled;
311 * count += pc->offset; 312 * running = pc->time_running;
312 * } else 313 *
313 * goto regular_read; 314 * if (pc->cap_usr_time && enabled != running) {
315 * cyc = rdtsc();
316 * time_offset = pc->time_offset;
317 * time_mult = pc->time_mult;
318 * time_shift = pc->time_shift;
319 * }
320 *
321 * idx = pc->index;
322 * count = pc->offset;
323 * if (pc->cap_usr_rdpmc && idx) {
324 * width = pc->pmc_width;
325 * pmc = rdpmc(idx - 1);
326 * }
314 * 327 *
315 * barrier(); 328 * barrier();
316 * } while (pc->lock != seq); 329 * } while (pc->lock != seq);
@@ -323,14 +336,57 @@ struct perf_event_mmap_page {
323 __s64 offset; /* add to hardware event value */ 336 __s64 offset; /* add to hardware event value */
324 __u64 time_enabled; /* time event active */ 337 __u64 time_enabled; /* time event active */
325 __u64 time_running; /* time event on cpu */ 338 __u64 time_running; /* time event on cpu */
326 __u32 time_mult, time_shift; 339 union {
340 __u64 capabilities;
341 __u64 cap_usr_time : 1,
342 cap_usr_rdpmc : 1,
343 cap_____res : 62;
344 };
345
346 /*
347 * If cap_usr_rdpmc this field provides the bit-width of the value
348 * read using the rdpmc() or equivalent instruction. This can be used
349 * to sign extend the result like:
350 *
351 * pmc <<= 64 - width;
352 * pmc >>= 64 - width; // signed shift right
353 * count += pmc;
354 */
355 __u16 pmc_width;
356
357 /*
358 * If cap_usr_time the below fields can be used to compute the time
359 * delta since time_enabled (in ns) using rdtsc or similar.
360 *
361 * u64 quot, rem;
362 * u64 delta;
363 *
364 * quot = (cyc >> time_shift);
365 * rem = cyc & ((1 << time_shift) - 1);
366 * delta = time_offset + quot * time_mult +
367 * ((rem * time_mult) >> time_shift);
368 *
369 * Where time_offset,time_mult,time_shift and cyc are read in the
370 * seqcount loop described above. This delta can then be added to
371 * enabled and possible running (if idx), improving the scaling:
372 *
373 * enabled += delta;
374 * if (idx)
375 * running += delta;
376 *
377 * quot = count / running;
378 * rem = count % running;
379 * count = quot * enabled + (rem * enabled) / running;
380 */
381 __u16 time_shift;
382 __u32 time_mult;
327 __u64 time_offset; 383 __u64 time_offset;
328 384
329 /* 385 /*
330 * Hole for extension of the self monitor capabilities 386 * Hole for extension of the self monitor capabilities
331 */ 387 */
332 388
333 __u64 __reserved[121]; /* align to 1k */ 389 __u64 __reserved[120]; /* align to 1k */
334 390
335 /* 391 /*
336 * Control data for the mmap() data buffer. 392 * Control data for the mmap() data buffer.
@@ -347,6 +403,13 @@ struct perf_event_mmap_page {
347 __u64 data_tail; /* user-space written tail */ 403 __u64 data_tail; /* user-space written tail */
348}; 404};
349 405
406/*
407 * Build time assertion that we keep the data_head at the intended location.
408 * IOW, validation we got the __reserved[] size right.
409 */
410extern char __assert_mmap_data_head_offset
411 [1 - 2*!!(offsetof(struct perf_event_mmap_page, data_head) != 1024)];
412
350#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) 413#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
351#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) 414#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
352#define PERF_RECORD_MISC_KERNEL (1 << 0) 415#define PERF_RECORD_MISC_KERNEL (1 << 0)