From c7206205d00ab375839bd6c7ddb247d600693c09 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 22 Mar 2012 17:26:36 +0100 Subject: perf: Fix mmap_page capabilities and docs Complete the syscall-less self-profiling feature and address all complaints, namely: - capabilities, so we can detect what is actually available at runtime Add a capabilities field to perf_event_mmap_page to indicate what is actually available for use. - on x86: RDPMC weirdness due to being 40/48 bits and not sign-extending properly. - ABI documentation as to how all this stuff works. Also improve the documentation for the new features. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian Cc: Vince Weaver Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1332433596.2487.33.camel@twins Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 83 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 10 deletions(-) (limited to 'include/linux/perf_event.h') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 57ae485e80fc..ca9ed4e6a286 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -299,18 +299,31 @@ struct perf_event_mmap_page { /* * Bits needed to read the hw events in user-space. * - * u32 seq; - * s64 count; + * u32 seq, time_mult, time_shift, idx, width; + * u64 count, enabled, running; + * u64 cyc, time_offset; + * s64 pmc = 0; * * do { * seq = pc->lock; - * * barrier() - * if (pc->index) { - * count = pmc_read(pc->index - 1); - * count += pc->offset; - * } else - * goto regular_read; + * + * enabled = pc->time_enabled; + * running = pc->time_running; + * + * if (pc->cap_usr_time && enabled != running) { + * cyc = rdtsc(); + * time_offset = pc->time_offset; + * time_mult = pc->time_mult; + * time_shift = pc->time_shift; + * } + * + * idx = pc->index; + * count = pc->offset; + * if (pc->cap_usr_rdpmc && idx) { + * width = pc->pmc_width; + * pmc = rdpmc(idx - 1); + * } * * barrier(); * } while (pc->lock != seq); @@ -323,14 +336,57 @@ struct perf_event_mmap_page { __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ __u64 time_running; /* time event on cpu */ - __u32 time_mult, time_shift; + union { + __u64 capabilities; + __u64 cap_usr_time : 1, + cap_usr_rdpmc : 1, + cap_____res : 62; + }; + + /* + * If cap_usr_rdpmc this field provides the bit-width of the value + * read using the rdpmc() or equivalent instruction. This can be used + * to sign extend the result like: + * + * pmc <<= 64 - width; + * pmc >>= 64 - width; // signed shift right + * count += pmc; + */ + __u16 pmc_width; + + /* + * If cap_usr_time the below fields can be used to compute the time + * delta since time_enabled (in ns) using rdtsc or similar. + * + * u64 quot, rem; + * u64 delta; + * + * quot = (cyc >> time_shift); + * rem = cyc & ((1 << time_shift) - 1); + * delta = time_offset + quot * time_mult + + * ((rem * time_mult) >> time_shift); + * + * Where time_offset,time_mult,time_shift and cyc are read in the + * seqcount loop described above. This delta can then be added to + * enabled and possible running (if idx), improving the scaling: + * + * enabled += delta; + * if (idx) + * running += delta; + * + * quot = count / running; + * rem = count % running; + * count = quot * enabled + (rem * enabled) / running; + */ + __u16 time_shift; + __u32 time_mult; __u64 time_offset; /* * Hole for extension of the self monitor capabilities */ - __u64 __reserved[121]; /* align to 1k */ + __u64 __reserved[120]; /* align to 1k */ /* * Control data for the mmap() data buffer. @@ -347,6 +403,13 @@ struct perf_event_mmap_page { __u64 data_tail; /* user-space written tail */ }; +/* + * Build time assertion that we keep the data_head at the intended location. + * IOW, validation we got the __reserved[] size right. + */ +extern char __assert_mmap_data_head_offset + [1 - 2*!!(offsetof(struct perf_event_mmap_page, data_head) != 1024)]; + #define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) #define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) #define PERF_RECORD_MISC_KERNEL (1 << 0) -- cgit v1.2.2