diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-03-25 14:39:37 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-06-18 08:46:11 -0400 |
commit | 43a21ea81a2400992561146327c4785ce7f7be38 (patch) | |
tree | d4974c0ff9d7f40291515c5c0cf7e0d51abccb66 /include | |
parent | d3a9262e59f7fb83c6d44df3b2b1460ed57d3ea1 (diff) |
perf_counter: Add event overlow handling
Alternative method of mmap() data output handling that provides
better overflow management and a more reliable data stream.
Unlike the previous method, that didn't have any user->kernel
feedback and relied on userspace keeping up, this method relies on
userspace writing its last read position into the control page.
It will ensure new output doesn't overwrite not-yet read events,
new events for which there is no space left are lost and the
overflow counter is incremented, providing exact event loss
numbers.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/perf_counter.h | 40 |
1 files changed, 28 insertions, 12 deletions
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a7d3a61a59b7..0765e8e69843 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h | |||
@@ -236,10 +236,16 @@ struct perf_counter_mmap_page { | |||
236 | /* | 236 | /* |
237 | * Control data for the mmap() data buffer. | 237 | * Control data for the mmap() data buffer. |
238 | * | 238 | * |
239 | * User-space reading this value should issue an rmb(), on SMP capable | 239 | * User-space reading the @data_head value should issue an rmb(), on |
240 | * platforms, after reading this value -- see perf_counter_wakeup(). | 240 | * SMP capable platforms, after reading this value -- see |
241 | * perf_counter_wakeup(). | ||
242 | * | ||
243 | * When the mapping is PROT_WRITE the @data_tail value should be | ||
244 | * written by userspace to reflect the last read data. In this case | ||
245 | * the kernel will not over-write unread data. | ||
241 | */ | 246 | */ |
242 | __u64 data_head; /* head in the data section */ | 247 | __u64 data_head; /* head in the data section */ |
248 | __u64 data_tail; /* user-space written tail */ | ||
243 | }; | 249 | }; |
244 | 250 | ||
245 | #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) | 251 | #define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) |
@@ -275,6 +281,15 @@ enum perf_event_type { | |||
275 | 281 | ||
276 | /* | 282 | /* |
277 | * struct { | 283 | * struct { |
284 | * struct perf_event_header header; | ||
285 | * u64 id; | ||
286 | * u64 lost; | ||
287 | * }; | ||
288 | */ | ||
289 | PERF_EVENT_LOST = 2, | ||
290 | |||
291 | /* | ||
292 | * struct { | ||
278 | * struct perf_event_header header; | 293 | * struct perf_event_header header; |
279 | * | 294 | * |
280 | * u32 pid, tid; | 295 | * u32 pid, tid; |
@@ -313,26 +328,26 @@ enum perf_event_type { | |||
313 | 328 | ||
314 | /* | 329 | /* |
315 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field | 330 | * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field |
316 | * will be PERF_RECORD_* | 331 | * will be PERF_SAMPLE_* |
317 | * | 332 | * |
318 | * struct { | 333 | * struct { |
319 | * struct perf_event_header header; | 334 | * struct perf_event_header header; |
320 | * | 335 | * |
321 | * { u64 ip; } && PERF_RECORD_IP | 336 | * { u64 ip; } && PERF_SAMPLE_IP |
322 | * { u32 pid, tid; } && PERF_RECORD_TID | 337 | * { u32 pid, tid; } && PERF_SAMPLE_TID |
323 | * { u64 time; } && PERF_RECORD_TIME | 338 | * { u64 time; } && PERF_SAMPLE_TIME |
324 | * { u64 addr; } && PERF_RECORD_ADDR | 339 | * { u64 addr; } && PERF_SAMPLE_ADDR |
325 | * { u64 config; } && PERF_RECORD_CONFIG | 340 | * { u64 config; } && PERF_SAMPLE_CONFIG |
326 | * { u32 cpu, res; } && PERF_RECORD_CPU | 341 | * { u32 cpu, res; } && PERF_SAMPLE_CPU |
327 | * | 342 | * |
328 | * { u64 nr; | 343 | * { u64 nr; |
329 | * { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP | 344 | * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP |
330 | * | 345 | * |
331 | * { u16 nr, | 346 | * { u16 nr, |
332 | * hv, | 347 | * hv, |
333 | * kernel, | 348 | * kernel, |
334 | * user; | 349 | * user; |
335 | * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN | 350 | * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN |
336 | * }; | 351 | * }; |
337 | */ | 352 | */ |
338 | }; | 353 | }; |
@@ -424,6 +439,7 @@ struct file; | |||
424 | struct perf_mmap_data { | 439 | struct perf_mmap_data { |
425 | struct rcu_head rcu_head; | 440 | struct rcu_head rcu_head; |
426 | int nr_pages; /* nr of data pages */ | 441 | int nr_pages; /* nr of data pages */ |
442 | int writable; /* are we writable */ | ||
427 | int nr_locked; /* nr pages mlocked */ | 443 | int nr_locked; /* nr pages mlocked */ |
428 | 444 | ||
429 | atomic_t poll; /* POLL_ for wakeups */ | 445 | atomic_t poll; /* POLL_ for wakeups */ |
@@ -433,8 +449,8 @@ struct perf_mmap_data { | |||
433 | atomic_long_t done_head; /* completed head */ | 449 | atomic_long_t done_head; /* completed head */ |
434 | 450 | ||
435 | atomic_t lock; /* concurrent writes */ | 451 | atomic_t lock; /* concurrent writes */ |
436 | |||
437 | atomic_t wakeup; /* needs a wakeup */ | 452 | atomic_t wakeup; /* needs a wakeup */ |
453 | atomic_t lost; /* nr records lost */ | ||
438 | 454 | ||
439 | struct perf_counter_mmap_page *user_page; | 455 | struct perf_counter_mmap_page *user_page; |
440 | void *data_pages[0]; | 456 | void *data_pages[0]; |