diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2010-10-20 10:50:11 -0400 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2010-11-30 16:19:04 -0500 |
commit | c320c7b7d380e630f595de1236d9d085b035d5b4 (patch) | |
tree | e1544fa8cc259a1d300fac9104dd3c46458572fe | |
parent | 068ffaa8bfb67c2ddb3ecaf38cc90f94a1a92fe3 (diff) |
perf events: Precalculate the header space for PERF_SAMPLE_ fields
PERF_SAMPLE_{CALLCHAIN,RAW} have variable lenghts per sample, but the others
can be precalculated, reducing a bit the per sample cost.
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: Ian Munsie <imunsie@au1.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <new-submission>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | include/linux/perf_event.h | 2 | ||||
-rw-r--r-- | kernel/perf_event.c | 150 |
2 files changed, 93 insertions, 59 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index cbf04cc1e630..adf6d9931643 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -758,6 +758,8 @@ struct perf_event { | |||
758 | u64 shadow_ctx_time; | 758 | u64 shadow_ctx_time; |
759 | 759 | ||
760 | struct perf_event_attr attr; | 760 | struct perf_event_attr attr; |
761 | u16 header_size; | ||
762 | u16 read_size; | ||
761 | struct hw_perf_event hw; | 763 | struct hw_perf_event hw; |
762 | 764 | ||
763 | struct perf_event_context *ctx; | 765 | struct perf_event_context *ctx; |
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index af1e63f249f3..aede71245e9f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
312 | ctx->nr_stat++; | 312 | ctx->nr_stat++; |
313 | } | 313 | } |
314 | 314 | ||
315 | /* | ||
316 | * Called at perf_event creation and when events are attached/detached from a | ||
317 | * group. | ||
318 | */ | ||
319 | static void perf_event__read_size(struct perf_event *event) | ||
320 | { | ||
321 | int entry = sizeof(u64); /* value */ | ||
322 | int size = 0; | ||
323 | int nr = 1; | ||
324 | |||
325 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
326 | size += sizeof(u64); | ||
327 | |||
328 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
329 | size += sizeof(u64); | ||
330 | |||
331 | if (event->attr.read_format & PERF_FORMAT_ID) | ||
332 | entry += sizeof(u64); | ||
333 | |||
334 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
335 | nr += event->group_leader->nr_siblings; | ||
336 | size += sizeof(u64); | ||
337 | } | ||
338 | |||
339 | size += entry * nr; | ||
340 | event->read_size = size; | ||
341 | } | ||
342 | |||
343 | static void perf_event__header_size(struct perf_event *event) | ||
344 | { | ||
345 | struct perf_sample_data *data; | ||
346 | u64 sample_type = event->attr.sample_type; | ||
347 | u16 size = 0; | ||
348 | |||
349 | perf_event__read_size(event); | ||
350 | |||
351 | if (sample_type & PERF_SAMPLE_IP) | ||
352 | size += sizeof(data->ip); | ||
353 | |||
354 | if (sample_type & PERF_SAMPLE_TID) | ||
355 | size += sizeof(data->tid_entry); | ||
356 | |||
357 | if (sample_type & PERF_SAMPLE_TIME) | ||
358 | size += sizeof(data->time); | ||
359 | |||
360 | if (sample_type & PERF_SAMPLE_ADDR) | ||
361 | size += sizeof(data->addr); | ||
362 | |||
363 | if (sample_type & PERF_SAMPLE_ID) | ||
364 | size += sizeof(data->id); | ||
365 | |||
366 | if (sample_type & PERF_SAMPLE_STREAM_ID) | ||
367 | size += sizeof(data->stream_id); | ||
368 | |||
369 | if (sample_type & PERF_SAMPLE_CPU) | ||
370 | size += sizeof(data->cpu_entry); | ||
371 | |||
372 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
373 | size += sizeof(data->period); | ||
374 | |||
375 | if (sample_type & PERF_SAMPLE_READ) | ||
376 | size += event->read_size; | ||
377 | |||
378 | event->header_size = size; | ||
379 | } | ||
380 | |||
315 | static void perf_group_attach(struct perf_event *event) | 381 | static void perf_group_attach(struct perf_event *event) |
316 | { | 382 | { |
317 | struct perf_event *group_leader = event->group_leader; | 383 | struct perf_event *group_leader = event->group_leader, *pos; |
318 | 384 | ||
319 | /* | 385 | /* |
320 | * We can have double attach due to group movement in perf_event_open. | 386 | * We can have double attach due to group movement in perf_event_open. |
@@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event) | |||
333 | 399 | ||
334 | list_add_tail(&event->group_entry, &group_leader->sibling_list); | 400 | list_add_tail(&event->group_entry, &group_leader->sibling_list); |
335 | group_leader->nr_siblings++; | 401 | group_leader->nr_siblings++; |
402 | |||
403 | perf_event__header_size(group_leader); | ||
404 | |||
405 | list_for_each_entry(pos, &group_leader->sibling_list, group_entry) | ||
406 | perf_event__header_size(pos); | ||
336 | } | 407 | } |
337 | 408 | ||
338 | /* | 409 | /* |
@@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event) | |||
391 | if (event->group_leader != event) { | 462 | if (event->group_leader != event) { |
392 | list_del_init(&event->group_entry); | 463 | list_del_init(&event->group_entry); |
393 | event->group_leader->nr_siblings--; | 464 | event->group_leader->nr_siblings--; |
394 | return; | 465 | goto out; |
395 | } | 466 | } |
396 | 467 | ||
397 | if (!list_empty(&event->group_entry)) | 468 | if (!list_empty(&event->group_entry)) |
@@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event) | |||
410 | /* Inherit group flags from the previous leader */ | 481 | /* Inherit group flags from the previous leader */ |
411 | sibling->group_flags = event->group_flags; | 482 | sibling->group_flags = event->group_flags; |
412 | } | 483 | } |
484 | |||
485 | out: | ||
486 | perf_event__header_size(event->group_leader); | ||
487 | |||
488 | list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry) | ||
489 | perf_event__header_size(tmp); | ||
413 | } | 490 | } |
414 | 491 | ||
415 | static inline int | 492 | static inline int |
@@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file) | |||
2289 | return perf_event_release_kernel(event); | 2366 | return perf_event_release_kernel(event); |
2290 | } | 2367 | } |
2291 | 2368 | ||
2292 | static int perf_event_read_size(struct perf_event *event) | ||
2293 | { | ||
2294 | int entry = sizeof(u64); /* value */ | ||
2295 | int size = 0; | ||
2296 | int nr = 1; | ||
2297 | |||
2298 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) | ||
2299 | size += sizeof(u64); | ||
2300 | |||
2301 | if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) | ||
2302 | size += sizeof(u64); | ||
2303 | |||
2304 | if (event->attr.read_format & PERF_FORMAT_ID) | ||
2305 | entry += sizeof(u64); | ||
2306 | |||
2307 | if (event->attr.read_format & PERF_FORMAT_GROUP) { | ||
2308 | nr += event->group_leader->nr_siblings; | ||
2309 | size += sizeof(u64); | ||
2310 | } | ||
2311 | |||
2312 | size += entry * nr; | ||
2313 | |||
2314 | return size; | ||
2315 | } | ||
2316 | |||
2317 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) | 2369 | u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) |
2318 | { | 2370 | { |
2319 | struct perf_event *child; | 2371 | struct perf_event *child; |
@@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) | |||
2428 | if (event->state == PERF_EVENT_STATE_ERROR) | 2480 | if (event->state == PERF_EVENT_STATE_ERROR) |
2429 | return 0; | 2481 | return 0; |
2430 | 2482 | ||
2431 | if (count < perf_event_read_size(event)) | 2483 | if (count < event->read_size) |
2432 | return -ENOSPC; | 2484 | return -ENOSPC; |
2433 | 2485 | ||
2434 | WARN_ON_ONCE(event->ctx->parent_ctx); | 2486 | WARN_ON_ONCE(event->ctx->parent_ctx); |
@@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header, | |||
3606 | data->type = sample_type; | 3658 | data->type = sample_type; |
3607 | 3659 | ||
3608 | header->type = PERF_RECORD_SAMPLE; | 3660 | header->type = PERF_RECORD_SAMPLE; |
3609 | header->size = sizeof(*header); | 3661 | header->size = sizeof(*header) + event->header_size; |
3610 | 3662 | ||
3611 | header->misc = 0; | 3663 | header->misc = 0; |
3612 | header->misc |= perf_misc_flags(regs); | 3664 | header->misc |= perf_misc_flags(regs); |
3613 | 3665 | ||
3614 | if (sample_type & PERF_SAMPLE_IP) { | 3666 | if (sample_type & PERF_SAMPLE_IP) |
3615 | data->ip = perf_instruction_pointer(regs); | 3667 | data->ip = perf_instruction_pointer(regs); |
3616 | 3668 | ||
3617 | header->size += sizeof(data->ip); | ||
3618 | } | ||
3619 | |||
3620 | if (sample_type & PERF_SAMPLE_TID) { | 3669 | if (sample_type & PERF_SAMPLE_TID) { |
3621 | /* namespace issues */ | 3670 | /* namespace issues */ |
3622 | data->tid_entry.pid = perf_event_pid(event, current); | 3671 | data->tid_entry.pid = perf_event_pid(event, current); |
3623 | data->tid_entry.tid = perf_event_tid(event, current); | 3672 | data->tid_entry.tid = perf_event_tid(event, current); |
3624 | |||
3625 | header->size += sizeof(data->tid_entry); | ||
3626 | } | 3673 | } |
3627 | 3674 | ||
3628 | if (sample_type & PERF_SAMPLE_TIME) { | 3675 | if (sample_type & PERF_SAMPLE_TIME) |
3629 | data->time = perf_clock(); | 3676 | data->time = perf_clock(); |
3630 | 3677 | ||
3631 | header->size += sizeof(data->time); | 3678 | if (sample_type & PERF_SAMPLE_ID) |
3632 | } | ||
3633 | |||
3634 | if (sample_type & PERF_SAMPLE_ADDR) | ||
3635 | header->size += sizeof(data->addr); | ||
3636 | |||
3637 | if (sample_type & PERF_SAMPLE_ID) { | ||
3638 | data->id = primary_event_id(event); | 3679 | data->id = primary_event_id(event); |
3639 | 3680 | ||
3640 | header->size += sizeof(data->id); | 3681 | if (sample_type & PERF_SAMPLE_STREAM_ID) |
3641 | } | ||
3642 | |||
3643 | if (sample_type & PERF_SAMPLE_STREAM_ID) { | ||
3644 | data->stream_id = event->id; | 3682 | data->stream_id = event->id; |
3645 | 3683 | ||
3646 | header->size += sizeof(data->stream_id); | ||
3647 | } | ||
3648 | |||
3649 | if (sample_type & PERF_SAMPLE_CPU) { | 3684 | if (sample_type & PERF_SAMPLE_CPU) { |
3650 | data->cpu_entry.cpu = raw_smp_processor_id(); | 3685 | data->cpu_entry.cpu = raw_smp_processor_id(); |
3651 | data->cpu_entry.reserved = 0; | 3686 | data->cpu_entry.reserved = 0; |
3652 | |||
3653 | header->size += sizeof(data->cpu_entry); | ||
3654 | } | 3687 | } |
3655 | 3688 | ||
3656 | if (sample_type & PERF_SAMPLE_PERIOD) | ||
3657 | header->size += sizeof(data->period); | ||
3658 | |||
3659 | if (sample_type & PERF_SAMPLE_READ) | ||
3660 | header->size += perf_event_read_size(event); | ||
3661 | |||
3662 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { | 3689 | if (sample_type & PERF_SAMPLE_CALLCHAIN) { |
3663 | int size = 1; | 3690 | int size = 1; |
3664 | 3691 | ||
@@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event, | |||
3726 | .header = { | 3753 | .header = { |
3727 | .type = PERF_RECORD_READ, | 3754 | .type = PERF_RECORD_READ, |
3728 | .misc = 0, | 3755 | .misc = 0, |
3729 | .size = sizeof(read_event) + perf_event_read_size(event), | 3756 | .size = sizeof(read_event) + event->read_size, |
3730 | }, | 3757 | }, |
3731 | .pid = perf_event_pid(event, task), | 3758 | .pid = perf_event_pid(event, task), |
3732 | .tid = perf_event_tid(event, task), | 3759 | .tid = perf_event_tid(event, task), |
@@ -5715,6 +5742,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5715 | mutex_unlock(¤t->perf_event_mutex); | 5742 | mutex_unlock(¤t->perf_event_mutex); |
5716 | 5743 | ||
5717 | /* | 5744 | /* |
5745 | * Precalculate sample_data sizes | ||
5746 | */ | ||
5747 | perf_event__header_size(event); | ||
5748 | |||
5749 | /* | ||
5718 | * Drop the reference on the group_event after placing the | 5750 | * Drop the reference on the group_event after placing the |
5719 | * new event on the sibling_list. This ensures destruction | 5751 | * new event on the sibling_list. This ensures destruction |
5720 | * of the group leader will find the pointer to itself in | 5752 | * of the group leader will find the pointer to itself in |