aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2010-10-20 10:50:11 -0400
committerArnaldo Carvalho de Melo <acme@redhat.com>2010-11-30 16:19:04 -0500
commitc320c7b7d380e630f595de1236d9d085b035d5b4 (patch)
treee1544fa8cc259a1d300fac9104dd3c46458572fe
parent068ffaa8bfb67c2ddb3ecaf38cc90f94a1a92fe3 (diff)
perf events: Precalculate the header space for PERF_SAMPLE_ fields
PERF_SAMPLE_{CALLCHAIN,RAW} have variable lenghts per sample, but the others can be precalculated, reducing a bit the per sample cost. Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Frédéric Weisbecker <fweisbec@gmail.com> Cc: Ian Munsie <imunsie@au1.ibm.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Paul Mackerras <paulus@samba.org> Cc: Stephane Eranian <eranian@google.com> LKML-Reference: <new-submission> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r--include/linux/perf_event.h2
-rw-r--r--kernel/perf_event.c150
2 files changed, 93 insertions, 59 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index cbf04cc1e630..adf6d9931643 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -758,6 +758,8 @@ struct perf_event {
758 u64 shadow_ctx_time; 758 u64 shadow_ctx_time;
759 759
760 struct perf_event_attr attr; 760 struct perf_event_attr attr;
761 u16 header_size;
762 u16 read_size;
761 struct hw_perf_event hw; 763 struct hw_perf_event hw;
762 764
763 struct perf_event_context *ctx; 765 struct perf_event_context *ctx;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index af1e63f249f3..aede71245e9f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -312,9 +312,75 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
312 ctx->nr_stat++; 312 ctx->nr_stat++;
313} 313}
314 314
315/*
316 * Called at perf_event creation and when events are attached/detached from a
317 * group.
318 */
319static void perf_event__read_size(struct perf_event *event)
320{
321 int entry = sizeof(u64); /* value */
322 int size = 0;
323 int nr = 1;
324
325 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
326 size += sizeof(u64);
327
328 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
329 size += sizeof(u64);
330
331 if (event->attr.read_format & PERF_FORMAT_ID)
332 entry += sizeof(u64);
333
334 if (event->attr.read_format & PERF_FORMAT_GROUP) {
335 nr += event->group_leader->nr_siblings;
336 size += sizeof(u64);
337 }
338
339 size += entry * nr;
340 event->read_size = size;
341}
342
343static void perf_event__header_size(struct perf_event *event)
344{
345 struct perf_sample_data *data;
346 u64 sample_type = event->attr.sample_type;
347 u16 size = 0;
348
349 perf_event__read_size(event);
350
351 if (sample_type & PERF_SAMPLE_IP)
352 size += sizeof(data->ip);
353
354 if (sample_type & PERF_SAMPLE_TID)
355 size += sizeof(data->tid_entry);
356
357 if (sample_type & PERF_SAMPLE_TIME)
358 size += sizeof(data->time);
359
360 if (sample_type & PERF_SAMPLE_ADDR)
361 size += sizeof(data->addr);
362
363 if (sample_type & PERF_SAMPLE_ID)
364 size += sizeof(data->id);
365
366 if (sample_type & PERF_SAMPLE_STREAM_ID)
367 size += sizeof(data->stream_id);
368
369 if (sample_type & PERF_SAMPLE_CPU)
370 size += sizeof(data->cpu_entry);
371
372 if (sample_type & PERF_SAMPLE_PERIOD)
373 size += sizeof(data->period);
374
375 if (sample_type & PERF_SAMPLE_READ)
376 size += event->read_size;
377
378 event->header_size = size;
379}
380
315static void perf_group_attach(struct perf_event *event) 381static void perf_group_attach(struct perf_event *event)
316{ 382{
317 struct perf_event *group_leader = event->group_leader; 383 struct perf_event *group_leader = event->group_leader, *pos;
318 384
319 /* 385 /*
320 * We can have double attach due to group movement in perf_event_open. 386 * We can have double attach due to group movement in perf_event_open.
@@ -333,6 +399,11 @@ static void perf_group_attach(struct perf_event *event)
333 399
334 list_add_tail(&event->group_entry, &group_leader->sibling_list); 400 list_add_tail(&event->group_entry, &group_leader->sibling_list);
335 group_leader->nr_siblings++; 401 group_leader->nr_siblings++;
402
403 perf_event__header_size(group_leader);
404
405 list_for_each_entry(pos, &group_leader->sibling_list, group_entry)
406 perf_event__header_size(pos);
336} 407}
337 408
338/* 409/*
@@ -391,7 +462,7 @@ static void perf_group_detach(struct perf_event *event)
391 if (event->group_leader != event) { 462 if (event->group_leader != event) {
392 list_del_init(&event->group_entry); 463 list_del_init(&event->group_entry);
393 event->group_leader->nr_siblings--; 464 event->group_leader->nr_siblings--;
394 return; 465 goto out;
395 } 466 }
396 467
397 if (!list_empty(&event->group_entry)) 468 if (!list_empty(&event->group_entry))
@@ -410,6 +481,12 @@ static void perf_group_detach(struct perf_event *event)
410 /* Inherit group flags from the previous leader */ 481 /* Inherit group flags from the previous leader */
411 sibling->group_flags = event->group_flags; 482 sibling->group_flags = event->group_flags;
412 } 483 }
484
485out:
486 perf_event__header_size(event->group_leader);
487
488 list_for_each_entry(tmp, &event->group_leader->sibling_list, group_entry)
489 perf_event__header_size(tmp);
413} 490}
414 491
415static inline int 492static inline int
@@ -2289,31 +2366,6 @@ static int perf_release(struct inode *inode, struct file *file)
2289 return perf_event_release_kernel(event); 2366 return perf_event_release_kernel(event);
2290} 2367}
2291 2368
2292static int perf_event_read_size(struct perf_event *event)
2293{
2294 int entry = sizeof(u64); /* value */
2295 int size = 0;
2296 int nr = 1;
2297
2298 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2299 size += sizeof(u64);
2300
2301 if (event->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2302 size += sizeof(u64);
2303
2304 if (event->attr.read_format & PERF_FORMAT_ID)
2305 entry += sizeof(u64);
2306
2307 if (event->attr.read_format & PERF_FORMAT_GROUP) {
2308 nr += event->group_leader->nr_siblings;
2309 size += sizeof(u64);
2310 }
2311
2312 size += entry * nr;
2313
2314 return size;
2315}
2316
2317u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 2369u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
2318{ 2370{
2319 struct perf_event *child; 2371 struct perf_event *child;
@@ -2428,7 +2480,7 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
2428 if (event->state == PERF_EVENT_STATE_ERROR) 2480 if (event->state == PERF_EVENT_STATE_ERROR)
2429 return 0; 2481 return 0;
2430 2482
2431 if (count < perf_event_read_size(event)) 2483 if (count < event->read_size)
2432 return -ENOSPC; 2484 return -ENOSPC;
2433 2485
2434 WARN_ON_ONCE(event->ctx->parent_ctx); 2486 WARN_ON_ONCE(event->ctx->parent_ctx);
@@ -3606,59 +3658,34 @@ void perf_prepare_sample(struct perf_event_header *header,
3606 data->type = sample_type; 3658 data->type = sample_type;
3607 3659
3608 header->type = PERF_RECORD_SAMPLE; 3660 header->type = PERF_RECORD_SAMPLE;
3609 header->size = sizeof(*header); 3661 header->size = sizeof(*header) + event->header_size;
3610 3662
3611 header->misc = 0; 3663 header->misc = 0;
3612 header->misc |= perf_misc_flags(regs); 3664 header->misc |= perf_misc_flags(regs);
3613 3665
3614 if (sample_type & PERF_SAMPLE_IP) { 3666 if (sample_type & PERF_SAMPLE_IP)
3615 data->ip = perf_instruction_pointer(regs); 3667 data->ip = perf_instruction_pointer(regs);
3616 3668
3617 header->size += sizeof(data->ip);
3618 }
3619
3620 if (sample_type & PERF_SAMPLE_TID) { 3669 if (sample_type & PERF_SAMPLE_TID) {
3621 /* namespace issues */ 3670 /* namespace issues */
3622 data->tid_entry.pid = perf_event_pid(event, current); 3671 data->tid_entry.pid = perf_event_pid(event, current);
3623 data->tid_entry.tid = perf_event_tid(event, current); 3672 data->tid_entry.tid = perf_event_tid(event, current);
3624
3625 header->size += sizeof(data->tid_entry);
3626 } 3673 }
3627 3674
3628 if (sample_type & PERF_SAMPLE_TIME) { 3675 if (sample_type & PERF_SAMPLE_TIME)
3629 data->time = perf_clock(); 3676 data->time = perf_clock();
3630 3677
3631 header->size += sizeof(data->time); 3678 if (sample_type & PERF_SAMPLE_ID)
3632 }
3633
3634 if (sample_type & PERF_SAMPLE_ADDR)
3635 header->size += sizeof(data->addr);
3636
3637 if (sample_type & PERF_SAMPLE_ID) {
3638 data->id = primary_event_id(event); 3679 data->id = primary_event_id(event);
3639 3680
3640 header->size += sizeof(data->id); 3681 if (sample_type & PERF_SAMPLE_STREAM_ID)
3641 }
3642
3643 if (sample_type & PERF_SAMPLE_STREAM_ID) {
3644 data->stream_id = event->id; 3682 data->stream_id = event->id;
3645 3683
3646 header->size += sizeof(data->stream_id);
3647 }
3648
3649 if (sample_type & PERF_SAMPLE_CPU) { 3684 if (sample_type & PERF_SAMPLE_CPU) {
3650 data->cpu_entry.cpu = raw_smp_processor_id(); 3685 data->cpu_entry.cpu = raw_smp_processor_id();
3651 data->cpu_entry.reserved = 0; 3686 data->cpu_entry.reserved = 0;
3652
3653 header->size += sizeof(data->cpu_entry);
3654 } 3687 }
3655 3688
3656 if (sample_type & PERF_SAMPLE_PERIOD)
3657 header->size += sizeof(data->period);
3658
3659 if (sample_type & PERF_SAMPLE_READ)
3660 header->size += perf_event_read_size(event);
3661
3662 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 3689 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
3663 int size = 1; 3690 int size = 1;
3664 3691
@@ -3726,7 +3753,7 @@ perf_event_read_event(struct perf_event *event,
3726 .header = { 3753 .header = {
3727 .type = PERF_RECORD_READ, 3754 .type = PERF_RECORD_READ,
3728 .misc = 0, 3755 .misc = 0,
3729 .size = sizeof(read_event) + perf_event_read_size(event), 3756 .size = sizeof(read_event) + event->read_size,
3730 }, 3757 },
3731 .pid = perf_event_pid(event, task), 3758 .pid = perf_event_pid(event, task),
3732 .tid = perf_event_tid(event, task), 3759 .tid = perf_event_tid(event, task),
@@ -5715,6 +5742,11 @@ SYSCALL_DEFINE5(perf_event_open,
5715 mutex_unlock(&current->perf_event_mutex); 5742 mutex_unlock(&current->perf_event_mutex);
5716 5743
5717 /* 5744 /*
5745 * Precalculate sample_data sizes
5746 */
5747 perf_event__header_size(event);
5748
5749 /*
5718 * Drop the reference on the group_event after placing the 5750 * Drop the reference on the group_event after placing the
5719 * new event on the sibling_list. This ensures destruction 5751 * new event on the sibling_list. This ensures destruction
5720 * of the group leader will find the pointer to itself in 5752 * of the group leader will find the pointer to itself in