aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/core.c
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2011-05-19 13:55:04 -0400
committerIngo Molnar <mingo@elte.hu>2011-06-09 06:57:54 -0400
commit76369139ceb955deefc509e6e12ce9d6ce50ccab (patch)
tree5ba2732188d2fecd4fd49b93f73e51e17fb69082 /kernel/events/core.c
parentb58f6b0dd3d677338b9065388cc2cc942b86338e (diff)
perf: Split up buffer handling from core code
And create the internal perf events header. v2: Keep an internal inlined perf_output_copy() Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Borislav Petkov <bp@alien8.de> Cc: Stephane Eranian <eranian@google.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Link: http://lkml.kernel.org/r/1305827704-5607-1-git-send-email-fweisbec@gmail.com [ v3: use clearer 'ring_buffer' and 'rb' naming ] Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c568
1 files changed, 71 insertions, 497 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5e8c7b1389bc..5e70f62752a2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -36,6 +36,8 @@
36#include <linux/ftrace_event.h> 36#include <linux/ftrace_event.h>
37#include <linux/hw_breakpoint.h> 37#include <linux/hw_breakpoint.h>
38 38
39#include "internal.h"
40
39#include <asm/irq_regs.h> 41#include <asm/irq_regs.h>
40 42
41struct remote_function_call { 43struct remote_function_call {
@@ -2886,7 +2888,7 @@ static void free_event_rcu(struct rcu_head *head)
2886 kfree(event); 2888 kfree(event);
2887} 2889}
2888 2890
2889static void perf_buffer_put(struct perf_buffer *buffer); 2891static void ring_buffer_put(struct ring_buffer *rb);
2890 2892
2891static void free_event(struct perf_event *event) 2893static void free_event(struct perf_event *event)
2892{ 2894{
@@ -2909,9 +2911,9 @@ static void free_event(struct perf_event *event)
2909 } 2911 }
2910 } 2912 }
2911 2913
2912 if (event->buffer) { 2914 if (event->rb) {
2913 perf_buffer_put(event->buffer); 2915 ring_buffer_put(event->rb);
2914 event->buffer = NULL; 2916 event->rb = NULL;
2915 } 2917 }
2916 2918
2917 if (is_cgroup_event(event)) 2919 if (is_cgroup_event(event))
@@ -3139,13 +3141,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
3139static unsigned int perf_poll(struct file *file, poll_table *wait) 3141static unsigned int perf_poll(struct file *file, poll_table *wait)
3140{ 3142{
3141 struct perf_event *event = file->private_data; 3143 struct perf_event *event = file->private_data;
3142 struct perf_buffer *buffer; 3144 struct ring_buffer *rb;
3143 unsigned int events = POLL_HUP; 3145 unsigned int events = POLL_HUP;
3144 3146
3145 rcu_read_lock(); 3147 rcu_read_lock();
3146 buffer = rcu_dereference(event->buffer); 3148 rb = rcu_dereference(event->rb);
3147 if (buffer) 3149 if (rb)
3148 events = atomic_xchg(&buffer->poll, 0); 3150 events = atomic_xchg(&rb->poll, 0);
3149 rcu_read_unlock(); 3151 rcu_read_unlock();
3150 3152
3151 poll_wait(file, &event->waitq, wait); 3153 poll_wait(file, &event->waitq, wait);
@@ -3356,14 +3358,14 @@ static int perf_event_index(struct perf_event *event)
3356void perf_event_update_userpage(struct perf_event *event) 3358void perf_event_update_userpage(struct perf_event *event)
3357{ 3359{
3358 struct perf_event_mmap_page *userpg; 3360 struct perf_event_mmap_page *userpg;
3359 struct perf_buffer *buffer; 3361 struct ring_buffer *rb;
3360 3362
3361 rcu_read_lock(); 3363 rcu_read_lock();
3362 buffer = rcu_dereference(event->buffer); 3364 rb = rcu_dereference(event->rb);
3363 if (!buffer) 3365 if (!rb)
3364 goto unlock; 3366 goto unlock;
3365 3367
3366 userpg = buffer->user_page; 3368 userpg = rb->user_page;
3367 3369
3368 /* 3370 /*
3369 * Disable preemption so as to not let the corresponding user-space 3371 * Disable preemption so as to not let the corresponding user-space
@@ -3390,220 +3392,10 @@ unlock:
3390 rcu_read_unlock(); 3392 rcu_read_unlock();
3391} 3393}
3392 3394
3393static unsigned long perf_data_size(struct perf_buffer *buffer);
3394
3395static void
3396perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
3397{
3398 long max_size = perf_data_size(buffer);
3399
3400 if (watermark)
3401 buffer->watermark = min(max_size, watermark);
3402
3403 if (!buffer->watermark)
3404 buffer->watermark = max_size / 2;
3405
3406 if (flags & PERF_BUFFER_WRITABLE)
3407 buffer->writable = 1;
3408
3409 atomic_set(&buffer->refcount, 1);
3410}
3411
3412#ifndef CONFIG_PERF_USE_VMALLOC
3413
3414/*
3415 * Back perf_mmap() with regular GFP_KERNEL-0 pages.
3416 */
3417
3418static struct page *
3419perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
3420{
3421 if (pgoff > buffer->nr_pages)
3422 return NULL;
3423
3424 if (pgoff == 0)
3425 return virt_to_page(buffer->user_page);
3426
3427 return virt_to_page(buffer->data_pages[pgoff - 1]);
3428}
3429
3430static void *perf_mmap_alloc_page(int cpu)
3431{
3432 struct page *page;
3433 int node;
3434
3435 node = (cpu == -1) ? cpu : cpu_to_node(cpu);
3436 page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
3437 if (!page)
3438 return NULL;
3439
3440 return page_address(page);
3441}
3442
3443static struct perf_buffer *
3444perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
3445{
3446 struct perf_buffer *buffer;
3447 unsigned long size;
3448 int i;
3449
3450 size = sizeof(struct perf_buffer);
3451 size += nr_pages * sizeof(void *);
3452
3453 buffer = kzalloc(size, GFP_KERNEL);
3454 if (!buffer)
3455 goto fail;
3456
3457 buffer->user_page = perf_mmap_alloc_page(cpu);
3458 if (!buffer->user_page)
3459 goto fail_user_page;
3460
3461 for (i = 0; i < nr_pages; i++) {
3462 buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
3463 if (!buffer->data_pages[i])
3464 goto fail_data_pages;
3465 }
3466
3467 buffer->nr_pages = nr_pages;
3468
3469 perf_buffer_init(buffer, watermark, flags);
3470
3471 return buffer;
3472
3473fail_data_pages:
3474 for (i--; i >= 0; i--)
3475 free_page((unsigned long)buffer->data_pages[i]);
3476
3477 free_page((unsigned long)buffer->user_page);
3478
3479fail_user_page:
3480 kfree(buffer);
3481
3482fail:
3483 return NULL;
3484}
3485
3486static void perf_mmap_free_page(unsigned long addr)
3487{
3488 struct page *page = virt_to_page((void *)addr);
3489
3490 page->mapping = NULL;
3491 __free_page(page);
3492}
3493
3494static void perf_buffer_free(struct perf_buffer *buffer)
3495{
3496 int i;
3497
3498 perf_mmap_free_page((unsigned long)buffer->user_page);
3499 for (i = 0; i < buffer->nr_pages; i++)
3500 perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
3501 kfree(buffer);
3502}
3503
3504static inline int page_order(struct perf_buffer *buffer)
3505{
3506 return 0;
3507}
3508
3509#else
3510
3511/*
3512 * Back perf_mmap() with vmalloc memory.
3513 *
3514 * Required for architectures that have d-cache aliasing issues.
3515 */
3516
3517static inline int page_order(struct perf_buffer *buffer)
3518{
3519 return buffer->page_order;
3520}
3521
3522static struct page *
3523perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
3524{
3525 if (pgoff > (1UL << page_order(buffer)))
3526 return NULL;
3527
3528 return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
3529}
3530
3531static void perf_mmap_unmark_page(void *addr)
3532{
3533 struct page *page = vmalloc_to_page(addr);
3534
3535 page->mapping = NULL;
3536}
3537
3538static void perf_buffer_free_work(struct work_struct *work)
3539{
3540 struct perf_buffer *buffer;
3541 void *base;
3542 int i, nr;
3543
3544 buffer = container_of(work, struct perf_buffer, work);
3545 nr = 1 << page_order(buffer);
3546
3547 base = buffer->user_page;
3548 for (i = 0; i < nr + 1; i++)
3549 perf_mmap_unmark_page(base + (i * PAGE_SIZE));
3550
3551 vfree(base);
3552 kfree(buffer);
3553}
3554
3555static void perf_buffer_free(struct perf_buffer *buffer)
3556{
3557 schedule_work(&buffer->work);
3558}
3559
3560static struct perf_buffer *
3561perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
3562{
3563 struct perf_buffer *buffer;
3564 unsigned long size;
3565 void *all_buf;
3566
3567 size = sizeof(struct perf_buffer);
3568 size += sizeof(void *);
3569
3570 buffer = kzalloc(size, GFP_KERNEL);
3571 if (!buffer)
3572 goto fail;
3573
3574 INIT_WORK(&buffer->work, perf_buffer_free_work);
3575
3576 all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
3577 if (!all_buf)
3578 goto fail_all_buf;
3579
3580 buffer->user_page = all_buf;
3581 buffer->data_pages[0] = all_buf + PAGE_SIZE;
3582 buffer->page_order = ilog2(nr_pages);
3583 buffer->nr_pages = 1;
3584
3585 perf_buffer_init(buffer, watermark, flags);
3586
3587 return buffer;
3588
3589fail_all_buf:
3590 kfree(buffer);
3591
3592fail:
3593 return NULL;
3594}
3595
3596#endif
3597
3598static unsigned long perf_data_size(struct perf_buffer *buffer)
3599{
3600 return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
3601}
3602
3603static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) 3395static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
3604{ 3396{
3605 struct perf_event *event = vma->vm_file->private_data; 3397 struct perf_event *event = vma->vm_file->private_data;
3606 struct perf_buffer *buffer; 3398 struct ring_buffer *rb;
3607 int ret = VM_FAULT_SIGBUS; 3399 int ret = VM_FAULT_SIGBUS;
3608 3400
3609 if (vmf->flags & FAULT_FLAG_MKWRITE) { 3401 if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -3613,14 +3405,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
3613 } 3405 }
3614 3406
3615 rcu_read_lock(); 3407 rcu_read_lock();
3616 buffer = rcu_dereference(event->buffer); 3408 rb = rcu_dereference(event->rb);
3617 if (!buffer) 3409 if (!rb)
3618 goto unlock; 3410 goto unlock;
3619 3411
3620 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) 3412 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
3621 goto unlock; 3413 goto unlock;
3622 3414
3623 vmf->page = perf_mmap_to_page(buffer, vmf->pgoff); 3415 vmf->page = perf_mmap_to_page(rb, vmf->pgoff);
3624 if (!vmf->page) 3416 if (!vmf->page)
3625 goto unlock; 3417 goto unlock;
3626 3418
@@ -3635,35 +3427,35 @@ unlock:
3635 return ret; 3427 return ret;
3636} 3428}
3637 3429
3638static void perf_buffer_free_rcu(struct rcu_head *rcu_head) 3430static void rb_free_rcu(struct rcu_head *rcu_head)
3639{ 3431{
3640 struct perf_buffer *buffer; 3432 struct ring_buffer *rb;
3641 3433
3642 buffer = container_of(rcu_head, struct perf_buffer, rcu_head); 3434 rb = container_of(rcu_head, struct ring_buffer, rcu_head);
3643 perf_buffer_free(buffer); 3435 rb_free(rb);
3644} 3436}
3645 3437
3646static struct perf_buffer *perf_buffer_get(struct perf_event *event) 3438static struct ring_buffer *ring_buffer_get(struct perf_event *event)
3647{ 3439{
3648 struct perf_buffer *buffer; 3440 struct ring_buffer *rb;
3649 3441
3650 rcu_read_lock(); 3442 rcu_read_lock();
3651 buffer = rcu_dereference(event->buffer); 3443 rb = rcu_dereference(event->rb);
3652 if (buffer) { 3444 if (rb) {
3653 if (!atomic_inc_not_zero(&buffer->refcount)) 3445 if (!atomic_inc_not_zero(&rb->refcount))
3654 buffer = NULL; 3446 rb = NULL;
3655 } 3447 }
3656 rcu_read_unlock(); 3448 rcu_read_unlock();
3657 3449
3658 return buffer; 3450 return rb;
3659} 3451}
3660 3452
3661static void perf_buffer_put(struct perf_buffer *buffer) 3453static void ring_buffer_put(struct ring_buffer *rb)
3662{ 3454{
3663 if (!atomic_dec_and_test(&buffer->refcount)) 3455 if (!atomic_dec_and_test(&rb->refcount))
3664 return; 3456 return;
3665 3457
3666 call_rcu(&buffer->rcu_head, perf_buffer_free_rcu); 3458 call_rcu(&rb->rcu_head, rb_free_rcu);
3667} 3459}
3668 3460
3669static void perf_mmap_open(struct vm_area_struct *vma) 3461static void perf_mmap_open(struct vm_area_struct *vma)
@@ -3678,16 +3470,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3678 struct perf_event *event = vma->vm_file->private_data; 3470 struct perf_event *event = vma->vm_file->private_data;
3679 3471
3680 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 3472 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
3681 unsigned long size = perf_data_size(event->buffer); 3473 unsigned long size = perf_data_size(event->rb);
3682 struct user_struct *user = event->mmap_user; 3474 struct user_struct *user = event->mmap_user;
3683 struct perf_buffer *buffer = event->buffer; 3475 struct ring_buffer *rb = event->rb;
3684 3476
3685 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 3477 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
3686 vma->vm_mm->locked_vm -= event->mmap_locked; 3478 vma->vm_mm->locked_vm -= event->mmap_locked;
3687 rcu_assign_pointer(event->buffer, NULL); 3479 rcu_assign_pointer(event->rb, NULL);
3688 mutex_unlock(&event->mmap_mutex); 3480 mutex_unlock(&event->mmap_mutex);
3689 3481
3690 perf_buffer_put(buffer); 3482 ring_buffer_put(rb);
3691 free_uid(user); 3483 free_uid(user);
3692 } 3484 }
3693} 3485}
@@ -3705,7 +3497,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3705 unsigned long user_locked, user_lock_limit; 3497 unsigned long user_locked, user_lock_limit;
3706 struct user_struct *user = current_user(); 3498 struct user_struct *user = current_user();
3707 unsigned long locked, lock_limit; 3499 unsigned long locked, lock_limit;
3708 struct perf_buffer *buffer; 3500 struct ring_buffer *rb;
3709 unsigned long vma_size; 3501 unsigned long vma_size;
3710 unsigned long nr_pages; 3502 unsigned long nr_pages;
3711 long user_extra, extra; 3503 long user_extra, extra;
@@ -3714,7 +3506,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3714 /* 3506 /*
3715 * Don't allow mmap() of inherited per-task counters. This would 3507 * Don't allow mmap() of inherited per-task counters. This would
3716 * create a performance issue due to all children writing to the 3508 * create a performance issue due to all children writing to the
3717 * same buffer. 3509 * same rb.
3718 */ 3510 */
3719 if (event->cpu == -1 && event->attr.inherit) 3511 if (event->cpu == -1 && event->attr.inherit)
3720 return -EINVAL; 3512 return -EINVAL;
@@ -3726,7 +3518,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3726 nr_pages = (vma_size / PAGE_SIZE) - 1; 3518 nr_pages = (vma_size / PAGE_SIZE) - 1;
3727 3519
3728 /* 3520 /*
3729 * If we have buffer pages ensure they're a power-of-two number, so we 3521 * If we have rb pages ensure they're a power-of-two number, so we
3730 * can do bitmasks instead of modulo. 3522 * can do bitmasks instead of modulo.
3731 */ 3523 */
3732 if (nr_pages != 0 && !is_power_of_2(nr_pages)) 3524 if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -3740,9 +3532,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3740 3532
3741 WARN_ON_ONCE(event->ctx->parent_ctx); 3533 WARN_ON_ONCE(event->ctx->parent_ctx);
3742 mutex_lock(&event->mmap_mutex); 3534 mutex_lock(&event->mmap_mutex);
3743 if (event->buffer) { 3535 if (event->rb) {
3744 if (event->buffer->nr_pages == nr_pages) 3536 if (event->rb->nr_pages == nr_pages)
3745 atomic_inc(&event->buffer->refcount); 3537 atomic_inc(&event->rb->refcount);
3746 else 3538 else
3747 ret = -EINVAL; 3539 ret = -EINVAL;
3748 goto unlock; 3540 goto unlock;
@@ -3772,18 +3564,18 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3772 goto unlock; 3564 goto unlock;
3773 } 3565 }
3774 3566
3775 WARN_ON(event->buffer); 3567 WARN_ON(event->rb);
3776 3568
3777 if (vma->vm_flags & VM_WRITE) 3569 if (vma->vm_flags & VM_WRITE)
3778 flags |= PERF_BUFFER_WRITABLE; 3570 flags |= RING_BUFFER_WRITABLE;
3779 3571
3780 buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark, 3572 rb = rb_alloc(nr_pages, event->attr.wakeup_watermark,
3781 event->cpu, flags); 3573 event->cpu, flags);
3782 if (!buffer) { 3574 if (!rb) {
3783 ret = -ENOMEM; 3575 ret = -ENOMEM;
3784 goto unlock; 3576 goto unlock;
3785 } 3577 }
3786 rcu_assign_pointer(event->buffer, buffer); 3578 rcu_assign_pointer(event->rb, rb);
3787 3579
3788 atomic_long_add(user_extra, &user->locked_vm); 3580 atomic_long_add(user_extra, &user->locked_vm);
3789 event->mmap_locked = extra; 3581 event->mmap_locked = extra;
@@ -3882,117 +3674,6 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
3882} 3674}
3883EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks); 3675EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
3884 3676
3885/*
3886 * Output
3887 */
3888static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
3889 unsigned long offset, unsigned long head)
3890{
3891 unsigned long mask;
3892
3893 if (!buffer->writable)
3894 return true;
3895
3896 mask = perf_data_size(buffer) - 1;
3897
3898 offset = (offset - tail) & mask;
3899 head = (head - tail) & mask;
3900
3901 if ((int)(head - offset) < 0)
3902 return false;
3903
3904 return true;
3905}
3906
3907static void perf_output_wakeup(struct perf_output_handle *handle)
3908{
3909 atomic_set(&handle->buffer->poll, POLL_IN);
3910
3911 if (handle->nmi) {
3912 handle->event->pending_wakeup = 1;
3913 irq_work_queue(&handle->event->pending);
3914 } else
3915 perf_event_wakeup(handle->event);
3916}
3917
3918/*
3919 * We need to ensure a later event_id doesn't publish a head when a former
3920 * event isn't done writing. However since we need to deal with NMIs we
3921 * cannot fully serialize things.
3922 *
3923 * We only publish the head (and generate a wakeup) when the outer-most
3924 * event completes.
3925 */
3926static void perf_output_get_handle(struct perf_output_handle *handle)
3927{
3928 struct perf_buffer *buffer = handle->buffer;
3929
3930 preempt_disable();
3931 local_inc(&buffer->nest);
3932 handle->wakeup = local_read(&buffer->wakeup);
3933}
3934
3935static void perf_output_put_handle(struct perf_output_handle *handle)
3936{
3937 struct perf_buffer *buffer = handle->buffer;
3938 unsigned long head;
3939
3940again:
3941 head = local_read(&buffer->head);
3942
3943 /*
3944 * IRQ/NMI can happen here, which means we can miss a head update.
3945 */
3946
3947 if (!local_dec_and_test(&buffer->nest))
3948 goto out;
3949
3950 /*
3951 * Publish the known good head. Rely on the full barrier implied
3952 * by atomic_dec_and_test() order the buffer->head read and this
3953 * write.
3954 */
3955 buffer->user_page->data_head = head;
3956
3957 /*
3958 * Now check if we missed an update, rely on the (compiler)
3959 * barrier in atomic_dec_and_test() to re-read buffer->head.
3960 */
3961 if (unlikely(head != local_read(&buffer->head))) {
3962 local_inc(&buffer->nest);
3963 goto again;
3964 }
3965
3966 if (handle->wakeup != local_read(&buffer->wakeup))
3967 perf_output_wakeup(handle);
3968
3969out:
3970 preempt_enable();
3971}
3972
3973__always_inline void perf_output_copy(struct perf_output_handle *handle,
3974 const void *buf, unsigned int len)
3975{
3976 do {
3977 unsigned long size = min_t(unsigned long, handle->size, len);
3978
3979 memcpy(handle->addr, buf, size);
3980
3981 len -= size;
3982 handle->addr += size;
3983 buf += size;
3984 handle->size -= size;
3985 if (!handle->size) {
3986 struct perf_buffer *buffer = handle->buffer;
3987
3988 handle->page++;
3989 handle->page &= buffer->nr_pages - 1;
3990 handle->addr = buffer->data_pages[handle->page];
3991 handle->size = PAGE_SIZE << page_order(buffer);
3992 }
3993 } while (len);
3994}
3995
3996static void __perf_event_header__init_id(struct perf_event_header *header, 3677static void __perf_event_header__init_id(struct perf_event_header *header,
3997 struct perf_sample_data *data, 3678 struct perf_sample_data *data,
3998 struct perf_event *event) 3679 struct perf_event *event)
@@ -4023,9 +3704,9 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
4023 } 3704 }
4024} 3705}
4025 3706
4026static void perf_event_header__init_id(struct perf_event_header *header, 3707void perf_event_header__init_id(struct perf_event_header *header,
4027 struct perf_sample_data *data, 3708 struct perf_sample_data *data,
4028 struct perf_event *event) 3709 struct perf_event *event)
4029{ 3710{
4030 if (event->attr.sample_id_all) 3711 if (event->attr.sample_id_all)
4031 __perf_event_header__init_id(header, data, event); 3712 __perf_event_header__init_id(header, data, event);
@@ -4052,121 +3733,14 @@ static void __perf_event__output_id_sample(struct perf_output_handle *handle,
4052 perf_output_put(handle, data->cpu_entry); 3733 perf_output_put(handle, data->cpu_entry);
4053} 3734}
4054 3735
4055static void perf_event__output_id_sample(struct perf_event *event, 3736void perf_event__output_id_sample(struct perf_event *event,
4056 struct perf_output_handle *handle, 3737 struct perf_output_handle *handle,
4057 struct perf_sample_data *sample) 3738 struct perf_sample_data *sample)
4058{ 3739{
4059 if (event->attr.sample_id_all) 3740 if (event->attr.sample_id_all)
4060 __perf_event__output_id_sample(handle, sample); 3741 __perf_event__output_id_sample(handle, sample);
4061} 3742}
4062 3743
4063int perf_output_begin(struct perf_output_handle *handle,
4064 struct perf_event *event, unsigned int size,
4065 int nmi, int sample)
4066{
4067 struct perf_buffer *buffer;
4068 unsigned long tail, offset, head;
4069 int have_lost;
4070 struct perf_sample_data sample_data;
4071 struct {
4072 struct perf_event_header header;
4073 u64 id;
4074 u64 lost;
4075 } lost_event;
4076
4077 rcu_read_lock();
4078 /*
4079 * For inherited events we send all the output towards the parent.
4080 */
4081 if (event->parent)
4082 event = event->parent;
4083
4084 buffer = rcu_dereference(event->buffer);
4085 if (!buffer)
4086 goto out;
4087
4088 handle->buffer = buffer;
4089 handle->event = event;
4090 handle->nmi = nmi;
4091 handle->sample = sample;
4092
4093 if (!buffer->nr_pages)
4094 goto out;
4095
4096 have_lost = local_read(&buffer->lost);
4097 if (have_lost) {
4098 lost_event.header.size = sizeof(lost_event);
4099 perf_event_header__init_id(&lost_event.header, &sample_data,
4100 event);
4101 size += lost_event.header.size;
4102 }
4103
4104 perf_output_get_handle(handle);
4105
4106 do {
4107 /*
4108 * Userspace could choose to issue a mb() before updating the
4109 * tail pointer. So that all reads will be completed before the
4110 * write is issued.
4111 */
4112 tail = ACCESS_ONCE(buffer->user_page->data_tail);
4113 smp_rmb();
4114 offset = head = local_read(&buffer->head);
4115 head += size;
4116 if (unlikely(!perf_output_space(buffer, tail, offset, head)))
4117 goto fail;
4118 } while (local_cmpxchg(&buffer->head, offset, head) != offset);
4119
4120 if (head - local_read(&buffer->wakeup) > buffer->watermark)
4121 local_add(buffer->watermark, &buffer->wakeup);
4122
4123 handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
4124 handle->page &= buffer->nr_pages - 1;
4125 handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
4126 handle->addr = buffer->data_pages[handle->page];
4127 handle->addr += handle->size;
4128 handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
4129
4130 if (have_lost) {
4131 lost_event.header.type = PERF_RECORD_LOST;
4132 lost_event.header.misc = 0;
4133 lost_event.id = event->id;
4134 lost_event.lost = local_xchg(&buffer->lost, 0);
4135
4136 perf_output_put(handle, lost_event);
4137 perf_event__output_id_sample(event, handle, &sample_data);
4138 }
4139
4140 return 0;
4141
4142fail:
4143 local_inc(&buffer->lost);
4144 perf_output_put_handle(handle);
4145out:
4146 rcu_read_unlock();
4147
4148 return -ENOSPC;
4149}
4150
4151void perf_output_end(struct perf_output_handle *handle)
4152{
4153 struct perf_event *event = handle->event;
4154 struct perf_buffer *buffer = handle->buffer;
4155
4156 int wakeup_events = event->attr.wakeup_events;
4157
4158 if (handle->sample && wakeup_events) {
4159 int events = local_inc_return(&buffer->events);
4160 if (events >= wakeup_events) {
4161 local_sub(wakeup_events, &buffer->events);
4162 local_inc(&buffer->wakeup);
4163 }
4164 }
4165
4166 perf_output_put_handle(handle);
4167 rcu_read_unlock();
4168}
4169
4170static void perf_output_read_one(struct perf_output_handle *handle, 3744static void perf_output_read_one(struct perf_output_handle *handle,
4171 struct perf_event *event, 3745 struct perf_event *event,
4172 u64 enabled, u64 running) 3746 u64 enabled, u64 running)
@@ -4187,7 +3761,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
4187 if (read_format & PERF_FORMAT_ID) 3761 if (read_format & PERF_FORMAT_ID)
4188 values[n++] = primary_event_id(event); 3762 values[n++] = primary_event_id(event);
4189 3763
4190 perf_output_copy(handle, values, n * sizeof(u64)); 3764 __output_copy(handle, values, n * sizeof(u64));
4191} 3765}
4192 3766
4193/* 3767/*
@@ -4217,7 +3791,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
4217 if (read_format & PERF_FORMAT_ID) 3791 if (read_format & PERF_FORMAT_ID)
4218 values[n++] = primary_event_id(leader); 3792 values[n++] = primary_event_id(leader);
4219 3793
4220 perf_output_copy(handle, values, n * sizeof(u64)); 3794 __output_copy(handle, values, n * sizeof(u64));
4221 3795
4222 list_for_each_entry(sub, &leader->sibling_list, group_entry) { 3796 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
4223 n = 0; 3797 n = 0;
@@ -4229,7 +3803,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
4229 if (read_format & PERF_FORMAT_ID) 3803 if (read_format & PERF_FORMAT_ID)
4230 values[n++] = primary_event_id(sub); 3804 values[n++] = primary_event_id(sub);
4231 3805
4232 perf_output_copy(handle, values, n * sizeof(u64)); 3806 __output_copy(handle, values, n * sizeof(u64));
4233 } 3807 }
4234} 3808}
4235 3809
@@ -4309,7 +3883,7 @@ void perf_output_sample(struct perf_output_handle *handle,
4309 3883
4310 size *= sizeof(u64); 3884 size *= sizeof(u64);
4311 3885
4312 perf_output_copy(handle, data->callchain, size); 3886 __output_copy(handle, data->callchain, size);
4313 } else { 3887 } else {
4314 u64 nr = 0; 3888 u64 nr = 0;
4315 perf_output_put(handle, nr); 3889 perf_output_put(handle, nr);
@@ -4319,8 +3893,8 @@ void perf_output_sample(struct perf_output_handle *handle,
4319 if (sample_type & PERF_SAMPLE_RAW) { 3893 if (sample_type & PERF_SAMPLE_RAW) {
4320 if (data->raw) { 3894 if (data->raw) {
4321 perf_output_put(handle, data->raw->size); 3895 perf_output_put(handle, data->raw->size);
4322 perf_output_copy(handle, data->raw->data, 3896 __output_copy(handle, data->raw->data,
4323 data->raw->size); 3897 data->raw->size);
4324 } else { 3898 } else {
4325 struct { 3899 struct {
4326 u32 size; 3900 u32 size;
@@ -4617,7 +4191,7 @@ static void perf_event_comm_output(struct perf_event *event,
4617 comm_event->event_id.tid = perf_event_tid(event, comm_event->task); 4191 comm_event->event_id.tid = perf_event_tid(event, comm_event->task);
4618 4192
4619 perf_output_put(&handle, comm_event->event_id); 4193 perf_output_put(&handle, comm_event->event_id);
4620 perf_output_copy(&handle, comm_event->comm, 4194 __output_copy(&handle, comm_event->comm,
4621 comm_event->comm_size); 4195 comm_event->comm_size);
4622 4196
4623 perf_event__output_id_sample(event, &handle, &sample); 4197 perf_event__output_id_sample(event, &handle, &sample);
@@ -4763,7 +4337,7 @@ static void perf_event_mmap_output(struct perf_event *event,
4763 mmap_event->event_id.tid = perf_event_tid(event, current); 4337 mmap_event->event_id.tid = perf_event_tid(event, current);
4764 4338
4765 perf_output_put(&handle, mmap_event->event_id); 4339 perf_output_put(&handle, mmap_event->event_id);
4766 perf_output_copy(&handle, mmap_event->file_name, 4340 __output_copy(&handle, mmap_event->file_name,
4767 mmap_event->file_size); 4341 mmap_event->file_size);
4768 4342
4769 perf_event__output_id_sample(event, &handle, &sample); 4343 perf_event__output_id_sample(event, &handle, &sample);
@@ -4819,7 +4393,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
4819 4393
4820 if (file) { 4394 if (file) {
4821 /* 4395 /*
4822 * d_path works from the end of the buffer backwards, so we 4396 * d_path works from the end of the rb backwards, so we
4823 * need to add enough zero bytes after the string to handle 4397 * need to add enough zero bytes after the string to handle
4824 * the 64bit alignment we do later. 4398 * the 64bit alignment we do later.
4825 */ 4399 */
@@ -6346,7 +5920,7 @@ err_size:
6346static int 5920static int
6347perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 5921perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
6348{ 5922{
6349 struct perf_buffer *buffer = NULL, *old_buffer = NULL; 5923 struct ring_buffer *rb = NULL, *old_rb = NULL;
6350 int ret = -EINVAL; 5924 int ret = -EINVAL;
6351 5925
6352 if (!output_event) 5926 if (!output_event)
@@ -6363,7 +5937,7 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
6363 goto out; 5937 goto out;
6364 5938
6365 /* 5939 /*
6366 * If its not a per-cpu buffer, it must be the same task. 5940 * If its not a per-cpu rb, it must be the same task.
6367 */ 5941 */
6368 if (output_event->cpu == -1 && output_event->ctx != event->ctx) 5942 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
6369 goto out; 5943 goto out;
@@ -6375,20 +5949,20 @@ set:
6375 goto unlock; 5949 goto unlock;
6376 5950
6377 if (output_event) { 5951 if (output_event) {
6378 /* get the buffer we want to redirect to */ 5952 /* get the rb we want to redirect to */
6379 buffer = perf_buffer_get(output_event); 5953 rb = ring_buffer_get(output_event);
6380 if (!buffer) 5954 if (!rb)
6381 goto unlock; 5955 goto unlock;
6382 } 5956 }
6383 5957
6384 old_buffer = event->buffer; 5958 old_rb = event->rb;
6385 rcu_assign_pointer(event->buffer, buffer); 5959 rcu_assign_pointer(event->rb, rb);
6386 ret = 0; 5960 ret = 0;
6387unlock: 5961unlock:
6388 mutex_unlock(&event->mmap_mutex); 5962 mutex_unlock(&event->mmap_mutex);
6389 5963
6390 if (old_buffer) 5964 if (old_rb)
6391 perf_buffer_put(old_buffer); 5965 ring_buffer_put(old_rb);
6392out: 5966out:
6393 return ret; 5967 return ret;
6394} 5968}