aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2013-06-19 06:44:41 -0400
committerIngo Molnar <mingo@kernel.org>2013-06-19 06:44:41 -0400
commiteff2108f020f30eb90462205ecf3ce10a420938b (patch)
tree27d1fc8ab23cd81e4863c29cddc0fbe8cfe68431 /kernel/events
parentafb71193a4d8e4a3c4c52a80a8cbee76582f0e90 (diff)
parentf1a527899ef0a8a241eb3bea619eb2e29d797f44 (diff)
Merge branch 'perf/urgent' into perf/core
Merge in the latest fixes, to avoid conflicts with ongoing work. Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c233
-rw-r--r--kernel/events/internal.h4
2 files changed, 165 insertions, 72 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index a0780b3a3d50..d0e0d0d2025f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -198,9 +198,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
198static void update_context_time(struct perf_event_context *ctx); 198static void update_context_time(struct perf_event_context *ctx);
199static u64 perf_event_time(struct perf_event *event); 199static u64 perf_event_time(struct perf_event *event);
200 200
201static void ring_buffer_attach(struct perf_event *event,
202 struct ring_buffer *rb);
203
204void __weak perf_event_print_debug(void) { } 201void __weak perf_event_print_debug(void) { }
205 202
206extern __weak const char *perf_pmu_name(void) 203extern __weak const char *perf_pmu_name(void)
@@ -3023,6 +3020,7 @@ static void free_event_rcu(struct rcu_head *head)
3023} 3020}
3024 3021
3025static void ring_buffer_put(struct ring_buffer *rb); 3022static void ring_buffer_put(struct ring_buffer *rb);
3023static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);
3026 3024
3027static void free_event(struct perf_event *event) 3025static void free_event(struct perf_event *event)
3028{ 3026{
@@ -3047,15 +3045,30 @@ static void free_event(struct perf_event *event)
3047 if (has_branch_stack(event)) { 3045 if (has_branch_stack(event)) {
3048 static_key_slow_dec_deferred(&perf_sched_events); 3046 static_key_slow_dec_deferred(&perf_sched_events);
3049 /* is system-wide event */ 3047 /* is system-wide event */
3050 if (!(event->attach_state & PERF_ATTACH_TASK)) 3048 if (!(event->attach_state & PERF_ATTACH_TASK)) {
3051 atomic_dec(&per_cpu(perf_branch_stack_events, 3049 atomic_dec(&per_cpu(perf_branch_stack_events,
3052 event->cpu)); 3050 event->cpu));
3051 }
3053 } 3052 }
3054 } 3053 }
3055 3054
3056 if (event->rb) { 3055 if (event->rb) {
3057 ring_buffer_put(event->rb); 3056 struct ring_buffer *rb;
3058 event->rb = NULL; 3057
3058 /*
3059 * Can happen when we close an event with re-directed output.
3060 *
3061 * Since we have a 0 refcount, perf_mmap_close() will skip
3062 * over us; possibly making our ring_buffer_put() the last.
3063 */
3064 mutex_lock(&event->mmap_mutex);
3065 rb = event->rb;
3066 if (rb) {
3067 rcu_assign_pointer(event->rb, NULL);
3068 ring_buffer_detach(event, rb);
3069 ring_buffer_put(rb); /* could be last */
3070 }
3071 mutex_unlock(&event->mmap_mutex);
3059 } 3072 }
3060 3073
3061 if (is_cgroup_event(event)) 3074 if (is_cgroup_event(event))
@@ -3293,30 +3306,13 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
3293 unsigned int events = POLL_HUP; 3306 unsigned int events = POLL_HUP;
3294 3307
3295 /* 3308 /*
3296 * Race between perf_event_set_output() and perf_poll(): perf_poll() 3309 * Pin the event->rb by taking event->mmap_mutex; otherwise
3297 * grabs the rb reference but perf_event_set_output() overrides it. 3310 * perf_event_set_output() can swizzle our rb and make us miss wakeups.
3298 * Here is the timeline for two threads T1, T2:
3299 * t0: T1, rb = rcu_dereference(event->rb)
3300 * t1: T2, old_rb = event->rb
3301 * t2: T2, event->rb = new rb
3302 * t3: T2, ring_buffer_detach(old_rb)
3303 * t4: T1, ring_buffer_attach(rb1)
3304 * t5: T1, poll_wait(event->waitq)
3305 *
3306 * To avoid this problem, we grab mmap_mutex in perf_poll()
3307 * thereby ensuring that the assignment of the new ring buffer
3308 * and the detachment of the old buffer appear atomic to perf_poll()
3309 */ 3311 */
3310 mutex_lock(&event->mmap_mutex); 3312 mutex_lock(&event->mmap_mutex);
3311 3313 rb = event->rb;
3312 rcu_read_lock(); 3314 if (rb)
3313 rb = rcu_dereference(event->rb);
3314 if (rb) {
3315 ring_buffer_attach(event, rb);
3316 events = atomic_xchg(&rb->poll, 0); 3315 events = atomic_xchg(&rb->poll, 0);
3317 }
3318 rcu_read_unlock();
3319
3320 mutex_unlock(&event->mmap_mutex); 3316 mutex_unlock(&event->mmap_mutex);
3321 3317
3322 poll_wait(file, &event->waitq, wait); 3318 poll_wait(file, &event->waitq, wait);
@@ -3626,16 +3622,12 @@ static void ring_buffer_attach(struct perf_event *event,
3626 return; 3622 return;
3627 3623
3628 spin_lock_irqsave(&rb->event_lock, flags); 3624 spin_lock_irqsave(&rb->event_lock, flags);
3629 if (!list_empty(&event->rb_entry)) 3625 if (list_empty(&event->rb_entry))
3630 goto unlock; 3626 list_add(&event->rb_entry, &rb->event_list);
3631
3632 list_add(&event->rb_entry, &rb->event_list);
3633unlock:
3634 spin_unlock_irqrestore(&rb->event_lock, flags); 3627 spin_unlock_irqrestore(&rb->event_lock, flags);
3635} 3628}
3636 3629
3637static void ring_buffer_detach(struct perf_event *event, 3630static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
3638 struct ring_buffer *rb)
3639{ 3631{
3640 unsigned long flags; 3632 unsigned long flags;
3641 3633
@@ -3654,13 +3646,10 @@ static void ring_buffer_wakeup(struct perf_event *event)
3654 3646
3655 rcu_read_lock(); 3647 rcu_read_lock();
3656 rb = rcu_dereference(event->rb); 3648 rb = rcu_dereference(event->rb);
3657 if (!rb) 3649 if (rb) {
3658 goto unlock; 3650 list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
3659 3651 wake_up_all(&event->waitq);
3660 list_for_each_entry_rcu(event, &rb->event_list, rb_entry) 3652 }
3661 wake_up_all(&event->waitq);
3662
3663unlock:
3664 rcu_read_unlock(); 3653 rcu_read_unlock();
3665} 3654}
3666 3655
@@ -3689,18 +3678,10 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)
3689 3678
3690static void ring_buffer_put(struct ring_buffer *rb) 3679static void ring_buffer_put(struct ring_buffer *rb)
3691{ 3680{
3692 struct perf_event *event, *n;
3693 unsigned long flags;
3694
3695 if (!atomic_dec_and_test(&rb->refcount)) 3681 if (!atomic_dec_and_test(&rb->refcount))
3696 return; 3682 return;
3697 3683
3698 spin_lock_irqsave(&rb->event_lock, flags); 3684 WARN_ON_ONCE(!list_empty(&rb->event_list));
3699 list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
3700 list_del_init(&event->rb_entry);
3701 wake_up_all(&event->waitq);
3702 }
3703 spin_unlock_irqrestore(&rb->event_lock, flags);
3704 3685
3705 call_rcu(&rb->rcu_head, rb_free_rcu); 3686 call_rcu(&rb->rcu_head, rb_free_rcu);
3706} 3687}
@@ -3710,26 +3691,100 @@ static void perf_mmap_open(struct vm_area_struct *vma)
3710 struct perf_event *event = vma->vm_file->private_data; 3691 struct perf_event *event = vma->vm_file->private_data;
3711 3692
3712 atomic_inc(&event->mmap_count); 3693 atomic_inc(&event->mmap_count);
3694 atomic_inc(&event->rb->mmap_count);
3713} 3695}
3714 3696
3697/*
3698 * A buffer can be mmap()ed multiple times; either directly through the same
3699 * event, or through other events by use of perf_event_set_output().
3700 *
3701 * In order to undo the VM accounting done by perf_mmap() we need to destroy
3702 * the buffer here, where we still have a VM context. This means we need
3703 * to detach all events redirecting to us.
3704 */
3715static void perf_mmap_close(struct vm_area_struct *vma) 3705static void perf_mmap_close(struct vm_area_struct *vma)
3716{ 3706{
3717 struct perf_event *event = vma->vm_file->private_data; 3707 struct perf_event *event = vma->vm_file->private_data;
3718 3708
3719 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 3709 struct ring_buffer *rb = event->rb;
3720 unsigned long size = perf_data_size(event->rb); 3710 struct user_struct *mmap_user = rb->mmap_user;
3721 struct user_struct *user = event->mmap_user; 3711 int mmap_locked = rb->mmap_locked;
3722 struct ring_buffer *rb = event->rb; 3712 unsigned long size = perf_data_size(rb);
3713
3714 atomic_dec(&rb->mmap_count);
3715
3716 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
3717 return;
3723 3718
3724 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 3719 /* Detach current event from the buffer. */
3725 vma->vm_mm->pinned_vm -= event->mmap_locked; 3720 rcu_assign_pointer(event->rb, NULL);
3726 rcu_assign_pointer(event->rb, NULL); 3721 ring_buffer_detach(event, rb);
3727 ring_buffer_detach(event, rb); 3722 mutex_unlock(&event->mmap_mutex);
3723
3724 /* If there's still other mmap()s of this buffer, we're done. */
3725 if (atomic_read(&rb->mmap_count)) {
3726 ring_buffer_put(rb); /* can't be last */
3727 return;
3728 }
3729
3730 /*
3731 * No other mmap()s, detach from all other events that might redirect
3732 * into the now unreachable buffer. Somewhat complicated by the
3733 * fact that rb::event_lock otherwise nests inside mmap_mutex.
3734 */
3735again:
3736 rcu_read_lock();
3737 list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
3738 if (!atomic_long_inc_not_zero(&event->refcount)) {
3739 /*
3740 * This event is en-route to free_event() which will
3741 * detach it and remove it from the list.
3742 */
3743 continue;
3744 }
3745 rcu_read_unlock();
3746
3747 mutex_lock(&event->mmap_mutex);
3748 /*
3749 * Check we didn't race with perf_event_set_output() which can
3750 * swizzle the rb from under us while we were waiting to
3751 * acquire mmap_mutex.
3752 *
3753 * If we find a different rb; ignore this event, a next
3754 * iteration will no longer find it on the list. We have to
3755 * still restart the iteration to make sure we're not now
3756 * iterating the wrong list.
3757 */
3758 if (event->rb == rb) {
3759 rcu_assign_pointer(event->rb, NULL);
3760 ring_buffer_detach(event, rb);
3761 ring_buffer_put(rb); /* can't be last, we still have one */
3762 }
3728 mutex_unlock(&event->mmap_mutex); 3763 mutex_unlock(&event->mmap_mutex);
3764 put_event(event);
3729 3765
3730 ring_buffer_put(rb); 3766 /*
3731 free_uid(user); 3767 * Restart the iteration; either we're on the wrong list or
3768 * destroyed its integrity by doing a deletion.
3769 */
3770 goto again;
3732 } 3771 }
3772 rcu_read_unlock();
3773
3774 /*
3775 * It could be there's still a few 0-ref events on the list; they'll
3776 * get cleaned up by free_event() -- they'll also still have their
3777 * ref on the rb and will free it whenever they are done with it.
3778 *
3779 * Aside from that, this buffer is 'fully' detached and unmapped,
3780 * undo the VM accounting.
3781 */
3782
3783 atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
3784 vma->vm_mm->pinned_vm -= mmap_locked;
3785 free_uid(mmap_user);
3786
3787 ring_buffer_put(rb); /* could be last */
3733} 3788}
3734 3789
3735static const struct vm_operations_struct perf_mmap_vmops = { 3790static const struct vm_operations_struct perf_mmap_vmops = {
@@ -3779,12 +3834,24 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3779 return -EINVAL; 3834 return -EINVAL;
3780 3835
3781 WARN_ON_ONCE(event->ctx->parent_ctx); 3836 WARN_ON_ONCE(event->ctx->parent_ctx);
3837again:
3782 mutex_lock(&event->mmap_mutex); 3838 mutex_lock(&event->mmap_mutex);
3783 if (event->rb) { 3839 if (event->rb) {
3784 if (event->rb->nr_pages == nr_pages) 3840 if (event->rb->nr_pages != nr_pages) {
3785 atomic_inc(&event->rb->refcount);
3786 else
3787 ret = -EINVAL; 3841 ret = -EINVAL;
3842 goto unlock;
3843 }
3844
3845 if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
3846 /*
3847 * Raced against perf_mmap_close() through
3848 * perf_event_set_output(). Try again, hope for better
3849 * luck.
3850 */
3851 mutex_unlock(&event->mmap_mutex);
3852 goto again;
3853 }
3854
3788 goto unlock; 3855 goto unlock;
3789 } 3856 }
3790 3857
@@ -3825,12 +3892,16 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
3825 ret = -ENOMEM; 3892 ret = -ENOMEM;
3826 goto unlock; 3893 goto unlock;
3827 } 3894 }
3828 rcu_assign_pointer(event->rb, rb); 3895
3896 atomic_set(&rb->mmap_count, 1);
3897 rb->mmap_locked = extra;
3898 rb->mmap_user = get_current_user();
3829 3899
3830 atomic_long_add(user_extra, &user->locked_vm); 3900 atomic_long_add(user_extra, &user->locked_vm);
3831 event->mmap_locked = extra; 3901 vma->vm_mm->pinned_vm += extra;
3832 event->mmap_user = get_current_user(); 3902
3833 vma->vm_mm->pinned_vm += event->mmap_locked; 3903 ring_buffer_attach(event, rb);
3904 rcu_assign_pointer(event->rb, rb);
3834 3905
3835 perf_event_update_userpage(event); 3906 perf_event_update_userpage(event);
3836 3907
@@ -3839,7 +3910,11 @@ unlock:
3839 atomic_inc(&event->mmap_count); 3910 atomic_inc(&event->mmap_count);
3840 mutex_unlock(&event->mmap_mutex); 3911 mutex_unlock(&event->mmap_mutex);
3841 3912
3842 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP; 3913 /*
3914 * Since pinned accounting is per vm we cannot allow fork() to copy our
3915 * vma.
3916 */
3917 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
3843 vma->vm_ops = &perf_mmap_vmops; 3918 vma->vm_ops = &perf_mmap_vmops;
3844 3919
3845 return ret; 3920 return ret;
@@ -6565,6 +6640,8 @@ set:
6565 if (atomic_read(&event->mmap_count)) 6640 if (atomic_read(&event->mmap_count))
6566 goto unlock; 6641 goto unlock;
6567 6642
6643 old_rb = event->rb;
6644
6568 if (output_event) { 6645 if (output_event) {
6569 /* get the rb we want to redirect to */ 6646 /* get the rb we want to redirect to */
6570 rb = ring_buffer_get(output_event); 6647 rb = ring_buffer_get(output_event);
@@ -6572,16 +6649,28 @@ set:
6572 goto unlock; 6649 goto unlock;
6573 } 6650 }
6574 6651
6575 old_rb = event->rb;
6576 rcu_assign_pointer(event->rb, rb);
6577 if (old_rb) 6652 if (old_rb)
6578 ring_buffer_detach(event, old_rb); 6653 ring_buffer_detach(event, old_rb);
6654
6655 if (rb)
6656 ring_buffer_attach(event, rb);
6657
6658 rcu_assign_pointer(event->rb, rb);
6659
6660 if (old_rb) {
6661 ring_buffer_put(old_rb);
6662 /*
6663 * Since we detached before setting the new rb, so that we
6664 * could attach the new rb, we could have missed a wakeup.
6665 * Provide it now.
6666 */
6667 wake_up_all(&event->waitq);
6668 }
6669
6579 ret = 0; 6670 ret = 0;
6580unlock: 6671unlock:
6581 mutex_unlock(&event->mmap_mutex); 6672 mutex_unlock(&event->mmap_mutex);
6582 6673
6583 if (old_rb)
6584 ring_buffer_put(old_rb);
6585out: 6674out:
6586 return ret; 6675 return ret;
6587} 6676}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index eb675c4d59df..ca6599723be5 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -31,6 +31,10 @@ struct ring_buffer {
31 spinlock_t event_lock; 31 spinlock_t event_lock;
32 struct list_head event_list; 32 struct list_head event_list;
33 33
34 atomic_t mmap_count;
35 unsigned long mmap_locked;
36 struct user_struct *mmap_user;
37
34 struct perf_event_mmap_page *user_page; 38 struct perf_event_mmap_page *user_page;
35 void *data_pages[0]; 39 void *data_pages[0];
36}; 40};