aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2014-03-14 05:50:33 -0400
committerThomas Gleixner <tglx@linutronix.de>2014-05-19 08:44:56 -0400
commitb69cf53640da2b86439596118cfa95233154ee76 (patch)
treefcb809f430c17449edebf179ec4dd330950af94a /kernel/events
parent39af6b1678afa5880dda7e375cf3f9d395087f6d (diff)
perf: Fix a race between ring_buffer_detach() and ring_buffer_attach()
Alexander noticed that we use RCU iteration on rb->event_list but do not use list_{add,del}_rcu() to add,remove entries to that list, nor do we observe proper grace periods when re-using the entries. Merge ring_buffer_detach() into ring_buffer_attach() such that attaching to the NULL buffer is detaching. Furthermore, ensure that between any 'detach' and 'attach' of the same event we observe the required grace period, but only when strictly required. In effect this means that only ioctl(.request = PERF_EVENT_IOC_SET_OUTPUT) will wait for a grace period, while the normal initial attach and final detach will not be delayed. This patch should, I think, do the right thing under all circumstances, the 'normal' cases all should never see the extra grace period, but the two cases: 1) PERF_EVENT_IOC_SET_OUTPUT on an event which already has a ring_buffer set, will now observe the required grace period between removing itself from the old and attaching itself to the new buffer. This case is 'simple' in that both buffers are present in perf_event_set_output() one could think an unconditional synchronize_rcu() would be sufficient; however... 2) an event that has a buffer attached, the buffer is destroyed (munmap) and then the event is attached to a new/different buffer using PERF_EVENT_IOC_SET_OUTPUT. This case is more complex because the buffer destruction does: ring_buffer_attach(.rb = NULL) followed by the ioctl() doing: ring_buffer_attach(.rb = foo); and we still need to observe the grace period between these two calls due to us reusing the event->rb_entry list_head. In order to make 2 happen we use Paul's latest cond_synchronize_rcu() call. Cc: Paul Mackerras <paulus@samba.org> Cc: Stephane Eranian <eranian@google.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Mike Galbraith <efault@gmx.de> Reported-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/20140507123526.GD13658@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/events')
-rw-r--r--kernel/events/core.c109
1 files changed, 49 insertions, 60 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index feb1329ca331..440eefc67397 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3192,7 +3192,8 @@ static void free_event_rcu(struct rcu_head *head)
3192} 3192}
3193 3193
3194static void ring_buffer_put(struct ring_buffer *rb); 3194static void ring_buffer_put(struct ring_buffer *rb);
3195static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb); 3195static void ring_buffer_attach(struct perf_event *event,
3196 struct ring_buffer *rb);
3196 3197
3197static void unaccount_event_cpu(struct perf_event *event, int cpu) 3198static void unaccount_event_cpu(struct perf_event *event, int cpu)
3198{ 3199{
@@ -3252,8 +3253,6 @@ static void free_event(struct perf_event *event)
3252 unaccount_event(event); 3253 unaccount_event(event);
3253 3254
3254 if (event->rb) { 3255 if (event->rb) {
3255 struct ring_buffer *rb;
3256
3257 /* 3256 /*
3258 * Can happen when we close an event with re-directed output. 3257 * Can happen when we close an event with re-directed output.
3259 * 3258 *
@@ -3261,12 +3260,7 @@ static void free_event(struct perf_event *event)
3261 * over us; possibly making our ring_buffer_put() the last. 3260 * over us; possibly making our ring_buffer_put() the last.
3262 */ 3261 */
3263 mutex_lock(&event->mmap_mutex); 3262 mutex_lock(&event->mmap_mutex);
3264 rb = event->rb; 3263 ring_buffer_attach(event, NULL);
3265 if (rb) {
3266 rcu_assign_pointer(event->rb, NULL);
3267 ring_buffer_detach(event, rb);
3268 ring_buffer_put(rb); /* could be last */
3269 }
3270 mutex_unlock(&event->mmap_mutex); 3264 mutex_unlock(&event->mmap_mutex);
3271 } 3265 }
3272 3266
@@ -3850,28 +3844,47 @@ unlock:
3850static void ring_buffer_attach(struct perf_event *event, 3844static void ring_buffer_attach(struct perf_event *event,
3851 struct ring_buffer *rb) 3845 struct ring_buffer *rb)
3852{ 3846{
3847 struct ring_buffer *old_rb = NULL;
3853 unsigned long flags; 3848 unsigned long flags;
3854 3849
3855 if (!list_empty(&event->rb_entry)) 3850 if (event->rb) {
3856 return; 3851 /*
3852 * Should be impossible, we set this when removing
3853 * event->rb_entry and wait/clear when adding event->rb_entry.
3854 */
3855 WARN_ON_ONCE(event->rcu_pending);
3857 3856
3858 spin_lock_irqsave(&rb->event_lock, flags); 3857 old_rb = event->rb;
3859 if (list_empty(&event->rb_entry)) 3858 event->rcu_batches = get_state_synchronize_rcu();
3860 list_add(&event->rb_entry, &rb->event_list); 3859 event->rcu_pending = 1;
3861 spin_unlock_irqrestore(&rb->event_lock, flags);
3862}
3863 3860
3864static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb) 3861 spin_lock_irqsave(&old_rb->event_lock, flags);
3865{ 3862 list_del_rcu(&event->rb_entry);
3866 unsigned long flags; 3863 spin_unlock_irqrestore(&old_rb->event_lock, flags);
3864 }
3867 3865
3868 if (list_empty(&event->rb_entry)) 3866 if (event->rcu_pending && rb) {
3869 return; 3867 cond_synchronize_rcu(event->rcu_batches);
3868 event->rcu_pending = 0;
3869 }
3870 3870
3871 spin_lock_irqsave(&rb->event_lock, flags); 3871 if (rb) {
3872 list_del_init(&event->rb_entry); 3872 spin_lock_irqsave(&rb->event_lock, flags);
3873 wake_up_all(&event->waitq); 3873 list_add_rcu(&event->rb_entry, &rb->event_list);
3874 spin_unlock_irqrestore(&rb->event_lock, flags); 3874 spin_unlock_irqrestore(&rb->event_lock, flags);
3875 }
3876
3877 rcu_assign_pointer(event->rb, rb);
3878
3879 if (old_rb) {
3880 ring_buffer_put(old_rb);
3881 /*
3882 * Since we detached before setting the new rb, so that we
3883 * could attach the new rb, we could have missed a wakeup.
3884 * Provide it now.
3885 */
3886 wake_up_all(&event->waitq);
3887 }
3875} 3888}
3876 3889
3877static void ring_buffer_wakeup(struct perf_event *event) 3890static void ring_buffer_wakeup(struct perf_event *event)
@@ -3940,7 +3953,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3940{ 3953{
3941 struct perf_event *event = vma->vm_file->private_data; 3954 struct perf_event *event = vma->vm_file->private_data;
3942 3955
3943 struct ring_buffer *rb = event->rb; 3956 struct ring_buffer *rb = ring_buffer_get(event);
3944 struct user_struct *mmap_user = rb->mmap_user; 3957 struct user_struct *mmap_user = rb->mmap_user;
3945 int mmap_locked = rb->mmap_locked; 3958 int mmap_locked = rb->mmap_locked;
3946 unsigned long size = perf_data_size(rb); 3959 unsigned long size = perf_data_size(rb);
@@ -3948,18 +3961,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
3948 atomic_dec(&rb->mmap_count); 3961 atomic_dec(&rb->mmap_count);
3949 3962
3950 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) 3963 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
3951 return; 3964 goto out_put;
3952 3965
3953 /* Detach current event from the buffer. */ 3966 ring_buffer_attach(event, NULL);
3954 rcu_assign_pointer(event->rb, NULL);
3955 ring_buffer_detach(event, rb);
3956 mutex_unlock(&event->mmap_mutex); 3967 mutex_unlock(&event->mmap_mutex);
3957 3968
3958 /* If there's still other mmap()s of this buffer, we're done. */ 3969 /* If there's still other mmap()s of this buffer, we're done. */
3959 if (atomic_read(&rb->mmap_count)) { 3970 if (atomic_read(&rb->mmap_count))
3960 ring_buffer_put(rb); /* can't be last */ 3971 goto out_put;
3961 return;
3962 }
3963 3972
3964 /* 3973 /*
3965 * No other mmap()s, detach from all other events that might redirect 3974 * No other mmap()s, detach from all other events that might redirect
@@ -3989,11 +3998,9 @@ again:
3989 * still restart the iteration to make sure we're not now 3998 * still restart the iteration to make sure we're not now
3990 * iterating the wrong list. 3999 * iterating the wrong list.
3991 */ 4000 */
3992 if (event->rb == rb) { 4001 if (event->rb == rb)
3993 rcu_assign_pointer(event->rb, NULL); 4002 ring_buffer_attach(event, NULL);
3994 ring_buffer_detach(event, rb); 4003
3995 ring_buffer_put(rb); /* can't be last, we still have one */
3996 }
3997 mutex_unlock(&event->mmap_mutex); 4004 mutex_unlock(&event->mmap_mutex);
3998 put_event(event); 4005 put_event(event);
3999 4006
@@ -4018,6 +4025,7 @@ again:
4018 vma->vm_mm->pinned_vm -= mmap_locked; 4025 vma->vm_mm->pinned_vm -= mmap_locked;
4019 free_uid(mmap_user); 4026 free_uid(mmap_user);
4020 4027
4028out_put:
4021 ring_buffer_put(rb); /* could be last */ 4029 ring_buffer_put(rb); /* could be last */
4022} 4030}
4023 4031
@@ -4135,7 +4143,6 @@ again:
4135 vma->vm_mm->pinned_vm += extra; 4143 vma->vm_mm->pinned_vm += extra;
4136 4144
4137 ring_buffer_attach(event, rb); 4145 ring_buffer_attach(event, rb);
4138 rcu_assign_pointer(event->rb, rb);
4139 4146
4140 perf_event_init_userpage(event); 4147 perf_event_init_userpage(event);
4141 perf_event_update_userpage(event); 4148 perf_event_update_userpage(event);
@@ -6934,7 +6941,7 @@ err_size:
6934static int 6941static int
6935perf_event_set_output(struct perf_event *event, struct perf_event *output_event) 6942perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
6936{ 6943{
6937 struct ring_buffer *rb = NULL, *old_rb = NULL; 6944 struct ring_buffer *rb = NULL;
6938 int ret = -EINVAL; 6945 int ret = -EINVAL;
6939 6946
6940 if (!output_event) 6947 if (!output_event)
@@ -6962,8 +6969,6 @@ set:
6962 if (atomic_read(&event->mmap_count)) 6969 if (atomic_read(&event->mmap_count))
6963 goto unlock; 6970 goto unlock;
6964 6971
6965 old_rb = event->rb;
6966
6967 if (output_event) { 6972 if (output_event) {
6968 /* get the rb we want to redirect to */ 6973 /* get the rb we want to redirect to */
6969 rb = ring_buffer_get(output_event); 6974 rb = ring_buffer_get(output_event);
@@ -6971,23 +6976,7 @@ set:
6971 goto unlock; 6976 goto unlock;
6972 } 6977 }
6973 6978
6974 if (old_rb) 6979 ring_buffer_attach(event, rb);
6975 ring_buffer_detach(event, old_rb);
6976
6977 if (rb)
6978 ring_buffer_attach(event, rb);
6979
6980 rcu_assign_pointer(event->rb, rb);
6981
6982 if (old_rb) {
6983 ring_buffer_put(old_rb);
6984 /*
6985 * Since we detached before setting the new rb, so that we
6986 * could attach the new rb, we could have missed a wakeup.
6987 * Provide it now.
6988 */
6989 wake_up_all(&event->waitq);
6990 }
6991 6980
6992 ret = 0; 6981 ret = 0;
6993unlock: 6982unlock: