diff options
| -rw-r--r-- | include/linux/perf_event.h | 1 | ||||
| -rw-r--r-- | kernel/events/core.c | 86 | ||||
| -rw-r--r-- | kernel/events/internal.h | 3 | ||||
| -rw-r--r-- | kernel/events/ring_buffer.c | 3 |
4 files changed, 91 insertions, 2 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1e9ebe5e0091..b1f89122bf6a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
| @@ -822,6 +822,7 @@ struct perf_event { | |||
| 822 | int mmap_locked; | 822 | int mmap_locked; |
| 823 | struct user_struct *mmap_user; | 823 | struct user_struct *mmap_user; |
| 824 | struct ring_buffer *rb; | 824 | struct ring_buffer *rb; |
| 825 | struct list_head rb_entry; | ||
| 825 | 826 | ||
| 826 | /* poll related */ | 827 | /* poll related */ |
| 827 | wait_queue_head_t waitq; | 828 | wait_queue_head_t waitq; |
diff --git a/kernel/events/core.c b/kernel/events/core.c index b0c1186fd97b..600c1629b64d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -185,6 +185,9 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, | |||
| 185 | static void update_context_time(struct perf_event_context *ctx); | 185 | static void update_context_time(struct perf_event_context *ctx); |
| 186 | static u64 perf_event_time(struct perf_event *event); | 186 | static u64 perf_event_time(struct perf_event *event); |
| 187 | 187 | ||
| 188 | static void ring_buffer_attach(struct perf_event *event, | ||
| 189 | struct ring_buffer *rb); | ||
| 190 | |||
| 188 | void __weak perf_event_print_debug(void) { } | 191 | void __weak perf_event_print_debug(void) { } |
| 189 | 192 | ||
| 190 | extern __weak const char *perf_pmu_name(void) | 193 | extern __weak const char *perf_pmu_name(void) |
| @@ -3191,12 +3194,33 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
| 3191 | struct ring_buffer *rb; | 3194 | struct ring_buffer *rb; |
| 3192 | unsigned int events = POLL_HUP; | 3195 | unsigned int events = POLL_HUP; |
| 3193 | 3196 | ||
| 3197 | /* | ||
| 3198 | * Race between perf_event_set_output() and perf_poll(): perf_poll() | ||
| 3199 | * grabs the rb reference but perf_event_set_output() overrides it. | ||
| 3200 | * Here is the timeline for two threads T1, T2: | ||
| 3201 | * t0: T1, rb = rcu_dereference(event->rb) | ||
| 3202 | * t1: T2, old_rb = event->rb | ||
| 3203 | * t2: T2, event->rb = new rb | ||
| 3204 | * t3: T2, ring_buffer_detach(old_rb) | ||
| 3205 | * t4: T1, ring_buffer_attach(rb1) | ||
| 3206 | * t5: T1, poll_wait(event->waitq) | ||
| 3207 | * | ||
| 3208 | * To avoid this problem, we grab mmap_mutex in perf_poll() | ||
| 3209 | * thereby ensuring that the assignment of the new ring buffer | ||
| 3210 | * and the detachment of the old buffer appear atomic to perf_poll() | ||
| 3211 | */ | ||
| 3212 | mutex_lock(&event->mmap_mutex); | ||
| 3213 | |||
| 3194 | rcu_read_lock(); | 3214 | rcu_read_lock(); |
| 3195 | rb = rcu_dereference(event->rb); | 3215 | rb = rcu_dereference(event->rb); |
| 3196 | if (rb) | 3216 | if (rb) { |
| 3217 | ring_buffer_attach(event, rb); | ||
| 3197 | events = atomic_xchg(&rb->poll, 0); | 3218 | events = atomic_xchg(&rb->poll, 0); |
| 3219 | } | ||
| 3198 | rcu_read_unlock(); | 3220 | rcu_read_unlock(); |
| 3199 | 3221 | ||
| 3222 | mutex_unlock(&event->mmap_mutex); | ||
| 3223 | |||
| 3200 | poll_wait(file, &event->waitq, wait); | 3224 | poll_wait(file, &event->waitq, wait); |
| 3201 | 3225 | ||
| 3202 | return events; | 3226 | return events; |
| @@ -3497,6 +3521,49 @@ unlock: | |||
| 3497 | return ret; | 3521 | return ret; |
| 3498 | } | 3522 | } |
| 3499 | 3523 | ||
| 3524 | static void ring_buffer_attach(struct perf_event *event, | ||
| 3525 | struct ring_buffer *rb) | ||
| 3526 | { | ||
| 3527 | unsigned long flags; | ||
| 3528 | |||
| 3529 | if (!list_empty(&event->rb_entry)) | ||
| 3530 | return; | ||
| 3531 | |||
| 3532 | spin_lock_irqsave(&rb->event_lock, flags); | ||
| 3533 | if (!list_empty(&event->rb_entry)) | ||
| 3534 | goto unlock; | ||
| 3535 | |||
| 3536 | list_add(&event->rb_entry, &rb->event_list); | ||
| 3537 | unlock: | ||
| 3538 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
| 3539 | } | ||
| 3540 | |||
| 3541 | static void ring_buffer_detach(struct perf_event *event, | ||
| 3542 | struct ring_buffer *rb) | ||
| 3543 | { | ||
| 3544 | unsigned long flags; | ||
| 3545 | |||
| 3546 | if (list_empty(&event->rb_entry)) | ||
| 3547 | return; | ||
| 3548 | |||
| 3549 | spin_lock_irqsave(&rb->event_lock, flags); | ||
| 3550 | list_del_init(&event->rb_entry); | ||
| 3551 | wake_up_all(&event->waitq); | ||
| 3552 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
| 3553 | } | ||
| 3554 | |||
| 3555 | static void ring_buffer_wakeup(struct perf_event *event) | ||
| 3556 | { | ||
| 3557 | struct ring_buffer *rb; | ||
| 3558 | |||
| 3559 | rcu_read_lock(); | ||
| 3560 | rb = rcu_dereference(event->rb); | ||
| 3561 | list_for_each_entry_rcu(event, &rb->event_list, rb_entry) { | ||
| 3562 | wake_up_all(&event->waitq); | ||
| 3563 | } | ||
| 3564 | rcu_read_unlock(); | ||
| 3565 | } | ||
| 3566 | |||
| 3500 | static void rb_free_rcu(struct rcu_head *rcu_head) | 3567 | static void rb_free_rcu(struct rcu_head *rcu_head) |
| 3501 | { | 3568 | { |
| 3502 | struct ring_buffer *rb; | 3569 | struct ring_buffer *rb; |
| @@ -3522,9 +3589,19 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event) | |||
| 3522 | 3589 | ||
| 3523 | static void ring_buffer_put(struct ring_buffer *rb) | 3590 | static void ring_buffer_put(struct ring_buffer *rb) |
| 3524 | { | 3591 | { |
| 3592 | struct perf_event *event, *n; | ||
| 3593 | unsigned long flags; | ||
| 3594 | |||
| 3525 | if (!atomic_dec_and_test(&rb->refcount)) | 3595 | if (!atomic_dec_and_test(&rb->refcount)) |
| 3526 | return; | 3596 | return; |
| 3527 | 3597 | ||
| 3598 | spin_lock_irqsave(&rb->event_lock, flags); | ||
| 3599 | list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) { | ||
| 3600 | list_del_init(&event->rb_entry); | ||
| 3601 | wake_up_all(&event->waitq); | ||
| 3602 | } | ||
| 3603 | spin_unlock_irqrestore(&rb->event_lock, flags); | ||
| 3604 | |||
| 3528 | call_rcu(&rb->rcu_head, rb_free_rcu); | 3605 | call_rcu(&rb->rcu_head, rb_free_rcu); |
| 3529 | } | 3606 | } |
| 3530 | 3607 | ||
| @@ -3547,6 +3624,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
| 3547 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); | 3624 | atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); |
| 3548 | vma->vm_mm->pinned_vm -= event->mmap_locked; | 3625 | vma->vm_mm->pinned_vm -= event->mmap_locked; |
| 3549 | rcu_assign_pointer(event->rb, NULL); | 3626 | rcu_assign_pointer(event->rb, NULL); |
| 3627 | ring_buffer_detach(event, rb); | ||
| 3550 | mutex_unlock(&event->mmap_mutex); | 3628 | mutex_unlock(&event->mmap_mutex); |
| 3551 | 3629 | ||
| 3552 | ring_buffer_put(rb); | 3630 | ring_buffer_put(rb); |
| @@ -3701,7 +3779,7 @@ static const struct file_operations perf_fops = { | |||
| 3701 | 3779 | ||
| 3702 | void perf_event_wakeup(struct perf_event *event) | 3780 | void perf_event_wakeup(struct perf_event *event) |
| 3703 | { | 3781 | { |
| 3704 | wake_up_all(&event->waitq); | 3782 | ring_buffer_wakeup(event); |
| 3705 | 3783 | ||
| 3706 | if (event->pending_kill) { | 3784 | if (event->pending_kill) { |
| 3707 | kill_fasync(&event->fasync, SIGIO, event->pending_kill); | 3785 | kill_fasync(&event->fasync, SIGIO, event->pending_kill); |
| @@ -5823,6 +5901,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, | |||
| 5823 | INIT_LIST_HEAD(&event->group_entry); | 5901 | INIT_LIST_HEAD(&event->group_entry); |
| 5824 | INIT_LIST_HEAD(&event->event_entry); | 5902 | INIT_LIST_HEAD(&event->event_entry); |
| 5825 | INIT_LIST_HEAD(&event->sibling_list); | 5903 | INIT_LIST_HEAD(&event->sibling_list); |
| 5904 | INIT_LIST_HEAD(&event->rb_entry); | ||
| 5905 | |||
| 5826 | init_waitqueue_head(&event->waitq); | 5906 | init_waitqueue_head(&event->waitq); |
| 5827 | init_irq_work(&event->pending, perf_pending_event); | 5907 | init_irq_work(&event->pending, perf_pending_event); |
| 5828 | 5908 | ||
| @@ -6029,6 +6109,8 @@ set: | |||
| 6029 | 6109 | ||
| 6030 | old_rb = event->rb; | 6110 | old_rb = event->rb; |
| 6031 | rcu_assign_pointer(event->rb, rb); | 6111 | rcu_assign_pointer(event->rb, rb); |
| 6112 | if (old_rb) | ||
| 6113 | ring_buffer_detach(event, old_rb); | ||
| 6032 | ret = 0; | 6114 | ret = 0; |
| 6033 | unlock: | 6115 | unlock: |
| 6034 | mutex_unlock(&event->mmap_mutex); | 6116 | mutex_unlock(&event->mmap_mutex); |
diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 09097dd8116c..64568a699375 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h | |||
| @@ -22,6 +22,9 @@ struct ring_buffer { | |||
| 22 | local_t lost; /* nr records lost */ | 22 | local_t lost; /* nr records lost */ |
| 23 | 23 | ||
| 24 | long watermark; /* wakeup watermark */ | 24 | long watermark; /* wakeup watermark */ |
| 25 | /* poll crap */ | ||
| 26 | spinlock_t event_lock; | ||
| 27 | struct list_head event_list; | ||
| 25 | 28 | ||
| 26 | struct perf_event_mmap_page *user_page; | 29 | struct perf_event_mmap_page *user_page; |
| 27 | void *data_pages[0]; | 30 | void *data_pages[0]; |
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index a2a29205cc0f..7f3011c6b57f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c | |||
| @@ -209,6 +209,9 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) | |||
| 209 | rb->writable = 1; | 209 | rb->writable = 1; |
| 210 | 210 | ||
| 211 | atomic_set(&rb->refcount, 1); | 211 | atomic_set(&rb->refcount, 1); |
| 212 | |||
| 213 | INIT_LIST_HEAD(&rb->event_list); | ||
| 214 | spin_lock_init(&rb->event_lock); | ||
| 212 | } | 215 | } |
| 213 | 216 | ||
| 214 | #ifndef CONFIG_PERF_USE_VMALLOC | 217 | #ifndef CONFIG_PERF_USE_VMALLOC |
