aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-05-27 06:54:41 -0400
committerIngo Molnar <mingo@elte.hu>2010-05-31 02:46:08 -0400
commitac9721f3f54b27a16c7e1afb2481e7ee95a70318 (patch)
treea9f21d60c7c4c1910696553a6f8273edcca03c64 /kernel/perf_event.c
parent67a3e12b05e055c0415c556a315a3d3eb637e29e (diff)
perf_events: Fix races and clean up perf_event and perf_mmap_data interaction
In order to move toward separate buffer objects, rework the whole perf_mmap_data construct to be a more self-sufficient entity, one with its own lifetime rules. This greatly sanitizes the whole output redirection code, which was riddled with bugs and races. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: <stable@kernel.org> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c224
1 files changed, 126 insertions, 98 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index bd7ce8ca5bb9..848d49a043e9 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1841,6 +1841,7 @@ static void free_event_rcu(struct rcu_head *head)
1841} 1841}
1842 1842
1843static void perf_pending_sync(struct perf_event *event); 1843static void perf_pending_sync(struct perf_event *event);
1844static void perf_mmap_data_put(struct perf_mmap_data *data);
1844 1845
1845static void free_event(struct perf_event *event) 1846static void free_event(struct perf_event *event)
1846{ 1847{
@@ -1856,9 +1857,9 @@ static void free_event(struct perf_event *event)
1856 atomic_dec(&nr_task_events); 1857 atomic_dec(&nr_task_events);
1857 } 1858 }
1858 1859
1859 if (event->output) { 1860 if (event->data) {
1860 fput(event->output->filp); 1861 perf_mmap_data_put(event->data);
1861 event->output = NULL; 1862 event->data = NULL;
1862 } 1863 }
1863 1864
1864 if (event->destroy) 1865 if (event->destroy)
@@ -2175,7 +2176,27 @@ unlock:
2175 return ret; 2176 return ret;
2176} 2177}
2177 2178
2178static int perf_event_set_output(struct perf_event *event, int output_fd); 2179static const struct file_operations perf_fops;
2180
2181static struct perf_event *perf_fget_light(int fd, int *fput_needed)
2182{
2183 struct file *file;
2184
2185 file = fget_light(fd, fput_needed);
2186 if (!file)
2187 return ERR_PTR(-EBADF);
2188
2189 if (file->f_op != &perf_fops) {
2190 fput_light(file, *fput_needed);
2191 *fput_needed = 0;
2192 return ERR_PTR(-EBADF);
2193 }
2194
2195 return file->private_data;
2196}
2197
2198static int perf_event_set_output(struct perf_event *event,
2199 struct perf_event *output_event);
2179static int perf_event_set_filter(struct perf_event *event, void __user *arg); 2200static int perf_event_set_filter(struct perf_event *event, void __user *arg);
2180 2201
2181static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2202static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -2202,7 +2223,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2202 return perf_event_period(event, (u64 __user *)arg); 2223 return perf_event_period(event, (u64 __user *)arg);
2203 2224
2204 case PERF_EVENT_IOC_SET_OUTPUT: 2225 case PERF_EVENT_IOC_SET_OUTPUT:
2205 return perf_event_set_output(event, arg); 2226 {
2227 struct perf_event *output_event = NULL;
2228 int fput_needed = 0;
2229 int ret;
2230
2231 if (arg != -1) {
2232 output_event = perf_fget_light(arg, &fput_needed);
2233 if (IS_ERR(output_event))
2234 return PTR_ERR(output_event);
2235 }
2236
2237 ret = perf_event_set_output(event, output_event);
2238 if (output_event)
2239 fput_light(output_event->filp, fput_needed);
2240
2241 return ret;
2242 }
2206 2243
2207 case PERF_EVENT_IOC_SET_FILTER: 2244 case PERF_EVENT_IOC_SET_FILTER:
2208 return perf_event_set_filter(event, (void __user *)arg); 2245 return perf_event_set_filter(event, (void __user *)arg);
@@ -2335,8 +2372,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2335 unsigned long size; 2372 unsigned long size;
2336 int i; 2373 int i;
2337 2374
2338 WARN_ON(atomic_read(&event->mmap_count));
2339
2340 size = sizeof(struct perf_mmap_data); 2375 size = sizeof(struct perf_mmap_data);
2341 size += nr_pages * sizeof(void *); 2376 size += nr_pages * sizeof(void *);
2342 2377
@@ -2452,8 +2487,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2452 unsigned long size; 2487 unsigned long size;
2453 void *all_buf; 2488 void *all_buf;
2454 2489
2455 WARN_ON(atomic_read(&event->mmap_count));
2456
2457 size = sizeof(struct perf_mmap_data); 2490 size = sizeof(struct perf_mmap_data);
2458 size += sizeof(void *); 2491 size += sizeof(void *);
2459 2492
@@ -2536,7 +2569,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2536 if (!data->watermark) 2569 if (!data->watermark)
2537 data->watermark = max_size / 2; 2570 data->watermark = max_size / 2;
2538 2571
2539 2572 atomic_set(&data->refcount, 1);
2540 rcu_assign_pointer(event->data, data); 2573 rcu_assign_pointer(event->data, data);
2541} 2574}
2542 2575
@@ -2548,13 +2581,26 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2548 perf_mmap_data_free(data); 2581 perf_mmap_data_free(data);
2549} 2582}
2550 2583
2551static void perf_mmap_data_release(struct perf_event *event) 2584static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
2552{ 2585{
2553 struct perf_mmap_data *data = event->data; 2586 struct perf_mmap_data *data;
2587
2588 rcu_read_lock();
2589 data = rcu_dereference(event->data);
2590 if (data) {
2591 if (!atomic_inc_not_zero(&data->refcount))
2592 data = NULL;
2593 }
2594 rcu_read_unlock();
2595
2596 return data;
2597}
2554 2598
2555 WARN_ON(atomic_read(&event->mmap_count)); 2599static void perf_mmap_data_put(struct perf_mmap_data *data)
2600{
2601 if (!atomic_dec_and_test(&data->refcount))
2602 return;
2556 2603
2557 rcu_assign_pointer(event->data, NULL);
2558 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); 2604 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
2559} 2605}
2560 2606
@@ -2569,15 +2615,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
2569{ 2615{
2570 struct perf_event *event = vma->vm_file->private_data; 2616 struct perf_event *event = vma->vm_file->private_data;
2571 2617
2572 WARN_ON_ONCE(event->ctx->parent_ctx);
2573 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 2618 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
2574 unsigned long size = perf_data_size(event->data); 2619 unsigned long size = perf_data_size(event->data);
2575 struct user_struct *user = current_user(); 2620 struct user_struct *user = event->mmap_user;
2621 struct perf_mmap_data *data = event->data;
2576 2622
2577 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 2623 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
2578 vma->vm_mm->locked_vm -= event->data->nr_locked; 2624 vma->vm_mm->locked_vm -= event->mmap_locked;
2579 perf_mmap_data_release(event); 2625 rcu_assign_pointer(event->data, NULL);
2580 mutex_unlock(&event->mmap_mutex); 2626 mutex_unlock(&event->mmap_mutex);
2627
2628 perf_mmap_data_put(data);
2629 free_uid(user);
2581 } 2630 }
2582} 2631}
2583 2632
@@ -2629,13 +2678,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2629 2678
2630 WARN_ON_ONCE(event->ctx->parent_ctx); 2679 WARN_ON_ONCE(event->ctx->parent_ctx);
2631 mutex_lock(&event->mmap_mutex); 2680 mutex_lock(&event->mmap_mutex);
2632 if (event->output) { 2681 if (event->data) {
2633 ret = -EINVAL; 2682 if (event->data->nr_pages == nr_pages)
2634 goto unlock; 2683 atomic_inc(&event->data->refcount);
2635 } 2684 else
2636
2637 if (atomic_inc_not_zero(&event->mmap_count)) {
2638 if (nr_pages != event->data->nr_pages)
2639 ret = -EINVAL; 2685 ret = -EINVAL;
2640 goto unlock; 2686 goto unlock;
2641 } 2687 }
@@ -2667,21 +2713,23 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2667 WARN_ON(event->data); 2713 WARN_ON(event->data);
2668 2714
2669 data = perf_mmap_data_alloc(event, nr_pages); 2715 data = perf_mmap_data_alloc(event, nr_pages);
2670 ret = -ENOMEM; 2716 if (!data) {
2671 if (!data) 2717 ret = -ENOMEM;
2672 goto unlock; 2718 goto unlock;
2719 }
2673 2720
2674 ret = 0;
2675 perf_mmap_data_init(event, data); 2721 perf_mmap_data_init(event, data);
2676
2677 atomic_set(&event->mmap_count, 1);
2678 atomic_long_add(user_extra, &user->locked_vm);
2679 vma->vm_mm->locked_vm += extra;
2680 event->data->nr_locked = extra;
2681 if (vma->vm_flags & VM_WRITE) 2722 if (vma->vm_flags & VM_WRITE)
2682 event->data->writable = 1; 2723 event->data->writable = 1;
2683 2724
2725 atomic_long_add(user_extra, &user->locked_vm);
2726 event->mmap_locked = extra;
2727 event->mmap_user = get_current_user();
2728 vma->vm_mm->locked_vm += event->mmap_locked;
2729
2684unlock: 2730unlock:
2731 if (!ret)
2732 atomic_inc(&event->mmap_count);
2685 mutex_unlock(&event->mmap_mutex); 2733 mutex_unlock(&event->mmap_mutex);
2686 2734
2687 vma->vm_flags |= VM_RESERVED; 2735 vma->vm_flags |= VM_RESERVED;
@@ -2993,7 +3041,6 @@ int perf_output_begin(struct perf_output_handle *handle,
2993 struct perf_event *event, unsigned int size, 3041 struct perf_event *event, unsigned int size,
2994 int nmi, int sample) 3042 int nmi, int sample)
2995{ 3043{
2996 struct perf_event *output_event;
2997 struct perf_mmap_data *data; 3044 struct perf_mmap_data *data;
2998 unsigned long tail, offset, head; 3045 unsigned long tail, offset, head;
2999 int have_lost; 3046 int have_lost;
@@ -3010,10 +3057,6 @@ int perf_output_begin(struct perf_output_handle *handle,
3010 if (event->parent) 3057 if (event->parent)
3011 event = event->parent; 3058 event = event->parent;
3012 3059
3013 output_event = rcu_dereference(event->output);
3014 if (output_event)
3015 event = output_event;
3016
3017 data = rcu_dereference(event->data); 3060 data = rcu_dereference(event->data);
3018 if (!data) 3061 if (!data)
3019 goto out; 3062 goto out;
@@ -4912,39 +4955,17 @@ err_size:
4912 goto out; 4955 goto out;
4913} 4956}
4914 4957
4915static int perf_event_set_output(struct perf_event *event, int output_fd) 4958static int
4959perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
4916{ 4960{
4917 struct perf_event *output_event = NULL; 4961 struct perf_mmap_data *data = NULL, *old_data = NULL;
4918 struct file *output_file = NULL;
4919 struct perf_event *old_output;
4920 int fput_needed = 0;
4921 int ret = -EINVAL; 4962 int ret = -EINVAL;
4922 4963
4923 /* 4964 if (!output_event)
4924 * Don't allow output of inherited per-task events. This would
4925 * create performance issues due to cross cpu access.
4926 */
4927 if (event->cpu == -1 && event->attr.inherit)
4928 return -EINVAL;
4929
4930 if (!output_fd)
4931 goto set; 4965 goto set;
4932 4966
4933 output_file = fget_light(output_fd, &fput_needed); 4967 /* don't allow circular references */
4934 if (!output_file) 4968 if (event == output_event)
4935 return -EBADF;
4936
4937 if (output_file->f_op != &perf_fops)
4938 goto out;
4939
4940 output_event = output_file->private_data;
4941
4942 /* Don't chain output fds */
4943 if (output_event->output)
4944 goto out;
4945
4946 /* Don't set an output fd when we already have an output channel */
4947 if (event->data)
4948 goto out; 4969 goto out;
4949 4970
4950 /* 4971 /*
@@ -4959,26 +4980,28 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
4959 if (output_event->cpu == -1 && output_event->ctx != event->ctx) 4980 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
4960 goto out; 4981 goto out;
4961 4982
4962 atomic_long_inc(&output_file->f_count);
4963
4964set: 4983set:
4965 mutex_lock(&event->mmap_mutex); 4984 mutex_lock(&event->mmap_mutex);
4966 old_output = event->output; 4985 /* Can't redirect output if we've got an active mmap() */
4967 rcu_assign_pointer(event->output, output_event); 4986 if (atomic_read(&event->mmap_count))
4968 mutex_unlock(&event->mmap_mutex); 4987 goto unlock;
4969 4988
4970 if (old_output) { 4989 if (output_event) {
4971 /* 4990 /* get the buffer we want to redirect to */
4972 * we need to make sure no existing perf_output_*() 4991 data = perf_mmap_data_get(output_event);
4973 * is still referencing this event. 4992 if (!data)
4974 */ 4993 goto unlock;
4975 synchronize_rcu();
4976 fput(old_output->filp);
4977 } 4994 }
4978 4995
4996 old_data = event->data;
4997 rcu_assign_pointer(event->data, data);
4979 ret = 0; 4998 ret = 0;
4999unlock:
5000 mutex_unlock(&event->mmap_mutex);
5001
5002 if (old_data)
5003 perf_mmap_data_put(old_data);
4980out: 5004out:
4981 fput_light(output_file, fput_needed);
4982 return ret; 5005 return ret;
4983} 5006}
4984 5007
@@ -4994,7 +5017,7 @@ SYSCALL_DEFINE5(perf_event_open,
4994 struct perf_event_attr __user *, attr_uptr, 5017 struct perf_event_attr __user *, attr_uptr,
4995 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) 5018 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
4996{ 5019{
4997 struct perf_event *event, *group_leader; 5020 struct perf_event *event, *group_leader = NULL, *output_event = NULL;
4998 struct perf_event_attr attr; 5021 struct perf_event_attr attr;
4999 struct perf_event_context *ctx; 5022 struct perf_event_context *ctx;
5000 struct file *event_file = NULL; 5023 struct file *event_file = NULL;
@@ -5034,19 +5057,25 @@ SYSCALL_DEFINE5(perf_event_open,
5034 goto err_fd; 5057 goto err_fd;
5035 } 5058 }
5036 5059
5060 if (group_fd != -1) {
5061 group_leader = perf_fget_light(group_fd, &fput_needed);
5062 if (IS_ERR(group_leader)) {
5063 err = PTR_ERR(group_leader);
5064 goto err_put_context;
5065 }
5066 group_file = group_leader->filp;
5067 if (flags & PERF_FLAG_FD_OUTPUT)
5068 output_event = group_leader;
5069 if (flags & PERF_FLAG_FD_NO_GROUP)
5070 group_leader = NULL;
5071 }
5072
5037 /* 5073 /*
5038 * Look up the group leader (we will attach this event to it): 5074 * Look up the group leader (we will attach this event to it):
5039 */ 5075 */
5040 group_leader = NULL; 5076 if (group_leader) {
5041 if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
5042 err = -EINVAL; 5077 err = -EINVAL;
5043 group_file = fget_light(group_fd, &fput_needed);
5044 if (!group_file)
5045 goto err_put_context;
5046 if (group_file->f_op != &perf_fops)
5047 goto err_put_context;
5048 5078
5049 group_leader = group_file->private_data;
5050 /* 5079 /*
5051 * Do not allow a recursive hierarchy (this new sibling 5080 * Do not allow a recursive hierarchy (this new sibling
5052 * becoming part of another group-sibling): 5081 * becoming part of another group-sibling):
@@ -5068,9 +5097,16 @@ SYSCALL_DEFINE5(perf_event_open,
5068 5097
5069 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 5098 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
5070 NULL, NULL, GFP_KERNEL); 5099 NULL, NULL, GFP_KERNEL);
5071 err = PTR_ERR(event); 5100 if (IS_ERR(event)) {
5072 if (IS_ERR(event)) 5101 err = PTR_ERR(event);
5073 goto err_put_context; 5102 goto err_put_context;
5103 }
5104
5105 if (output_event) {
5106 err = perf_event_set_output(event, output_event);
5107 if (err)
5108 goto err_free_put_context;
5109 }
5074 5110
5075 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); 5111 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
5076 if (IS_ERR(event_file)) { 5112 if (IS_ERR(event_file)) {
@@ -5078,12 +5114,6 @@ SYSCALL_DEFINE5(perf_event_open,
5078 goto err_free_put_context; 5114 goto err_free_put_context;
5079 } 5115 }
5080 5116
5081 if (flags & PERF_FLAG_FD_OUTPUT) {
5082 err = perf_event_set_output(event, group_fd);
5083 if (err)
5084 goto err_fput_free_put_context;
5085 }
5086
5087 event->filp = event_file; 5117 event->filp = event_file;
5088 WARN_ON_ONCE(ctx->parent_ctx); 5118 WARN_ON_ONCE(ctx->parent_ctx);
5089 mutex_lock(&ctx->mutex); 5119 mutex_lock(&ctx->mutex);
@@ -5101,8 +5131,6 @@ SYSCALL_DEFINE5(perf_event_open,
5101 fd_install(event_fd, event_file); 5131 fd_install(event_fd, event_file);
5102 return event_fd; 5132 return event_fd;
5103 5133
5104err_fput_free_put_context:
5105 fput(event_file);
5106err_free_put_context: 5134err_free_put_context:
5107 free_event(event); 5135 free_event(event);
5108err_put_context: 5136err_put_context: