aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c356
1 files changed, 220 insertions, 136 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index bd7ce8ca5bb9..ff86c558af4c 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -283,14 +283,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
283static void 283static void
284list_add_event(struct perf_event *event, struct perf_event_context *ctx) 284list_add_event(struct perf_event *event, struct perf_event_context *ctx)
285{ 285{
286 struct perf_event *group_leader = event->group_leader; 286 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
287 event->attach_state |= PERF_ATTACH_CONTEXT;
287 288
288 /* 289 /*
289 * Depending on whether it is a standalone or sibling event, 290 * If we're a stand alone event or group leader, we go to the context
290 * add it straight to the context's event list, or to the group 291 * list, group events are kept attached to the group so that
291 * leader's sibling list: 292 * perf_group_detach can, at all times, locate all siblings.
292 */ 293 */
293 if (group_leader == event) { 294 if (event->group_leader == event) {
294 struct list_head *list; 295 struct list_head *list;
295 296
296 if (is_software_event(event)) 297 if (is_software_event(event))
@@ -298,13 +299,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
298 299
299 list = ctx_group_list(event, ctx); 300 list = ctx_group_list(event, ctx);
300 list_add_tail(&event->group_entry, list); 301 list_add_tail(&event->group_entry, list);
301 } else {
302 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
303 !is_software_event(event))
304 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
305
306 list_add_tail(&event->group_entry, &group_leader->sibling_list);
307 group_leader->nr_siblings++;
308 } 302 }
309 303
310 list_add_rcu(&event->event_entry, &ctx->event_list); 304 list_add_rcu(&event->event_entry, &ctx->event_list);
@@ -313,6 +307,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
313 ctx->nr_stat++; 307 ctx->nr_stat++;
314} 308}
315 309
310static void perf_group_attach(struct perf_event *event)
311{
312 struct perf_event *group_leader = event->group_leader;
313
314 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP);
315 event->attach_state |= PERF_ATTACH_GROUP;
316
317 if (group_leader == event)
318 return;
319
320 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
321 !is_software_event(event))
322 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
323
324 list_add_tail(&event->group_entry, &group_leader->sibling_list);
325 group_leader->nr_siblings++;
326}
327
316/* 328/*
317 * Remove a event from the lists for its context. 329 * Remove a event from the lists for its context.
318 * Must be called with ctx->mutex and ctx->lock held. 330 * Must be called with ctx->mutex and ctx->lock held.
@@ -320,17 +332,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
320static void 332static void
321list_del_event(struct perf_event *event, struct perf_event_context *ctx) 333list_del_event(struct perf_event *event, struct perf_event_context *ctx)
322{ 334{
323 if (list_empty(&event->group_entry)) 335 /*
336 * We can have double detach due to exit/hot-unplug + close.
337 */
338 if (!(event->attach_state & PERF_ATTACH_CONTEXT))
324 return; 339 return;
340
341 event->attach_state &= ~PERF_ATTACH_CONTEXT;
342
325 ctx->nr_events--; 343 ctx->nr_events--;
326 if (event->attr.inherit_stat) 344 if (event->attr.inherit_stat)
327 ctx->nr_stat--; 345 ctx->nr_stat--;
328 346
329 list_del_init(&event->group_entry);
330 list_del_rcu(&event->event_entry); 347 list_del_rcu(&event->event_entry);
331 348
332 if (event->group_leader != event) 349 if (event->group_leader == event)
333 event->group_leader->nr_siblings--; 350 list_del_init(&event->group_entry);
334 351
335 update_group_times(event); 352 update_group_times(event);
336 353
@@ -345,21 +362,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
345 event->state = PERF_EVENT_STATE_OFF; 362 event->state = PERF_EVENT_STATE_OFF;
346} 363}
347 364
348static void 365static void perf_group_detach(struct perf_event *event)
349perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx)
350{ 366{
351 struct perf_event *sibling, *tmp; 367 struct perf_event *sibling, *tmp;
368 struct list_head *list = NULL;
369
370 /*
371 * We can have double detach due to exit/hot-unplug + close.
372 */
373 if (!(event->attach_state & PERF_ATTACH_GROUP))
374 return;
375
376 event->attach_state &= ~PERF_ATTACH_GROUP;
377
378 /*
379 * If this is a sibling, remove it from its group.
380 */
381 if (event->group_leader != event) {
382 list_del_init(&event->group_entry);
383 event->group_leader->nr_siblings--;
384 return;
385 }
386
387 if (!list_empty(&event->group_entry))
388 list = &event->group_entry;
352 389
353 /* 390 /*
354 * If this was a group event with sibling events then 391 * If this was a group event with sibling events then
355 * upgrade the siblings to singleton events by adding them 392 * upgrade the siblings to singleton events by adding them
356 * to the context list directly: 393 * to whatever list we are on.
357 */ 394 */
358 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { 395 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
359 struct list_head *list; 396 if (list)
360 397 list_move_tail(&sibling->group_entry, list);
361 list = ctx_group_list(event, ctx);
362 list_move_tail(&sibling->group_entry, list);
363 sibling->group_leader = sibling; 398 sibling->group_leader = sibling;
364 399
365 /* Inherit group flags from the previous leader */ 400 /* Inherit group flags from the previous leader */
@@ -652,8 +687,11 @@ group_sched_in(struct perf_event *group_event,
652 if (txn) 687 if (txn)
653 pmu->start_txn(pmu); 688 pmu->start_txn(pmu);
654 689
655 if (event_sched_in(group_event, cpuctx, ctx)) 690 if (event_sched_in(group_event, cpuctx, ctx)) {
691 if (txn)
692 pmu->cancel_txn(pmu);
656 return -EAGAIN; 693 return -EAGAIN;
694 }
657 695
658 /* 696 /*
659 * Schedule in siblings as one group (if any): 697 * Schedule in siblings as one group (if any):
@@ -675,9 +713,6 @@ group_sched_in(struct perf_event *group_event,
675 } 713 }
676 714
677group_error: 715group_error:
678 if (txn)
679 pmu->cancel_txn(pmu);
680
681 /* 716 /*
682 * Groups can be scheduled in as one unit only, so undo any 717 * Groups can be scheduled in as one unit only, so undo any
683 * partial group before returning: 718 * partial group before returning:
@@ -689,6 +724,9 @@ group_error:
689 } 724 }
690 event_sched_out(group_event, cpuctx, ctx); 725 event_sched_out(group_event, cpuctx, ctx);
691 726
727 if (txn)
728 pmu->cancel_txn(pmu);
729
692 return -EAGAIN; 730 return -EAGAIN;
693} 731}
694 732
@@ -727,6 +765,7 @@ static void add_event_to_ctx(struct perf_event *event,
727 struct perf_event_context *ctx) 765 struct perf_event_context *ctx)
728{ 766{
729 list_add_event(event, ctx); 767 list_add_event(event, ctx);
768 perf_group_attach(event);
730 event->tstamp_enabled = ctx->time; 769 event->tstamp_enabled = ctx->time;
731 event->tstamp_running = ctx->time; 770 event->tstamp_running = ctx->time;
732 event->tstamp_stopped = ctx->time; 771 event->tstamp_stopped = ctx->time;
@@ -1468,6 +1507,9 @@ do { \
1468 divisor = nsec * frequency; 1507 divisor = nsec * frequency;
1469 } 1508 }
1470 1509
1510 if (!divisor)
1511 return dividend;
1512
1471 return div64_u64(dividend, divisor); 1513 return div64_u64(dividend, divisor);
1472} 1514}
1473 1515
@@ -1490,7 +1532,7 @@ static int perf_event_start(struct perf_event *event)
1490static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count) 1532static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
1491{ 1533{
1492 struct hw_perf_event *hwc = &event->hw; 1534 struct hw_perf_event *hwc = &event->hw;
1493 u64 period, sample_period; 1535 s64 period, sample_period;
1494 s64 delta; 1536 s64 delta;
1495 1537
1496 period = perf_calculate_period(event, nsec, count); 1538 period = perf_calculate_period(event, nsec, count);
@@ -1841,6 +1883,7 @@ static void free_event_rcu(struct rcu_head *head)
1841} 1883}
1842 1884
1843static void perf_pending_sync(struct perf_event *event); 1885static void perf_pending_sync(struct perf_event *event);
1886static void perf_mmap_data_put(struct perf_mmap_data *data);
1844 1887
1845static void free_event(struct perf_event *event) 1888static void free_event(struct perf_event *event)
1846{ 1889{
@@ -1856,9 +1899,9 @@ static void free_event(struct perf_event *event)
1856 atomic_dec(&nr_task_events); 1899 atomic_dec(&nr_task_events);
1857 } 1900 }
1858 1901
1859 if (event->output) { 1902 if (event->data) {
1860 fput(event->output->filp); 1903 perf_mmap_data_put(event->data);
1861 event->output = NULL; 1904 event->data = NULL;
1862 } 1905 }
1863 1906
1864 if (event->destroy) 1907 if (event->destroy)
@@ -1893,8 +1936,8 @@ int perf_event_release_kernel(struct perf_event *event)
1893 */ 1936 */
1894 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 1937 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
1895 raw_spin_lock_irq(&ctx->lock); 1938 raw_spin_lock_irq(&ctx->lock);
1939 perf_group_detach(event);
1896 list_del_event(event, ctx); 1940 list_del_event(event, ctx);
1897 perf_destroy_group(event, ctx);
1898 raw_spin_unlock_irq(&ctx->lock); 1941 raw_spin_unlock_irq(&ctx->lock);
1899 mutex_unlock(&ctx->mutex); 1942 mutex_unlock(&ctx->mutex);
1900 1943
@@ -2175,7 +2218,27 @@ unlock:
2175 return ret; 2218 return ret;
2176} 2219}
2177 2220
2178static int perf_event_set_output(struct perf_event *event, int output_fd); 2221static const struct file_operations perf_fops;
2222
2223static struct perf_event *perf_fget_light(int fd, int *fput_needed)
2224{
2225 struct file *file;
2226
2227 file = fget_light(fd, fput_needed);
2228 if (!file)
2229 return ERR_PTR(-EBADF);
2230
2231 if (file->f_op != &perf_fops) {
2232 fput_light(file, *fput_needed);
2233 *fput_needed = 0;
2234 return ERR_PTR(-EBADF);
2235 }
2236
2237 return file->private_data;
2238}
2239
2240static int perf_event_set_output(struct perf_event *event,
2241 struct perf_event *output_event);
2179static int perf_event_set_filter(struct perf_event *event, void __user *arg); 2242static int perf_event_set_filter(struct perf_event *event, void __user *arg);
2180 2243
2181static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2244static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -2202,7 +2265,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2202 return perf_event_period(event, (u64 __user *)arg); 2265 return perf_event_period(event, (u64 __user *)arg);
2203 2266
2204 case PERF_EVENT_IOC_SET_OUTPUT: 2267 case PERF_EVENT_IOC_SET_OUTPUT:
2205 return perf_event_set_output(event, arg); 2268 {
2269 struct perf_event *output_event = NULL;
2270 int fput_needed = 0;
2271 int ret;
2272
2273 if (arg != -1) {
2274 output_event = perf_fget_light(arg, &fput_needed);
2275 if (IS_ERR(output_event))
2276 return PTR_ERR(output_event);
2277 }
2278
2279 ret = perf_event_set_output(event, output_event);
2280 if (output_event)
2281 fput_light(output_event->filp, fput_needed);
2282
2283 return ret;
2284 }
2206 2285
2207 case PERF_EVENT_IOC_SET_FILTER: 2286 case PERF_EVENT_IOC_SET_FILTER:
2208 return perf_event_set_filter(event, (void __user *)arg); 2287 return perf_event_set_filter(event, (void __user *)arg);
@@ -2335,8 +2414,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2335 unsigned long size; 2414 unsigned long size;
2336 int i; 2415 int i;
2337 2416
2338 WARN_ON(atomic_read(&event->mmap_count));
2339
2340 size = sizeof(struct perf_mmap_data); 2417 size = sizeof(struct perf_mmap_data);
2341 size += nr_pages * sizeof(void *); 2418 size += nr_pages * sizeof(void *);
2342 2419
@@ -2452,8 +2529,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2452 unsigned long size; 2529 unsigned long size;
2453 void *all_buf; 2530 void *all_buf;
2454 2531
2455 WARN_ON(atomic_read(&event->mmap_count));
2456
2457 size = sizeof(struct perf_mmap_data); 2532 size = sizeof(struct perf_mmap_data);
2458 size += sizeof(void *); 2533 size += sizeof(void *);
2459 2534
@@ -2536,7 +2611,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2536 if (!data->watermark) 2611 if (!data->watermark)
2537 data->watermark = max_size / 2; 2612 data->watermark = max_size / 2;
2538 2613
2539 2614 atomic_set(&data->refcount, 1);
2540 rcu_assign_pointer(event->data, data); 2615 rcu_assign_pointer(event->data, data);
2541} 2616}
2542 2617
@@ -2548,13 +2623,26 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2548 perf_mmap_data_free(data); 2623 perf_mmap_data_free(data);
2549} 2624}
2550 2625
2551static void perf_mmap_data_release(struct perf_event *event) 2626static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
2552{ 2627{
2553 struct perf_mmap_data *data = event->data; 2628 struct perf_mmap_data *data;
2629
2630 rcu_read_lock();
2631 data = rcu_dereference(event->data);
2632 if (data) {
2633 if (!atomic_inc_not_zero(&data->refcount))
2634 data = NULL;
2635 }
2636 rcu_read_unlock();
2637
2638 return data;
2639}
2554 2640
2555 WARN_ON(atomic_read(&event->mmap_count)); 2641static void perf_mmap_data_put(struct perf_mmap_data *data)
2642{
2643 if (!atomic_dec_and_test(&data->refcount))
2644 return;
2556 2645
2557 rcu_assign_pointer(event->data, NULL);
2558 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); 2646 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
2559} 2647}
2560 2648
@@ -2569,15 +2657,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
2569{ 2657{
2570 struct perf_event *event = vma->vm_file->private_data; 2658 struct perf_event *event = vma->vm_file->private_data;
2571 2659
2572 WARN_ON_ONCE(event->ctx->parent_ctx);
2573 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 2660 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
2574 unsigned long size = perf_data_size(event->data); 2661 unsigned long size = perf_data_size(event->data);
2575 struct user_struct *user = current_user(); 2662 struct user_struct *user = event->mmap_user;
2663 struct perf_mmap_data *data = event->data;
2576 2664
2577 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 2665 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
2578 vma->vm_mm->locked_vm -= event->data->nr_locked; 2666 vma->vm_mm->locked_vm -= event->mmap_locked;
2579 perf_mmap_data_release(event); 2667 rcu_assign_pointer(event->data, NULL);
2580 mutex_unlock(&event->mmap_mutex); 2668 mutex_unlock(&event->mmap_mutex);
2669
2670 perf_mmap_data_put(data);
2671 free_uid(user);
2581 } 2672 }
2582} 2673}
2583 2674
@@ -2629,13 +2720,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2629 2720
2630 WARN_ON_ONCE(event->ctx->parent_ctx); 2721 WARN_ON_ONCE(event->ctx->parent_ctx);
2631 mutex_lock(&event->mmap_mutex); 2722 mutex_lock(&event->mmap_mutex);
2632 if (event->output) { 2723 if (event->data) {
2633 ret = -EINVAL; 2724 if (event->data->nr_pages == nr_pages)
2634 goto unlock; 2725 atomic_inc(&event->data->refcount);
2635 } 2726 else
2636
2637 if (atomic_inc_not_zero(&event->mmap_count)) {
2638 if (nr_pages != event->data->nr_pages)
2639 ret = -EINVAL; 2727 ret = -EINVAL;
2640 goto unlock; 2728 goto unlock;
2641 } 2729 }
@@ -2667,21 +2755,23 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2667 WARN_ON(event->data); 2755 WARN_ON(event->data);
2668 2756
2669 data = perf_mmap_data_alloc(event, nr_pages); 2757 data = perf_mmap_data_alloc(event, nr_pages);
2670 ret = -ENOMEM; 2758 if (!data) {
2671 if (!data) 2759 ret = -ENOMEM;
2672 goto unlock; 2760 goto unlock;
2761 }
2673 2762
2674 ret = 0;
2675 perf_mmap_data_init(event, data); 2763 perf_mmap_data_init(event, data);
2676
2677 atomic_set(&event->mmap_count, 1);
2678 atomic_long_add(user_extra, &user->locked_vm);
2679 vma->vm_mm->locked_vm += extra;
2680 event->data->nr_locked = extra;
2681 if (vma->vm_flags & VM_WRITE) 2764 if (vma->vm_flags & VM_WRITE)
2682 event->data->writable = 1; 2765 event->data->writable = 1;
2683 2766
2767 atomic_long_add(user_extra, &user->locked_vm);
2768 event->mmap_locked = extra;
2769 event->mmap_user = get_current_user();
2770 vma->vm_mm->locked_vm += event->mmap_locked;
2771
2684unlock: 2772unlock:
2773 if (!ret)
2774 atomic_inc(&event->mmap_count);
2685 mutex_unlock(&event->mmap_mutex); 2775 mutex_unlock(&event->mmap_mutex);
2686 2776
2687 vma->vm_flags |= VM_RESERVED; 2777 vma->vm_flags |= VM_RESERVED;
@@ -2977,6 +3067,7 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
2977 3067
2978 len -= size; 3068 len -= size;
2979 handle->addr += size; 3069 handle->addr += size;
3070 buf += size;
2980 handle->size -= size; 3071 handle->size -= size;
2981 if (!handle->size) { 3072 if (!handle->size) {
2982 struct perf_mmap_data *data = handle->data; 3073 struct perf_mmap_data *data = handle->data;
@@ -2993,7 +3084,6 @@ int perf_output_begin(struct perf_output_handle *handle,
2993 struct perf_event *event, unsigned int size, 3084 struct perf_event *event, unsigned int size,
2994 int nmi, int sample) 3085 int nmi, int sample)
2995{ 3086{
2996 struct perf_event *output_event;
2997 struct perf_mmap_data *data; 3087 struct perf_mmap_data *data;
2998 unsigned long tail, offset, head; 3088 unsigned long tail, offset, head;
2999 int have_lost; 3089 int have_lost;
@@ -3010,10 +3100,6 @@ int perf_output_begin(struct perf_output_handle *handle,
3010 if (event->parent) 3100 if (event->parent)
3011 event = event->parent; 3101 event = event->parent;
3012 3102
3013 output_event = rcu_dereference(event->output);
3014 if (output_event)
3015 event = output_event;
3016
3017 data = rcu_dereference(event->data); 3103 data = rcu_dereference(event->data);
3018 if (!data) 3104 if (!data)
3019 goto out; 3105 goto out;
@@ -3972,13 +4058,6 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3972 } 4058 }
3973} 4059}
3974 4060
3975static void perf_swevent_unthrottle(struct perf_event *event)
3976{
3977 /*
3978 * Nothing to do, we already reset hwc->interrupts.
3979 */
3980}
3981
3982static void perf_swevent_add(struct perf_event *event, u64 nr, 4061static void perf_swevent_add(struct perf_event *event, u64 nr,
3983 int nmi, struct perf_sample_data *data, 4062 int nmi, struct perf_sample_data *data,
3984 struct pt_regs *regs) 4063 struct pt_regs *regs)
@@ -4193,11 +4272,22 @@ static void perf_swevent_disable(struct perf_event *event)
4193 hlist_del_rcu(&event->hlist_entry); 4272 hlist_del_rcu(&event->hlist_entry);
4194} 4273}
4195 4274
4275static void perf_swevent_void(struct perf_event *event)
4276{
4277}
4278
4279static int perf_swevent_int(struct perf_event *event)
4280{
4281 return 0;
4282}
4283
4196static const struct pmu perf_ops_generic = { 4284static const struct pmu perf_ops_generic = {
4197 .enable = perf_swevent_enable, 4285 .enable = perf_swevent_enable,
4198 .disable = perf_swevent_disable, 4286 .disable = perf_swevent_disable,
4287 .start = perf_swevent_int,
4288 .stop = perf_swevent_void,
4199 .read = perf_swevent_read, 4289 .read = perf_swevent_read,
4200 .unthrottle = perf_swevent_unthrottle, 4290 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4201}; 4291};
4202 4292
4203/* 4293/*
@@ -4478,8 +4568,10 @@ static int swevent_hlist_get(struct perf_event *event)
4478static const struct pmu perf_ops_tracepoint = { 4568static const struct pmu perf_ops_tracepoint = {
4479 .enable = perf_trace_enable, 4569 .enable = perf_trace_enable,
4480 .disable = perf_trace_disable, 4570 .disable = perf_trace_disable,
4571 .start = perf_swevent_int,
4572 .stop = perf_swevent_void,
4481 .read = perf_swevent_read, 4573 .read = perf_swevent_read,
4482 .unthrottle = perf_swevent_unthrottle, 4574 .unthrottle = perf_swevent_void,
4483}; 4575};
4484 4576
4485static int perf_tp_filter_match(struct perf_event *event, 4577static int perf_tp_filter_match(struct perf_event *event,
@@ -4912,39 +5004,17 @@ err_size:
4912 goto out; 5004 goto out;
4913} 5005}
4914 5006
4915static int perf_event_set_output(struct perf_event *event, int output_fd) 5007static int
5008perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
4916{ 5009{
4917 struct perf_event *output_event = NULL; 5010 struct perf_mmap_data *data = NULL, *old_data = NULL;
4918 struct file *output_file = NULL;
4919 struct perf_event *old_output;
4920 int fput_needed = 0;
4921 int ret = -EINVAL; 5011 int ret = -EINVAL;
4922 5012
4923 /* 5013 if (!output_event)
4924 * Don't allow output of inherited per-task events. This would
4925 * create performance issues due to cross cpu access.
4926 */
4927 if (event->cpu == -1 && event->attr.inherit)
4928 return -EINVAL;
4929
4930 if (!output_fd)
4931 goto set; 5014 goto set;
4932 5015
4933 output_file = fget_light(output_fd, &fput_needed); 5016 /* don't allow circular references */
4934 if (!output_file) 5017 if (event == output_event)
4935 return -EBADF;
4936
4937 if (output_file->f_op != &perf_fops)
4938 goto out;
4939
4940 output_event = output_file->private_data;
4941
4942 /* Don't chain output fds */
4943 if (output_event->output)
4944 goto out;
4945
4946 /* Don't set an output fd when we already have an output channel */
4947 if (event->data)
4948 goto out; 5018 goto out;
4949 5019
4950 /* 5020 /*
@@ -4959,26 +5029,28 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
4959 if (output_event->cpu == -1 && output_event->ctx != event->ctx) 5029 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
4960 goto out; 5030 goto out;
4961 5031
4962 atomic_long_inc(&output_file->f_count);
4963
4964set: 5032set:
4965 mutex_lock(&event->mmap_mutex); 5033 mutex_lock(&event->mmap_mutex);
4966 old_output = event->output; 5034 /* Can't redirect output if we've got an active mmap() */
4967 rcu_assign_pointer(event->output, output_event); 5035 if (atomic_read(&event->mmap_count))
4968 mutex_unlock(&event->mmap_mutex); 5036 goto unlock;
4969 5037
4970 if (old_output) { 5038 if (output_event) {
4971 /* 5039 /* get the buffer we want to redirect to */
4972 * we need to make sure no existing perf_output_*() 5040 data = perf_mmap_data_get(output_event);
4973 * is still referencing this event. 5041 if (!data)
4974 */ 5042 goto unlock;
4975 synchronize_rcu();
4976 fput(old_output->filp);
4977 } 5043 }
4978 5044
5045 old_data = event->data;
5046 rcu_assign_pointer(event->data, data);
4979 ret = 0; 5047 ret = 0;
5048unlock:
5049 mutex_unlock(&event->mmap_mutex);
5050
5051 if (old_data)
5052 perf_mmap_data_put(old_data);
4980out: 5053out:
4981 fput_light(output_file, fput_needed);
4982 return ret; 5054 return ret;
4983} 5055}
4984 5056
@@ -4994,7 +5066,7 @@ SYSCALL_DEFINE5(perf_event_open,
4994 struct perf_event_attr __user *, attr_uptr, 5066 struct perf_event_attr __user *, attr_uptr,
4995 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) 5067 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
4996{ 5068{
4997 struct perf_event *event, *group_leader; 5069 struct perf_event *event, *group_leader = NULL, *output_event = NULL;
4998 struct perf_event_attr attr; 5070 struct perf_event_attr attr;
4999 struct perf_event_context *ctx; 5071 struct perf_event_context *ctx;
5000 struct file *event_file = NULL; 5072 struct file *event_file = NULL;
@@ -5034,19 +5106,25 @@ SYSCALL_DEFINE5(perf_event_open,
5034 goto err_fd; 5106 goto err_fd;
5035 } 5107 }
5036 5108
5109 if (group_fd != -1) {
5110 group_leader = perf_fget_light(group_fd, &fput_needed);
5111 if (IS_ERR(group_leader)) {
5112 err = PTR_ERR(group_leader);
5113 goto err_put_context;
5114 }
5115 group_file = group_leader->filp;
5116 if (flags & PERF_FLAG_FD_OUTPUT)
5117 output_event = group_leader;
5118 if (flags & PERF_FLAG_FD_NO_GROUP)
5119 group_leader = NULL;
5120 }
5121
5037 /* 5122 /*
5038 * Look up the group leader (we will attach this event to it): 5123 * Look up the group leader (we will attach this event to it):
5039 */ 5124 */
5040 group_leader = NULL; 5125 if (group_leader) {
5041 if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
5042 err = -EINVAL; 5126 err = -EINVAL;
5043 group_file = fget_light(group_fd, &fput_needed);
5044 if (!group_file)
5045 goto err_put_context;
5046 if (group_file->f_op != &perf_fops)
5047 goto err_put_context;
5048 5127
5049 group_leader = group_file->private_data;
5050 /* 5128 /*
5051 * Do not allow a recursive hierarchy (this new sibling 5129 * Do not allow a recursive hierarchy (this new sibling
5052 * becoming part of another group-sibling): 5130 * becoming part of another group-sibling):
@@ -5068,9 +5146,16 @@ SYSCALL_DEFINE5(perf_event_open,
5068 5146
5069 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 5147 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
5070 NULL, NULL, GFP_KERNEL); 5148 NULL, NULL, GFP_KERNEL);
5071 err = PTR_ERR(event); 5149 if (IS_ERR(event)) {
5072 if (IS_ERR(event)) 5150 err = PTR_ERR(event);
5073 goto err_put_context; 5151 goto err_put_context;
5152 }
5153
5154 if (output_event) {
5155 err = perf_event_set_output(event, output_event);
5156 if (err)
5157 goto err_free_put_context;
5158 }
5074 5159
5075 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); 5160 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
5076 if (IS_ERR(event_file)) { 5161 if (IS_ERR(event_file)) {
@@ -5078,12 +5163,6 @@ SYSCALL_DEFINE5(perf_event_open,
5078 goto err_free_put_context; 5163 goto err_free_put_context;
5079 } 5164 }
5080 5165
5081 if (flags & PERF_FLAG_FD_OUTPUT) {
5082 err = perf_event_set_output(event, group_fd);
5083 if (err)
5084 goto err_fput_free_put_context;
5085 }
5086
5087 event->filp = event_file; 5166 event->filp = event_file;
5088 WARN_ON_ONCE(ctx->parent_ctx); 5167 WARN_ON_ONCE(ctx->parent_ctx);
5089 mutex_lock(&ctx->mutex); 5168 mutex_lock(&ctx->mutex);
@@ -5097,12 +5176,16 @@ SYSCALL_DEFINE5(perf_event_open,
5097 list_add_tail(&event->owner_entry, &current->perf_event_list); 5176 list_add_tail(&event->owner_entry, &current->perf_event_list);
5098 mutex_unlock(&current->perf_event_mutex); 5177 mutex_unlock(&current->perf_event_mutex);
5099 5178
5179 /*
5180 * Drop the reference on the group_event after placing the
5181 * new event on the sibling_list. This ensures destruction
5182 * of the group leader will find the pointer to itself in
5183 * perf_group_detach().
5184 */
5100 fput_light(group_file, fput_needed); 5185 fput_light(group_file, fput_needed);
5101 fd_install(event_fd, event_file); 5186 fd_install(event_fd, event_file);
5102 return event_fd; 5187 return event_fd;
5103 5188
5104err_fput_free_put_context:
5105 fput(event_file);
5106err_free_put_context: 5189err_free_put_context:
5107 free_event(event); 5190 free_event(event);
5108err_put_context: 5191err_put_context:
@@ -5420,6 +5503,7 @@ static void perf_free_event(struct perf_event *event,
5420 5503
5421 fput(parent->filp); 5504 fput(parent->filp);
5422 5505
5506 perf_group_detach(event);
5423 list_del_event(event, ctx); 5507 list_del_event(event, ctx);
5424 free_event(event); 5508 free_event(event);
5425} 5509}