aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-06-03 18:45:26 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-06-03 18:45:26 -0400
commitf150dba6d4a1e275b62ca76572c2786c71b91e85 (patch)
tree1fb8ed0a64f69431e8a3304a1b346a19979028b9 /kernel
parent636667a545b2d16797f27002a65d688c195c9b60 (diff)
parentc6df8d5ab87a246942d138321e1721edbb69f6e1 (diff)
Merge branch 'perf-fixes-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-fixes-for-linus-2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: perf: Fix crash in swevents perf buildid-list: Fix --with-hits event processing perf scripts python: Give field dict to unhandled callback perf hist: fix objdump output parsing perf-record: Check correct pid when forking perf: Do the comm inheritance per thread in event__process_task perf: Use event__process_task from perf sched perf: Process comm events by tid blktrace: Fix new kernel-doc warnings perf_events: Fix unincremented buffer base on partial copy perf_events: Fix event scheduling issues introduced by transactional API perf_events, trace: Fix perf_trace_destroy(), mutex went missing perf_events, trace: Fix probe unregister race perf_events: Fix races in group composition perf_events: Fix races and clean up perf_event and perf_mmap_data interaction
Diffstat (limited to 'kernel')
-rw-r--r--kernel/perf_event.c351
-rw-r--r--kernel/trace/blktrace.c2
-rw-r--r--kernel/trace/trace_event_perf.c15
-rw-r--r--kernel/trace/trace_kprobe.c4
-rw-r--r--kernel/trace/trace_syscalls.c4
5 files changed, 234 insertions, 142 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index bd7ce8ca5bb..31d6afe9259 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -283,14 +283,15 @@ ctx_group_list(struct perf_event *event, struct perf_event_context *ctx)
283static void 283static void
284list_add_event(struct perf_event *event, struct perf_event_context *ctx) 284list_add_event(struct perf_event *event, struct perf_event_context *ctx)
285{ 285{
286 struct perf_event *group_leader = event->group_leader; 286 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT);
287 event->attach_state |= PERF_ATTACH_CONTEXT;
287 288
288 /* 289 /*
289 * Depending on whether it is a standalone or sibling event, 290 * If we're a stand alone event or group leader, we go to the context
290 * add it straight to the context's event list, or to the group 291 * list, group events are kept attached to the group so that
291 * leader's sibling list: 292 * perf_group_detach can, at all times, locate all siblings.
292 */ 293 */
293 if (group_leader == event) { 294 if (event->group_leader == event) {
294 struct list_head *list; 295 struct list_head *list;
295 296
296 if (is_software_event(event)) 297 if (is_software_event(event))
@@ -298,13 +299,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
298 299
299 list = ctx_group_list(event, ctx); 300 list = ctx_group_list(event, ctx);
300 list_add_tail(&event->group_entry, list); 301 list_add_tail(&event->group_entry, list);
301 } else {
302 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
303 !is_software_event(event))
304 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
305
306 list_add_tail(&event->group_entry, &group_leader->sibling_list);
307 group_leader->nr_siblings++;
308 } 302 }
309 303
310 list_add_rcu(&event->event_entry, &ctx->event_list); 304 list_add_rcu(&event->event_entry, &ctx->event_list);
@@ -313,6 +307,24 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
313 ctx->nr_stat++; 307 ctx->nr_stat++;
314} 308}
315 309
310static void perf_group_attach(struct perf_event *event)
311{
312 struct perf_event *group_leader = event->group_leader;
313
314 WARN_ON_ONCE(event->attach_state & PERF_ATTACH_GROUP);
315 event->attach_state |= PERF_ATTACH_GROUP;
316
317 if (group_leader == event)
318 return;
319
320 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
321 !is_software_event(event))
322 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
323
324 list_add_tail(&event->group_entry, &group_leader->sibling_list);
325 group_leader->nr_siblings++;
326}
327
316/* 328/*
317 * Remove a event from the lists for its context. 329 * Remove a event from the lists for its context.
318 * Must be called with ctx->mutex and ctx->lock held. 330 * Must be called with ctx->mutex and ctx->lock held.
@@ -320,17 +332,22 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
320static void 332static void
321list_del_event(struct perf_event *event, struct perf_event_context *ctx) 333list_del_event(struct perf_event *event, struct perf_event_context *ctx)
322{ 334{
323 if (list_empty(&event->group_entry)) 335 /*
336 * We can have double detach due to exit/hot-unplug + close.
337 */
338 if (!(event->attach_state & PERF_ATTACH_CONTEXT))
324 return; 339 return;
340
341 event->attach_state &= ~PERF_ATTACH_CONTEXT;
342
325 ctx->nr_events--; 343 ctx->nr_events--;
326 if (event->attr.inherit_stat) 344 if (event->attr.inherit_stat)
327 ctx->nr_stat--; 345 ctx->nr_stat--;
328 346
329 list_del_init(&event->group_entry);
330 list_del_rcu(&event->event_entry); 347 list_del_rcu(&event->event_entry);
331 348
332 if (event->group_leader != event) 349 if (event->group_leader == event)
333 event->group_leader->nr_siblings--; 350 list_del_init(&event->group_entry);
334 351
335 update_group_times(event); 352 update_group_times(event);
336 353
@@ -345,21 +362,39 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
345 event->state = PERF_EVENT_STATE_OFF; 362 event->state = PERF_EVENT_STATE_OFF;
346} 363}
347 364
348static void 365static void perf_group_detach(struct perf_event *event)
349perf_destroy_group(struct perf_event *event, struct perf_event_context *ctx)
350{ 366{
351 struct perf_event *sibling, *tmp; 367 struct perf_event *sibling, *tmp;
368 struct list_head *list = NULL;
369
370 /*
371 * We can have double detach due to exit/hot-unplug + close.
372 */
373 if (!(event->attach_state & PERF_ATTACH_GROUP))
374 return;
375
376 event->attach_state &= ~PERF_ATTACH_GROUP;
377
378 /*
379 * If this is a sibling, remove it from its group.
380 */
381 if (event->group_leader != event) {
382 list_del_init(&event->group_entry);
383 event->group_leader->nr_siblings--;
384 return;
385 }
386
387 if (!list_empty(&event->group_entry))
388 list = &event->group_entry;
352 389
353 /* 390 /*
354 * If this was a group event with sibling events then 391 * If this was a group event with sibling events then
355 * upgrade the siblings to singleton events by adding them 392 * upgrade the siblings to singleton events by adding them
356 * to the context list directly: 393 * to whatever list we are on.
357 */ 394 */
358 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) { 395 list_for_each_entry_safe(sibling, tmp, &event->sibling_list, group_entry) {
359 struct list_head *list; 396 if (list)
360 397 list_move_tail(&sibling->group_entry, list);
361 list = ctx_group_list(event, ctx);
362 list_move_tail(&sibling->group_entry, list);
363 sibling->group_leader = sibling; 398 sibling->group_leader = sibling;
364 399
365 /* Inherit group flags from the previous leader */ 400 /* Inherit group flags from the previous leader */
@@ -652,8 +687,11 @@ group_sched_in(struct perf_event *group_event,
652 if (txn) 687 if (txn)
653 pmu->start_txn(pmu); 688 pmu->start_txn(pmu);
654 689
655 if (event_sched_in(group_event, cpuctx, ctx)) 690 if (event_sched_in(group_event, cpuctx, ctx)) {
691 if (txn)
692 pmu->cancel_txn(pmu);
656 return -EAGAIN; 693 return -EAGAIN;
694 }
657 695
658 /* 696 /*
659 * Schedule in siblings as one group (if any): 697 * Schedule in siblings as one group (if any):
@@ -675,9 +713,6 @@ group_sched_in(struct perf_event *group_event,
675 } 713 }
676 714
677group_error: 715group_error:
678 if (txn)
679 pmu->cancel_txn(pmu);
680
681 /* 716 /*
682 * Groups can be scheduled in as one unit only, so undo any 717 * Groups can be scheduled in as one unit only, so undo any
683 * partial group before returning: 718 * partial group before returning:
@@ -689,6 +724,9 @@ group_error:
689 } 724 }
690 event_sched_out(group_event, cpuctx, ctx); 725 event_sched_out(group_event, cpuctx, ctx);
691 726
727 if (txn)
728 pmu->cancel_txn(pmu);
729
692 return -EAGAIN; 730 return -EAGAIN;
693} 731}
694 732
@@ -727,6 +765,7 @@ static void add_event_to_ctx(struct perf_event *event,
727 struct perf_event_context *ctx) 765 struct perf_event_context *ctx)
728{ 766{
729 list_add_event(event, ctx); 767 list_add_event(event, ctx);
768 perf_group_attach(event);
730 event->tstamp_enabled = ctx->time; 769 event->tstamp_enabled = ctx->time;
731 event->tstamp_running = ctx->time; 770 event->tstamp_running = ctx->time;
732 event->tstamp_stopped = ctx->time; 771 event->tstamp_stopped = ctx->time;
@@ -1841,6 +1880,7 @@ static void free_event_rcu(struct rcu_head *head)
1841} 1880}
1842 1881
1843static void perf_pending_sync(struct perf_event *event); 1882static void perf_pending_sync(struct perf_event *event);
1883static void perf_mmap_data_put(struct perf_mmap_data *data);
1844 1884
1845static void free_event(struct perf_event *event) 1885static void free_event(struct perf_event *event)
1846{ 1886{
@@ -1856,9 +1896,9 @@ static void free_event(struct perf_event *event)
1856 atomic_dec(&nr_task_events); 1896 atomic_dec(&nr_task_events);
1857 } 1897 }
1858 1898
1859 if (event->output) { 1899 if (event->data) {
1860 fput(event->output->filp); 1900 perf_mmap_data_put(event->data);
1861 event->output = NULL; 1901 event->data = NULL;
1862 } 1902 }
1863 1903
1864 if (event->destroy) 1904 if (event->destroy)
@@ -1893,8 +1933,8 @@ int perf_event_release_kernel(struct perf_event *event)
1893 */ 1933 */
1894 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 1934 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING);
1895 raw_spin_lock_irq(&ctx->lock); 1935 raw_spin_lock_irq(&ctx->lock);
1936 perf_group_detach(event);
1896 list_del_event(event, ctx); 1937 list_del_event(event, ctx);
1897 perf_destroy_group(event, ctx);
1898 raw_spin_unlock_irq(&ctx->lock); 1938 raw_spin_unlock_irq(&ctx->lock);
1899 mutex_unlock(&ctx->mutex); 1939 mutex_unlock(&ctx->mutex);
1900 1940
@@ -2175,7 +2215,27 @@ unlock:
2175 return ret; 2215 return ret;
2176} 2216}
2177 2217
2178static int perf_event_set_output(struct perf_event *event, int output_fd); 2218static const struct file_operations perf_fops;
2219
2220static struct perf_event *perf_fget_light(int fd, int *fput_needed)
2221{
2222 struct file *file;
2223
2224 file = fget_light(fd, fput_needed);
2225 if (!file)
2226 return ERR_PTR(-EBADF);
2227
2228 if (file->f_op != &perf_fops) {
2229 fput_light(file, *fput_needed);
2230 *fput_needed = 0;
2231 return ERR_PTR(-EBADF);
2232 }
2233
2234 return file->private_data;
2235}
2236
2237static int perf_event_set_output(struct perf_event *event,
2238 struct perf_event *output_event);
2179static int perf_event_set_filter(struct perf_event *event, void __user *arg); 2239static int perf_event_set_filter(struct perf_event *event, void __user *arg);
2180 2240
2181static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 2241static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -2202,7 +2262,23 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
2202 return perf_event_period(event, (u64 __user *)arg); 2262 return perf_event_period(event, (u64 __user *)arg);
2203 2263
2204 case PERF_EVENT_IOC_SET_OUTPUT: 2264 case PERF_EVENT_IOC_SET_OUTPUT:
2205 return perf_event_set_output(event, arg); 2265 {
2266 struct perf_event *output_event = NULL;
2267 int fput_needed = 0;
2268 int ret;
2269
2270 if (arg != -1) {
2271 output_event = perf_fget_light(arg, &fput_needed);
2272 if (IS_ERR(output_event))
2273 return PTR_ERR(output_event);
2274 }
2275
2276 ret = perf_event_set_output(event, output_event);
2277 if (output_event)
2278 fput_light(output_event->filp, fput_needed);
2279
2280 return ret;
2281 }
2206 2282
2207 case PERF_EVENT_IOC_SET_FILTER: 2283 case PERF_EVENT_IOC_SET_FILTER:
2208 return perf_event_set_filter(event, (void __user *)arg); 2284 return perf_event_set_filter(event, (void __user *)arg);
@@ -2335,8 +2411,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2335 unsigned long size; 2411 unsigned long size;
2336 int i; 2412 int i;
2337 2413
2338 WARN_ON(atomic_read(&event->mmap_count));
2339
2340 size = sizeof(struct perf_mmap_data); 2414 size = sizeof(struct perf_mmap_data);
2341 size += nr_pages * sizeof(void *); 2415 size += nr_pages * sizeof(void *);
2342 2416
@@ -2452,8 +2526,6 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
2452 unsigned long size; 2526 unsigned long size;
2453 void *all_buf; 2527 void *all_buf;
2454 2528
2455 WARN_ON(atomic_read(&event->mmap_count));
2456
2457 size = sizeof(struct perf_mmap_data); 2529 size = sizeof(struct perf_mmap_data);
2458 size += sizeof(void *); 2530 size += sizeof(void *);
2459 2531
@@ -2536,7 +2608,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
2536 if (!data->watermark) 2608 if (!data->watermark)
2537 data->watermark = max_size / 2; 2609 data->watermark = max_size / 2;
2538 2610
2539 2611 atomic_set(&data->refcount, 1);
2540 rcu_assign_pointer(event->data, data); 2612 rcu_assign_pointer(event->data, data);
2541} 2613}
2542 2614
@@ -2548,13 +2620,26 @@ static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
2548 perf_mmap_data_free(data); 2620 perf_mmap_data_free(data);
2549} 2621}
2550 2622
2551static void perf_mmap_data_release(struct perf_event *event) 2623static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
2552{ 2624{
2553 struct perf_mmap_data *data = event->data; 2625 struct perf_mmap_data *data;
2554 2626
2555 WARN_ON(atomic_read(&event->mmap_count)); 2627 rcu_read_lock();
2628 data = rcu_dereference(event->data);
2629 if (data) {
2630 if (!atomic_inc_not_zero(&data->refcount))
2631 data = NULL;
2632 }
2633 rcu_read_unlock();
2634
2635 return data;
2636}
2637
2638static void perf_mmap_data_put(struct perf_mmap_data *data)
2639{
2640 if (!atomic_dec_and_test(&data->refcount))
2641 return;
2556 2642
2557 rcu_assign_pointer(event->data, NULL);
2558 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu); 2643 call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
2559} 2644}
2560 2645
@@ -2569,15 +2654,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
2569{ 2654{
2570 struct perf_event *event = vma->vm_file->private_data; 2655 struct perf_event *event = vma->vm_file->private_data;
2571 2656
2572 WARN_ON_ONCE(event->ctx->parent_ctx);
2573 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) { 2657 if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
2574 unsigned long size = perf_data_size(event->data); 2658 unsigned long size = perf_data_size(event->data);
2575 struct user_struct *user = current_user(); 2659 struct user_struct *user = event->mmap_user;
2660 struct perf_mmap_data *data = event->data;
2576 2661
2577 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm); 2662 atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
2578 vma->vm_mm->locked_vm -= event->data->nr_locked; 2663 vma->vm_mm->locked_vm -= event->mmap_locked;
2579 perf_mmap_data_release(event); 2664 rcu_assign_pointer(event->data, NULL);
2580 mutex_unlock(&event->mmap_mutex); 2665 mutex_unlock(&event->mmap_mutex);
2666
2667 perf_mmap_data_put(data);
2668 free_uid(user);
2581 } 2669 }
2582} 2670}
2583 2671
@@ -2629,13 +2717,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2629 2717
2630 WARN_ON_ONCE(event->ctx->parent_ctx); 2718 WARN_ON_ONCE(event->ctx->parent_ctx);
2631 mutex_lock(&event->mmap_mutex); 2719 mutex_lock(&event->mmap_mutex);
2632 if (event->output) { 2720 if (event->data) {
2633 ret = -EINVAL; 2721 if (event->data->nr_pages == nr_pages)
2634 goto unlock; 2722 atomic_inc(&event->data->refcount);
2635 } 2723 else
2636
2637 if (atomic_inc_not_zero(&event->mmap_count)) {
2638 if (nr_pages != event->data->nr_pages)
2639 ret = -EINVAL; 2724 ret = -EINVAL;
2640 goto unlock; 2725 goto unlock;
2641 } 2726 }
@@ -2667,21 +2752,23 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
2667 WARN_ON(event->data); 2752 WARN_ON(event->data);
2668 2753
2669 data = perf_mmap_data_alloc(event, nr_pages); 2754 data = perf_mmap_data_alloc(event, nr_pages);
2670 ret = -ENOMEM; 2755 if (!data) {
2671 if (!data) 2756 ret = -ENOMEM;
2672 goto unlock; 2757 goto unlock;
2758 }
2673 2759
2674 ret = 0;
2675 perf_mmap_data_init(event, data); 2760 perf_mmap_data_init(event, data);
2676
2677 atomic_set(&event->mmap_count, 1);
2678 atomic_long_add(user_extra, &user->locked_vm);
2679 vma->vm_mm->locked_vm += extra;
2680 event->data->nr_locked = extra;
2681 if (vma->vm_flags & VM_WRITE) 2761 if (vma->vm_flags & VM_WRITE)
2682 event->data->writable = 1; 2762 event->data->writable = 1;
2683 2763
2764 atomic_long_add(user_extra, &user->locked_vm);
2765 event->mmap_locked = extra;
2766 event->mmap_user = get_current_user();
2767 vma->vm_mm->locked_vm += event->mmap_locked;
2768
2684unlock: 2769unlock:
2770 if (!ret)
2771 atomic_inc(&event->mmap_count);
2685 mutex_unlock(&event->mmap_mutex); 2772 mutex_unlock(&event->mmap_mutex);
2686 2773
2687 vma->vm_flags |= VM_RESERVED; 2774 vma->vm_flags |= VM_RESERVED;
@@ -2977,6 +3064,7 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
2977 3064
2978 len -= size; 3065 len -= size;
2979 handle->addr += size; 3066 handle->addr += size;
3067 buf += size;
2980 handle->size -= size; 3068 handle->size -= size;
2981 if (!handle->size) { 3069 if (!handle->size) {
2982 struct perf_mmap_data *data = handle->data; 3070 struct perf_mmap_data *data = handle->data;
@@ -2993,7 +3081,6 @@ int perf_output_begin(struct perf_output_handle *handle,
2993 struct perf_event *event, unsigned int size, 3081 struct perf_event *event, unsigned int size,
2994 int nmi, int sample) 3082 int nmi, int sample)
2995{ 3083{
2996 struct perf_event *output_event;
2997 struct perf_mmap_data *data; 3084 struct perf_mmap_data *data;
2998 unsigned long tail, offset, head; 3085 unsigned long tail, offset, head;
2999 int have_lost; 3086 int have_lost;
@@ -3010,10 +3097,6 @@ int perf_output_begin(struct perf_output_handle *handle,
3010 if (event->parent) 3097 if (event->parent)
3011 event = event->parent; 3098 event = event->parent;
3012 3099
3013 output_event = rcu_dereference(event->output);
3014 if (output_event)
3015 event = output_event;
3016
3017 data = rcu_dereference(event->data); 3100 data = rcu_dereference(event->data);
3018 if (!data) 3101 if (!data)
3019 goto out; 3102 goto out;
@@ -3972,13 +4055,6 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
3972 } 4055 }
3973} 4056}
3974 4057
3975static void perf_swevent_unthrottle(struct perf_event *event)
3976{
3977 /*
3978 * Nothing to do, we already reset hwc->interrupts.
3979 */
3980}
3981
3982static void perf_swevent_add(struct perf_event *event, u64 nr, 4058static void perf_swevent_add(struct perf_event *event, u64 nr,
3983 int nmi, struct perf_sample_data *data, 4059 int nmi, struct perf_sample_data *data,
3984 struct pt_regs *regs) 4060 struct pt_regs *regs)
@@ -4193,11 +4269,22 @@ static void perf_swevent_disable(struct perf_event *event)
4193 hlist_del_rcu(&event->hlist_entry); 4269 hlist_del_rcu(&event->hlist_entry);
4194} 4270}
4195 4271
4272static void perf_swevent_void(struct perf_event *event)
4273{
4274}
4275
4276static int perf_swevent_int(struct perf_event *event)
4277{
4278 return 0;
4279}
4280
4196static const struct pmu perf_ops_generic = { 4281static const struct pmu perf_ops_generic = {
4197 .enable = perf_swevent_enable, 4282 .enable = perf_swevent_enable,
4198 .disable = perf_swevent_disable, 4283 .disable = perf_swevent_disable,
4284 .start = perf_swevent_int,
4285 .stop = perf_swevent_void,
4199 .read = perf_swevent_read, 4286 .read = perf_swevent_read,
4200 .unthrottle = perf_swevent_unthrottle, 4287 .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */
4201}; 4288};
4202 4289
4203/* 4290/*
@@ -4478,8 +4565,10 @@ static int swevent_hlist_get(struct perf_event *event)
4478static const struct pmu perf_ops_tracepoint = { 4565static const struct pmu perf_ops_tracepoint = {
4479 .enable = perf_trace_enable, 4566 .enable = perf_trace_enable,
4480 .disable = perf_trace_disable, 4567 .disable = perf_trace_disable,
4568 .start = perf_swevent_int,
4569 .stop = perf_swevent_void,
4481 .read = perf_swevent_read, 4570 .read = perf_swevent_read,
4482 .unthrottle = perf_swevent_unthrottle, 4571 .unthrottle = perf_swevent_void,
4483}; 4572};
4484 4573
4485static int perf_tp_filter_match(struct perf_event *event, 4574static int perf_tp_filter_match(struct perf_event *event,
@@ -4912,39 +5001,17 @@ err_size:
4912 goto out; 5001 goto out;
4913} 5002}
4914 5003
4915static int perf_event_set_output(struct perf_event *event, int output_fd) 5004static int
5005perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
4916{ 5006{
4917 struct perf_event *output_event = NULL; 5007 struct perf_mmap_data *data = NULL, *old_data = NULL;
4918 struct file *output_file = NULL;
4919 struct perf_event *old_output;
4920 int fput_needed = 0;
4921 int ret = -EINVAL; 5008 int ret = -EINVAL;
4922 5009
4923 /* 5010 if (!output_event)
4924 * Don't allow output of inherited per-task events. This would
4925 * create performance issues due to cross cpu access.
4926 */
4927 if (event->cpu == -1 && event->attr.inherit)
4928 return -EINVAL;
4929
4930 if (!output_fd)
4931 goto set; 5011 goto set;
4932 5012
4933 output_file = fget_light(output_fd, &fput_needed); 5013 /* don't allow circular references */
4934 if (!output_file) 5014 if (event == output_event)
4935 return -EBADF;
4936
4937 if (output_file->f_op != &perf_fops)
4938 goto out;
4939
4940 output_event = output_file->private_data;
4941
4942 /* Don't chain output fds */
4943 if (output_event->output)
4944 goto out;
4945
4946 /* Don't set an output fd when we already have an output channel */
4947 if (event->data)
4948 goto out; 5015 goto out;
4949 5016
4950 /* 5017 /*
@@ -4959,26 +5026,28 @@ static int perf_event_set_output(struct perf_event *event, int output_fd)
4959 if (output_event->cpu == -1 && output_event->ctx != event->ctx) 5026 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
4960 goto out; 5027 goto out;
4961 5028
4962 atomic_long_inc(&output_file->f_count);
4963
4964set: 5029set:
4965 mutex_lock(&event->mmap_mutex); 5030 mutex_lock(&event->mmap_mutex);
4966 old_output = event->output; 5031 /* Can't redirect output if we've got an active mmap() */
4967 rcu_assign_pointer(event->output, output_event); 5032 if (atomic_read(&event->mmap_count))
4968 mutex_unlock(&event->mmap_mutex); 5033 goto unlock;
4969 5034
4970 if (old_output) { 5035 if (output_event) {
4971 /* 5036 /* get the buffer we want to redirect to */
4972 * we need to make sure no existing perf_output_*() 5037 data = perf_mmap_data_get(output_event);
4973 * is still referencing this event. 5038 if (!data)
4974 */ 5039 goto unlock;
4975 synchronize_rcu();
4976 fput(old_output->filp);
4977 } 5040 }
4978 5041
5042 old_data = event->data;
5043 rcu_assign_pointer(event->data, data);
4979 ret = 0; 5044 ret = 0;
5045unlock:
5046 mutex_unlock(&event->mmap_mutex);
5047
5048 if (old_data)
5049 perf_mmap_data_put(old_data);
4980out: 5050out:
4981 fput_light(output_file, fput_needed);
4982 return ret; 5051 return ret;
4983} 5052}
4984 5053
@@ -4994,7 +5063,7 @@ SYSCALL_DEFINE5(perf_event_open,
4994 struct perf_event_attr __user *, attr_uptr, 5063 struct perf_event_attr __user *, attr_uptr,
4995 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) 5064 pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
4996{ 5065{
4997 struct perf_event *event, *group_leader; 5066 struct perf_event *event, *group_leader = NULL, *output_event = NULL;
4998 struct perf_event_attr attr; 5067 struct perf_event_attr attr;
4999 struct perf_event_context *ctx; 5068 struct perf_event_context *ctx;
5000 struct file *event_file = NULL; 5069 struct file *event_file = NULL;
@@ -5034,19 +5103,25 @@ SYSCALL_DEFINE5(perf_event_open,
5034 goto err_fd; 5103 goto err_fd;
5035 } 5104 }
5036 5105
5106 if (group_fd != -1) {
5107 group_leader = perf_fget_light(group_fd, &fput_needed);
5108 if (IS_ERR(group_leader)) {
5109 err = PTR_ERR(group_leader);
5110 goto err_put_context;
5111 }
5112 group_file = group_leader->filp;
5113 if (flags & PERF_FLAG_FD_OUTPUT)
5114 output_event = group_leader;
5115 if (flags & PERF_FLAG_FD_NO_GROUP)
5116 group_leader = NULL;
5117 }
5118
5037 /* 5119 /*
5038 * Look up the group leader (we will attach this event to it): 5120 * Look up the group leader (we will attach this event to it):
5039 */ 5121 */
5040 group_leader = NULL; 5122 if (group_leader) {
5041 if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) {
5042 err = -EINVAL; 5123 err = -EINVAL;
5043 group_file = fget_light(group_fd, &fput_needed);
5044 if (!group_file)
5045 goto err_put_context;
5046 if (group_file->f_op != &perf_fops)
5047 goto err_put_context;
5048 5124
5049 group_leader = group_file->private_data;
5050 /* 5125 /*
5051 * Do not allow a recursive hierarchy (this new sibling 5126 * Do not allow a recursive hierarchy (this new sibling
5052 * becoming part of another group-sibling): 5127 * becoming part of another group-sibling):
@@ -5068,9 +5143,16 @@ SYSCALL_DEFINE5(perf_event_open,
5068 5143
5069 event = perf_event_alloc(&attr, cpu, ctx, group_leader, 5144 event = perf_event_alloc(&attr, cpu, ctx, group_leader,
5070 NULL, NULL, GFP_KERNEL); 5145 NULL, NULL, GFP_KERNEL);
5071 err = PTR_ERR(event); 5146 if (IS_ERR(event)) {
5072 if (IS_ERR(event)) 5147 err = PTR_ERR(event);
5073 goto err_put_context; 5148 goto err_put_context;
5149 }
5150
5151 if (output_event) {
5152 err = perf_event_set_output(event, output_event);
5153 if (err)
5154 goto err_free_put_context;
5155 }
5074 5156
5075 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR); 5157 event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
5076 if (IS_ERR(event_file)) { 5158 if (IS_ERR(event_file)) {
@@ -5078,12 +5160,6 @@ SYSCALL_DEFINE5(perf_event_open,
5078 goto err_free_put_context; 5160 goto err_free_put_context;
5079 } 5161 }
5080 5162
5081 if (flags & PERF_FLAG_FD_OUTPUT) {
5082 err = perf_event_set_output(event, group_fd);
5083 if (err)
5084 goto err_fput_free_put_context;
5085 }
5086
5087 event->filp = event_file; 5163 event->filp = event_file;
5088 WARN_ON_ONCE(ctx->parent_ctx); 5164 WARN_ON_ONCE(ctx->parent_ctx);
5089 mutex_lock(&ctx->mutex); 5165 mutex_lock(&ctx->mutex);
@@ -5097,12 +5173,16 @@ SYSCALL_DEFINE5(perf_event_open,
5097 list_add_tail(&event->owner_entry, &current->perf_event_list); 5173 list_add_tail(&event->owner_entry, &current->perf_event_list);
5098 mutex_unlock(&current->perf_event_mutex); 5174 mutex_unlock(&current->perf_event_mutex);
5099 5175
5176 /*
5177 * Drop the reference on the group_event after placing the
5178 * new event on the sibling_list. This ensures destruction
5179 * of the group leader will find the pointer to itself in
5180 * perf_group_detach().
5181 */
5100 fput_light(group_file, fput_needed); 5182 fput_light(group_file, fput_needed);
5101 fd_install(event_fd, event_file); 5183 fd_install(event_fd, event_file);
5102 return event_fd; 5184 return event_fd;
5103 5185
5104err_fput_free_put_context:
5105 fput(event_file);
5106err_free_put_context: 5186err_free_put_context:
5107 free_event(event); 5187 free_event(event);
5108err_put_context: 5188err_put_context:
@@ -5420,6 +5500,7 @@ static void perf_free_event(struct perf_event *event,
5420 5500
5421 fput(parent->filp); 5501 fput(parent->filp);
5422 5502
5503 perf_group_detach(event);
5423 list_del_event(event, ctx); 5504 list_del_event(event, ctx);
5424 free_event(event); 5505 free_event(event);
5425} 5506}
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 36ea2b65dcd..638711c1750 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -842,6 +842,7 @@ static void blk_add_trace_split(void *ignore,
842 842
843/** 843/**
844 * blk_add_trace_remap - Add a trace for a remap operation 844 * blk_add_trace_remap - Add a trace for a remap operation
845 * @ignore: trace callback data parameter (not used)
845 * @q: queue the io is for 846 * @q: queue the io is for
846 * @bio: the source bio 847 * @bio: the source bio
847 * @dev: target device 848 * @dev: target device
@@ -873,6 +874,7 @@ static void blk_add_trace_remap(void *ignore,
873 874
874/** 875/**
875 * blk_add_trace_rq_remap - Add a trace for a request-remap operation 876 * blk_add_trace_rq_remap - Add a trace for a request-remap operation
877 * @ignore: trace callback data parameter (not used)
876 * @q: queue the io is for 878 * @q: queue the io is for
877 * @rq: the source request 879 * @rq: the source request
878 * @dev: target device 880 * @dev: target device
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index cb6f365016e..e6f65887842 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -116,7 +116,7 @@ int perf_trace_enable(struct perf_event *p_event)
116 if (WARN_ON_ONCE(!list)) 116 if (WARN_ON_ONCE(!list))
117 return -EINVAL; 117 return -EINVAL;
118 118
119 list = per_cpu_ptr(list, smp_processor_id()); 119 list = this_cpu_ptr(list);
120 hlist_add_head_rcu(&p_event->hlist_entry, list); 120 hlist_add_head_rcu(&p_event->hlist_entry, list);
121 121
122 return 0; 122 return 0;
@@ -132,8 +132,9 @@ void perf_trace_destroy(struct perf_event *p_event)
132 struct ftrace_event_call *tp_event = p_event->tp_event; 132 struct ftrace_event_call *tp_event = p_event->tp_event;
133 int i; 133 int i;
134 134
135 mutex_lock(&event_mutex);
135 if (--tp_event->perf_refcount > 0) 136 if (--tp_event->perf_refcount > 0)
136 return; 137 goto out;
137 138
138 if (tp_event->class->reg) 139 if (tp_event->class->reg)
139 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER); 140 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
@@ -142,6 +143,12 @@ void perf_trace_destroy(struct perf_event *p_event)
142 tp_event->class->perf_probe, 143 tp_event->class->perf_probe,
143 tp_event); 144 tp_event);
144 145
146 /*
147 * Ensure our callback won't be called anymore. See
148 * tracepoint_probe_unregister() and __DO_TRACE().
149 */
150 synchronize_sched();
151
145 free_percpu(tp_event->perf_events); 152 free_percpu(tp_event->perf_events);
146 tp_event->perf_events = NULL; 153 tp_event->perf_events = NULL;
147 154
@@ -151,6 +158,8 @@ void perf_trace_destroy(struct perf_event *p_event)
151 perf_trace_buf[i] = NULL; 158 perf_trace_buf[i] = NULL;
152 } 159 }
153 } 160 }
161out:
162 mutex_unlock(&event_mutex);
154} 163}
155 164
156__kprobes void *perf_trace_buf_prepare(int size, unsigned short type, 165__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
@@ -169,7 +178,7 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
169 if (*rctxp < 0) 178 if (*rctxp < 0)
170 return NULL; 179 return NULL;
171 180
172 raw_data = per_cpu_ptr(perf_trace_buf[*rctxp], smp_processor_id()); 181 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
173 182
174 /* zero the dead bytes from align to not leak stack to user */ 183 /* zero the dead bytes from align to not leak stack to user */
175 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); 184 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index faf7cefd15d..f52b5f50299 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1359,7 +1359,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
1359 for (i = 0; i < tp->nr_args; i++) 1359 for (i = 0; i < tp->nr_args; i++)
1360 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); 1360 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1361 1361
1362 head = per_cpu_ptr(call->perf_events, smp_processor_id()); 1362 head = this_cpu_ptr(call->perf_events);
1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head); 1363 perf_trace_buf_submit(entry, size, rctx, entry->ip, 1, regs, head);
1364} 1364}
1365 1365
@@ -1392,7 +1392,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1392 for (i = 0; i < tp->nr_args; i++) 1392 for (i = 0; i < tp->nr_args; i++)
1393 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset); 1393 call_fetch(&tp->args[i].fetch, regs, data + tp->args[i].offset);
1394 1394
1395 head = per_cpu_ptr(call->perf_events, smp_processor_id()); 1395 head = this_cpu_ptr(call->perf_events);
1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head); 1396 perf_trace_buf_submit(entry, size, rctx, entry->ret_ip, 1, regs, head);
1397} 1397}
1398 1398
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index d2c859cec9e..34e35804304 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -519,7 +519,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
519 syscall_get_arguments(current, regs, 0, sys_data->nb_args, 519 syscall_get_arguments(current, regs, 0, sys_data->nb_args,
520 (unsigned long *)&rec->args); 520 (unsigned long *)&rec->args);
521 521
522 head = per_cpu_ptr(sys_data->enter_event->perf_events, smp_processor_id()); 522 head = this_cpu_ptr(sys_data->enter_event->perf_events);
523 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 523 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
524} 524}
525 525
@@ -595,7 +595,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
595 rec->nr = syscall_nr; 595 rec->nr = syscall_nr;
596 rec->ret = syscall_get_return_value(current, regs); 596 rec->ret = syscall_get_return_value(current, regs);
597 597
598 head = per_cpu_ptr(sys_data->exit_event->perf_events, smp_processor_id()); 598 head = this_cpu_ptr(sys_data->exit_event->perf_events);
599 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head); 599 perf_trace_buf_submit(rec, size, rctx, 0, 1, regs, head);
600} 600}
601 601