aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2016-01-15 09:07:41 -0500
committerIngo Molnar <mingo@kernel.org>2016-01-29 02:35:35 -0500
commitc6e5b73242d2d9172ea880483bc4ba7ffca0cfb2 (patch)
tree971eae178c1644b5f0e9aebd14fac11b5a02d9c0
parent60beda849343494b2a598b927630bbe293c1cc6e (diff)
perf: Synchronously clean up child events
The orphan cleanup workqueue doesn't always catch orphans, for example, if they never schedule after they are orphaned. IOW, the event leak is still very real. It also wouldn't work for kernel counters. Doing it synchonously is a little hairy due to lock inversion issues, but is made to work. Patch based on work by Alexander Shishkin. Suggested-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: vince@deater.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--include/linux/perf_event.h3
-rw-r--r--kernel/events/core.c174
2 files changed, 84 insertions, 93 deletions
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4f90434b8d64..b35a61a481fa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -634,9 +634,6 @@ struct perf_event_context {
634 int nr_cgroups; /* cgroup evts */ 634 int nr_cgroups; /* cgroup evts */
635 void *task_ctx_data; /* pmu specific data */ 635 void *task_ctx_data; /* pmu specific data */
636 struct rcu_head rcu_head; 636 struct rcu_head rcu_head;
637
638 struct delayed_work orphans_remove;
639 bool orphans_remove_sched;
640}; 637};
641 638
642/* 639/*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e549cf2accdd..98c862aff8fa 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -49,8 +49,6 @@
49 49
50#include <asm/irq_regs.h> 50#include <asm/irq_regs.h>
51 51
52static struct workqueue_struct *perf_wq;
53
54typedef int (*remote_function_f)(void *); 52typedef int (*remote_function_f)(void *);
55 53
56struct remote_function_call { 54struct remote_function_call {
@@ -1645,45 +1643,11 @@ out:
1645 perf_event__header_size(tmp); 1643 perf_event__header_size(tmp);
1646} 1644}
1647 1645
1648/*
1649 * User event without the task.
1650 */
1651static bool is_orphaned_event(struct perf_event *event) 1646static bool is_orphaned_event(struct perf_event *event)
1652{ 1647{
1653 return event && event->state == PERF_EVENT_STATE_EXIT; 1648 return event->state == PERF_EVENT_STATE_EXIT;
1654}
1655
1656/*
1657 * Event has a parent but parent's task finished and it's
1658 * alive only because of children holding refference.
1659 */
1660static bool is_orphaned_child(struct perf_event *event)
1661{
1662 return is_orphaned_event(event->parent);
1663}
1664
1665static void orphans_remove_work(struct work_struct *work);
1666
1667static void schedule_orphans_remove(struct perf_event_context *ctx)
1668{
1669 if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
1670 return;
1671
1672 if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
1673 get_ctx(ctx);
1674 ctx->orphans_remove_sched = true;
1675 }
1676}
1677
1678static int __init perf_workqueue_init(void)
1679{
1680 perf_wq = create_singlethread_workqueue("perf");
1681 WARN(!perf_wq, "failed to create perf workqueue\n");
1682 return perf_wq ? 0 : -1;
1683} 1649}
1684 1650
1685core_initcall(perf_workqueue_init);
1686
1687static inline int pmu_filter_match(struct perf_event *event) 1651static inline int pmu_filter_match(struct perf_event *event)
1688{ 1652{
1689 struct pmu *pmu = event->pmu; 1653 struct pmu *pmu = event->pmu;
@@ -1744,9 +1708,6 @@ event_sched_out(struct perf_event *event,
1744 if (event->attr.exclusive || !cpuctx->active_oncpu) 1708 if (event->attr.exclusive || !cpuctx->active_oncpu)
1745 cpuctx->exclusive = 0; 1709 cpuctx->exclusive = 0;
1746 1710
1747 if (is_orphaned_child(event))
1748 schedule_orphans_remove(ctx);
1749
1750 perf_pmu_enable(event->pmu); 1711 perf_pmu_enable(event->pmu);
1751} 1712}
1752 1713
@@ -1984,9 +1945,6 @@ event_sched_in(struct perf_event *event,
1984 if (event->attr.exclusive) 1945 if (event->attr.exclusive)
1985 cpuctx->exclusive = 1; 1946 cpuctx->exclusive = 1;
1986 1947
1987 if (is_orphaned_child(event))
1988 schedule_orphans_remove(ctx);
1989
1990out: 1948out:
1991 perf_pmu_enable(event->pmu); 1949 perf_pmu_enable(event->pmu);
1992 1950
@@ -3369,7 +3327,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
3369 INIT_LIST_HEAD(&ctx->flexible_groups); 3327 INIT_LIST_HEAD(&ctx->flexible_groups);
3370 INIT_LIST_HEAD(&ctx->event_list); 3328 INIT_LIST_HEAD(&ctx->event_list);
3371 atomic_set(&ctx->refcount, 1); 3329 atomic_set(&ctx->refcount, 1);
3372 INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
3373} 3330}
3374 3331
3375static struct perf_event_context * 3332static struct perf_event_context *
@@ -3782,11 +3739,22 @@ static void perf_remove_from_owner(struct perf_event *event)
3782 3739
3783static void put_event(struct perf_event *event) 3740static void put_event(struct perf_event *event)
3784{ 3741{
3785 struct perf_event_context *ctx;
3786
3787 if (!atomic_long_dec_and_test(&event->refcount)) 3742 if (!atomic_long_dec_and_test(&event->refcount))
3788 return; 3743 return;
3789 3744
3745 _free_event(event);
3746}
3747
3748/*
3749 * Kill an event dead; while event:refcount will preserve the event
3750 * object, it will not preserve its functionality. Once the last 'user'
3751 * gives up the object, we'll destroy the thing.
3752 */
3753int perf_event_release_kernel(struct perf_event *event)
3754{
3755 struct perf_event_context *ctx;
3756 struct perf_event *child, *tmp;
3757
3790 if (!is_kernel_event(event)) 3758 if (!is_kernel_event(event))
3791 perf_remove_from_owner(event); 3759 perf_remove_from_owner(event);
3792 3760
@@ -3811,14 +3779,70 @@ static void put_event(struct perf_event *event)
3811 * At this point we must have event->state == PERF_EVENT_STATE_EXIT, 3779 * At this point we must have event->state == PERF_EVENT_STATE_EXIT,
3812 * either from the above perf_remove_from_context() or through 3780 * either from the above perf_remove_from_context() or through
3813 * perf_event_exit_event(). 3781 * perf_event_exit_event().
3782 *
3783 * Therefore, anybody acquiring event->child_mutex after the below
3784 * loop _must_ also see this, most importantly inherit_event() which
3785 * will avoid placing more children on the list.
3786 *
3787 * Thus this guarantees that we will in fact observe and kill _ALL_
3788 * child events.
3814 */ 3789 */
3815 WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT); 3790 WARN_ON_ONCE(event->state != PERF_EVENT_STATE_EXIT);
3816 3791
3817 _free_event(event); 3792again:
3818} 3793 mutex_lock(&event->child_mutex);
3794 list_for_each_entry(child, &event->child_list, child_list) {
3819 3795
3820int perf_event_release_kernel(struct perf_event *event) 3796 /*
3821{ 3797 * Cannot change, child events are not migrated, see the
3798 * comment with perf_event_ctx_lock_nested().
3799 */
3800 ctx = lockless_dereference(child->ctx);
3801 /*
3802 * Since child_mutex nests inside ctx::mutex, we must jump
3803 * through hoops. We start by grabbing a reference on the ctx.
3804 *
3805 * Since the event cannot get freed while we hold the
3806 * child_mutex, the context must also exist and have a !0
3807 * reference count.
3808 */
3809 get_ctx(ctx);
3810
3811 /*
3812 * Now that we have a ctx ref, we can drop child_mutex, and
3813 * acquire ctx::mutex without fear of it going away. Then we
3814 * can re-acquire child_mutex.
3815 */
3816 mutex_unlock(&event->child_mutex);
3817 mutex_lock(&ctx->mutex);
3818 mutex_lock(&event->child_mutex);
3819
3820 /*
3821 * Now that we hold ctx::mutex and child_mutex, revalidate our
3822 * state, if child is still the first entry, it didn't get freed
3823 * and we can continue doing so.
3824 */
3825 tmp = list_first_entry_or_null(&event->child_list,
3826 struct perf_event, child_list);
3827 if (tmp == child) {
3828 perf_remove_from_context(child, DETACH_GROUP);
3829 list_del(&child->child_list);
3830 free_event(child);
3831 /*
3832 * This matches the refcount bump in inherit_event();
3833 * this can't be the last reference.
3834 */
3835 put_event(event);
3836 }
3837
3838 mutex_unlock(&event->child_mutex);
3839 mutex_unlock(&ctx->mutex);
3840 put_ctx(ctx);
3841 goto again;
3842 }
3843 mutex_unlock(&event->child_mutex);
3844
3845 /* Must be the last reference */
3822 put_event(event); 3846 put_event(event);
3823 return 0; 3847 return 0;
3824} 3848}
@@ -3829,46 +3853,10 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
3829 */ 3853 */
3830static int perf_release(struct inode *inode, struct file *file) 3854static int perf_release(struct inode *inode, struct file *file)
3831{ 3855{
3832 put_event(file->private_data); 3856 perf_event_release_kernel(file->private_data);
3833 return 0; 3857 return 0;
3834} 3858}
3835 3859
3836/*
3837 * Remove all orphanes events from the context.
3838 */
3839static void orphans_remove_work(struct work_struct *work)
3840{
3841 struct perf_event_context *ctx;
3842 struct perf_event *event, *tmp;
3843
3844 ctx = container_of(work, struct perf_event_context,
3845 orphans_remove.work);
3846
3847 mutex_lock(&ctx->mutex);
3848 list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
3849 struct perf_event *parent_event = event->parent;
3850
3851 if (!is_orphaned_child(event))
3852 continue;
3853
3854 perf_remove_from_context(event, DETACH_GROUP);
3855
3856 mutex_lock(&parent_event->child_mutex);
3857 list_del_init(&event->child_list);
3858 mutex_unlock(&parent_event->child_mutex);
3859
3860 free_event(event);
3861 put_event(parent_event);
3862 }
3863
3864 raw_spin_lock_irq(&ctx->lock);
3865 ctx->orphans_remove_sched = false;
3866 raw_spin_unlock_irq(&ctx->lock);
3867 mutex_unlock(&ctx->mutex);
3868
3869 put_ctx(ctx);
3870}
3871
3872u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) 3860u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
3873{ 3861{
3874 struct perf_event *child; 3862 struct perf_event *child;
@@ -8719,7 +8707,7 @@ perf_event_exit_event(struct perf_event *child_event,
8719 if (parent_event) 8707 if (parent_event)
8720 perf_group_detach(child_event); 8708 perf_group_detach(child_event);
8721 list_del_event(child_event, child_ctx); 8709 list_del_event(child_event, child_ctx);
8722 child_event->state = PERF_EVENT_STATE_EXIT; 8710 child_event->state = PERF_EVENT_STATE_EXIT; /* see perf_event_release_kernel() */
8723 raw_spin_unlock_irq(&child_ctx->lock); 8711 raw_spin_unlock_irq(&child_ctx->lock);
8724 8712
8725 /* 8713 /*
@@ -8977,8 +8965,16 @@ inherit_event(struct perf_event *parent_event,
8977 if (IS_ERR(child_event)) 8965 if (IS_ERR(child_event))
8978 return child_event; 8966 return child_event;
8979 8967
8968 /*
8969 * is_orphaned_event() and list_add_tail(&parent_event->child_list)
8970 * must be under the same lock in order to serialize against
8971 * perf_event_release_kernel(), such that either we must observe
8972 * is_orphaned_event() or they will observe us on the child_list.
8973 */
8974 mutex_lock(&parent_event->child_mutex);
8980 if (is_orphaned_event(parent_event) || 8975 if (is_orphaned_event(parent_event) ||
8981 !atomic_long_inc_not_zero(&parent_event->refcount)) { 8976 !atomic_long_inc_not_zero(&parent_event->refcount)) {
8977 mutex_unlock(&parent_event->child_mutex);
8982 free_event(child_event); 8978 free_event(child_event);
8983 return NULL; 8979 return NULL;
8984 } 8980 }
@@ -9026,8 +9022,6 @@ inherit_event(struct perf_event *parent_event,
9026 /* 9022 /*
9027 * Link this into the parent event's child list 9023 * Link this into the parent event's child list
9028 */ 9024 */
9029 WARN_ON_ONCE(parent_event->ctx->parent_ctx);
9030 mutex_lock(&parent_event->child_mutex);
9031 list_add_tail(&child_event->child_list, &parent_event->child_list); 9025 list_add_tail(&child_event->child_list, &parent_event->child_list);
9032 mutex_unlock(&parent_event->child_mutex); 9026 mutex_unlock(&parent_event->child_mutex);
9033 9027