diff options
author | Peter Zijlstra <peterz@infradead.org> | 2015-01-23 06:24:14 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-02-04 02:07:10 -0500 |
commit | f63a8daa5812afef4f06c962351687e1ff9ccb2b (patch) | |
tree | dd488b9bda2685c213a09d3feeb9e4f8a6d9dcaa /kernel/events | |
parent | 652884fe0c7bd57f534c5fe68d6def0dc8c4b7ed (diff) |
perf: Fix event->ctx locking
There have been a few reported issues wrt. the lack of locking around
changing event->ctx. This patch tries to address those.
It avoids the whole rwsem thing; and while it appears to work, please
give it some thought in review.
What I did fail at is sensible runtime checks on the use of
event->ctx, the RCU use makes it very hard.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20150123125834.209535886@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 244 |
1 files changed, 207 insertions, 37 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index b358cb38e4a5..417a96bf3d41 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -907,6 +907,77 @@ static void put_ctx(struct perf_event_context *ctx) | |||
907 | } | 907 | } |
908 | 908 | ||
909 | /* | 909 | /* |
910 | * Because of perf_event::ctx migration in sys_perf_event_open::move_group and | ||
911 | * perf_pmu_migrate_context() we need some magic. | ||
912 | * | ||
913 | * Those places that change perf_event::ctx will hold both | ||
914 | * perf_event_ctx::mutex of the 'old' and 'new' ctx value. | ||
915 | * | ||
916 | * Lock ordering is by mutex address. There is one other site where | ||
917 | * perf_event_context::mutex nests and that is put_event(). But remember that | ||
918 | * that is a parent<->child context relation, and migration does not affect | ||
919 | * children, therefore these two orderings should not interact. | ||
920 | * | ||
921 | * The change in perf_event::ctx does not affect children (as claimed above) | ||
922 | * because the sys_perf_event_open() case will install a new event and break | ||
923 | * the ctx parent<->child relation, and perf_pmu_migrate_context() is only | ||
924 | * concerned with cpuctx and that doesn't have children. | ||
925 | * | ||
926 | * The places that change perf_event::ctx will issue: | ||
927 | * | ||
928 | * perf_remove_from_context(); | ||
929 | * synchronize_rcu(); | ||
930 | * perf_install_in_context(); | ||
931 | * | ||
932 | * to affect the change. The remove_from_context() + synchronize_rcu() should | ||
933 | * quiesce the event, after which we can install it in the new location. This | ||
934 | * means that only external vectors (perf_fops, prctl) can perturb the event | ||
935 | * while in transit. Therefore all such accessors should also acquire | ||
936 | * perf_event_context::mutex to serialize against this. | ||
937 | * | ||
938 | * However; because event->ctx can change while we're waiting to acquire | ||
939 | * ctx->mutex we must be careful and use the below perf_event_ctx_lock() | ||
940 | * function. | ||
941 | * | ||
942 | * Lock order: | ||
943 | * task_struct::perf_event_mutex | ||
944 | * perf_event_context::mutex | ||
945 | * perf_event_context::lock | ||
946 | * perf_event::child_mutex; | ||
947 | * perf_event::mmap_mutex | ||
948 | * mmap_sem | ||
949 | */ | ||
950 | static struct perf_event_context *perf_event_ctx_lock(struct perf_event *event) | ||
951 | { | ||
952 | struct perf_event_context *ctx; | ||
953 | |||
954 | again: | ||
955 | rcu_read_lock(); | ||
956 | ctx = ACCESS_ONCE(event->ctx); | ||
957 | if (!atomic_inc_not_zero(&ctx->refcount)) { | ||
958 | rcu_read_unlock(); | ||
959 | goto again; | ||
960 | } | ||
961 | rcu_read_unlock(); | ||
962 | |||
963 | mutex_lock(&ctx->mutex); | ||
964 | if (event->ctx != ctx) { | ||
965 | mutex_unlock(&ctx->mutex); | ||
966 | put_ctx(ctx); | ||
967 | goto again; | ||
968 | } | ||
969 | |||
970 | return ctx; | ||
971 | } | ||
972 | |||
973 | static void perf_event_ctx_unlock(struct perf_event *event, | ||
974 | struct perf_event_context *ctx) | ||
975 | { | ||
976 | mutex_unlock(&ctx->mutex); | ||
977 | put_ctx(ctx); | ||
978 | } | ||
979 | |||
980 | /* | ||
910 | * This must be done under the ctx->lock, such as to serialize against | 981 | * This must be done under the ctx->lock, such as to serialize against |
911 | * context_equiv(), therefore we cannot call put_ctx() since that might end up | 982 | * context_equiv(), therefore we cannot call put_ctx() since that might end up |
912 | * calling scheduler related locks and ctx->lock nests inside those. | 983 | * calling scheduler related locks and ctx->lock nests inside those. |
@@ -1666,7 +1737,7 @@ int __perf_event_disable(void *info) | |||
1666 | * is the current context on this CPU and preemption is disabled, | 1737 | * is the current context on this CPU and preemption is disabled, |
1667 | * hence we can't get into perf_event_task_sched_out for this context. | 1738 | * hence we can't get into perf_event_task_sched_out for this context. |
1668 | */ | 1739 | */ |
1669 | void perf_event_disable(struct perf_event *event) | 1740 | static void _perf_event_disable(struct perf_event *event) |
1670 | { | 1741 | { |
1671 | struct perf_event_context *ctx = event->ctx; | 1742 | struct perf_event_context *ctx = event->ctx; |
1672 | struct task_struct *task = ctx->task; | 1743 | struct task_struct *task = ctx->task; |
@@ -1707,6 +1778,19 @@ retry: | |||
1707 | } | 1778 | } |
1708 | raw_spin_unlock_irq(&ctx->lock); | 1779 | raw_spin_unlock_irq(&ctx->lock); |
1709 | } | 1780 | } |
1781 | |||
1782 | /* | ||
1783 | * Strictly speaking kernel users cannot create groups and therefore this | ||
1784 | * interface does not need the perf_event_ctx_lock() magic. | ||
1785 | */ | ||
1786 | void perf_event_disable(struct perf_event *event) | ||
1787 | { | ||
1788 | struct perf_event_context *ctx; | ||
1789 | |||
1790 | ctx = perf_event_ctx_lock(event); | ||
1791 | _perf_event_disable(event); | ||
1792 | perf_event_ctx_unlock(event, ctx); | ||
1793 | } | ||
1710 | EXPORT_SYMBOL_GPL(perf_event_disable); | 1794 | EXPORT_SYMBOL_GPL(perf_event_disable); |
1711 | 1795 | ||
1712 | static void perf_set_shadow_time(struct perf_event *event, | 1796 | static void perf_set_shadow_time(struct perf_event *event, |
@@ -2170,7 +2254,7 @@ unlock: | |||
2170 | * perf_event_for_each_child or perf_event_for_each as described | 2254 | * perf_event_for_each_child or perf_event_for_each as described |
2171 | * for perf_event_disable. | 2255 | * for perf_event_disable. |
2172 | */ | 2256 | */ |
2173 | void perf_event_enable(struct perf_event *event) | 2257 | static void _perf_event_enable(struct perf_event *event) |
2174 | { | 2258 | { |
2175 | struct perf_event_context *ctx = event->ctx; | 2259 | struct perf_event_context *ctx = event->ctx; |
2176 | struct task_struct *task = ctx->task; | 2260 | struct task_struct *task = ctx->task; |
@@ -2226,9 +2310,21 @@ retry: | |||
2226 | out: | 2310 | out: |
2227 | raw_spin_unlock_irq(&ctx->lock); | 2311 | raw_spin_unlock_irq(&ctx->lock); |
2228 | } | 2312 | } |
2313 | |||
2314 | /* | ||
2315 | * See perf_event_disable(); | ||
2316 | */ | ||
2317 | void perf_event_enable(struct perf_event *event) | ||
2318 | { | ||
2319 | struct perf_event_context *ctx; | ||
2320 | |||
2321 | ctx = perf_event_ctx_lock(event); | ||
2322 | _perf_event_enable(event); | ||
2323 | perf_event_ctx_unlock(event, ctx); | ||
2324 | } | ||
2229 | EXPORT_SYMBOL_GPL(perf_event_enable); | 2325 | EXPORT_SYMBOL_GPL(perf_event_enable); |
2230 | 2326 | ||
2231 | int perf_event_refresh(struct perf_event *event, int refresh) | 2327 | static int _perf_event_refresh(struct perf_event *event, int refresh) |
2232 | { | 2328 | { |
2233 | /* | 2329 | /* |
2234 | * not supported on inherited events | 2330 | * not supported on inherited events |
@@ -2237,10 +2333,25 @@ int perf_event_refresh(struct perf_event *event, int refresh) | |||
2237 | return -EINVAL; | 2333 | return -EINVAL; |
2238 | 2334 | ||
2239 | atomic_add(refresh, &event->event_limit); | 2335 | atomic_add(refresh, &event->event_limit); |
2240 | perf_event_enable(event); | 2336 | _perf_event_enable(event); |
2241 | 2337 | ||
2242 | return 0; | 2338 | return 0; |
2243 | } | 2339 | } |
2340 | |||
2341 | /* | ||
2342 | * See perf_event_disable() | ||
2343 | */ | ||
2344 | int perf_event_refresh(struct perf_event *event, int refresh) | ||
2345 | { | ||
2346 | struct perf_event_context *ctx; | ||
2347 | int ret; | ||
2348 | |||
2349 | ctx = perf_event_ctx_lock(event); | ||
2350 | ret = _perf_event_refresh(event, refresh); | ||
2351 | perf_event_ctx_unlock(event, ctx); | ||
2352 | |||
2353 | return ret; | ||
2354 | } | ||
2244 | EXPORT_SYMBOL_GPL(perf_event_refresh); | 2355 | EXPORT_SYMBOL_GPL(perf_event_refresh); |
2245 | 2356 | ||
2246 | static void ctx_sched_out(struct perf_event_context *ctx, | 2357 | static void ctx_sched_out(struct perf_event_context *ctx, |
@@ -3433,7 +3544,16 @@ static void perf_remove_from_owner(struct perf_event *event) | |||
3433 | rcu_read_unlock(); | 3544 | rcu_read_unlock(); |
3434 | 3545 | ||
3435 | if (owner) { | 3546 | if (owner) { |
3436 | mutex_lock(&owner->perf_event_mutex); | 3547 | /* |
3548 | * If we're here through perf_event_exit_task() we're already | ||
3549 | * holding ctx->mutex which would be an inversion wrt. the | ||
3550 | * normal lock order. | ||
3551 | * | ||
3552 | * However we can safely take this lock because its the child | ||
3553 | * ctx->mutex. | ||
3554 | */ | ||
3555 | mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING); | ||
3556 | |||
3437 | /* | 3557 | /* |
3438 | * We have to re-check the event->owner field, if it is cleared | 3558 | * We have to re-check the event->owner field, if it is cleared |
3439 | * we raced with perf_event_exit_task(), acquiring the mutex | 3559 | * we raced with perf_event_exit_task(), acquiring the mutex |
@@ -3559,12 +3679,13 @@ static int perf_event_read_group(struct perf_event *event, | |||
3559 | u64 read_format, char __user *buf) | 3679 | u64 read_format, char __user *buf) |
3560 | { | 3680 | { |
3561 | struct perf_event *leader = event->group_leader, *sub; | 3681 | struct perf_event *leader = event->group_leader, *sub; |
3562 | int n = 0, size = 0, ret = -EFAULT; | ||
3563 | struct perf_event_context *ctx = leader->ctx; | 3682 | struct perf_event_context *ctx = leader->ctx; |
3564 | u64 values[5]; | 3683 | int n = 0, size = 0, ret; |
3565 | u64 count, enabled, running; | 3684 | u64 count, enabled, running; |
3685 | u64 values[5]; | ||
3686 | |||
3687 | lockdep_assert_held(&ctx->mutex); | ||
3566 | 3688 | ||
3567 | mutex_lock(&ctx->mutex); | ||
3568 | count = perf_event_read_value(leader, &enabled, &running); | 3689 | count = perf_event_read_value(leader, &enabled, &running); |
3569 | 3690 | ||
3570 | values[n++] = 1 + leader->nr_siblings; | 3691 | values[n++] = 1 + leader->nr_siblings; |
@@ -3579,7 +3700,7 @@ static int perf_event_read_group(struct perf_event *event, | |||
3579 | size = n * sizeof(u64); | 3700 | size = n * sizeof(u64); |
3580 | 3701 | ||
3581 | if (copy_to_user(buf, values, size)) | 3702 | if (copy_to_user(buf, values, size)) |
3582 | goto unlock; | 3703 | return -EFAULT; |
3583 | 3704 | ||
3584 | ret = size; | 3705 | ret = size; |
3585 | 3706 | ||
@@ -3593,14 +3714,11 @@ static int perf_event_read_group(struct perf_event *event, | |||
3593 | size = n * sizeof(u64); | 3714 | size = n * sizeof(u64); |
3594 | 3715 | ||
3595 | if (copy_to_user(buf + ret, values, size)) { | 3716 | if (copy_to_user(buf + ret, values, size)) { |
3596 | ret = -EFAULT; | 3717 | return -EFAULT; |
3597 | goto unlock; | ||
3598 | } | 3718 | } |
3599 | 3719 | ||
3600 | ret += size; | 3720 | ret += size; |
3601 | } | 3721 | } |
3602 | unlock: | ||
3603 | mutex_unlock(&ctx->mutex); | ||
3604 | 3722 | ||
3605 | return ret; | 3723 | return ret; |
3606 | } | 3724 | } |
@@ -3672,8 +3790,14 @@ static ssize_t | |||
3672 | perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | 3790 | perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) |
3673 | { | 3791 | { |
3674 | struct perf_event *event = file->private_data; | 3792 | struct perf_event *event = file->private_data; |
3793 | struct perf_event_context *ctx; | ||
3794 | int ret; | ||
3675 | 3795 | ||
3676 | return perf_read_hw(event, buf, count); | 3796 | ctx = perf_event_ctx_lock(event); |
3797 | ret = perf_read_hw(event, buf, count); | ||
3798 | perf_event_ctx_unlock(event, ctx); | ||
3799 | |||
3800 | return ret; | ||
3677 | } | 3801 | } |
3678 | 3802 | ||
3679 | static unsigned int perf_poll(struct file *file, poll_table *wait) | 3803 | static unsigned int perf_poll(struct file *file, poll_table *wait) |
@@ -3699,7 +3823,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
3699 | return events; | 3823 | return events; |
3700 | } | 3824 | } |
3701 | 3825 | ||
3702 | static void perf_event_reset(struct perf_event *event) | 3826 | static void _perf_event_reset(struct perf_event *event) |
3703 | { | 3827 | { |
3704 | (void)perf_event_read(event); | 3828 | (void)perf_event_read(event); |
3705 | local64_set(&event->count, 0); | 3829 | local64_set(&event->count, 0); |
@@ -3718,6 +3842,7 @@ static void perf_event_for_each_child(struct perf_event *event, | |||
3718 | struct perf_event *child; | 3842 | struct perf_event *child; |
3719 | 3843 | ||
3720 | WARN_ON_ONCE(event->ctx->parent_ctx); | 3844 | WARN_ON_ONCE(event->ctx->parent_ctx); |
3845 | |||
3721 | mutex_lock(&event->child_mutex); | 3846 | mutex_lock(&event->child_mutex); |
3722 | func(event); | 3847 | func(event); |
3723 | list_for_each_entry(child, &event->child_list, child_list) | 3848 | list_for_each_entry(child, &event->child_list, child_list) |
@@ -3731,14 +3856,13 @@ static void perf_event_for_each(struct perf_event *event, | |||
3731 | struct perf_event_context *ctx = event->ctx; | 3856 | struct perf_event_context *ctx = event->ctx; |
3732 | struct perf_event *sibling; | 3857 | struct perf_event *sibling; |
3733 | 3858 | ||
3734 | WARN_ON_ONCE(ctx->parent_ctx); | 3859 | lockdep_assert_held(&ctx->mutex); |
3735 | mutex_lock(&ctx->mutex); | 3860 | |
3736 | event = event->group_leader; | 3861 | event = event->group_leader; |
3737 | 3862 | ||
3738 | perf_event_for_each_child(event, func); | 3863 | perf_event_for_each_child(event, func); |
3739 | list_for_each_entry(sibling, &event->sibling_list, group_entry) | 3864 | list_for_each_entry(sibling, &event->sibling_list, group_entry) |
3740 | perf_event_for_each_child(sibling, func); | 3865 | perf_event_for_each_child(sibling, func); |
3741 | mutex_unlock(&ctx->mutex); | ||
3742 | } | 3866 | } |
3743 | 3867 | ||
3744 | static int perf_event_period(struct perf_event *event, u64 __user *arg) | 3868 | static int perf_event_period(struct perf_event *event, u64 __user *arg) |
@@ -3808,25 +3932,24 @@ static int perf_event_set_output(struct perf_event *event, | |||
3808 | struct perf_event *output_event); | 3932 | struct perf_event *output_event); |
3809 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); | 3933 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); |
3810 | 3934 | ||
3811 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 3935 | static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) |
3812 | { | 3936 | { |
3813 | struct perf_event *event = file->private_data; | ||
3814 | void (*func)(struct perf_event *); | 3937 | void (*func)(struct perf_event *); |
3815 | u32 flags = arg; | 3938 | u32 flags = arg; |
3816 | 3939 | ||
3817 | switch (cmd) { | 3940 | switch (cmd) { |
3818 | case PERF_EVENT_IOC_ENABLE: | 3941 | case PERF_EVENT_IOC_ENABLE: |
3819 | func = perf_event_enable; | 3942 | func = _perf_event_enable; |
3820 | break; | 3943 | break; |
3821 | case PERF_EVENT_IOC_DISABLE: | 3944 | case PERF_EVENT_IOC_DISABLE: |
3822 | func = perf_event_disable; | 3945 | func = _perf_event_disable; |
3823 | break; | 3946 | break; |
3824 | case PERF_EVENT_IOC_RESET: | 3947 | case PERF_EVENT_IOC_RESET: |
3825 | func = perf_event_reset; | 3948 | func = _perf_event_reset; |
3826 | break; | 3949 | break; |
3827 | 3950 | ||
3828 | case PERF_EVENT_IOC_REFRESH: | 3951 | case PERF_EVENT_IOC_REFRESH: |
3829 | return perf_event_refresh(event, arg); | 3952 | return _perf_event_refresh(event, arg); |
3830 | 3953 | ||
3831 | case PERF_EVENT_IOC_PERIOD: | 3954 | case PERF_EVENT_IOC_PERIOD: |
3832 | return perf_event_period(event, (u64 __user *)arg); | 3955 | return perf_event_period(event, (u64 __user *)arg); |
@@ -3873,6 +3996,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
3873 | return 0; | 3996 | return 0; |
3874 | } | 3997 | } |
3875 | 3998 | ||
3999 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
4000 | { | ||
4001 | struct perf_event *event = file->private_data; | ||
4002 | struct perf_event_context *ctx; | ||
4003 | long ret; | ||
4004 | |||
4005 | ctx = perf_event_ctx_lock(event); | ||
4006 | ret = _perf_ioctl(event, cmd, arg); | ||
4007 | perf_event_ctx_unlock(event, ctx); | ||
4008 | |||
4009 | return ret; | ||
4010 | } | ||
4011 | |||
3876 | #ifdef CONFIG_COMPAT | 4012 | #ifdef CONFIG_COMPAT |
3877 | static long perf_compat_ioctl(struct file *file, unsigned int cmd, | 4013 | static long perf_compat_ioctl(struct file *file, unsigned int cmd, |
3878 | unsigned long arg) | 4014 | unsigned long arg) |
@@ -3895,11 +4031,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd, | |||
3895 | 4031 | ||
3896 | int perf_event_task_enable(void) | 4032 | int perf_event_task_enable(void) |
3897 | { | 4033 | { |
4034 | struct perf_event_context *ctx; | ||
3898 | struct perf_event *event; | 4035 | struct perf_event *event; |
3899 | 4036 | ||
3900 | mutex_lock(¤t->perf_event_mutex); | 4037 | mutex_lock(¤t->perf_event_mutex); |
3901 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) | 4038 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { |
3902 | perf_event_for_each_child(event, perf_event_enable); | 4039 | ctx = perf_event_ctx_lock(event); |
4040 | perf_event_for_each_child(event, _perf_event_enable); | ||
4041 | perf_event_ctx_unlock(event, ctx); | ||
4042 | } | ||
3903 | mutex_unlock(¤t->perf_event_mutex); | 4043 | mutex_unlock(¤t->perf_event_mutex); |
3904 | 4044 | ||
3905 | return 0; | 4045 | return 0; |
@@ -3907,11 +4047,15 @@ int perf_event_task_enable(void) | |||
3907 | 4047 | ||
3908 | int perf_event_task_disable(void) | 4048 | int perf_event_task_disable(void) |
3909 | { | 4049 | { |
4050 | struct perf_event_context *ctx; | ||
3910 | struct perf_event *event; | 4051 | struct perf_event *event; |
3911 | 4052 | ||
3912 | mutex_lock(¤t->perf_event_mutex); | 4053 | mutex_lock(¤t->perf_event_mutex); |
3913 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) | 4054 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { |
3914 | perf_event_for_each_child(event, perf_event_disable); | 4055 | ctx = perf_event_ctx_lock(event); |
4056 | perf_event_for_each_child(event, _perf_event_disable); | ||
4057 | perf_event_ctx_unlock(event, ctx); | ||
4058 | } | ||
3915 | mutex_unlock(¤t->perf_event_mutex); | 4059 | mutex_unlock(¤t->perf_event_mutex); |
3916 | 4060 | ||
3917 | return 0; | 4061 | return 0; |
@@ -7269,6 +7413,15 @@ out: | |||
7269 | return ret; | 7413 | return ret; |
7270 | } | 7414 | } |
7271 | 7415 | ||
7416 | static void mutex_lock_double(struct mutex *a, struct mutex *b) | ||
7417 | { | ||
7418 | if (b < a) | ||
7419 | swap(a, b); | ||
7420 | |||
7421 | mutex_lock(a); | ||
7422 | mutex_lock_nested(b, SINGLE_DEPTH_NESTING); | ||
7423 | } | ||
7424 | |||
7272 | /** | 7425 | /** |
7273 | * sys_perf_event_open - open a performance event, associate it to a task/cpu | 7426 | * sys_perf_event_open - open a performance event, associate it to a task/cpu |
7274 | * | 7427 | * |
@@ -7284,7 +7437,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7284 | struct perf_event *group_leader = NULL, *output_event = NULL; | 7437 | struct perf_event *group_leader = NULL, *output_event = NULL; |
7285 | struct perf_event *event, *sibling; | 7438 | struct perf_event *event, *sibling; |
7286 | struct perf_event_attr attr; | 7439 | struct perf_event_attr attr; |
7287 | struct perf_event_context *ctx; | 7440 | struct perf_event_context *ctx, *uninitialized_var(gctx); |
7288 | struct file *event_file = NULL; | 7441 | struct file *event_file = NULL; |
7289 | struct fd group = {NULL, 0}; | 7442 | struct fd group = {NULL, 0}; |
7290 | struct task_struct *task = NULL; | 7443 | struct task_struct *task = NULL; |
@@ -7482,9 +7635,14 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7482 | } | 7635 | } |
7483 | 7636 | ||
7484 | if (move_group) { | 7637 | if (move_group) { |
7485 | struct perf_event_context *gctx = group_leader->ctx; | 7638 | gctx = group_leader->ctx; |
7639 | |||
7640 | /* | ||
7641 | * See perf_event_ctx_lock() for comments on the details | ||
7642 | * of swizzling perf_event::ctx. | ||
7643 | */ | ||
7644 | mutex_lock_double(&gctx->mutex, &ctx->mutex); | ||
7486 | 7645 | ||
7487 | mutex_lock(&gctx->mutex); | ||
7488 | perf_remove_from_context(group_leader, false); | 7646 | perf_remove_from_context(group_leader, false); |
7489 | 7647 | ||
7490 | /* | 7648 | /* |
@@ -7499,15 +7657,19 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7499 | perf_event__state_init(sibling); | 7657 | perf_event__state_init(sibling); |
7500 | put_ctx(gctx); | 7658 | put_ctx(gctx); |
7501 | } | 7659 | } |
7502 | mutex_unlock(&gctx->mutex); | 7660 | } else { |
7503 | put_ctx(gctx); | 7661 | mutex_lock(&ctx->mutex); |
7504 | } | 7662 | } |
7505 | 7663 | ||
7506 | WARN_ON_ONCE(ctx->parent_ctx); | 7664 | WARN_ON_ONCE(ctx->parent_ctx); |
7507 | mutex_lock(&ctx->mutex); | ||
7508 | 7665 | ||
7509 | if (move_group) { | 7666 | if (move_group) { |
7667 | /* | ||
7668 | * Wait for everybody to stop referencing the events through | ||
7669 | * the old lists, before installing it on new lists. | ||
7670 | */ | ||
7510 | synchronize_rcu(); | 7671 | synchronize_rcu(); |
7672 | |||
7511 | perf_install_in_context(ctx, group_leader, group_leader->cpu); | 7673 | perf_install_in_context(ctx, group_leader, group_leader->cpu); |
7512 | get_ctx(ctx); | 7674 | get_ctx(ctx); |
7513 | list_for_each_entry(sibling, &group_leader->sibling_list, | 7675 | list_for_each_entry(sibling, &group_leader->sibling_list, |
@@ -7519,6 +7681,11 @@ SYSCALL_DEFINE5(perf_event_open, | |||
7519 | 7681 | ||
7520 | perf_install_in_context(ctx, event, event->cpu); | 7682 | perf_install_in_context(ctx, event, event->cpu); |
7521 | perf_unpin_context(ctx); | 7683 | perf_unpin_context(ctx); |
7684 | |||
7685 | if (move_group) { | ||
7686 | mutex_unlock(&gctx->mutex); | ||
7687 | put_ctx(gctx); | ||
7688 | } | ||
7522 | mutex_unlock(&ctx->mutex); | 7689 | mutex_unlock(&ctx->mutex); |
7523 | 7690 | ||
7524 | put_online_cpus(); | 7691 | put_online_cpus(); |
@@ -7626,7 +7793,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
7626 | src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; | 7793 | src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; |
7627 | dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; | 7794 | dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; |
7628 | 7795 | ||
7629 | mutex_lock(&src_ctx->mutex); | 7796 | /* |
7797 | * See perf_event_ctx_lock() for comments on the details | ||
7798 | * of swizzling perf_event::ctx. | ||
7799 | */ | ||
7800 | mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex); | ||
7630 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, | 7801 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, |
7631 | event_entry) { | 7802 | event_entry) { |
7632 | perf_remove_from_context(event, false); | 7803 | perf_remove_from_context(event, false); |
@@ -7634,11 +7805,9 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
7634 | put_ctx(src_ctx); | 7805 | put_ctx(src_ctx); |
7635 | list_add(&event->migrate_entry, &events); | 7806 | list_add(&event->migrate_entry, &events); |
7636 | } | 7807 | } |
7637 | mutex_unlock(&src_ctx->mutex); | ||
7638 | 7808 | ||
7639 | synchronize_rcu(); | 7809 | synchronize_rcu(); |
7640 | 7810 | ||
7641 | mutex_lock(&dst_ctx->mutex); | ||
7642 | list_for_each_entry_safe(event, tmp, &events, migrate_entry) { | 7811 | list_for_each_entry_safe(event, tmp, &events, migrate_entry) { |
7643 | list_del(&event->migrate_entry); | 7812 | list_del(&event->migrate_entry); |
7644 | if (event->state >= PERF_EVENT_STATE_OFF) | 7813 | if (event->state >= PERF_EVENT_STATE_OFF) |
@@ -7648,6 +7817,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
7648 | get_ctx(dst_ctx); | 7817 | get_ctx(dst_ctx); |
7649 | } | 7818 | } |
7650 | mutex_unlock(&dst_ctx->mutex); | 7819 | mutex_unlock(&dst_ctx->mutex); |
7820 | mutex_unlock(&src_ctx->mutex); | ||
7651 | } | 7821 | } |
7652 | EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); | 7822 | EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); |
7653 | 7823 | ||