aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c490
1 files changed, 368 insertions, 122 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 19efcf13375a..f04daabfd1cf 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -872,22 +872,32 @@ void perf_pmu_enable(struct pmu *pmu)
872 pmu->pmu_enable(pmu); 872 pmu->pmu_enable(pmu);
873} 873}
874 874
875static DEFINE_PER_CPU(struct list_head, rotation_list); 875static DEFINE_PER_CPU(struct list_head, active_ctx_list);
876 876
877/* 877/*
878 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized 878 * perf_event_ctx_activate(), perf_event_ctx_deactivate(), and
879 * because they're strictly cpu affine and rotate_start is called with IRQs 879 * perf_event_task_tick() are fully serialized because they're strictly cpu
880 * disabled, while rotate_context is called from IRQ context. 880 * affine and perf_event_ctx{activate,deactivate} are called with IRQs
881 * disabled, while perf_event_task_tick is called from IRQ context.
881 */ 882 */
882static void perf_pmu_rotate_start(struct pmu *pmu) 883static void perf_event_ctx_activate(struct perf_event_context *ctx)
883{ 884{
884 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 885 struct list_head *head = this_cpu_ptr(&active_ctx_list);
885 struct list_head *head = this_cpu_ptr(&rotation_list);
886 886
887 WARN_ON(!irqs_disabled()); 887 WARN_ON(!irqs_disabled());
888 888
889 if (list_empty(&cpuctx->rotation_list)) 889 WARN_ON(!list_empty(&ctx->active_ctx_list));
890 list_add(&cpuctx->rotation_list, head); 890
891 list_add(&ctx->active_ctx_list, head);
892}
893
894static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
895{
896 WARN_ON(!irqs_disabled());
897
898 WARN_ON(list_empty(&ctx->active_ctx_list));
899
900 list_del_init(&ctx->active_ctx_list);
891} 901}
892 902
893static void get_ctx(struct perf_event_context *ctx) 903static void get_ctx(struct perf_event_context *ctx)
@@ -907,6 +917,84 @@ static void put_ctx(struct perf_event_context *ctx)
907} 917}
908 918
909/* 919/*
920 * Because of perf_event::ctx migration in sys_perf_event_open::move_group and
921 * perf_pmu_migrate_context() we need some magic.
922 *
923 * Those places that change perf_event::ctx will hold both
924 * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
925 *
926 * Lock ordering is by mutex address. There is one other site where
927 * perf_event_context::mutex nests and that is put_event(). But remember that
928 * that is a parent<->child context relation, and migration does not affect
929 * children, therefore these two orderings should not interact.
930 *
931 * The change in perf_event::ctx does not affect children (as claimed above)
932 * because the sys_perf_event_open() case will install a new event and break
933 * the ctx parent<->child relation, and perf_pmu_migrate_context() is only
934 * concerned with cpuctx and that doesn't have children.
935 *
936 * The places that change perf_event::ctx will issue:
937 *
938 * perf_remove_from_context();
939 * synchronize_rcu();
940 * perf_install_in_context();
941 *
942 * to affect the change. The remove_from_context() + synchronize_rcu() should
943 * quiesce the event, after which we can install it in the new location. This
944 * means that only external vectors (perf_fops, prctl) can perturb the event
945 * while in transit. Therefore all such accessors should also acquire
946 * perf_event_context::mutex to serialize against this.
947 *
948 * However; because event->ctx can change while we're waiting to acquire
949 * ctx->mutex we must be careful and use the below perf_event_ctx_lock()
950 * function.
951 *
952 * Lock order:
953 * task_struct::perf_event_mutex
954 * perf_event_context::mutex
955 * perf_event_context::lock
956 * perf_event::child_mutex;
957 * perf_event::mmap_mutex
958 * mmap_sem
959 */
960static struct perf_event_context *
961perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
962{
963 struct perf_event_context *ctx;
964
965again:
966 rcu_read_lock();
967 ctx = ACCESS_ONCE(event->ctx);
968 if (!atomic_inc_not_zero(&ctx->refcount)) {
969 rcu_read_unlock();
970 goto again;
971 }
972 rcu_read_unlock();
973
974 mutex_lock_nested(&ctx->mutex, nesting);
975 if (event->ctx != ctx) {
976 mutex_unlock(&ctx->mutex);
977 put_ctx(ctx);
978 goto again;
979 }
980
981 return ctx;
982}
983
984static inline struct perf_event_context *
985perf_event_ctx_lock(struct perf_event *event)
986{
987 return perf_event_ctx_lock_nested(event, 0);
988}
989
990static void perf_event_ctx_unlock(struct perf_event *event,
991 struct perf_event_context *ctx)
992{
993 mutex_unlock(&ctx->mutex);
994 put_ctx(ctx);
995}
996
997/*
910 * This must be done under the ctx->lock, such as to serialize against 998 * This must be done under the ctx->lock, such as to serialize against
911 * context_equiv(), therefore we cannot call put_ctx() since that might end up 999 * context_equiv(), therefore we cannot call put_ctx() since that might end up
912 * calling scheduler related locks and ctx->lock nests inside those. 1000 * calling scheduler related locks and ctx->lock nests inside those.
@@ -1155,8 +1243,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1155 ctx->nr_branch_stack++; 1243 ctx->nr_branch_stack++;
1156 1244
1157 list_add_rcu(&event->event_entry, &ctx->event_list); 1245 list_add_rcu(&event->event_entry, &ctx->event_list);
1158 if (!ctx->nr_events)
1159 perf_pmu_rotate_start(ctx->pmu);
1160 ctx->nr_events++; 1246 ctx->nr_events++;
1161 if (event->attr.inherit_stat) 1247 if (event->attr.inherit_stat)
1162 ctx->nr_stat++; 1248 ctx->nr_stat++;
@@ -1275,6 +1361,8 @@ static void perf_group_attach(struct perf_event *event)
1275 if (group_leader == event) 1361 if (group_leader == event)
1276 return; 1362 return;
1277 1363
1364 WARN_ON_ONCE(group_leader->ctx != event->ctx);
1365
1278 if (group_leader->group_flags & PERF_GROUP_SOFTWARE && 1366 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
1279 !is_software_event(event)) 1367 !is_software_event(event))
1280 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; 1368 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
@@ -1296,6 +1384,10 @@ static void
1296list_del_event(struct perf_event *event, struct perf_event_context *ctx) 1384list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1297{ 1385{
1298 struct perf_cpu_context *cpuctx; 1386 struct perf_cpu_context *cpuctx;
1387
1388 WARN_ON_ONCE(event->ctx != ctx);
1389 lockdep_assert_held(&ctx->lock);
1390
1299 /* 1391 /*
1300 * We can have double detach due to exit/hot-unplug + close. 1392 * We can have double detach due to exit/hot-unplug + close.
1301 */ 1393 */
@@ -1380,6 +1472,8 @@ static void perf_group_detach(struct perf_event *event)
1380 1472
1381 /* Inherit group flags from the previous leader */ 1473 /* Inherit group flags from the previous leader */
1382 sibling->group_flags = event->group_flags; 1474 sibling->group_flags = event->group_flags;
1475
1476 WARN_ON_ONCE(sibling->ctx != event->ctx);
1383 } 1477 }
1384 1478
1385out: 1479out:
@@ -1442,6 +1536,10 @@ event_sched_out(struct perf_event *event,
1442{ 1536{
1443 u64 tstamp = perf_event_time(event); 1537 u64 tstamp = perf_event_time(event);
1444 u64 delta; 1538 u64 delta;
1539
1540 WARN_ON_ONCE(event->ctx != ctx);
1541 lockdep_assert_held(&ctx->lock);
1542
1445 /* 1543 /*
1446 * An event which could not be activated because of 1544 * An event which could not be activated because of
1447 * filter mismatch still needs to have its timings 1545 * filter mismatch still needs to have its timings
@@ -1471,7 +1569,8 @@ event_sched_out(struct perf_event *event,
1471 1569
1472 if (!is_software_event(event)) 1570 if (!is_software_event(event))
1473 cpuctx->active_oncpu--; 1571 cpuctx->active_oncpu--;
1474 ctx->nr_active--; 1572 if (!--ctx->nr_active)
1573 perf_event_ctx_deactivate(ctx);
1475 if (event->attr.freq && event->attr.sample_freq) 1574 if (event->attr.freq && event->attr.sample_freq)
1476 ctx->nr_freq--; 1575 ctx->nr_freq--;
1477 if (event->attr.exclusive || !cpuctx->active_oncpu) 1576 if (event->attr.exclusive || !cpuctx->active_oncpu)
@@ -1654,7 +1753,7 @@ int __perf_event_disable(void *info)
1654 * is the current context on this CPU and preemption is disabled, 1753 * is the current context on this CPU and preemption is disabled,
1655 * hence we can't get into perf_event_task_sched_out for this context. 1754 * hence we can't get into perf_event_task_sched_out for this context.
1656 */ 1755 */
1657void perf_event_disable(struct perf_event *event) 1756static void _perf_event_disable(struct perf_event *event)
1658{ 1757{
1659 struct perf_event_context *ctx = event->ctx; 1758 struct perf_event_context *ctx = event->ctx;
1660 struct task_struct *task = ctx->task; 1759 struct task_struct *task = ctx->task;
@@ -1695,6 +1794,19 @@ retry:
1695 } 1794 }
1696 raw_spin_unlock_irq(&ctx->lock); 1795 raw_spin_unlock_irq(&ctx->lock);
1697} 1796}
1797
1798/*
1799 * Strictly speaking kernel users cannot create groups and therefore this
1800 * interface does not need the perf_event_ctx_lock() magic.
1801 */
1802void perf_event_disable(struct perf_event *event)
1803{
1804 struct perf_event_context *ctx;
1805
1806 ctx = perf_event_ctx_lock(event);
1807 _perf_event_disable(event);
1808 perf_event_ctx_unlock(event, ctx);
1809}
1698EXPORT_SYMBOL_GPL(perf_event_disable); 1810EXPORT_SYMBOL_GPL(perf_event_disable);
1699 1811
1700static void perf_set_shadow_time(struct perf_event *event, 1812static void perf_set_shadow_time(struct perf_event *event,
@@ -1782,7 +1894,8 @@ event_sched_in(struct perf_event *event,
1782 1894
1783 if (!is_software_event(event)) 1895 if (!is_software_event(event))
1784 cpuctx->active_oncpu++; 1896 cpuctx->active_oncpu++;
1785 ctx->nr_active++; 1897 if (!ctx->nr_active++)
1898 perf_event_ctx_activate(ctx);
1786 if (event->attr.freq && event->attr.sample_freq) 1899 if (event->attr.freq && event->attr.sample_freq)
1787 ctx->nr_freq++; 1900 ctx->nr_freq++;
1788 1901
@@ -2158,7 +2271,7 @@ unlock:
2158 * perf_event_for_each_child or perf_event_for_each as described 2271 * perf_event_for_each_child or perf_event_for_each as described
2159 * for perf_event_disable. 2272 * for perf_event_disable.
2160 */ 2273 */
2161void perf_event_enable(struct perf_event *event) 2274static void _perf_event_enable(struct perf_event *event)
2162{ 2275{
2163 struct perf_event_context *ctx = event->ctx; 2276 struct perf_event_context *ctx = event->ctx;
2164 struct task_struct *task = ctx->task; 2277 struct task_struct *task = ctx->task;
@@ -2214,9 +2327,21 @@ retry:
2214out: 2327out:
2215 raw_spin_unlock_irq(&ctx->lock); 2328 raw_spin_unlock_irq(&ctx->lock);
2216} 2329}
2330
2331/*
2332 * See perf_event_disable();
2333 */
2334void perf_event_enable(struct perf_event *event)
2335{
2336 struct perf_event_context *ctx;
2337
2338 ctx = perf_event_ctx_lock(event);
2339 _perf_event_enable(event);
2340 perf_event_ctx_unlock(event, ctx);
2341}
2217EXPORT_SYMBOL_GPL(perf_event_enable); 2342EXPORT_SYMBOL_GPL(perf_event_enable);
2218 2343
2219int perf_event_refresh(struct perf_event *event, int refresh) 2344static int _perf_event_refresh(struct perf_event *event, int refresh)
2220{ 2345{
2221 /* 2346 /*
2222 * not supported on inherited events 2347 * not supported on inherited events
@@ -2225,10 +2350,25 @@ int perf_event_refresh(struct perf_event *event, int refresh)
2225 return -EINVAL; 2350 return -EINVAL;
2226 2351
2227 atomic_add(refresh, &event->event_limit); 2352 atomic_add(refresh, &event->event_limit);
2228 perf_event_enable(event); 2353 _perf_event_enable(event);
2229 2354
2230 return 0; 2355 return 0;
2231} 2356}
2357
2358/*
2359 * See perf_event_disable()
2360 */
2361int perf_event_refresh(struct perf_event *event, int refresh)
2362{
2363 struct perf_event_context *ctx;
2364 int ret;
2365
2366 ctx = perf_event_ctx_lock(event);
2367 ret = _perf_event_refresh(event, refresh);
2368 perf_event_ctx_unlock(event, ctx);
2369
2370 return ret;
2371}
2232EXPORT_SYMBOL_GPL(perf_event_refresh); 2372EXPORT_SYMBOL_GPL(perf_event_refresh);
2233 2373
2234static void ctx_sched_out(struct perf_event_context *ctx, 2374static void ctx_sched_out(struct perf_event_context *ctx,
@@ -2612,12 +2752,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2612 2752
2613 perf_pmu_enable(ctx->pmu); 2753 perf_pmu_enable(ctx->pmu);
2614 perf_ctx_unlock(cpuctx, ctx); 2754 perf_ctx_unlock(cpuctx, ctx);
2615
2616 /*
2617 * Since these rotations are per-cpu, we need to ensure the
2618 * cpu-context we got scheduled on is actually rotating.
2619 */
2620 perf_pmu_rotate_start(ctx->pmu);
2621} 2755}
2622 2756
2623/* 2757/*
@@ -2905,25 +3039,18 @@ static void rotate_ctx(struct perf_event_context *ctx)
2905 list_rotate_left(&ctx->flexible_groups); 3039 list_rotate_left(&ctx->flexible_groups);
2906} 3040}
2907 3041
2908/*
2909 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
2910 * because they're strictly cpu affine and rotate_start is called with IRQs
2911 * disabled, while rotate_context is called from IRQ context.
2912 */
2913static int perf_rotate_context(struct perf_cpu_context *cpuctx) 3042static int perf_rotate_context(struct perf_cpu_context *cpuctx)
2914{ 3043{
2915 struct perf_event_context *ctx = NULL; 3044 struct perf_event_context *ctx = NULL;
2916 int rotate = 0, remove = 1; 3045 int rotate = 0;
2917 3046
2918 if (cpuctx->ctx.nr_events) { 3047 if (cpuctx->ctx.nr_events) {
2919 remove = 0;
2920 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 3048 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
2921 rotate = 1; 3049 rotate = 1;
2922 } 3050 }
2923 3051
2924 ctx = cpuctx->task_ctx; 3052 ctx = cpuctx->task_ctx;
2925 if (ctx && ctx->nr_events) { 3053 if (ctx && ctx->nr_events) {
2926 remove = 0;
2927 if (ctx->nr_events != ctx->nr_active) 3054 if (ctx->nr_events != ctx->nr_active)
2928 rotate = 1; 3055 rotate = 1;
2929 } 3056 }
@@ -2947,8 +3074,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
2947 perf_pmu_enable(cpuctx->ctx.pmu); 3074 perf_pmu_enable(cpuctx->ctx.pmu);
2948 perf_ctx_unlock(cpuctx, cpuctx->task_ctx); 3075 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2949done: 3076done:
2950 if (remove)
2951 list_del_init(&cpuctx->rotation_list);
2952 3077
2953 return rotate; 3078 return rotate;
2954} 3079}
@@ -2966,9 +3091,8 @@ bool perf_event_can_stop_tick(void)
2966 3091
2967void perf_event_task_tick(void) 3092void perf_event_task_tick(void)
2968{ 3093{
2969 struct list_head *head = this_cpu_ptr(&rotation_list); 3094 struct list_head *head = this_cpu_ptr(&active_ctx_list);
2970 struct perf_cpu_context *cpuctx, *tmp; 3095 struct perf_event_context *ctx, *tmp;
2971 struct perf_event_context *ctx;
2972 int throttled; 3096 int throttled;
2973 3097
2974 WARN_ON(!irqs_disabled()); 3098 WARN_ON(!irqs_disabled());
@@ -2976,14 +3100,8 @@ void perf_event_task_tick(void)
2976 __this_cpu_inc(perf_throttled_seq); 3100 __this_cpu_inc(perf_throttled_seq);
2977 throttled = __this_cpu_xchg(perf_throttled_count, 0); 3101 throttled = __this_cpu_xchg(perf_throttled_count, 0);
2978 3102
2979 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { 3103 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
2980 ctx = &cpuctx->ctx;
2981 perf_adjust_freq_unthr_context(ctx, throttled); 3104 perf_adjust_freq_unthr_context(ctx, throttled);
2982
2983 ctx = cpuctx->task_ctx;
2984 if (ctx)
2985 perf_adjust_freq_unthr_context(ctx, throttled);
2986 }
2987} 3105}
2988 3106
2989static int event_enable_on_exec(struct perf_event *event, 3107static int event_enable_on_exec(struct perf_event *event,
@@ -3142,6 +3260,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
3142{ 3260{
3143 raw_spin_lock_init(&ctx->lock); 3261 raw_spin_lock_init(&ctx->lock);
3144 mutex_init(&ctx->mutex); 3262 mutex_init(&ctx->mutex);
3263 INIT_LIST_HEAD(&ctx->active_ctx_list);
3145 INIT_LIST_HEAD(&ctx->pinned_groups); 3264 INIT_LIST_HEAD(&ctx->pinned_groups);
3146 INIT_LIST_HEAD(&ctx->flexible_groups); 3265 INIT_LIST_HEAD(&ctx->flexible_groups);
3147 INIT_LIST_HEAD(&ctx->event_list); 3266 INIT_LIST_HEAD(&ctx->event_list);
@@ -3421,7 +3540,16 @@ static void perf_remove_from_owner(struct perf_event *event)
3421 rcu_read_unlock(); 3540 rcu_read_unlock();
3422 3541
3423 if (owner) { 3542 if (owner) {
3424 mutex_lock(&owner->perf_event_mutex); 3543 /*
3544 * If we're here through perf_event_exit_task() we're already
3545 * holding ctx->mutex which would be an inversion wrt. the
3546 * normal lock order.
3547 *
3548 * However we can safely take this lock because its the child
3549 * ctx->mutex.
3550 */
3551 mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
3552
3425 /* 3553 /*
3426 * We have to re-check the event->owner field, if it is cleared 3554 * We have to re-check the event->owner field, if it is cleared
3427 * we raced with perf_event_exit_task(), acquiring the mutex 3555 * we raced with perf_event_exit_task(), acquiring the mutex
@@ -3440,7 +3568,7 @@ static void perf_remove_from_owner(struct perf_event *event)
3440 */ 3568 */
3441static void put_event(struct perf_event *event) 3569static void put_event(struct perf_event *event)
3442{ 3570{
3443 struct perf_event_context *ctx = event->ctx; 3571 struct perf_event_context *ctx;
3444 3572
3445 if (!atomic_long_dec_and_test(&event->refcount)) 3573 if (!atomic_long_dec_and_test(&event->refcount))
3446 return; 3574 return;
@@ -3448,7 +3576,6 @@ static void put_event(struct perf_event *event)
3448 if (!is_kernel_event(event)) 3576 if (!is_kernel_event(event))
3449 perf_remove_from_owner(event); 3577 perf_remove_from_owner(event);
3450 3578
3451 WARN_ON_ONCE(ctx->parent_ctx);
3452 /* 3579 /*
3453 * There are two ways this annotation is useful: 3580 * There are two ways this annotation is useful:
3454 * 3581 *
@@ -3461,7 +3588,8 @@ static void put_event(struct perf_event *event)
3461 * the last filedesc died, so there is no possibility 3588 * the last filedesc died, so there is no possibility
3462 * to trigger the AB-BA case. 3589 * to trigger the AB-BA case.
3463 */ 3590 */
3464 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 3591 ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
3592 WARN_ON_ONCE(ctx->parent_ctx);
3465 perf_remove_from_context(event, true); 3593 perf_remove_from_context(event, true);
3466 mutex_unlock(&ctx->mutex); 3594 mutex_unlock(&ctx->mutex);
3467 3595
@@ -3547,12 +3675,13 @@ static int perf_event_read_group(struct perf_event *event,
3547 u64 read_format, char __user *buf) 3675 u64 read_format, char __user *buf)
3548{ 3676{
3549 struct perf_event *leader = event->group_leader, *sub; 3677 struct perf_event *leader = event->group_leader, *sub;
3550 int n = 0, size = 0, ret = -EFAULT;
3551 struct perf_event_context *ctx = leader->ctx; 3678 struct perf_event_context *ctx = leader->ctx;
3552 u64 values[5]; 3679 int n = 0, size = 0, ret;
3553 u64 count, enabled, running; 3680 u64 count, enabled, running;
3681 u64 values[5];
3682
3683 lockdep_assert_held(&ctx->mutex);
3554 3684
3555 mutex_lock(&ctx->mutex);
3556 count = perf_event_read_value(leader, &enabled, &running); 3685 count = perf_event_read_value(leader, &enabled, &running);
3557 3686
3558 values[n++] = 1 + leader->nr_siblings; 3687 values[n++] = 1 + leader->nr_siblings;
@@ -3567,7 +3696,7 @@ static int perf_event_read_group(struct perf_event *event,
3567 size = n * sizeof(u64); 3696 size = n * sizeof(u64);
3568 3697
3569 if (copy_to_user(buf, values, size)) 3698 if (copy_to_user(buf, values, size))
3570 goto unlock; 3699 return -EFAULT;
3571 3700
3572 ret = size; 3701 ret = size;
3573 3702
@@ -3581,14 +3710,11 @@ static int perf_event_read_group(struct perf_event *event,
3581 size = n * sizeof(u64); 3710 size = n * sizeof(u64);
3582 3711
3583 if (copy_to_user(buf + ret, values, size)) { 3712 if (copy_to_user(buf + ret, values, size)) {
3584 ret = -EFAULT; 3713 return -EFAULT;
3585 goto unlock;
3586 } 3714 }
3587 3715
3588 ret += size; 3716 ret += size;
3589 } 3717 }
3590unlock:
3591 mutex_unlock(&ctx->mutex);
3592 3718
3593 return ret; 3719 return ret;
3594} 3720}
@@ -3660,8 +3786,14 @@ static ssize_t
3660perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 3786perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
3661{ 3787{
3662 struct perf_event *event = file->private_data; 3788 struct perf_event *event = file->private_data;
3789 struct perf_event_context *ctx;
3790 int ret;
3791
3792 ctx = perf_event_ctx_lock(event);
3793 ret = perf_read_hw(event, buf, count);
3794 perf_event_ctx_unlock(event, ctx);
3663 3795
3664 return perf_read_hw(event, buf, count); 3796 return ret;
3665} 3797}
3666 3798
3667static unsigned int perf_poll(struct file *file, poll_table *wait) 3799static unsigned int perf_poll(struct file *file, poll_table *wait)
@@ -3687,7 +3819,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
3687 return events; 3819 return events;
3688} 3820}
3689 3821
3690static void perf_event_reset(struct perf_event *event) 3822static void _perf_event_reset(struct perf_event *event)
3691{ 3823{
3692 (void)perf_event_read(event); 3824 (void)perf_event_read(event);
3693 local64_set(&event->count, 0); 3825 local64_set(&event->count, 0);
@@ -3706,6 +3838,7 @@ static void perf_event_for_each_child(struct perf_event *event,
3706 struct perf_event *child; 3838 struct perf_event *child;
3707 3839
3708 WARN_ON_ONCE(event->ctx->parent_ctx); 3840 WARN_ON_ONCE(event->ctx->parent_ctx);
3841
3709 mutex_lock(&event->child_mutex); 3842 mutex_lock(&event->child_mutex);
3710 func(event); 3843 func(event);
3711 list_for_each_entry(child, &event->child_list, child_list) 3844 list_for_each_entry(child, &event->child_list, child_list)
@@ -3719,14 +3852,13 @@ static void perf_event_for_each(struct perf_event *event,
3719 struct perf_event_context *ctx = event->ctx; 3852 struct perf_event_context *ctx = event->ctx;
3720 struct perf_event *sibling; 3853 struct perf_event *sibling;
3721 3854
3722 WARN_ON_ONCE(ctx->parent_ctx); 3855 lockdep_assert_held(&ctx->mutex);
3723 mutex_lock(&ctx->mutex); 3856
3724 event = event->group_leader; 3857 event = event->group_leader;
3725 3858
3726 perf_event_for_each_child(event, func); 3859 perf_event_for_each_child(event, func);
3727 list_for_each_entry(sibling, &event->sibling_list, group_entry) 3860 list_for_each_entry(sibling, &event->sibling_list, group_entry)
3728 perf_event_for_each_child(sibling, func); 3861 perf_event_for_each_child(sibling, func);
3729 mutex_unlock(&ctx->mutex);
3730} 3862}
3731 3863
3732static int perf_event_period(struct perf_event *event, u64 __user *arg) 3864static int perf_event_period(struct perf_event *event, u64 __user *arg)
@@ -3796,25 +3928,24 @@ static int perf_event_set_output(struct perf_event *event,
3796 struct perf_event *output_event); 3928 struct perf_event *output_event);
3797static int perf_event_set_filter(struct perf_event *event, void __user *arg); 3929static int perf_event_set_filter(struct perf_event *event, void __user *arg);
3798 3930
3799static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 3931static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
3800{ 3932{
3801 struct perf_event *event = file->private_data;
3802 void (*func)(struct perf_event *); 3933 void (*func)(struct perf_event *);
3803 u32 flags = arg; 3934 u32 flags = arg;
3804 3935
3805 switch (cmd) { 3936 switch (cmd) {
3806 case PERF_EVENT_IOC_ENABLE: 3937 case PERF_EVENT_IOC_ENABLE:
3807 func = perf_event_enable; 3938 func = _perf_event_enable;
3808 break; 3939 break;
3809 case PERF_EVENT_IOC_DISABLE: 3940 case PERF_EVENT_IOC_DISABLE:
3810 func = perf_event_disable; 3941 func = _perf_event_disable;
3811 break; 3942 break;
3812 case PERF_EVENT_IOC_RESET: 3943 case PERF_EVENT_IOC_RESET:
3813 func = perf_event_reset; 3944 func = _perf_event_reset;
3814 break; 3945 break;
3815 3946
3816 case PERF_EVENT_IOC_REFRESH: 3947 case PERF_EVENT_IOC_REFRESH:
3817 return perf_event_refresh(event, arg); 3948 return _perf_event_refresh(event, arg);
3818 3949
3819 case PERF_EVENT_IOC_PERIOD: 3950 case PERF_EVENT_IOC_PERIOD:
3820 return perf_event_period(event, (u64 __user *)arg); 3951 return perf_event_period(event, (u64 __user *)arg);
@@ -3861,6 +3992,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3861 return 0; 3992 return 0;
3862} 3993}
3863 3994
3995static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3996{
3997 struct perf_event *event = file->private_data;
3998 struct perf_event_context *ctx;
3999 long ret;
4000
4001 ctx = perf_event_ctx_lock(event);
4002 ret = _perf_ioctl(event, cmd, arg);
4003 perf_event_ctx_unlock(event, ctx);
4004
4005 return ret;
4006}
4007
3864#ifdef CONFIG_COMPAT 4008#ifdef CONFIG_COMPAT
3865static long perf_compat_ioctl(struct file *file, unsigned int cmd, 4009static long perf_compat_ioctl(struct file *file, unsigned int cmd,
3866 unsigned long arg) 4010 unsigned long arg)
@@ -3883,11 +4027,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd,
3883 4027
3884int perf_event_task_enable(void) 4028int perf_event_task_enable(void)
3885{ 4029{
4030 struct perf_event_context *ctx;
3886 struct perf_event *event; 4031 struct perf_event *event;
3887 4032
3888 mutex_lock(&current->perf_event_mutex); 4033 mutex_lock(&current->perf_event_mutex);
3889 list_for_each_entry(event, &current->perf_event_list, owner_entry) 4034 list_for_each_entry(event, &current->perf_event_list, owner_entry) {
3890 perf_event_for_each_child(event, perf_event_enable); 4035 ctx = perf_event_ctx_lock(event);
4036 perf_event_for_each_child(event, _perf_event_enable);
4037 perf_event_ctx_unlock(event, ctx);
4038 }
3891 mutex_unlock(&current->perf_event_mutex); 4039 mutex_unlock(&current->perf_event_mutex);
3892 4040
3893 return 0; 4041 return 0;
@@ -3895,11 +4043,15 @@ int perf_event_task_enable(void)
3895 4043
3896int perf_event_task_disable(void) 4044int perf_event_task_disable(void)
3897{ 4045{
4046 struct perf_event_context *ctx;
3898 struct perf_event *event; 4047 struct perf_event *event;
3899 4048
3900 mutex_lock(&current->perf_event_mutex); 4049 mutex_lock(&current->perf_event_mutex);
3901 list_for_each_entry(event, &current->perf_event_list, owner_entry) 4050 list_for_each_entry(event, &current->perf_event_list, owner_entry) {
3902 perf_event_for_each_child(event, perf_event_disable); 4051 ctx = perf_event_ctx_lock(event);
4052 perf_event_for_each_child(event, _perf_event_disable);
4053 perf_event_ctx_unlock(event, ctx);
4054 }
3903 mutex_unlock(&current->perf_event_mutex); 4055 mutex_unlock(&current->perf_event_mutex);
3904 4056
3905 return 0; 4057 return 0;
@@ -3949,7 +4101,8 @@ unlock:
3949 rcu_read_unlock(); 4101 rcu_read_unlock();
3950} 4102}
3951 4103
3952void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) 4104void __weak arch_perf_update_userpage(
4105 struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now)
3953{ 4106{
3954} 4107}
3955 4108
@@ -3999,7 +4152,7 @@ void perf_event_update_userpage(struct perf_event *event)
3999 userpg->time_running = running + 4152 userpg->time_running = running +
4000 atomic64_read(&event->child_total_time_running); 4153 atomic64_read(&event->child_total_time_running);
4001 4154
4002 arch_perf_update_userpage(userpg, now); 4155 arch_perf_update_userpage(event, userpg, now);
4003 4156
4004 barrier(); 4157 barrier();
4005 ++userpg->lock; 4158 ++userpg->lock;
@@ -4141,6 +4294,9 @@ static void perf_mmap_open(struct vm_area_struct *vma)
4141 4294
4142 atomic_inc(&event->mmap_count); 4295 atomic_inc(&event->mmap_count);
4143 atomic_inc(&event->rb->mmap_count); 4296 atomic_inc(&event->rb->mmap_count);
4297
4298 if (event->pmu->event_mapped)
4299 event->pmu->event_mapped(event);
4144} 4300}
4145 4301
4146/* 4302/*
@@ -4160,6 +4316,9 @@ static void perf_mmap_close(struct vm_area_struct *vma)
4160 int mmap_locked = rb->mmap_locked; 4316 int mmap_locked = rb->mmap_locked;
4161 unsigned long size = perf_data_size(rb); 4317 unsigned long size = perf_data_size(rb);
4162 4318
4319 if (event->pmu->event_unmapped)
4320 event->pmu->event_unmapped(event);
4321
4163 atomic_dec(&rb->mmap_count); 4322 atomic_dec(&rb->mmap_count);
4164 4323
4165 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) 4324 if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
@@ -4361,6 +4520,9 @@ unlock:
4361 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; 4520 vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
4362 vma->vm_ops = &perf_mmap_vmops; 4521 vma->vm_ops = &perf_mmap_vmops;
4363 4522
4523 if (event->pmu->event_mapped)
4524 event->pmu->event_mapped(event);
4525
4364 return ret; 4526 return ret;
4365} 4527}
4366 4528
@@ -5889,6 +6051,8 @@ end:
5889 rcu_read_unlock(); 6051 rcu_read_unlock();
5890} 6052}
5891 6053
6054DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
6055
5892int perf_swevent_get_recursion_context(void) 6056int perf_swevent_get_recursion_context(void)
5893{ 6057{
5894 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); 6058 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
@@ -5904,21 +6068,30 @@ inline void perf_swevent_put_recursion_context(int rctx)
5904 put_recursion_context(swhash->recursion, rctx); 6068 put_recursion_context(swhash->recursion, rctx);
5905} 6069}
5906 6070
5907void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) 6071void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
5908{ 6072{
5909 struct perf_sample_data data; 6073 struct perf_sample_data data;
5910 int rctx;
5911 6074
5912 preempt_disable_notrace(); 6075 if (WARN_ON_ONCE(!regs))
5913 rctx = perf_swevent_get_recursion_context();
5914 if (rctx < 0)
5915 return; 6076 return;
5916 6077
5917 perf_sample_data_init(&data, addr, 0); 6078 perf_sample_data_init(&data, addr, 0);
5918
5919 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); 6079 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
6080}
6081
6082void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
6083{
6084 int rctx;
6085
6086 preempt_disable_notrace();
6087 rctx = perf_swevent_get_recursion_context();
6088 if (unlikely(rctx < 0))
6089 goto fail;
6090
6091 ___perf_sw_event(event_id, nr, regs, addr);
5920 6092
5921 perf_swevent_put_recursion_context(rctx); 6093 perf_swevent_put_recursion_context(rctx);
6094fail:
5922 preempt_enable_notrace(); 6095 preempt_enable_notrace();
5923} 6096}
5924 6097
@@ -6780,7 +6953,6 @@ skip_type:
6780 6953
6781 __perf_cpu_hrtimer_init(cpuctx, cpu); 6954 __perf_cpu_hrtimer_init(cpuctx, cpu);
6782 6955
6783 INIT_LIST_HEAD(&cpuctx->rotation_list);
6784 cpuctx->unique_pmu = pmu; 6956 cpuctx->unique_pmu = pmu;
6785 } 6957 }
6786 6958
@@ -6853,6 +7025,20 @@ void perf_pmu_unregister(struct pmu *pmu)
6853} 7025}
6854EXPORT_SYMBOL_GPL(perf_pmu_unregister); 7026EXPORT_SYMBOL_GPL(perf_pmu_unregister);
6855 7027
7028static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
7029{
7030 int ret;
7031
7032 if (!try_module_get(pmu->module))
7033 return -ENODEV;
7034 event->pmu = pmu;
7035 ret = pmu->event_init(event);
7036 if (ret)
7037 module_put(pmu->module);
7038
7039 return ret;
7040}
7041
6856struct pmu *perf_init_event(struct perf_event *event) 7042struct pmu *perf_init_event(struct perf_event *event)
6857{ 7043{
6858 struct pmu *pmu = NULL; 7044 struct pmu *pmu = NULL;
@@ -6865,24 +7051,14 @@ struct pmu *perf_init_event(struct perf_event *event)
6865 pmu = idr_find(&pmu_idr, event->attr.type); 7051 pmu = idr_find(&pmu_idr, event->attr.type);
6866 rcu_read_unlock(); 7052 rcu_read_unlock();
6867 if (pmu) { 7053 if (pmu) {
6868 if (!try_module_get(pmu->module)) { 7054 ret = perf_try_init_event(pmu, event);
6869 pmu = ERR_PTR(-ENODEV);
6870 goto unlock;
6871 }
6872 event->pmu = pmu;
6873 ret = pmu->event_init(event);
6874 if (ret) 7055 if (ret)
6875 pmu = ERR_PTR(ret); 7056 pmu = ERR_PTR(ret);
6876 goto unlock; 7057 goto unlock;
6877 } 7058 }
6878 7059
6879 list_for_each_entry_rcu(pmu, &pmus, entry) { 7060 list_for_each_entry_rcu(pmu, &pmus, entry) {
6880 if (!try_module_get(pmu->module)) { 7061 ret = perf_try_init_event(pmu, event);
6881 pmu = ERR_PTR(-ENODEV);
6882 goto unlock;
6883 }
6884 event->pmu = pmu;
6885 ret = pmu->event_init(event);
6886 if (!ret) 7062 if (!ret)
6887 goto unlock; 7063 goto unlock;
6888 7064
@@ -7246,6 +7422,15 @@ out:
7246 return ret; 7422 return ret;
7247} 7423}
7248 7424
7425static void mutex_lock_double(struct mutex *a, struct mutex *b)
7426{
7427 if (b < a)
7428 swap(a, b);
7429
7430 mutex_lock(a);
7431 mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
7432}
7433
7249/** 7434/**
7250 * sys_perf_event_open - open a performance event, associate it to a task/cpu 7435 * sys_perf_event_open - open a performance event, associate it to a task/cpu
7251 * 7436 *
@@ -7261,7 +7446,7 @@ SYSCALL_DEFINE5(perf_event_open,
7261 struct perf_event *group_leader = NULL, *output_event = NULL; 7446 struct perf_event *group_leader = NULL, *output_event = NULL;
7262 struct perf_event *event, *sibling; 7447 struct perf_event *event, *sibling;
7263 struct perf_event_attr attr; 7448 struct perf_event_attr attr;
7264 struct perf_event_context *ctx; 7449 struct perf_event_context *ctx, *uninitialized_var(gctx);
7265 struct file *event_file = NULL; 7450 struct file *event_file = NULL;
7266 struct fd group = {NULL, 0}; 7451 struct fd group = {NULL, 0};
7267 struct task_struct *task = NULL; 7452 struct task_struct *task = NULL;
@@ -7459,43 +7644,68 @@ SYSCALL_DEFINE5(perf_event_open,
7459 } 7644 }
7460 7645
7461 if (move_group) { 7646 if (move_group) {
7462 struct perf_event_context *gctx = group_leader->ctx; 7647 gctx = group_leader->ctx;
7463
7464 mutex_lock(&gctx->mutex);
7465 perf_remove_from_context(group_leader, false);
7466 7648
7467 /* 7649 /*
7468 * Removing from the context ends up with disabled 7650 * See perf_event_ctx_lock() for comments on the details
7469 * event. What we want here is event in the initial 7651 * of swizzling perf_event::ctx.
7470 * startup state, ready to be add into new context.
7471 */ 7652 */
7472 perf_event__state_init(group_leader); 7653 mutex_lock_double(&gctx->mutex, &ctx->mutex);
7654
7655 perf_remove_from_context(group_leader, false);
7656
7473 list_for_each_entry(sibling, &group_leader->sibling_list, 7657 list_for_each_entry(sibling, &group_leader->sibling_list,
7474 group_entry) { 7658 group_entry) {
7475 perf_remove_from_context(sibling, false); 7659 perf_remove_from_context(sibling, false);
7476 perf_event__state_init(sibling);
7477 put_ctx(gctx); 7660 put_ctx(gctx);
7478 } 7661 }
7479 mutex_unlock(&gctx->mutex); 7662 } else {
7480 put_ctx(gctx); 7663 mutex_lock(&ctx->mutex);
7481 } 7664 }
7482 7665
7483 WARN_ON_ONCE(ctx->parent_ctx); 7666 WARN_ON_ONCE(ctx->parent_ctx);
7484 mutex_lock(&ctx->mutex);
7485 7667
7486 if (move_group) { 7668 if (move_group) {
7669 /*
7670 * Wait for everybody to stop referencing the events through
7671 * the old lists, before installing it on new lists.
7672 */
7487 synchronize_rcu(); 7673 synchronize_rcu();
7488 perf_install_in_context(ctx, group_leader, group_leader->cpu); 7674
7489 get_ctx(ctx); 7675 /*
7676 * Install the group siblings before the group leader.
7677 *
7678 * Because a group leader will try and install the entire group
7679 * (through the sibling list, which is still in-tact), we can
7680 * end up with siblings installed in the wrong context.
7681 *
7682 * By installing siblings first we NO-OP because they're not
7683 * reachable through the group lists.
7684 */
7490 list_for_each_entry(sibling, &group_leader->sibling_list, 7685 list_for_each_entry(sibling, &group_leader->sibling_list,
7491 group_entry) { 7686 group_entry) {
7687 perf_event__state_init(sibling);
7492 perf_install_in_context(ctx, sibling, sibling->cpu); 7688 perf_install_in_context(ctx, sibling, sibling->cpu);
7493 get_ctx(ctx); 7689 get_ctx(ctx);
7494 } 7690 }
7691
7692 /*
7693 * Removing from the context ends up with disabled
7694 * event. What we want here is event in the initial
7695 * startup state, ready to be add into new context.
7696 */
7697 perf_event__state_init(group_leader);
7698 perf_install_in_context(ctx, group_leader, group_leader->cpu);
7699 get_ctx(ctx);
7495 } 7700 }
7496 7701
7497 perf_install_in_context(ctx, event, event->cpu); 7702 perf_install_in_context(ctx, event, event->cpu);
7498 perf_unpin_context(ctx); 7703 perf_unpin_context(ctx);
7704
7705 if (move_group) {
7706 mutex_unlock(&gctx->mutex);
7707 put_ctx(gctx);
7708 }
7499 mutex_unlock(&ctx->mutex); 7709 mutex_unlock(&ctx->mutex);
7500 7710
7501 put_online_cpus(); 7711 put_online_cpus();
@@ -7603,7 +7813,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
7603 src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; 7813 src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
7604 dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; 7814 dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
7605 7815
7606 mutex_lock(&src_ctx->mutex); 7816 /*
7817 * See perf_event_ctx_lock() for comments on the details
7818 * of swizzling perf_event::ctx.
7819 */
7820 mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
7607 list_for_each_entry_safe(event, tmp, &src_ctx->event_list, 7821 list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
7608 event_entry) { 7822 event_entry) {
7609 perf_remove_from_context(event, false); 7823 perf_remove_from_context(event, false);
@@ -7611,11 +7825,36 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
7611 put_ctx(src_ctx); 7825 put_ctx(src_ctx);
7612 list_add(&event->migrate_entry, &events); 7826 list_add(&event->migrate_entry, &events);
7613 } 7827 }
7614 mutex_unlock(&src_ctx->mutex);
7615 7828
7829 /*
7830 * Wait for the events to quiesce before re-instating them.
7831 */
7616 synchronize_rcu(); 7832 synchronize_rcu();
7617 7833
7618 mutex_lock(&dst_ctx->mutex); 7834 /*
7835 * Re-instate events in 2 passes.
7836 *
7837 * Skip over group leaders and only install siblings on this first
7838 * pass, siblings will not get enabled without a leader, however a
7839 * leader will enable its siblings, even if those are still on the old
7840 * context.
7841 */
7842 list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
7843 if (event->group_leader == event)
7844 continue;
7845
7846 list_del(&event->migrate_entry);
7847 if (event->state >= PERF_EVENT_STATE_OFF)
7848 event->state = PERF_EVENT_STATE_INACTIVE;
7849 account_event_cpu(event, dst_cpu);
7850 perf_install_in_context(dst_ctx, event, dst_cpu);
7851 get_ctx(dst_ctx);
7852 }
7853
7854 /*
7855 * Once all the siblings are setup properly, install the group leaders
7856 * to make it go.
7857 */
7619 list_for_each_entry_safe(event, tmp, &events, migrate_entry) { 7858 list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
7620 list_del(&event->migrate_entry); 7859 list_del(&event->migrate_entry);
7621 if (event->state >= PERF_EVENT_STATE_OFF) 7860 if (event->state >= PERF_EVENT_STATE_OFF)
@@ -7625,6 +7864,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
7625 get_ctx(dst_ctx); 7864 get_ctx(dst_ctx);
7626 } 7865 }
7627 mutex_unlock(&dst_ctx->mutex); 7866 mutex_unlock(&dst_ctx->mutex);
7867 mutex_unlock(&src_ctx->mutex);
7628} 7868}
7629EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); 7869EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
7630 7870
@@ -7811,14 +8051,19 @@ static void perf_free_event(struct perf_event *event,
7811 8051
7812 put_event(parent); 8052 put_event(parent);
7813 8053
8054 raw_spin_lock_irq(&ctx->lock);
7814 perf_group_detach(event); 8055 perf_group_detach(event);
7815 list_del_event(event, ctx); 8056 list_del_event(event, ctx);
8057 raw_spin_unlock_irq(&ctx->lock);
7816 free_event(event); 8058 free_event(event);
7817} 8059}
7818 8060
7819/* 8061/*
7820 * free an unexposed, unused context as created by inheritance by 8062 * Free an unexposed, unused context as created by inheritance by
7821 * perf_event_init_task below, used by fork() in case of fail. 8063 * perf_event_init_task below, used by fork() in case of fail.
8064 *
8065 * Not all locks are strictly required, but take them anyway to be nice and
8066 * help out with the lockdep assertions.
7822 */ 8067 */
7823void perf_event_free_task(struct task_struct *task) 8068void perf_event_free_task(struct task_struct *task)
7824{ 8069{
@@ -8137,7 +8382,7 @@ static void __init perf_event_init_all_cpus(void)
8137 for_each_possible_cpu(cpu) { 8382 for_each_possible_cpu(cpu) {
8138 swhash = &per_cpu(swevent_htable, cpu); 8383 swhash = &per_cpu(swevent_htable, cpu);
8139 mutex_init(&swhash->hlist_mutex); 8384 mutex_init(&swhash->hlist_mutex);
8140 INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); 8385 INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
8141 } 8386 }
8142} 8387}
8143 8388
@@ -8158,22 +8403,11 @@ static void perf_event_init_cpu(int cpu)
8158} 8403}
8159 8404
8160#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC 8405#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
8161static void perf_pmu_rotate_stop(struct pmu *pmu)
8162{
8163 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
8164
8165 WARN_ON(!irqs_disabled());
8166
8167 list_del_init(&cpuctx->rotation_list);
8168}
8169
8170static void __perf_event_exit_context(void *__info) 8406static void __perf_event_exit_context(void *__info)
8171{ 8407{
8172 struct remove_event re = { .detach_group = true }; 8408 struct remove_event re = { .detach_group = true };
8173 struct perf_event_context *ctx = __info; 8409 struct perf_event_context *ctx = __info;
8174 8410
8175 perf_pmu_rotate_stop(ctx->pmu);
8176
8177 rcu_read_lock(); 8411 rcu_read_lock();
8178 list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) 8412 list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
8179 __perf_remove_from_context(&re); 8413 __perf_remove_from_context(&re);
@@ -8284,6 +8518,18 @@ void __init perf_event_init(void)
8284 != 1024); 8518 != 1024);
8285} 8519}
8286 8520
8521ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr,
8522 char *page)
8523{
8524 struct perf_pmu_events_attr *pmu_attr =
8525 container_of(attr, struct perf_pmu_events_attr, attr);
8526
8527 if (pmu_attr->event_str)
8528 return sprintf(page, "%s\n", pmu_attr->event_str);
8529
8530 return 0;
8531}
8532
8287static int __init perf_event_sysfs_init(void) 8533static int __init perf_event_sysfs_init(void)
8288{ 8534{
8289 struct pmu *pmu; 8535 struct pmu *pmu;