diff options
Diffstat (limited to 'kernel/events/core.c')
| -rw-r--r-- | kernel/events/core.c | 490 |
1 files changed, 368 insertions, 122 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 19efcf13375a..f04daabfd1cf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
| @@ -872,22 +872,32 @@ void perf_pmu_enable(struct pmu *pmu) | |||
| 872 | pmu->pmu_enable(pmu); | 872 | pmu->pmu_enable(pmu); |
| 873 | } | 873 | } |
| 874 | 874 | ||
| 875 | static DEFINE_PER_CPU(struct list_head, rotation_list); | 875 | static DEFINE_PER_CPU(struct list_head, active_ctx_list); |
| 876 | 876 | ||
| 877 | /* | 877 | /* |
| 878 | * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized | 878 | * perf_event_ctx_activate(), perf_event_ctx_deactivate(), and |
| 879 | * because they're strictly cpu affine and rotate_start is called with IRQs | 879 | * perf_event_task_tick() are fully serialized because they're strictly cpu |
| 880 | * disabled, while rotate_context is called from IRQ context. | 880 | * affine and perf_event_ctx{activate,deactivate} are called with IRQs |
| 881 | * disabled, while perf_event_task_tick is called from IRQ context. | ||
| 881 | */ | 882 | */ |
| 882 | static void perf_pmu_rotate_start(struct pmu *pmu) | 883 | static void perf_event_ctx_activate(struct perf_event_context *ctx) |
| 883 | { | 884 | { |
| 884 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | 885 | struct list_head *head = this_cpu_ptr(&active_ctx_list); |
| 885 | struct list_head *head = this_cpu_ptr(&rotation_list); | ||
| 886 | 886 | ||
| 887 | WARN_ON(!irqs_disabled()); | 887 | WARN_ON(!irqs_disabled()); |
| 888 | 888 | ||
| 889 | if (list_empty(&cpuctx->rotation_list)) | 889 | WARN_ON(!list_empty(&ctx->active_ctx_list)); |
| 890 | list_add(&cpuctx->rotation_list, head); | 890 | |
| 891 | list_add(&ctx->active_ctx_list, head); | ||
| 892 | } | ||
| 893 | |||
| 894 | static void perf_event_ctx_deactivate(struct perf_event_context *ctx) | ||
| 895 | { | ||
| 896 | WARN_ON(!irqs_disabled()); | ||
| 897 | |||
| 898 | WARN_ON(list_empty(&ctx->active_ctx_list)); | ||
| 899 | |||
| 900 | list_del_init(&ctx->active_ctx_list); | ||
| 891 | } | 901 | } |
| 892 | 902 | ||
| 893 | static void get_ctx(struct perf_event_context *ctx) | 903 | static void get_ctx(struct perf_event_context *ctx) |
| @@ -907,6 +917,84 @@ static void put_ctx(struct perf_event_context *ctx) | |||
| 907 | } | 917 | } |
| 908 | 918 | ||
| 909 | /* | 919 | /* |
| 920 | * Because of perf_event::ctx migration in sys_perf_event_open::move_group and | ||
| 921 | * perf_pmu_migrate_context() we need some magic. | ||
| 922 | * | ||
| 923 | * Those places that change perf_event::ctx will hold both | ||
| 924 | * perf_event_ctx::mutex of the 'old' and 'new' ctx value. | ||
| 925 | * | ||
| 926 | * Lock ordering is by mutex address. There is one other site where | ||
| 927 | * perf_event_context::mutex nests and that is put_event(). But remember that | ||
| 928 | * that is a parent<->child context relation, and migration does not affect | ||
| 929 | * children, therefore these two orderings should not interact. | ||
| 930 | * | ||
| 931 | * The change in perf_event::ctx does not affect children (as claimed above) | ||
| 932 | * because the sys_perf_event_open() case will install a new event and break | ||
| 933 | * the ctx parent<->child relation, and perf_pmu_migrate_context() is only | ||
| 934 | * concerned with cpuctx and that doesn't have children. | ||
| 935 | * | ||
| 936 | * The places that change perf_event::ctx will issue: | ||
| 937 | * | ||
| 938 | * perf_remove_from_context(); | ||
| 939 | * synchronize_rcu(); | ||
| 940 | * perf_install_in_context(); | ||
| 941 | * | ||
| 942 | * to affect the change. The remove_from_context() + synchronize_rcu() should | ||
| 943 | * quiesce the event, after which we can install it in the new location. This | ||
| 944 | * means that only external vectors (perf_fops, prctl) can perturb the event | ||
| 945 | * while in transit. Therefore all such accessors should also acquire | ||
| 946 | * perf_event_context::mutex to serialize against this. | ||
| 947 | * | ||
| 948 | * However; because event->ctx can change while we're waiting to acquire | ||
| 949 | * ctx->mutex we must be careful and use the below perf_event_ctx_lock() | ||
| 950 | * function. | ||
| 951 | * | ||
| 952 | * Lock order: | ||
| 953 | * task_struct::perf_event_mutex | ||
| 954 | * perf_event_context::mutex | ||
| 955 | * perf_event_context::lock | ||
| 956 | * perf_event::child_mutex; | ||
| 957 | * perf_event::mmap_mutex | ||
| 958 | * mmap_sem | ||
| 959 | */ | ||
| 960 | static struct perf_event_context * | ||
| 961 | perf_event_ctx_lock_nested(struct perf_event *event, int nesting) | ||
| 962 | { | ||
| 963 | struct perf_event_context *ctx; | ||
| 964 | |||
| 965 | again: | ||
| 966 | rcu_read_lock(); | ||
| 967 | ctx = ACCESS_ONCE(event->ctx); | ||
| 968 | if (!atomic_inc_not_zero(&ctx->refcount)) { | ||
| 969 | rcu_read_unlock(); | ||
| 970 | goto again; | ||
| 971 | } | ||
| 972 | rcu_read_unlock(); | ||
| 973 | |||
| 974 | mutex_lock_nested(&ctx->mutex, nesting); | ||
| 975 | if (event->ctx != ctx) { | ||
| 976 | mutex_unlock(&ctx->mutex); | ||
| 977 | put_ctx(ctx); | ||
| 978 | goto again; | ||
| 979 | } | ||
| 980 | |||
| 981 | return ctx; | ||
| 982 | } | ||
| 983 | |||
| 984 | static inline struct perf_event_context * | ||
| 985 | perf_event_ctx_lock(struct perf_event *event) | ||
| 986 | { | ||
| 987 | return perf_event_ctx_lock_nested(event, 0); | ||
| 988 | } | ||
| 989 | |||
| 990 | static void perf_event_ctx_unlock(struct perf_event *event, | ||
| 991 | struct perf_event_context *ctx) | ||
| 992 | { | ||
| 993 | mutex_unlock(&ctx->mutex); | ||
| 994 | put_ctx(ctx); | ||
| 995 | } | ||
| 996 | |||
| 997 | /* | ||
| 910 | * This must be done under the ctx->lock, such as to serialize against | 998 | * This must be done under the ctx->lock, such as to serialize against |
| 911 | * context_equiv(), therefore we cannot call put_ctx() since that might end up | 999 | * context_equiv(), therefore we cannot call put_ctx() since that might end up |
| 912 | * calling scheduler related locks and ctx->lock nests inside those. | 1000 | * calling scheduler related locks and ctx->lock nests inside those. |
| @@ -1155,8 +1243,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) | |||
| 1155 | ctx->nr_branch_stack++; | 1243 | ctx->nr_branch_stack++; |
| 1156 | 1244 | ||
| 1157 | list_add_rcu(&event->event_entry, &ctx->event_list); | 1245 | list_add_rcu(&event->event_entry, &ctx->event_list); |
| 1158 | if (!ctx->nr_events) | ||
| 1159 | perf_pmu_rotate_start(ctx->pmu); | ||
| 1160 | ctx->nr_events++; | 1246 | ctx->nr_events++; |
| 1161 | if (event->attr.inherit_stat) | 1247 | if (event->attr.inherit_stat) |
| 1162 | ctx->nr_stat++; | 1248 | ctx->nr_stat++; |
| @@ -1275,6 +1361,8 @@ static void perf_group_attach(struct perf_event *event) | |||
| 1275 | if (group_leader == event) | 1361 | if (group_leader == event) |
| 1276 | return; | 1362 | return; |
| 1277 | 1363 | ||
| 1364 | WARN_ON_ONCE(group_leader->ctx != event->ctx); | ||
| 1365 | |||
| 1278 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && | 1366 | if (group_leader->group_flags & PERF_GROUP_SOFTWARE && |
| 1279 | !is_software_event(event)) | 1367 | !is_software_event(event)) |
| 1280 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; | 1368 | group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; |
| @@ -1296,6 +1384,10 @@ static void | |||
| 1296 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) | 1384 | list_del_event(struct perf_event *event, struct perf_event_context *ctx) |
| 1297 | { | 1385 | { |
| 1298 | struct perf_cpu_context *cpuctx; | 1386 | struct perf_cpu_context *cpuctx; |
| 1387 | |||
| 1388 | WARN_ON_ONCE(event->ctx != ctx); | ||
| 1389 | lockdep_assert_held(&ctx->lock); | ||
| 1390 | |||
| 1299 | /* | 1391 | /* |
| 1300 | * We can have double detach due to exit/hot-unplug + close. | 1392 | * We can have double detach due to exit/hot-unplug + close. |
| 1301 | */ | 1393 | */ |
| @@ -1380,6 +1472,8 @@ static void perf_group_detach(struct perf_event *event) | |||
| 1380 | 1472 | ||
| 1381 | /* Inherit group flags from the previous leader */ | 1473 | /* Inherit group flags from the previous leader */ |
| 1382 | sibling->group_flags = event->group_flags; | 1474 | sibling->group_flags = event->group_flags; |
| 1475 | |||
| 1476 | WARN_ON_ONCE(sibling->ctx != event->ctx); | ||
| 1383 | } | 1477 | } |
| 1384 | 1478 | ||
| 1385 | out: | 1479 | out: |
| @@ -1442,6 +1536,10 @@ event_sched_out(struct perf_event *event, | |||
| 1442 | { | 1536 | { |
| 1443 | u64 tstamp = perf_event_time(event); | 1537 | u64 tstamp = perf_event_time(event); |
| 1444 | u64 delta; | 1538 | u64 delta; |
| 1539 | |||
| 1540 | WARN_ON_ONCE(event->ctx != ctx); | ||
| 1541 | lockdep_assert_held(&ctx->lock); | ||
| 1542 | |||
| 1445 | /* | 1543 | /* |
| 1446 | * An event which could not be activated because of | 1544 | * An event which could not be activated because of |
| 1447 | * filter mismatch still needs to have its timings | 1545 | * filter mismatch still needs to have its timings |
| @@ -1471,7 +1569,8 @@ event_sched_out(struct perf_event *event, | |||
| 1471 | 1569 | ||
| 1472 | if (!is_software_event(event)) | 1570 | if (!is_software_event(event)) |
| 1473 | cpuctx->active_oncpu--; | 1571 | cpuctx->active_oncpu--; |
| 1474 | ctx->nr_active--; | 1572 | if (!--ctx->nr_active) |
| 1573 | perf_event_ctx_deactivate(ctx); | ||
| 1475 | if (event->attr.freq && event->attr.sample_freq) | 1574 | if (event->attr.freq && event->attr.sample_freq) |
| 1476 | ctx->nr_freq--; | 1575 | ctx->nr_freq--; |
| 1477 | if (event->attr.exclusive || !cpuctx->active_oncpu) | 1576 | if (event->attr.exclusive || !cpuctx->active_oncpu) |
| @@ -1654,7 +1753,7 @@ int __perf_event_disable(void *info) | |||
| 1654 | * is the current context on this CPU and preemption is disabled, | 1753 | * is the current context on this CPU and preemption is disabled, |
| 1655 | * hence we can't get into perf_event_task_sched_out for this context. | 1754 | * hence we can't get into perf_event_task_sched_out for this context. |
| 1656 | */ | 1755 | */ |
| 1657 | void perf_event_disable(struct perf_event *event) | 1756 | static void _perf_event_disable(struct perf_event *event) |
| 1658 | { | 1757 | { |
| 1659 | struct perf_event_context *ctx = event->ctx; | 1758 | struct perf_event_context *ctx = event->ctx; |
| 1660 | struct task_struct *task = ctx->task; | 1759 | struct task_struct *task = ctx->task; |
| @@ -1695,6 +1794,19 @@ retry: | |||
| 1695 | } | 1794 | } |
| 1696 | raw_spin_unlock_irq(&ctx->lock); | 1795 | raw_spin_unlock_irq(&ctx->lock); |
| 1697 | } | 1796 | } |
| 1797 | |||
| 1798 | /* | ||
| 1799 | * Strictly speaking kernel users cannot create groups and therefore this | ||
| 1800 | * interface does not need the perf_event_ctx_lock() magic. | ||
| 1801 | */ | ||
| 1802 | void perf_event_disable(struct perf_event *event) | ||
| 1803 | { | ||
| 1804 | struct perf_event_context *ctx; | ||
| 1805 | |||
| 1806 | ctx = perf_event_ctx_lock(event); | ||
| 1807 | _perf_event_disable(event); | ||
| 1808 | perf_event_ctx_unlock(event, ctx); | ||
| 1809 | } | ||
| 1698 | EXPORT_SYMBOL_GPL(perf_event_disable); | 1810 | EXPORT_SYMBOL_GPL(perf_event_disable); |
| 1699 | 1811 | ||
| 1700 | static void perf_set_shadow_time(struct perf_event *event, | 1812 | static void perf_set_shadow_time(struct perf_event *event, |
| @@ -1782,7 +1894,8 @@ event_sched_in(struct perf_event *event, | |||
| 1782 | 1894 | ||
| 1783 | if (!is_software_event(event)) | 1895 | if (!is_software_event(event)) |
| 1784 | cpuctx->active_oncpu++; | 1896 | cpuctx->active_oncpu++; |
| 1785 | ctx->nr_active++; | 1897 | if (!ctx->nr_active++) |
| 1898 | perf_event_ctx_activate(ctx); | ||
| 1786 | if (event->attr.freq && event->attr.sample_freq) | 1899 | if (event->attr.freq && event->attr.sample_freq) |
| 1787 | ctx->nr_freq++; | 1900 | ctx->nr_freq++; |
| 1788 | 1901 | ||
| @@ -2158,7 +2271,7 @@ unlock: | |||
| 2158 | * perf_event_for_each_child or perf_event_for_each as described | 2271 | * perf_event_for_each_child or perf_event_for_each as described |
| 2159 | * for perf_event_disable. | 2272 | * for perf_event_disable. |
| 2160 | */ | 2273 | */ |
| 2161 | void perf_event_enable(struct perf_event *event) | 2274 | static void _perf_event_enable(struct perf_event *event) |
| 2162 | { | 2275 | { |
| 2163 | struct perf_event_context *ctx = event->ctx; | 2276 | struct perf_event_context *ctx = event->ctx; |
| 2164 | struct task_struct *task = ctx->task; | 2277 | struct task_struct *task = ctx->task; |
| @@ -2214,9 +2327,21 @@ retry: | |||
| 2214 | out: | 2327 | out: |
| 2215 | raw_spin_unlock_irq(&ctx->lock); | 2328 | raw_spin_unlock_irq(&ctx->lock); |
| 2216 | } | 2329 | } |
| 2330 | |||
| 2331 | /* | ||
| 2332 | * See perf_event_disable(); | ||
| 2333 | */ | ||
| 2334 | void perf_event_enable(struct perf_event *event) | ||
| 2335 | { | ||
| 2336 | struct perf_event_context *ctx; | ||
| 2337 | |||
| 2338 | ctx = perf_event_ctx_lock(event); | ||
| 2339 | _perf_event_enable(event); | ||
| 2340 | perf_event_ctx_unlock(event, ctx); | ||
| 2341 | } | ||
| 2217 | EXPORT_SYMBOL_GPL(perf_event_enable); | 2342 | EXPORT_SYMBOL_GPL(perf_event_enable); |
| 2218 | 2343 | ||
| 2219 | int perf_event_refresh(struct perf_event *event, int refresh) | 2344 | static int _perf_event_refresh(struct perf_event *event, int refresh) |
| 2220 | { | 2345 | { |
| 2221 | /* | 2346 | /* |
| 2222 | * not supported on inherited events | 2347 | * not supported on inherited events |
| @@ -2225,10 +2350,25 @@ int perf_event_refresh(struct perf_event *event, int refresh) | |||
| 2225 | return -EINVAL; | 2350 | return -EINVAL; |
| 2226 | 2351 | ||
| 2227 | atomic_add(refresh, &event->event_limit); | 2352 | atomic_add(refresh, &event->event_limit); |
| 2228 | perf_event_enable(event); | 2353 | _perf_event_enable(event); |
| 2229 | 2354 | ||
| 2230 | return 0; | 2355 | return 0; |
| 2231 | } | 2356 | } |
| 2357 | |||
| 2358 | /* | ||
| 2359 | * See perf_event_disable() | ||
| 2360 | */ | ||
| 2361 | int perf_event_refresh(struct perf_event *event, int refresh) | ||
| 2362 | { | ||
| 2363 | struct perf_event_context *ctx; | ||
| 2364 | int ret; | ||
| 2365 | |||
| 2366 | ctx = perf_event_ctx_lock(event); | ||
| 2367 | ret = _perf_event_refresh(event, refresh); | ||
| 2368 | perf_event_ctx_unlock(event, ctx); | ||
| 2369 | |||
| 2370 | return ret; | ||
| 2371 | } | ||
| 2232 | EXPORT_SYMBOL_GPL(perf_event_refresh); | 2372 | EXPORT_SYMBOL_GPL(perf_event_refresh); |
| 2233 | 2373 | ||
| 2234 | static void ctx_sched_out(struct perf_event_context *ctx, | 2374 | static void ctx_sched_out(struct perf_event_context *ctx, |
| @@ -2612,12 +2752,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, | |||
| 2612 | 2752 | ||
| 2613 | perf_pmu_enable(ctx->pmu); | 2753 | perf_pmu_enable(ctx->pmu); |
| 2614 | perf_ctx_unlock(cpuctx, ctx); | 2754 | perf_ctx_unlock(cpuctx, ctx); |
| 2615 | |||
| 2616 | /* | ||
| 2617 | * Since these rotations are per-cpu, we need to ensure the | ||
| 2618 | * cpu-context we got scheduled on is actually rotating. | ||
| 2619 | */ | ||
| 2620 | perf_pmu_rotate_start(ctx->pmu); | ||
| 2621 | } | 2755 | } |
| 2622 | 2756 | ||
| 2623 | /* | 2757 | /* |
| @@ -2905,25 +3039,18 @@ static void rotate_ctx(struct perf_event_context *ctx) | |||
| 2905 | list_rotate_left(&ctx->flexible_groups); | 3039 | list_rotate_left(&ctx->flexible_groups); |
| 2906 | } | 3040 | } |
| 2907 | 3041 | ||
| 2908 | /* | ||
| 2909 | * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized | ||
| 2910 | * because they're strictly cpu affine and rotate_start is called with IRQs | ||
| 2911 | * disabled, while rotate_context is called from IRQ context. | ||
| 2912 | */ | ||
| 2913 | static int perf_rotate_context(struct perf_cpu_context *cpuctx) | 3042 | static int perf_rotate_context(struct perf_cpu_context *cpuctx) |
| 2914 | { | 3043 | { |
| 2915 | struct perf_event_context *ctx = NULL; | 3044 | struct perf_event_context *ctx = NULL; |
| 2916 | int rotate = 0, remove = 1; | 3045 | int rotate = 0; |
| 2917 | 3046 | ||
| 2918 | if (cpuctx->ctx.nr_events) { | 3047 | if (cpuctx->ctx.nr_events) { |
| 2919 | remove = 0; | ||
| 2920 | if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) | 3048 | if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) |
| 2921 | rotate = 1; | 3049 | rotate = 1; |
| 2922 | } | 3050 | } |
| 2923 | 3051 | ||
| 2924 | ctx = cpuctx->task_ctx; | 3052 | ctx = cpuctx->task_ctx; |
| 2925 | if (ctx && ctx->nr_events) { | 3053 | if (ctx && ctx->nr_events) { |
| 2926 | remove = 0; | ||
| 2927 | if (ctx->nr_events != ctx->nr_active) | 3054 | if (ctx->nr_events != ctx->nr_active) |
| 2928 | rotate = 1; | 3055 | rotate = 1; |
| 2929 | } | 3056 | } |
| @@ -2947,8 +3074,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx) | |||
| 2947 | perf_pmu_enable(cpuctx->ctx.pmu); | 3074 | perf_pmu_enable(cpuctx->ctx.pmu); |
| 2948 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); | 3075 | perf_ctx_unlock(cpuctx, cpuctx->task_ctx); |
| 2949 | done: | 3076 | done: |
| 2950 | if (remove) | ||
| 2951 | list_del_init(&cpuctx->rotation_list); | ||
| 2952 | 3077 | ||
| 2953 | return rotate; | 3078 | return rotate; |
| 2954 | } | 3079 | } |
| @@ -2966,9 +3091,8 @@ bool perf_event_can_stop_tick(void) | |||
| 2966 | 3091 | ||
| 2967 | void perf_event_task_tick(void) | 3092 | void perf_event_task_tick(void) |
| 2968 | { | 3093 | { |
| 2969 | struct list_head *head = this_cpu_ptr(&rotation_list); | 3094 | struct list_head *head = this_cpu_ptr(&active_ctx_list); |
| 2970 | struct perf_cpu_context *cpuctx, *tmp; | 3095 | struct perf_event_context *ctx, *tmp; |
| 2971 | struct perf_event_context *ctx; | ||
| 2972 | int throttled; | 3096 | int throttled; |
| 2973 | 3097 | ||
| 2974 | WARN_ON(!irqs_disabled()); | 3098 | WARN_ON(!irqs_disabled()); |
| @@ -2976,14 +3100,8 @@ void perf_event_task_tick(void) | |||
| 2976 | __this_cpu_inc(perf_throttled_seq); | 3100 | __this_cpu_inc(perf_throttled_seq); |
| 2977 | throttled = __this_cpu_xchg(perf_throttled_count, 0); | 3101 | throttled = __this_cpu_xchg(perf_throttled_count, 0); |
| 2978 | 3102 | ||
| 2979 | list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { | 3103 | list_for_each_entry_safe(ctx, tmp, head, active_ctx_list) |
| 2980 | ctx = &cpuctx->ctx; | ||
| 2981 | perf_adjust_freq_unthr_context(ctx, throttled); | 3104 | perf_adjust_freq_unthr_context(ctx, throttled); |
| 2982 | |||
| 2983 | ctx = cpuctx->task_ctx; | ||
| 2984 | if (ctx) | ||
| 2985 | perf_adjust_freq_unthr_context(ctx, throttled); | ||
| 2986 | } | ||
| 2987 | } | 3105 | } |
| 2988 | 3106 | ||
| 2989 | static int event_enable_on_exec(struct perf_event *event, | 3107 | static int event_enable_on_exec(struct perf_event *event, |
| @@ -3142,6 +3260,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx) | |||
| 3142 | { | 3260 | { |
| 3143 | raw_spin_lock_init(&ctx->lock); | 3261 | raw_spin_lock_init(&ctx->lock); |
| 3144 | mutex_init(&ctx->mutex); | 3262 | mutex_init(&ctx->mutex); |
| 3263 | INIT_LIST_HEAD(&ctx->active_ctx_list); | ||
| 3145 | INIT_LIST_HEAD(&ctx->pinned_groups); | 3264 | INIT_LIST_HEAD(&ctx->pinned_groups); |
| 3146 | INIT_LIST_HEAD(&ctx->flexible_groups); | 3265 | INIT_LIST_HEAD(&ctx->flexible_groups); |
| 3147 | INIT_LIST_HEAD(&ctx->event_list); | 3266 | INIT_LIST_HEAD(&ctx->event_list); |
| @@ -3421,7 +3540,16 @@ static void perf_remove_from_owner(struct perf_event *event) | |||
| 3421 | rcu_read_unlock(); | 3540 | rcu_read_unlock(); |
| 3422 | 3541 | ||
| 3423 | if (owner) { | 3542 | if (owner) { |
| 3424 | mutex_lock(&owner->perf_event_mutex); | 3543 | /* |
| 3544 | * If we're here through perf_event_exit_task() we're already | ||
| 3545 | * holding ctx->mutex which would be an inversion wrt. the | ||
| 3546 | * normal lock order. | ||
| 3547 | * | ||
| 3548 | * However we can safely take this lock because its the child | ||
| 3549 | * ctx->mutex. | ||
| 3550 | */ | ||
| 3551 | mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING); | ||
| 3552 | |||
| 3425 | /* | 3553 | /* |
| 3426 | * We have to re-check the event->owner field, if it is cleared | 3554 | * We have to re-check the event->owner field, if it is cleared |
| 3427 | * we raced with perf_event_exit_task(), acquiring the mutex | 3555 | * we raced with perf_event_exit_task(), acquiring the mutex |
| @@ -3440,7 +3568,7 @@ static void perf_remove_from_owner(struct perf_event *event) | |||
| 3440 | */ | 3568 | */ |
| 3441 | static void put_event(struct perf_event *event) | 3569 | static void put_event(struct perf_event *event) |
| 3442 | { | 3570 | { |
| 3443 | struct perf_event_context *ctx = event->ctx; | 3571 | struct perf_event_context *ctx; |
| 3444 | 3572 | ||
| 3445 | if (!atomic_long_dec_and_test(&event->refcount)) | 3573 | if (!atomic_long_dec_and_test(&event->refcount)) |
| 3446 | return; | 3574 | return; |
| @@ -3448,7 +3576,6 @@ static void put_event(struct perf_event *event) | |||
| 3448 | if (!is_kernel_event(event)) | 3576 | if (!is_kernel_event(event)) |
| 3449 | perf_remove_from_owner(event); | 3577 | perf_remove_from_owner(event); |
| 3450 | 3578 | ||
| 3451 | WARN_ON_ONCE(ctx->parent_ctx); | ||
| 3452 | /* | 3579 | /* |
| 3453 | * There are two ways this annotation is useful: | 3580 | * There are two ways this annotation is useful: |
| 3454 | * | 3581 | * |
| @@ -3461,7 +3588,8 @@ static void put_event(struct perf_event *event) | |||
| 3461 | * the last filedesc died, so there is no possibility | 3588 | * the last filedesc died, so there is no possibility |
| 3462 | * to trigger the AB-BA case. | 3589 | * to trigger the AB-BA case. |
| 3463 | */ | 3590 | */ |
| 3464 | mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); | 3591 | ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING); |
| 3592 | WARN_ON_ONCE(ctx->parent_ctx); | ||
| 3465 | perf_remove_from_context(event, true); | 3593 | perf_remove_from_context(event, true); |
| 3466 | mutex_unlock(&ctx->mutex); | 3594 | mutex_unlock(&ctx->mutex); |
| 3467 | 3595 | ||
| @@ -3547,12 +3675,13 @@ static int perf_event_read_group(struct perf_event *event, | |||
| 3547 | u64 read_format, char __user *buf) | 3675 | u64 read_format, char __user *buf) |
| 3548 | { | 3676 | { |
| 3549 | struct perf_event *leader = event->group_leader, *sub; | 3677 | struct perf_event *leader = event->group_leader, *sub; |
| 3550 | int n = 0, size = 0, ret = -EFAULT; | ||
| 3551 | struct perf_event_context *ctx = leader->ctx; | 3678 | struct perf_event_context *ctx = leader->ctx; |
| 3552 | u64 values[5]; | 3679 | int n = 0, size = 0, ret; |
| 3553 | u64 count, enabled, running; | 3680 | u64 count, enabled, running; |
| 3681 | u64 values[5]; | ||
| 3682 | |||
| 3683 | lockdep_assert_held(&ctx->mutex); | ||
| 3554 | 3684 | ||
| 3555 | mutex_lock(&ctx->mutex); | ||
| 3556 | count = perf_event_read_value(leader, &enabled, &running); | 3685 | count = perf_event_read_value(leader, &enabled, &running); |
| 3557 | 3686 | ||
| 3558 | values[n++] = 1 + leader->nr_siblings; | 3687 | values[n++] = 1 + leader->nr_siblings; |
| @@ -3567,7 +3696,7 @@ static int perf_event_read_group(struct perf_event *event, | |||
| 3567 | size = n * sizeof(u64); | 3696 | size = n * sizeof(u64); |
| 3568 | 3697 | ||
| 3569 | if (copy_to_user(buf, values, size)) | 3698 | if (copy_to_user(buf, values, size)) |
| 3570 | goto unlock; | 3699 | return -EFAULT; |
| 3571 | 3700 | ||
| 3572 | ret = size; | 3701 | ret = size; |
| 3573 | 3702 | ||
| @@ -3581,14 +3710,11 @@ static int perf_event_read_group(struct perf_event *event, | |||
| 3581 | size = n * sizeof(u64); | 3710 | size = n * sizeof(u64); |
| 3582 | 3711 | ||
| 3583 | if (copy_to_user(buf + ret, values, size)) { | 3712 | if (copy_to_user(buf + ret, values, size)) { |
| 3584 | ret = -EFAULT; | 3713 | return -EFAULT; |
| 3585 | goto unlock; | ||
| 3586 | } | 3714 | } |
| 3587 | 3715 | ||
| 3588 | ret += size; | 3716 | ret += size; |
| 3589 | } | 3717 | } |
| 3590 | unlock: | ||
| 3591 | mutex_unlock(&ctx->mutex); | ||
| 3592 | 3718 | ||
| 3593 | return ret; | 3719 | return ret; |
| 3594 | } | 3720 | } |
| @@ -3660,8 +3786,14 @@ static ssize_t | |||
| 3660 | perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | 3786 | perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) |
| 3661 | { | 3787 | { |
| 3662 | struct perf_event *event = file->private_data; | 3788 | struct perf_event *event = file->private_data; |
| 3789 | struct perf_event_context *ctx; | ||
| 3790 | int ret; | ||
| 3791 | |||
| 3792 | ctx = perf_event_ctx_lock(event); | ||
| 3793 | ret = perf_read_hw(event, buf, count); | ||
| 3794 | perf_event_ctx_unlock(event, ctx); | ||
| 3663 | 3795 | ||
| 3664 | return perf_read_hw(event, buf, count); | 3796 | return ret; |
| 3665 | } | 3797 | } |
| 3666 | 3798 | ||
| 3667 | static unsigned int perf_poll(struct file *file, poll_table *wait) | 3799 | static unsigned int perf_poll(struct file *file, poll_table *wait) |
| @@ -3687,7 +3819,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait) | |||
| 3687 | return events; | 3819 | return events; |
| 3688 | } | 3820 | } |
| 3689 | 3821 | ||
| 3690 | static void perf_event_reset(struct perf_event *event) | 3822 | static void _perf_event_reset(struct perf_event *event) |
| 3691 | { | 3823 | { |
| 3692 | (void)perf_event_read(event); | 3824 | (void)perf_event_read(event); |
| 3693 | local64_set(&event->count, 0); | 3825 | local64_set(&event->count, 0); |
| @@ -3706,6 +3838,7 @@ static void perf_event_for_each_child(struct perf_event *event, | |||
| 3706 | struct perf_event *child; | 3838 | struct perf_event *child; |
| 3707 | 3839 | ||
| 3708 | WARN_ON_ONCE(event->ctx->parent_ctx); | 3840 | WARN_ON_ONCE(event->ctx->parent_ctx); |
| 3841 | |||
| 3709 | mutex_lock(&event->child_mutex); | 3842 | mutex_lock(&event->child_mutex); |
| 3710 | func(event); | 3843 | func(event); |
| 3711 | list_for_each_entry(child, &event->child_list, child_list) | 3844 | list_for_each_entry(child, &event->child_list, child_list) |
| @@ -3719,14 +3852,13 @@ static void perf_event_for_each(struct perf_event *event, | |||
| 3719 | struct perf_event_context *ctx = event->ctx; | 3852 | struct perf_event_context *ctx = event->ctx; |
| 3720 | struct perf_event *sibling; | 3853 | struct perf_event *sibling; |
| 3721 | 3854 | ||
| 3722 | WARN_ON_ONCE(ctx->parent_ctx); | 3855 | lockdep_assert_held(&ctx->mutex); |
| 3723 | mutex_lock(&ctx->mutex); | 3856 | |
| 3724 | event = event->group_leader; | 3857 | event = event->group_leader; |
| 3725 | 3858 | ||
| 3726 | perf_event_for_each_child(event, func); | 3859 | perf_event_for_each_child(event, func); |
| 3727 | list_for_each_entry(sibling, &event->sibling_list, group_entry) | 3860 | list_for_each_entry(sibling, &event->sibling_list, group_entry) |
| 3728 | perf_event_for_each_child(sibling, func); | 3861 | perf_event_for_each_child(sibling, func); |
| 3729 | mutex_unlock(&ctx->mutex); | ||
| 3730 | } | 3862 | } |
| 3731 | 3863 | ||
| 3732 | static int perf_event_period(struct perf_event *event, u64 __user *arg) | 3864 | static int perf_event_period(struct perf_event *event, u64 __user *arg) |
| @@ -3796,25 +3928,24 @@ static int perf_event_set_output(struct perf_event *event, | |||
| 3796 | struct perf_event *output_event); | 3928 | struct perf_event *output_event); |
| 3797 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); | 3929 | static int perf_event_set_filter(struct perf_event *event, void __user *arg); |
| 3798 | 3930 | ||
| 3799 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | 3931 | static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg) |
| 3800 | { | 3932 | { |
| 3801 | struct perf_event *event = file->private_data; | ||
| 3802 | void (*func)(struct perf_event *); | 3933 | void (*func)(struct perf_event *); |
| 3803 | u32 flags = arg; | 3934 | u32 flags = arg; |
| 3804 | 3935 | ||
| 3805 | switch (cmd) { | 3936 | switch (cmd) { |
| 3806 | case PERF_EVENT_IOC_ENABLE: | 3937 | case PERF_EVENT_IOC_ENABLE: |
| 3807 | func = perf_event_enable; | 3938 | func = _perf_event_enable; |
| 3808 | break; | 3939 | break; |
| 3809 | case PERF_EVENT_IOC_DISABLE: | 3940 | case PERF_EVENT_IOC_DISABLE: |
| 3810 | func = perf_event_disable; | 3941 | func = _perf_event_disable; |
| 3811 | break; | 3942 | break; |
| 3812 | case PERF_EVENT_IOC_RESET: | 3943 | case PERF_EVENT_IOC_RESET: |
| 3813 | func = perf_event_reset; | 3944 | func = _perf_event_reset; |
| 3814 | break; | 3945 | break; |
| 3815 | 3946 | ||
| 3816 | case PERF_EVENT_IOC_REFRESH: | 3947 | case PERF_EVENT_IOC_REFRESH: |
| 3817 | return perf_event_refresh(event, arg); | 3948 | return _perf_event_refresh(event, arg); |
| 3818 | 3949 | ||
| 3819 | case PERF_EVENT_IOC_PERIOD: | 3950 | case PERF_EVENT_IOC_PERIOD: |
| 3820 | return perf_event_period(event, (u64 __user *)arg); | 3951 | return perf_event_period(event, (u64 __user *)arg); |
| @@ -3861,6 +3992,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 3861 | return 0; | 3992 | return 0; |
| 3862 | } | 3993 | } |
| 3863 | 3994 | ||
| 3995 | static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
| 3996 | { | ||
| 3997 | struct perf_event *event = file->private_data; | ||
| 3998 | struct perf_event_context *ctx; | ||
| 3999 | long ret; | ||
| 4000 | |||
| 4001 | ctx = perf_event_ctx_lock(event); | ||
| 4002 | ret = _perf_ioctl(event, cmd, arg); | ||
| 4003 | perf_event_ctx_unlock(event, ctx); | ||
| 4004 | |||
| 4005 | return ret; | ||
| 4006 | } | ||
| 4007 | |||
| 3864 | #ifdef CONFIG_COMPAT | 4008 | #ifdef CONFIG_COMPAT |
| 3865 | static long perf_compat_ioctl(struct file *file, unsigned int cmd, | 4009 | static long perf_compat_ioctl(struct file *file, unsigned int cmd, |
| 3866 | unsigned long arg) | 4010 | unsigned long arg) |
| @@ -3883,11 +4027,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd, | |||
| 3883 | 4027 | ||
| 3884 | int perf_event_task_enable(void) | 4028 | int perf_event_task_enable(void) |
| 3885 | { | 4029 | { |
| 4030 | struct perf_event_context *ctx; | ||
| 3886 | struct perf_event *event; | 4031 | struct perf_event *event; |
| 3887 | 4032 | ||
| 3888 | mutex_lock(¤t->perf_event_mutex); | 4033 | mutex_lock(¤t->perf_event_mutex); |
| 3889 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) | 4034 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { |
| 3890 | perf_event_for_each_child(event, perf_event_enable); | 4035 | ctx = perf_event_ctx_lock(event); |
| 4036 | perf_event_for_each_child(event, _perf_event_enable); | ||
| 4037 | perf_event_ctx_unlock(event, ctx); | ||
| 4038 | } | ||
| 3891 | mutex_unlock(¤t->perf_event_mutex); | 4039 | mutex_unlock(¤t->perf_event_mutex); |
| 3892 | 4040 | ||
| 3893 | return 0; | 4041 | return 0; |
| @@ -3895,11 +4043,15 @@ int perf_event_task_enable(void) | |||
| 3895 | 4043 | ||
| 3896 | int perf_event_task_disable(void) | 4044 | int perf_event_task_disable(void) |
| 3897 | { | 4045 | { |
| 4046 | struct perf_event_context *ctx; | ||
| 3898 | struct perf_event *event; | 4047 | struct perf_event *event; |
| 3899 | 4048 | ||
| 3900 | mutex_lock(¤t->perf_event_mutex); | 4049 | mutex_lock(¤t->perf_event_mutex); |
| 3901 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) | 4050 | list_for_each_entry(event, ¤t->perf_event_list, owner_entry) { |
| 3902 | perf_event_for_each_child(event, perf_event_disable); | 4051 | ctx = perf_event_ctx_lock(event); |
| 4052 | perf_event_for_each_child(event, _perf_event_disable); | ||
| 4053 | perf_event_ctx_unlock(event, ctx); | ||
| 4054 | } | ||
| 3903 | mutex_unlock(¤t->perf_event_mutex); | 4055 | mutex_unlock(¤t->perf_event_mutex); |
| 3904 | 4056 | ||
| 3905 | return 0; | 4057 | return 0; |
| @@ -3949,7 +4101,8 @@ unlock: | |||
| 3949 | rcu_read_unlock(); | 4101 | rcu_read_unlock(); |
| 3950 | } | 4102 | } |
| 3951 | 4103 | ||
| 3952 | void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | 4104 | void __weak arch_perf_update_userpage( |
| 4105 | struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) | ||
| 3953 | { | 4106 | { |
| 3954 | } | 4107 | } |
| 3955 | 4108 | ||
| @@ -3999,7 +4152,7 @@ void perf_event_update_userpage(struct perf_event *event) | |||
| 3999 | userpg->time_running = running + | 4152 | userpg->time_running = running + |
| 4000 | atomic64_read(&event->child_total_time_running); | 4153 | atomic64_read(&event->child_total_time_running); |
| 4001 | 4154 | ||
| 4002 | arch_perf_update_userpage(userpg, now); | 4155 | arch_perf_update_userpage(event, userpg, now); |
| 4003 | 4156 | ||
| 4004 | barrier(); | 4157 | barrier(); |
| 4005 | ++userpg->lock; | 4158 | ++userpg->lock; |
| @@ -4141,6 +4294,9 @@ static void perf_mmap_open(struct vm_area_struct *vma) | |||
| 4141 | 4294 | ||
| 4142 | atomic_inc(&event->mmap_count); | 4295 | atomic_inc(&event->mmap_count); |
| 4143 | atomic_inc(&event->rb->mmap_count); | 4296 | atomic_inc(&event->rb->mmap_count); |
| 4297 | |||
| 4298 | if (event->pmu->event_mapped) | ||
| 4299 | event->pmu->event_mapped(event); | ||
| 4144 | } | 4300 | } |
| 4145 | 4301 | ||
| 4146 | /* | 4302 | /* |
| @@ -4160,6 +4316,9 @@ static void perf_mmap_close(struct vm_area_struct *vma) | |||
| 4160 | int mmap_locked = rb->mmap_locked; | 4316 | int mmap_locked = rb->mmap_locked; |
| 4161 | unsigned long size = perf_data_size(rb); | 4317 | unsigned long size = perf_data_size(rb); |
| 4162 | 4318 | ||
| 4319 | if (event->pmu->event_unmapped) | ||
| 4320 | event->pmu->event_unmapped(event); | ||
| 4321 | |||
| 4163 | atomic_dec(&rb->mmap_count); | 4322 | atomic_dec(&rb->mmap_count); |
| 4164 | 4323 | ||
| 4165 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) | 4324 | if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) |
| @@ -4361,6 +4520,9 @@ unlock: | |||
| 4361 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; | 4520 | vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP; |
| 4362 | vma->vm_ops = &perf_mmap_vmops; | 4521 | vma->vm_ops = &perf_mmap_vmops; |
| 4363 | 4522 | ||
| 4523 | if (event->pmu->event_mapped) | ||
| 4524 | event->pmu->event_mapped(event); | ||
| 4525 | |||
| 4364 | return ret; | 4526 | return ret; |
| 4365 | } | 4527 | } |
| 4366 | 4528 | ||
| @@ -5889,6 +6051,8 @@ end: | |||
| 5889 | rcu_read_unlock(); | 6051 | rcu_read_unlock(); |
| 5890 | } | 6052 | } |
| 5891 | 6053 | ||
| 6054 | DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]); | ||
| 6055 | |||
| 5892 | int perf_swevent_get_recursion_context(void) | 6056 | int perf_swevent_get_recursion_context(void) |
| 5893 | { | 6057 | { |
| 5894 | struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); | 6058 | struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); |
| @@ -5904,21 +6068,30 @@ inline void perf_swevent_put_recursion_context(int rctx) | |||
| 5904 | put_recursion_context(swhash->recursion, rctx); | 6068 | put_recursion_context(swhash->recursion, rctx); |
| 5905 | } | 6069 | } |
| 5906 | 6070 | ||
| 5907 | void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | 6071 | void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) |
| 5908 | { | 6072 | { |
| 5909 | struct perf_sample_data data; | 6073 | struct perf_sample_data data; |
| 5910 | int rctx; | ||
| 5911 | 6074 | ||
| 5912 | preempt_disable_notrace(); | 6075 | if (WARN_ON_ONCE(!regs)) |
| 5913 | rctx = perf_swevent_get_recursion_context(); | ||
| 5914 | if (rctx < 0) | ||
| 5915 | return; | 6076 | return; |
| 5916 | 6077 | ||
| 5917 | perf_sample_data_init(&data, addr, 0); | 6078 | perf_sample_data_init(&data, addr, 0); |
| 5918 | |||
| 5919 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); | 6079 | do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); |
| 6080 | } | ||
| 6081 | |||
| 6082 | void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) | ||
| 6083 | { | ||
| 6084 | int rctx; | ||
| 6085 | |||
| 6086 | preempt_disable_notrace(); | ||
| 6087 | rctx = perf_swevent_get_recursion_context(); | ||
| 6088 | if (unlikely(rctx < 0)) | ||
| 6089 | goto fail; | ||
| 6090 | |||
| 6091 | ___perf_sw_event(event_id, nr, regs, addr); | ||
| 5920 | 6092 | ||
| 5921 | perf_swevent_put_recursion_context(rctx); | 6093 | perf_swevent_put_recursion_context(rctx); |
| 6094 | fail: | ||
| 5922 | preempt_enable_notrace(); | 6095 | preempt_enable_notrace(); |
| 5923 | } | 6096 | } |
| 5924 | 6097 | ||
| @@ -6780,7 +6953,6 @@ skip_type: | |||
| 6780 | 6953 | ||
| 6781 | __perf_cpu_hrtimer_init(cpuctx, cpu); | 6954 | __perf_cpu_hrtimer_init(cpuctx, cpu); |
| 6782 | 6955 | ||
| 6783 | INIT_LIST_HEAD(&cpuctx->rotation_list); | ||
| 6784 | cpuctx->unique_pmu = pmu; | 6956 | cpuctx->unique_pmu = pmu; |
| 6785 | } | 6957 | } |
| 6786 | 6958 | ||
| @@ -6853,6 +7025,20 @@ void perf_pmu_unregister(struct pmu *pmu) | |||
| 6853 | } | 7025 | } |
| 6854 | EXPORT_SYMBOL_GPL(perf_pmu_unregister); | 7026 | EXPORT_SYMBOL_GPL(perf_pmu_unregister); |
| 6855 | 7027 | ||
| 7028 | static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) | ||
| 7029 | { | ||
| 7030 | int ret; | ||
| 7031 | |||
| 7032 | if (!try_module_get(pmu->module)) | ||
| 7033 | return -ENODEV; | ||
| 7034 | event->pmu = pmu; | ||
| 7035 | ret = pmu->event_init(event); | ||
| 7036 | if (ret) | ||
| 7037 | module_put(pmu->module); | ||
| 7038 | |||
| 7039 | return ret; | ||
| 7040 | } | ||
| 7041 | |||
| 6856 | struct pmu *perf_init_event(struct perf_event *event) | 7042 | struct pmu *perf_init_event(struct perf_event *event) |
| 6857 | { | 7043 | { |
| 6858 | struct pmu *pmu = NULL; | 7044 | struct pmu *pmu = NULL; |
| @@ -6865,24 +7051,14 @@ struct pmu *perf_init_event(struct perf_event *event) | |||
| 6865 | pmu = idr_find(&pmu_idr, event->attr.type); | 7051 | pmu = idr_find(&pmu_idr, event->attr.type); |
| 6866 | rcu_read_unlock(); | 7052 | rcu_read_unlock(); |
| 6867 | if (pmu) { | 7053 | if (pmu) { |
| 6868 | if (!try_module_get(pmu->module)) { | 7054 | ret = perf_try_init_event(pmu, event); |
| 6869 | pmu = ERR_PTR(-ENODEV); | ||
| 6870 | goto unlock; | ||
| 6871 | } | ||
| 6872 | event->pmu = pmu; | ||
| 6873 | ret = pmu->event_init(event); | ||
| 6874 | if (ret) | 7055 | if (ret) |
| 6875 | pmu = ERR_PTR(ret); | 7056 | pmu = ERR_PTR(ret); |
| 6876 | goto unlock; | 7057 | goto unlock; |
| 6877 | } | 7058 | } |
| 6878 | 7059 | ||
| 6879 | list_for_each_entry_rcu(pmu, &pmus, entry) { | 7060 | list_for_each_entry_rcu(pmu, &pmus, entry) { |
| 6880 | if (!try_module_get(pmu->module)) { | 7061 | ret = perf_try_init_event(pmu, event); |
| 6881 | pmu = ERR_PTR(-ENODEV); | ||
| 6882 | goto unlock; | ||
| 6883 | } | ||
| 6884 | event->pmu = pmu; | ||
| 6885 | ret = pmu->event_init(event); | ||
| 6886 | if (!ret) | 7062 | if (!ret) |
| 6887 | goto unlock; | 7063 | goto unlock; |
| 6888 | 7064 | ||
| @@ -7246,6 +7422,15 @@ out: | |||
| 7246 | return ret; | 7422 | return ret; |
| 7247 | } | 7423 | } |
| 7248 | 7424 | ||
| 7425 | static void mutex_lock_double(struct mutex *a, struct mutex *b) | ||
| 7426 | { | ||
| 7427 | if (b < a) | ||
| 7428 | swap(a, b); | ||
| 7429 | |||
| 7430 | mutex_lock(a); | ||
| 7431 | mutex_lock_nested(b, SINGLE_DEPTH_NESTING); | ||
| 7432 | } | ||
| 7433 | |||
| 7249 | /** | 7434 | /** |
| 7250 | * sys_perf_event_open - open a performance event, associate it to a task/cpu | 7435 | * sys_perf_event_open - open a performance event, associate it to a task/cpu |
| 7251 | * | 7436 | * |
| @@ -7261,7 +7446,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 7261 | struct perf_event *group_leader = NULL, *output_event = NULL; | 7446 | struct perf_event *group_leader = NULL, *output_event = NULL; |
| 7262 | struct perf_event *event, *sibling; | 7447 | struct perf_event *event, *sibling; |
| 7263 | struct perf_event_attr attr; | 7448 | struct perf_event_attr attr; |
| 7264 | struct perf_event_context *ctx; | 7449 | struct perf_event_context *ctx, *uninitialized_var(gctx); |
| 7265 | struct file *event_file = NULL; | 7450 | struct file *event_file = NULL; |
| 7266 | struct fd group = {NULL, 0}; | 7451 | struct fd group = {NULL, 0}; |
| 7267 | struct task_struct *task = NULL; | 7452 | struct task_struct *task = NULL; |
| @@ -7459,43 +7644,68 @@ SYSCALL_DEFINE5(perf_event_open, | |||
| 7459 | } | 7644 | } |
| 7460 | 7645 | ||
| 7461 | if (move_group) { | 7646 | if (move_group) { |
| 7462 | struct perf_event_context *gctx = group_leader->ctx; | 7647 | gctx = group_leader->ctx; |
| 7463 | |||
| 7464 | mutex_lock(&gctx->mutex); | ||
| 7465 | perf_remove_from_context(group_leader, false); | ||
| 7466 | 7648 | ||
| 7467 | /* | 7649 | /* |
| 7468 | * Removing from the context ends up with disabled | 7650 | * See perf_event_ctx_lock() for comments on the details |
| 7469 | * event. What we want here is event in the initial | 7651 | * of swizzling perf_event::ctx. |
| 7470 | * startup state, ready to be add into new context. | ||
| 7471 | */ | 7652 | */ |
| 7472 | perf_event__state_init(group_leader); | 7653 | mutex_lock_double(&gctx->mutex, &ctx->mutex); |
| 7654 | |||
| 7655 | perf_remove_from_context(group_leader, false); | ||
| 7656 | |||
| 7473 | list_for_each_entry(sibling, &group_leader->sibling_list, | 7657 | list_for_each_entry(sibling, &group_leader->sibling_list, |
| 7474 | group_entry) { | 7658 | group_entry) { |
| 7475 | perf_remove_from_context(sibling, false); | 7659 | perf_remove_from_context(sibling, false); |
| 7476 | perf_event__state_init(sibling); | ||
| 7477 | put_ctx(gctx); | 7660 | put_ctx(gctx); |
| 7478 | } | 7661 | } |
| 7479 | mutex_unlock(&gctx->mutex); | 7662 | } else { |
| 7480 | put_ctx(gctx); | 7663 | mutex_lock(&ctx->mutex); |
| 7481 | } | 7664 | } |
| 7482 | 7665 | ||
| 7483 | WARN_ON_ONCE(ctx->parent_ctx); | 7666 | WARN_ON_ONCE(ctx->parent_ctx); |
| 7484 | mutex_lock(&ctx->mutex); | ||
| 7485 | 7667 | ||
| 7486 | if (move_group) { | 7668 | if (move_group) { |
| 7669 | /* | ||
| 7670 | * Wait for everybody to stop referencing the events through | ||
| 7671 | * the old lists, before installing it on new lists. | ||
| 7672 | */ | ||
| 7487 | synchronize_rcu(); | 7673 | synchronize_rcu(); |
| 7488 | perf_install_in_context(ctx, group_leader, group_leader->cpu); | 7674 | |
| 7489 | get_ctx(ctx); | 7675 | /* |
| 7676 | * Install the group siblings before the group leader. | ||
| 7677 | * | ||
| 7678 | * Because a group leader will try and install the entire group | ||
| 7679 | * (through the sibling list, which is still in-tact), we can | ||
| 7680 | * end up with siblings installed in the wrong context. | ||
| 7681 | * | ||
| 7682 | * By installing siblings first we NO-OP because they're not | ||
| 7683 | * reachable through the group lists. | ||
| 7684 | */ | ||
| 7490 | list_for_each_entry(sibling, &group_leader->sibling_list, | 7685 | list_for_each_entry(sibling, &group_leader->sibling_list, |
| 7491 | group_entry) { | 7686 | group_entry) { |
| 7687 | perf_event__state_init(sibling); | ||
| 7492 | perf_install_in_context(ctx, sibling, sibling->cpu); | 7688 | perf_install_in_context(ctx, sibling, sibling->cpu); |
| 7493 | get_ctx(ctx); | 7689 | get_ctx(ctx); |
| 7494 | } | 7690 | } |
| 7691 | |||
| 7692 | /* | ||
| 7693 | * Removing from the context ends up with disabled | ||
| 7694 | * event. What we want here is event in the initial | ||
| 7695 | * startup state, ready to be add into new context. | ||
| 7696 | */ | ||
| 7697 | perf_event__state_init(group_leader); | ||
| 7698 | perf_install_in_context(ctx, group_leader, group_leader->cpu); | ||
| 7699 | get_ctx(ctx); | ||
| 7495 | } | 7700 | } |
| 7496 | 7701 | ||
| 7497 | perf_install_in_context(ctx, event, event->cpu); | 7702 | perf_install_in_context(ctx, event, event->cpu); |
| 7498 | perf_unpin_context(ctx); | 7703 | perf_unpin_context(ctx); |
| 7704 | |||
| 7705 | if (move_group) { | ||
| 7706 | mutex_unlock(&gctx->mutex); | ||
| 7707 | put_ctx(gctx); | ||
| 7708 | } | ||
| 7499 | mutex_unlock(&ctx->mutex); | 7709 | mutex_unlock(&ctx->mutex); |
| 7500 | 7710 | ||
| 7501 | put_online_cpus(); | 7711 | put_online_cpus(); |
| @@ -7603,7 +7813,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
| 7603 | src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; | 7813 | src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; |
| 7604 | dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; | 7814 | dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; |
| 7605 | 7815 | ||
| 7606 | mutex_lock(&src_ctx->mutex); | 7816 | /* |
| 7817 | * See perf_event_ctx_lock() for comments on the details | ||
| 7818 | * of swizzling perf_event::ctx. | ||
| 7819 | */ | ||
| 7820 | mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex); | ||
| 7607 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, | 7821 | list_for_each_entry_safe(event, tmp, &src_ctx->event_list, |
| 7608 | event_entry) { | 7822 | event_entry) { |
| 7609 | perf_remove_from_context(event, false); | 7823 | perf_remove_from_context(event, false); |
| @@ -7611,11 +7825,36 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
| 7611 | put_ctx(src_ctx); | 7825 | put_ctx(src_ctx); |
| 7612 | list_add(&event->migrate_entry, &events); | 7826 | list_add(&event->migrate_entry, &events); |
| 7613 | } | 7827 | } |
| 7614 | mutex_unlock(&src_ctx->mutex); | ||
| 7615 | 7828 | ||
| 7829 | /* | ||
| 7830 | * Wait for the events to quiesce before re-instating them. | ||
| 7831 | */ | ||
| 7616 | synchronize_rcu(); | 7832 | synchronize_rcu(); |
| 7617 | 7833 | ||
| 7618 | mutex_lock(&dst_ctx->mutex); | 7834 | /* |
| 7835 | * Re-instate events in 2 passes. | ||
| 7836 | * | ||
| 7837 | * Skip over group leaders and only install siblings on this first | ||
| 7838 | * pass, siblings will not get enabled without a leader, however a | ||
| 7839 | * leader will enable its siblings, even if those are still on the old | ||
| 7840 | * context. | ||
| 7841 | */ | ||
| 7842 | list_for_each_entry_safe(event, tmp, &events, migrate_entry) { | ||
| 7843 | if (event->group_leader == event) | ||
| 7844 | continue; | ||
| 7845 | |||
| 7846 | list_del(&event->migrate_entry); | ||
| 7847 | if (event->state >= PERF_EVENT_STATE_OFF) | ||
| 7848 | event->state = PERF_EVENT_STATE_INACTIVE; | ||
| 7849 | account_event_cpu(event, dst_cpu); | ||
| 7850 | perf_install_in_context(dst_ctx, event, dst_cpu); | ||
| 7851 | get_ctx(dst_ctx); | ||
| 7852 | } | ||
| 7853 | |||
| 7854 | /* | ||
| 7855 | * Once all the siblings are setup properly, install the group leaders | ||
| 7856 | * to make it go. | ||
| 7857 | */ | ||
| 7619 | list_for_each_entry_safe(event, tmp, &events, migrate_entry) { | 7858 | list_for_each_entry_safe(event, tmp, &events, migrate_entry) { |
| 7620 | list_del(&event->migrate_entry); | 7859 | list_del(&event->migrate_entry); |
| 7621 | if (event->state >= PERF_EVENT_STATE_OFF) | 7860 | if (event->state >= PERF_EVENT_STATE_OFF) |
| @@ -7625,6 +7864,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) | |||
| 7625 | get_ctx(dst_ctx); | 7864 | get_ctx(dst_ctx); |
| 7626 | } | 7865 | } |
| 7627 | mutex_unlock(&dst_ctx->mutex); | 7866 | mutex_unlock(&dst_ctx->mutex); |
| 7867 | mutex_unlock(&src_ctx->mutex); | ||
| 7628 | } | 7868 | } |
| 7629 | EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); | 7869 | EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); |
| 7630 | 7870 | ||
| @@ -7811,14 +8051,19 @@ static void perf_free_event(struct perf_event *event, | |||
| 7811 | 8051 | ||
| 7812 | put_event(parent); | 8052 | put_event(parent); |
| 7813 | 8053 | ||
| 8054 | raw_spin_lock_irq(&ctx->lock); | ||
| 7814 | perf_group_detach(event); | 8055 | perf_group_detach(event); |
| 7815 | list_del_event(event, ctx); | 8056 | list_del_event(event, ctx); |
| 8057 | raw_spin_unlock_irq(&ctx->lock); | ||
| 7816 | free_event(event); | 8058 | free_event(event); |
| 7817 | } | 8059 | } |
| 7818 | 8060 | ||
| 7819 | /* | 8061 | /* |
| 7820 | * free an unexposed, unused context as created by inheritance by | 8062 | * Free an unexposed, unused context as created by inheritance by |
| 7821 | * perf_event_init_task below, used by fork() in case of fail. | 8063 | * perf_event_init_task below, used by fork() in case of fail. |
| 8064 | * | ||
| 8065 | * Not all locks are strictly required, but take them anyway to be nice and | ||
| 8066 | * help out with the lockdep assertions. | ||
| 7822 | */ | 8067 | */ |
| 7823 | void perf_event_free_task(struct task_struct *task) | 8068 | void perf_event_free_task(struct task_struct *task) |
| 7824 | { | 8069 | { |
| @@ -8137,7 +8382,7 @@ static void __init perf_event_init_all_cpus(void) | |||
| 8137 | for_each_possible_cpu(cpu) { | 8382 | for_each_possible_cpu(cpu) { |
| 8138 | swhash = &per_cpu(swevent_htable, cpu); | 8383 | swhash = &per_cpu(swevent_htable, cpu); |
| 8139 | mutex_init(&swhash->hlist_mutex); | 8384 | mutex_init(&swhash->hlist_mutex); |
| 8140 | INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); | 8385 | INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu)); |
| 8141 | } | 8386 | } |
| 8142 | } | 8387 | } |
| 8143 | 8388 | ||
| @@ -8158,22 +8403,11 @@ static void perf_event_init_cpu(int cpu) | |||
| 8158 | } | 8403 | } |
| 8159 | 8404 | ||
| 8160 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC | 8405 | #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC |
| 8161 | static void perf_pmu_rotate_stop(struct pmu *pmu) | ||
| 8162 | { | ||
| 8163 | struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); | ||
| 8164 | |||
| 8165 | WARN_ON(!irqs_disabled()); | ||
| 8166 | |||
| 8167 | list_del_init(&cpuctx->rotation_list); | ||
| 8168 | } | ||
| 8169 | |||
| 8170 | static void __perf_event_exit_context(void *__info) | 8406 | static void __perf_event_exit_context(void *__info) |
| 8171 | { | 8407 | { |
| 8172 | struct remove_event re = { .detach_group = true }; | 8408 | struct remove_event re = { .detach_group = true }; |
| 8173 | struct perf_event_context *ctx = __info; | 8409 | struct perf_event_context *ctx = __info; |
| 8174 | 8410 | ||
| 8175 | perf_pmu_rotate_stop(ctx->pmu); | ||
| 8176 | |||
| 8177 | rcu_read_lock(); | 8411 | rcu_read_lock(); |
| 8178 | list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) | 8412 | list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) |
| 8179 | __perf_remove_from_context(&re); | 8413 | __perf_remove_from_context(&re); |
| @@ -8284,6 +8518,18 @@ void __init perf_event_init(void) | |||
| 8284 | != 1024); | 8518 | != 1024); |
| 8285 | } | 8519 | } |
| 8286 | 8520 | ||
| 8521 | ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, | ||
| 8522 | char *page) | ||
| 8523 | { | ||
| 8524 | struct perf_pmu_events_attr *pmu_attr = | ||
| 8525 | container_of(attr, struct perf_pmu_events_attr, attr); | ||
| 8526 | |||
| 8527 | if (pmu_attr->event_str) | ||
| 8528 | return sprintf(page, "%s\n", pmu_attr->event_str); | ||
| 8529 | |||
| 8530 | return 0; | ||
| 8531 | } | ||
| 8532 | |||
| 8287 | static int __init perf_event_sysfs_init(void) | 8533 | static int __init perf_event_sysfs_init(void) |
| 8288 | { | 8534 | { |
| 8289 | struct pmu *pmu; | 8535 | struct pmu *pmu; |
