aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-09 18:43:55 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-09 18:43:55 -0500
commita4cbbf549a9be10b7583c44249efccd64839533d (patch)
tree4b4862e4513b629723b8853e379cd38fee08b095 /kernel
parent8308756f45a12e2ff4f7749c2694fc83cdef0be9 (diff)
parent2fde4f94e0a9531251e706fa57131b51b0df042e (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar: "Kernel side changes: - AMD range breakpoints support: Extend breakpoint tools and core to support address range through perf event with initial backend support for AMD extended breakpoints. The syntax is: perf record -e mem:addr/len:type For example set write breakpoint from 0x1000 to 0x1200 (0x1000 + 512) perf record -e mem:0x1000/512:w - event throttling/rotating fixes - various event group handling fixes, cleanups and general paranoia code to be more robust against bugs in the future. - kernel stack overhead fixes User-visible tooling side changes: - Show precise number of samples in at the end of a 'record' session, if processing build ids, since we will then traverse the whole perf.data file and see all the PERF_RECORD_SAMPLE records, otherwise stop showing the previous off-base heuristicly counted number of "samples" (Namhyung Kim). - Support to read compressed module from build-id cache (Namhyung Kim) - Enable sampling loads and stores simultaneously in 'perf mem' (Stephane Eranian) - 'perf diff' output improvements (Namhyung Kim) - Fix error reporting for evsel pgfault constructor (Arnaldo Carvalho de Melo) Tooling side infrastructure changes: - Cache eh/debug frame offset for dwarf unwind (Namhyung Kim) - Support parsing parameterized events (Cody P Schafer) - Add support for IP address formats in libtraceevent (David Ahern) Plus other misc fixes" * 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (48 commits) perf: Decouple unthrottling and rotating perf: Drop module reference on event init failure perf: Use POLLIN instead of POLL_IN for perf poll data in flag perf: Fix put_event() ctx lock perf: Fix move_group() order perf: Fix event->ctx locking perf: Add a bit of paranoia perf symbols: Convert lseek + read to pread perf tools: Use perf_data_file__fd() consistently perf symbols: Support to read compressed module from build-id cache perf evsel: Set attr.task bit for a tracking event perf header: Set header version correctly perf record: Show precise number of samples perf tools: Do not use __perf_session__process_events() directly perf callchain: Cache eh/debug frame offset for dwarf unwind perf tools: Provide stub for missing pthread_attr_setaffinity_np perf evsel: Don't rely on malloc working for sz 0 tools lib traceevent: Add support for IP address formats perf ui/tui: Show fatal error message only if exists perf tests: Fix typo in sample-parsing.c ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c464
-rw-r--r--kernel/events/ring_buffer.c3
-rw-r--r--kernel/sched/core.c2
-rw-r--r--kernel/trace/trace_event_perf.c4
-rw-r--r--kernel/trace/trace_kprobe.c4
-rw-r--r--kernel/trace/trace_syscalls.c4
-rw-r--r--kernel/trace/trace_uprobe.c2
7 files changed, 355 insertions, 128 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 19efcf13375a..7f2fbb8b5069 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -872,22 +872,32 @@ void perf_pmu_enable(struct pmu *pmu)
872 pmu->pmu_enable(pmu); 872 pmu->pmu_enable(pmu);
873} 873}
874 874
875static DEFINE_PER_CPU(struct list_head, rotation_list); 875static DEFINE_PER_CPU(struct list_head, active_ctx_list);
876 876
877/* 877/*
878 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized 878 * perf_event_ctx_activate(), perf_event_ctx_deactivate(), and
879 * because they're strictly cpu affine and rotate_start is called with IRQs 879 * perf_event_task_tick() are fully serialized because they're strictly cpu
880 * disabled, while rotate_context is called from IRQ context. 880 * affine and perf_event_ctx{activate,deactivate} are called with IRQs
881 * disabled, while perf_event_task_tick is called from IRQ context.
881 */ 882 */
882static void perf_pmu_rotate_start(struct pmu *pmu) 883static void perf_event_ctx_activate(struct perf_event_context *ctx)
883{ 884{
884 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); 885 struct list_head *head = this_cpu_ptr(&active_ctx_list);
885 struct list_head *head = this_cpu_ptr(&rotation_list);
886 886
887 WARN_ON(!irqs_disabled()); 887 WARN_ON(!irqs_disabled());
888 888
889 if (list_empty(&cpuctx->rotation_list)) 889 WARN_ON(!list_empty(&ctx->active_ctx_list));
890 list_add(&cpuctx->rotation_list, head); 890
891 list_add(&ctx->active_ctx_list, head);
892}
893
894static void perf_event_ctx_deactivate(struct perf_event_context *ctx)
895{
896 WARN_ON(!irqs_disabled());
897
898 WARN_ON(list_empty(&ctx->active_ctx_list));
899
900 list_del_init(&ctx->active_ctx_list);
891} 901}
892 902
893static void get_ctx(struct perf_event_context *ctx) 903static void get_ctx(struct perf_event_context *ctx)
@@ -907,6 +917,84 @@ static void put_ctx(struct perf_event_context *ctx)
907} 917}
908 918
909/* 919/*
920 * Because of perf_event::ctx migration in sys_perf_event_open::move_group and
921 * perf_pmu_migrate_context() we need some magic.
922 *
923 * Those places that change perf_event::ctx will hold both
924 * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
925 *
926 * Lock ordering is by mutex address. There is one other site where
927 * perf_event_context::mutex nests and that is put_event(). But remember that
928 * that is a parent<->child context relation, and migration does not affect
929 * children, therefore these two orderings should not interact.
930 *
931 * The change in perf_event::ctx does not affect children (as claimed above)
932 * because the sys_perf_event_open() case will install a new event and break
933 * the ctx parent<->child relation, and perf_pmu_migrate_context() is only
934 * concerned with cpuctx and that doesn't have children.
935 *
936 * The places that change perf_event::ctx will issue:
937 *
938 * perf_remove_from_context();
939 * synchronize_rcu();
940 * perf_install_in_context();
941 *
942 * to affect the change. The remove_from_context() + synchronize_rcu() should
943 * quiesce the event, after which we can install it in the new location. This
944 * means that only external vectors (perf_fops, prctl) can perturb the event
945 * while in transit. Therefore all such accessors should also acquire
946 * perf_event_context::mutex to serialize against this.
947 *
948 * However; because event->ctx can change while we're waiting to acquire
949 * ctx->mutex we must be careful and use the below perf_event_ctx_lock()
950 * function.
951 *
952 * Lock order:
953 * task_struct::perf_event_mutex
954 * perf_event_context::mutex
955 * perf_event_context::lock
956 * perf_event::child_mutex;
957 * perf_event::mmap_mutex
958 * mmap_sem
959 */
960static struct perf_event_context *
961perf_event_ctx_lock_nested(struct perf_event *event, int nesting)
962{
963 struct perf_event_context *ctx;
964
965again:
966 rcu_read_lock();
967 ctx = ACCESS_ONCE(event->ctx);
968 if (!atomic_inc_not_zero(&ctx->refcount)) {
969 rcu_read_unlock();
970 goto again;
971 }
972 rcu_read_unlock();
973
974 mutex_lock_nested(&ctx->mutex, nesting);
975 if (event->ctx != ctx) {
976 mutex_unlock(&ctx->mutex);
977 put_ctx(ctx);
978 goto again;
979 }
980
981 return ctx;
982}
983
984static inline struct perf_event_context *
985perf_event_ctx_lock(struct perf_event *event)
986{
987 return perf_event_ctx_lock_nested(event, 0);
988}
989
990static void perf_event_ctx_unlock(struct perf_event *event,
991 struct perf_event_context *ctx)
992{
993 mutex_unlock(&ctx->mutex);
994 put_ctx(ctx);
995}
996
997/*
910 * This must be done under the ctx->lock, such as to serialize against 998 * This must be done under the ctx->lock, such as to serialize against
911 * context_equiv(), therefore we cannot call put_ctx() since that might end up 999 * context_equiv(), therefore we cannot call put_ctx() since that might end up
912 * calling scheduler related locks and ctx->lock nests inside those. 1000 * calling scheduler related locks and ctx->lock nests inside those.
@@ -1155,8 +1243,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
1155 ctx->nr_branch_stack++; 1243 ctx->nr_branch_stack++;
1156 1244
1157 list_add_rcu(&event->event_entry, &ctx->event_list); 1245 list_add_rcu(&event->event_entry, &ctx->event_list);
1158 if (!ctx->nr_events)
1159 perf_pmu_rotate_start(ctx->pmu);
1160 ctx->nr_events++; 1246 ctx->nr_events++;
1161 if (event->attr.inherit_stat) 1247 if (event->attr.inherit_stat)
1162 ctx->nr_stat++; 1248 ctx->nr_stat++;
@@ -1275,6 +1361,8 @@ static void perf_group_attach(struct perf_event *event)
1275 if (group_leader == event) 1361 if (group_leader == event)
1276 return; 1362 return;
1277 1363
1364 WARN_ON_ONCE(group_leader->ctx != event->ctx);
1365
1278 if (group_leader->group_flags & PERF_GROUP_SOFTWARE && 1366 if (group_leader->group_flags & PERF_GROUP_SOFTWARE &&
1279 !is_software_event(event)) 1367 !is_software_event(event))
1280 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE; 1368 group_leader->group_flags &= ~PERF_GROUP_SOFTWARE;
@@ -1296,6 +1384,10 @@ static void
1296list_del_event(struct perf_event *event, struct perf_event_context *ctx) 1384list_del_event(struct perf_event *event, struct perf_event_context *ctx)
1297{ 1385{
1298 struct perf_cpu_context *cpuctx; 1386 struct perf_cpu_context *cpuctx;
1387
1388 WARN_ON_ONCE(event->ctx != ctx);
1389 lockdep_assert_held(&ctx->lock);
1390
1299 /* 1391 /*
1300 * We can have double detach due to exit/hot-unplug + close. 1392 * We can have double detach due to exit/hot-unplug + close.
1301 */ 1393 */
@@ -1380,6 +1472,8 @@ static void perf_group_detach(struct perf_event *event)
1380 1472
1381 /* Inherit group flags from the previous leader */ 1473 /* Inherit group flags from the previous leader */
1382 sibling->group_flags = event->group_flags; 1474 sibling->group_flags = event->group_flags;
1475
1476 WARN_ON_ONCE(sibling->ctx != event->ctx);
1383 } 1477 }
1384 1478
1385out: 1479out:
@@ -1442,6 +1536,10 @@ event_sched_out(struct perf_event *event,
1442{ 1536{
1443 u64 tstamp = perf_event_time(event); 1537 u64 tstamp = perf_event_time(event);
1444 u64 delta; 1538 u64 delta;
1539
1540 WARN_ON_ONCE(event->ctx != ctx);
1541 lockdep_assert_held(&ctx->lock);
1542
1445 /* 1543 /*
1446 * An event which could not be activated because of 1544 * An event which could not be activated because of
1447 * filter mismatch still needs to have its timings 1545 * filter mismatch still needs to have its timings
@@ -1471,7 +1569,8 @@ event_sched_out(struct perf_event *event,
1471 1569
1472 if (!is_software_event(event)) 1570 if (!is_software_event(event))
1473 cpuctx->active_oncpu--; 1571 cpuctx->active_oncpu--;
1474 ctx->nr_active--; 1572 if (!--ctx->nr_active)
1573 perf_event_ctx_deactivate(ctx);
1475 if (event->attr.freq && event->attr.sample_freq) 1574 if (event->attr.freq && event->attr.sample_freq)
1476 ctx->nr_freq--; 1575 ctx->nr_freq--;
1477 if (event->attr.exclusive || !cpuctx->active_oncpu) 1576 if (event->attr.exclusive || !cpuctx->active_oncpu)
@@ -1654,7 +1753,7 @@ int __perf_event_disable(void *info)
1654 * is the current context on this CPU and preemption is disabled, 1753 * is the current context on this CPU and preemption is disabled,
1655 * hence we can't get into perf_event_task_sched_out for this context. 1754 * hence we can't get into perf_event_task_sched_out for this context.
1656 */ 1755 */
1657void perf_event_disable(struct perf_event *event) 1756static void _perf_event_disable(struct perf_event *event)
1658{ 1757{
1659 struct perf_event_context *ctx = event->ctx; 1758 struct perf_event_context *ctx = event->ctx;
1660 struct task_struct *task = ctx->task; 1759 struct task_struct *task = ctx->task;
@@ -1695,6 +1794,19 @@ retry:
1695 } 1794 }
1696 raw_spin_unlock_irq(&ctx->lock); 1795 raw_spin_unlock_irq(&ctx->lock);
1697} 1796}
1797
1798/*
1799 * Strictly speaking kernel users cannot create groups and therefore this
1800 * interface does not need the perf_event_ctx_lock() magic.
1801 */
1802void perf_event_disable(struct perf_event *event)
1803{
1804 struct perf_event_context *ctx;
1805
1806 ctx = perf_event_ctx_lock(event);
1807 _perf_event_disable(event);
1808 perf_event_ctx_unlock(event, ctx);
1809}
1698EXPORT_SYMBOL_GPL(perf_event_disable); 1810EXPORT_SYMBOL_GPL(perf_event_disable);
1699 1811
1700static void perf_set_shadow_time(struct perf_event *event, 1812static void perf_set_shadow_time(struct perf_event *event,
@@ -1782,7 +1894,8 @@ event_sched_in(struct perf_event *event,
1782 1894
1783 if (!is_software_event(event)) 1895 if (!is_software_event(event))
1784 cpuctx->active_oncpu++; 1896 cpuctx->active_oncpu++;
1785 ctx->nr_active++; 1897 if (!ctx->nr_active++)
1898 perf_event_ctx_activate(ctx);
1786 if (event->attr.freq && event->attr.sample_freq) 1899 if (event->attr.freq && event->attr.sample_freq)
1787 ctx->nr_freq++; 1900 ctx->nr_freq++;
1788 1901
@@ -2158,7 +2271,7 @@ unlock:
2158 * perf_event_for_each_child or perf_event_for_each as described 2271 * perf_event_for_each_child or perf_event_for_each as described
2159 * for perf_event_disable. 2272 * for perf_event_disable.
2160 */ 2273 */
2161void perf_event_enable(struct perf_event *event) 2274static void _perf_event_enable(struct perf_event *event)
2162{ 2275{
2163 struct perf_event_context *ctx = event->ctx; 2276 struct perf_event_context *ctx = event->ctx;
2164 struct task_struct *task = ctx->task; 2277 struct task_struct *task = ctx->task;
@@ -2214,9 +2327,21 @@ retry:
2214out: 2327out:
2215 raw_spin_unlock_irq(&ctx->lock); 2328 raw_spin_unlock_irq(&ctx->lock);
2216} 2329}
2330
2331/*
2332 * See perf_event_disable();
2333 */
2334void perf_event_enable(struct perf_event *event)
2335{
2336 struct perf_event_context *ctx;
2337
2338 ctx = perf_event_ctx_lock(event);
2339 _perf_event_enable(event);
2340 perf_event_ctx_unlock(event, ctx);
2341}
2217EXPORT_SYMBOL_GPL(perf_event_enable); 2342EXPORT_SYMBOL_GPL(perf_event_enable);
2218 2343
2219int perf_event_refresh(struct perf_event *event, int refresh) 2344static int _perf_event_refresh(struct perf_event *event, int refresh)
2220{ 2345{
2221 /* 2346 /*
2222 * not supported on inherited events 2347 * not supported on inherited events
@@ -2225,10 +2350,25 @@ int perf_event_refresh(struct perf_event *event, int refresh)
2225 return -EINVAL; 2350 return -EINVAL;
2226 2351
2227 atomic_add(refresh, &event->event_limit); 2352 atomic_add(refresh, &event->event_limit);
2228 perf_event_enable(event); 2353 _perf_event_enable(event);
2229 2354
2230 return 0; 2355 return 0;
2231} 2356}
2357
2358/*
2359 * See perf_event_disable()
2360 */
2361int perf_event_refresh(struct perf_event *event, int refresh)
2362{
2363 struct perf_event_context *ctx;
2364 int ret;
2365
2366 ctx = perf_event_ctx_lock(event);
2367 ret = _perf_event_refresh(event, refresh);
2368 perf_event_ctx_unlock(event, ctx);
2369
2370 return ret;
2371}
2232EXPORT_SYMBOL_GPL(perf_event_refresh); 2372EXPORT_SYMBOL_GPL(perf_event_refresh);
2233 2373
2234static void ctx_sched_out(struct perf_event_context *ctx, 2374static void ctx_sched_out(struct perf_event_context *ctx,
@@ -2612,12 +2752,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
2612 2752
2613 perf_pmu_enable(ctx->pmu); 2753 perf_pmu_enable(ctx->pmu);
2614 perf_ctx_unlock(cpuctx, ctx); 2754 perf_ctx_unlock(cpuctx, ctx);
2615
2616 /*
2617 * Since these rotations are per-cpu, we need to ensure the
2618 * cpu-context we got scheduled on is actually rotating.
2619 */
2620 perf_pmu_rotate_start(ctx->pmu);
2621} 2755}
2622 2756
2623/* 2757/*
@@ -2905,25 +3039,18 @@ static void rotate_ctx(struct perf_event_context *ctx)
2905 list_rotate_left(&ctx->flexible_groups); 3039 list_rotate_left(&ctx->flexible_groups);
2906} 3040}
2907 3041
2908/*
2909 * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
2910 * because they're strictly cpu affine and rotate_start is called with IRQs
2911 * disabled, while rotate_context is called from IRQ context.
2912 */
2913static int perf_rotate_context(struct perf_cpu_context *cpuctx) 3042static int perf_rotate_context(struct perf_cpu_context *cpuctx)
2914{ 3043{
2915 struct perf_event_context *ctx = NULL; 3044 struct perf_event_context *ctx = NULL;
2916 int rotate = 0, remove = 1; 3045 int rotate = 0;
2917 3046
2918 if (cpuctx->ctx.nr_events) { 3047 if (cpuctx->ctx.nr_events) {
2919 remove = 0;
2920 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active) 3048 if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
2921 rotate = 1; 3049 rotate = 1;
2922 } 3050 }
2923 3051
2924 ctx = cpuctx->task_ctx; 3052 ctx = cpuctx->task_ctx;
2925 if (ctx && ctx->nr_events) { 3053 if (ctx && ctx->nr_events) {
2926 remove = 0;
2927 if (ctx->nr_events != ctx->nr_active) 3054 if (ctx->nr_events != ctx->nr_active)
2928 rotate = 1; 3055 rotate = 1;
2929 } 3056 }
@@ -2947,8 +3074,6 @@ static int perf_rotate_context(struct perf_cpu_context *cpuctx)
2947 perf_pmu_enable(cpuctx->ctx.pmu); 3074 perf_pmu_enable(cpuctx->ctx.pmu);
2948 perf_ctx_unlock(cpuctx, cpuctx->task_ctx); 3075 perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
2949done: 3076done:
2950 if (remove)
2951 list_del_init(&cpuctx->rotation_list);
2952 3077
2953 return rotate; 3078 return rotate;
2954} 3079}
@@ -2966,9 +3091,8 @@ bool perf_event_can_stop_tick(void)
2966 3091
2967void perf_event_task_tick(void) 3092void perf_event_task_tick(void)
2968{ 3093{
2969 struct list_head *head = this_cpu_ptr(&rotation_list); 3094 struct list_head *head = this_cpu_ptr(&active_ctx_list);
2970 struct perf_cpu_context *cpuctx, *tmp; 3095 struct perf_event_context *ctx, *tmp;
2971 struct perf_event_context *ctx;
2972 int throttled; 3096 int throttled;
2973 3097
2974 WARN_ON(!irqs_disabled()); 3098 WARN_ON(!irqs_disabled());
@@ -2976,14 +3100,8 @@ void perf_event_task_tick(void)
2976 __this_cpu_inc(perf_throttled_seq); 3100 __this_cpu_inc(perf_throttled_seq);
2977 throttled = __this_cpu_xchg(perf_throttled_count, 0); 3101 throttled = __this_cpu_xchg(perf_throttled_count, 0);
2978 3102
2979 list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) { 3103 list_for_each_entry_safe(ctx, tmp, head, active_ctx_list)
2980 ctx = &cpuctx->ctx;
2981 perf_adjust_freq_unthr_context(ctx, throttled); 3104 perf_adjust_freq_unthr_context(ctx, throttled);
2982
2983 ctx = cpuctx->task_ctx;
2984 if (ctx)
2985 perf_adjust_freq_unthr_context(ctx, throttled);
2986 }
2987} 3105}
2988 3106
2989static int event_enable_on_exec(struct perf_event *event, 3107static int event_enable_on_exec(struct perf_event *event,
@@ -3142,6 +3260,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
3142{ 3260{
3143 raw_spin_lock_init(&ctx->lock); 3261 raw_spin_lock_init(&ctx->lock);
3144 mutex_init(&ctx->mutex); 3262 mutex_init(&ctx->mutex);
3263 INIT_LIST_HEAD(&ctx->active_ctx_list);
3145 INIT_LIST_HEAD(&ctx->pinned_groups); 3264 INIT_LIST_HEAD(&ctx->pinned_groups);
3146 INIT_LIST_HEAD(&ctx->flexible_groups); 3265 INIT_LIST_HEAD(&ctx->flexible_groups);
3147 INIT_LIST_HEAD(&ctx->event_list); 3266 INIT_LIST_HEAD(&ctx->event_list);
@@ -3421,7 +3540,16 @@ static void perf_remove_from_owner(struct perf_event *event)
3421 rcu_read_unlock(); 3540 rcu_read_unlock();
3422 3541
3423 if (owner) { 3542 if (owner) {
3424 mutex_lock(&owner->perf_event_mutex); 3543 /*
3544 * If we're here through perf_event_exit_task() we're already
3545 * holding ctx->mutex which would be an inversion wrt. the
3546 * normal lock order.
3547 *
3548 * However we can safely take this lock because its the child
3549 * ctx->mutex.
3550 */
3551 mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
3552
3425 /* 3553 /*
3426 * We have to re-check the event->owner field, if it is cleared 3554 * We have to re-check the event->owner field, if it is cleared
3427 * we raced with perf_event_exit_task(), acquiring the mutex 3555 * we raced with perf_event_exit_task(), acquiring the mutex
@@ -3440,7 +3568,7 @@ static void perf_remove_from_owner(struct perf_event *event)
3440 */ 3568 */
3441static void put_event(struct perf_event *event) 3569static void put_event(struct perf_event *event)
3442{ 3570{
3443 struct perf_event_context *ctx = event->ctx; 3571 struct perf_event_context *ctx;
3444 3572
3445 if (!atomic_long_dec_and_test(&event->refcount)) 3573 if (!atomic_long_dec_and_test(&event->refcount))
3446 return; 3574 return;
@@ -3448,7 +3576,6 @@ static void put_event(struct perf_event *event)
3448 if (!is_kernel_event(event)) 3576 if (!is_kernel_event(event))
3449 perf_remove_from_owner(event); 3577 perf_remove_from_owner(event);
3450 3578
3451 WARN_ON_ONCE(ctx->parent_ctx);
3452 /* 3579 /*
3453 * There are two ways this annotation is useful: 3580 * There are two ways this annotation is useful:
3454 * 3581 *
@@ -3461,7 +3588,8 @@ static void put_event(struct perf_event *event)
3461 * the last filedesc died, so there is no possibility 3588 * the last filedesc died, so there is no possibility
3462 * to trigger the AB-BA case. 3589 * to trigger the AB-BA case.
3463 */ 3590 */
3464 mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); 3591 ctx = perf_event_ctx_lock_nested(event, SINGLE_DEPTH_NESTING);
3592 WARN_ON_ONCE(ctx->parent_ctx);
3465 perf_remove_from_context(event, true); 3593 perf_remove_from_context(event, true);
3466 mutex_unlock(&ctx->mutex); 3594 mutex_unlock(&ctx->mutex);
3467 3595
@@ -3547,12 +3675,13 @@ static int perf_event_read_group(struct perf_event *event,
3547 u64 read_format, char __user *buf) 3675 u64 read_format, char __user *buf)
3548{ 3676{
3549 struct perf_event *leader = event->group_leader, *sub; 3677 struct perf_event *leader = event->group_leader, *sub;
3550 int n = 0, size = 0, ret = -EFAULT;
3551 struct perf_event_context *ctx = leader->ctx; 3678 struct perf_event_context *ctx = leader->ctx;
3552 u64 values[5]; 3679 int n = 0, size = 0, ret;
3553 u64 count, enabled, running; 3680 u64 count, enabled, running;
3681 u64 values[5];
3682
3683 lockdep_assert_held(&ctx->mutex);
3554 3684
3555 mutex_lock(&ctx->mutex);
3556 count = perf_event_read_value(leader, &enabled, &running); 3685 count = perf_event_read_value(leader, &enabled, &running);
3557 3686
3558 values[n++] = 1 + leader->nr_siblings; 3687 values[n++] = 1 + leader->nr_siblings;
@@ -3567,7 +3696,7 @@ static int perf_event_read_group(struct perf_event *event,
3567 size = n * sizeof(u64); 3696 size = n * sizeof(u64);
3568 3697
3569 if (copy_to_user(buf, values, size)) 3698 if (copy_to_user(buf, values, size))
3570 goto unlock; 3699 return -EFAULT;
3571 3700
3572 ret = size; 3701 ret = size;
3573 3702
@@ -3581,14 +3710,11 @@ static int perf_event_read_group(struct perf_event *event,
3581 size = n * sizeof(u64); 3710 size = n * sizeof(u64);
3582 3711
3583 if (copy_to_user(buf + ret, values, size)) { 3712 if (copy_to_user(buf + ret, values, size)) {
3584 ret = -EFAULT; 3713 return -EFAULT;
3585 goto unlock;
3586 } 3714 }
3587 3715
3588 ret += size; 3716 ret += size;
3589 } 3717 }
3590unlock:
3591 mutex_unlock(&ctx->mutex);
3592 3718
3593 return ret; 3719 return ret;
3594} 3720}
@@ -3660,8 +3786,14 @@ static ssize_t
3660perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) 3786perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
3661{ 3787{
3662 struct perf_event *event = file->private_data; 3788 struct perf_event *event = file->private_data;
3789 struct perf_event_context *ctx;
3790 int ret;
3663 3791
3664 return perf_read_hw(event, buf, count); 3792 ctx = perf_event_ctx_lock(event);
3793 ret = perf_read_hw(event, buf, count);
3794 perf_event_ctx_unlock(event, ctx);
3795
3796 return ret;
3665} 3797}
3666 3798
3667static unsigned int perf_poll(struct file *file, poll_table *wait) 3799static unsigned int perf_poll(struct file *file, poll_table *wait)
@@ -3687,7 +3819,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
3687 return events; 3819 return events;
3688} 3820}
3689 3821
3690static void perf_event_reset(struct perf_event *event) 3822static void _perf_event_reset(struct perf_event *event)
3691{ 3823{
3692 (void)perf_event_read(event); 3824 (void)perf_event_read(event);
3693 local64_set(&event->count, 0); 3825 local64_set(&event->count, 0);
@@ -3706,6 +3838,7 @@ static void perf_event_for_each_child(struct perf_event *event,
3706 struct perf_event *child; 3838 struct perf_event *child;
3707 3839
3708 WARN_ON_ONCE(event->ctx->parent_ctx); 3840 WARN_ON_ONCE(event->ctx->parent_ctx);
3841
3709 mutex_lock(&event->child_mutex); 3842 mutex_lock(&event->child_mutex);
3710 func(event); 3843 func(event);
3711 list_for_each_entry(child, &event->child_list, child_list) 3844 list_for_each_entry(child, &event->child_list, child_list)
@@ -3719,14 +3852,13 @@ static void perf_event_for_each(struct perf_event *event,
3719 struct perf_event_context *ctx = event->ctx; 3852 struct perf_event_context *ctx = event->ctx;
3720 struct perf_event *sibling; 3853 struct perf_event *sibling;
3721 3854
3722 WARN_ON_ONCE(ctx->parent_ctx); 3855 lockdep_assert_held(&ctx->mutex);
3723 mutex_lock(&ctx->mutex); 3856
3724 event = event->group_leader; 3857 event = event->group_leader;
3725 3858
3726 perf_event_for_each_child(event, func); 3859 perf_event_for_each_child(event, func);
3727 list_for_each_entry(sibling, &event->sibling_list, group_entry) 3860 list_for_each_entry(sibling, &event->sibling_list, group_entry)
3728 perf_event_for_each_child(sibling, func); 3861 perf_event_for_each_child(sibling, func);
3729 mutex_unlock(&ctx->mutex);
3730} 3862}
3731 3863
3732static int perf_event_period(struct perf_event *event, u64 __user *arg) 3864static int perf_event_period(struct perf_event *event, u64 __user *arg)
@@ -3796,25 +3928,24 @@ static int perf_event_set_output(struct perf_event *event,
3796 struct perf_event *output_event); 3928 struct perf_event *output_event);
3797static int perf_event_set_filter(struct perf_event *event, void __user *arg); 3929static int perf_event_set_filter(struct perf_event *event, void __user *arg);
3798 3930
3799static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 3931static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
3800{ 3932{
3801 struct perf_event *event = file->private_data;
3802 void (*func)(struct perf_event *); 3933 void (*func)(struct perf_event *);
3803 u32 flags = arg; 3934 u32 flags = arg;
3804 3935
3805 switch (cmd) { 3936 switch (cmd) {
3806 case PERF_EVENT_IOC_ENABLE: 3937 case PERF_EVENT_IOC_ENABLE:
3807 func = perf_event_enable; 3938 func = _perf_event_enable;
3808 break; 3939 break;
3809 case PERF_EVENT_IOC_DISABLE: 3940 case PERF_EVENT_IOC_DISABLE:
3810 func = perf_event_disable; 3941 func = _perf_event_disable;
3811 break; 3942 break;
3812 case PERF_EVENT_IOC_RESET: 3943 case PERF_EVENT_IOC_RESET:
3813 func = perf_event_reset; 3944 func = _perf_event_reset;
3814 break; 3945 break;
3815 3946
3816 case PERF_EVENT_IOC_REFRESH: 3947 case PERF_EVENT_IOC_REFRESH:
3817 return perf_event_refresh(event, arg); 3948 return _perf_event_refresh(event, arg);
3818 3949
3819 case PERF_EVENT_IOC_PERIOD: 3950 case PERF_EVENT_IOC_PERIOD:
3820 return perf_event_period(event, (u64 __user *)arg); 3951 return perf_event_period(event, (u64 __user *)arg);
@@ -3861,6 +3992,19 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3861 return 0; 3992 return 0;
3862} 3993}
3863 3994
3995static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
3996{
3997 struct perf_event *event = file->private_data;
3998 struct perf_event_context *ctx;
3999 long ret;
4000
4001 ctx = perf_event_ctx_lock(event);
4002 ret = _perf_ioctl(event, cmd, arg);
4003 perf_event_ctx_unlock(event, ctx);
4004
4005 return ret;
4006}
4007
3864#ifdef CONFIG_COMPAT 4008#ifdef CONFIG_COMPAT
3865static long perf_compat_ioctl(struct file *file, unsigned int cmd, 4009static long perf_compat_ioctl(struct file *file, unsigned int cmd,
3866 unsigned long arg) 4010 unsigned long arg)
@@ -3883,11 +4027,15 @@ static long perf_compat_ioctl(struct file *file, unsigned int cmd,
3883 4027
3884int perf_event_task_enable(void) 4028int perf_event_task_enable(void)
3885{ 4029{
4030 struct perf_event_context *ctx;
3886 struct perf_event *event; 4031 struct perf_event *event;
3887 4032
3888 mutex_lock(&current->perf_event_mutex); 4033 mutex_lock(&current->perf_event_mutex);
3889 list_for_each_entry(event, &current->perf_event_list, owner_entry) 4034 list_for_each_entry(event, &current->perf_event_list, owner_entry) {
3890 perf_event_for_each_child(event, perf_event_enable); 4035 ctx = perf_event_ctx_lock(event);
4036 perf_event_for_each_child(event, _perf_event_enable);
4037 perf_event_ctx_unlock(event, ctx);
4038 }
3891 mutex_unlock(&current->perf_event_mutex); 4039 mutex_unlock(&current->perf_event_mutex);
3892 4040
3893 return 0; 4041 return 0;
@@ -3895,11 +4043,15 @@ int perf_event_task_enable(void)
3895 4043
3896int perf_event_task_disable(void) 4044int perf_event_task_disable(void)
3897{ 4045{
4046 struct perf_event_context *ctx;
3898 struct perf_event *event; 4047 struct perf_event *event;
3899 4048
3900 mutex_lock(&current->perf_event_mutex); 4049 mutex_lock(&current->perf_event_mutex);
3901 list_for_each_entry(event, &current->perf_event_list, owner_entry) 4050 list_for_each_entry(event, &current->perf_event_list, owner_entry) {
3902 perf_event_for_each_child(event, perf_event_disable); 4051 ctx = perf_event_ctx_lock(event);
4052 perf_event_for_each_child(event, _perf_event_disable);
4053 perf_event_ctx_unlock(event, ctx);
4054 }
3903 mutex_unlock(&current->perf_event_mutex); 4055 mutex_unlock(&current->perf_event_mutex);
3904 4056
3905 return 0; 4057 return 0;
@@ -5889,6 +6041,8 @@ end:
5889 rcu_read_unlock(); 6041 rcu_read_unlock();
5890} 6042}
5891 6043
6044DEFINE_PER_CPU(struct pt_regs, __perf_regs[4]);
6045
5892int perf_swevent_get_recursion_context(void) 6046int perf_swevent_get_recursion_context(void)
5893{ 6047{
5894 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable); 6048 struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
@@ -5904,21 +6058,30 @@ inline void perf_swevent_put_recursion_context(int rctx)
5904 put_recursion_context(swhash->recursion, rctx); 6058 put_recursion_context(swhash->recursion, rctx);
5905} 6059}
5906 6060
5907void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr) 6061void ___perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
5908{ 6062{
5909 struct perf_sample_data data; 6063 struct perf_sample_data data;
5910 int rctx;
5911 6064
5912 preempt_disable_notrace(); 6065 if (WARN_ON_ONCE(!regs))
5913 rctx = perf_swevent_get_recursion_context();
5914 if (rctx < 0)
5915 return; 6066 return;
5916 6067
5917 perf_sample_data_init(&data, addr, 0); 6068 perf_sample_data_init(&data, addr, 0);
5918
5919 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs); 6069 do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, &data, regs);
6070}
6071
6072void __perf_sw_event(u32 event_id, u64 nr, struct pt_regs *regs, u64 addr)
6073{
6074 int rctx;
6075
6076 preempt_disable_notrace();
6077 rctx = perf_swevent_get_recursion_context();
6078 if (unlikely(rctx < 0))
6079 goto fail;
6080
6081 ___perf_sw_event(event_id, nr, regs, addr);
5920 6082
5921 perf_swevent_put_recursion_context(rctx); 6083 perf_swevent_put_recursion_context(rctx);
6084fail:
5922 preempt_enable_notrace(); 6085 preempt_enable_notrace();
5923} 6086}
5924 6087
@@ -6780,7 +6943,6 @@ skip_type:
6780 6943
6781 __perf_cpu_hrtimer_init(cpuctx, cpu); 6944 __perf_cpu_hrtimer_init(cpuctx, cpu);
6782 6945
6783 INIT_LIST_HEAD(&cpuctx->rotation_list);
6784 cpuctx->unique_pmu = pmu; 6946 cpuctx->unique_pmu = pmu;
6785 } 6947 }
6786 6948
@@ -6853,6 +7015,20 @@ void perf_pmu_unregister(struct pmu *pmu)
6853} 7015}
6854EXPORT_SYMBOL_GPL(perf_pmu_unregister); 7016EXPORT_SYMBOL_GPL(perf_pmu_unregister);
6855 7017
7018static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
7019{
7020 int ret;
7021
7022 if (!try_module_get(pmu->module))
7023 return -ENODEV;
7024 event->pmu = pmu;
7025 ret = pmu->event_init(event);
7026 if (ret)
7027 module_put(pmu->module);
7028
7029 return ret;
7030}
7031
6856struct pmu *perf_init_event(struct perf_event *event) 7032struct pmu *perf_init_event(struct perf_event *event)
6857{ 7033{
6858 struct pmu *pmu = NULL; 7034 struct pmu *pmu = NULL;
@@ -6865,24 +7041,14 @@ struct pmu *perf_init_event(struct perf_event *event)
6865 pmu = idr_find(&pmu_idr, event->attr.type); 7041 pmu = idr_find(&pmu_idr, event->attr.type);
6866 rcu_read_unlock(); 7042 rcu_read_unlock();
6867 if (pmu) { 7043 if (pmu) {
6868 if (!try_module_get(pmu->module)) { 7044 ret = perf_try_init_event(pmu, event);
6869 pmu = ERR_PTR(-ENODEV);
6870 goto unlock;
6871 }
6872 event->pmu = pmu;
6873 ret = pmu->event_init(event);
6874 if (ret) 7045 if (ret)
6875 pmu = ERR_PTR(ret); 7046 pmu = ERR_PTR(ret);
6876 goto unlock; 7047 goto unlock;
6877 } 7048 }
6878 7049
6879 list_for_each_entry_rcu(pmu, &pmus, entry) { 7050 list_for_each_entry_rcu(pmu, &pmus, entry) {
6880 if (!try_module_get(pmu->module)) { 7051 ret = perf_try_init_event(pmu, event);
6881 pmu = ERR_PTR(-ENODEV);
6882 goto unlock;
6883 }
6884 event->pmu = pmu;
6885 ret = pmu->event_init(event);
6886 if (!ret) 7052 if (!ret)
6887 goto unlock; 7053 goto unlock;
6888 7054
@@ -7246,6 +7412,15 @@ out:
7246 return ret; 7412 return ret;
7247} 7413}
7248 7414
7415static void mutex_lock_double(struct mutex *a, struct mutex *b)
7416{
7417 if (b < a)
7418 swap(a, b);
7419
7420 mutex_lock(a);
7421 mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
7422}
7423
7249/** 7424/**
7250 * sys_perf_event_open - open a performance event, associate it to a task/cpu 7425 * sys_perf_event_open - open a performance event, associate it to a task/cpu
7251 * 7426 *
@@ -7261,7 +7436,7 @@ SYSCALL_DEFINE5(perf_event_open,
7261 struct perf_event *group_leader = NULL, *output_event = NULL; 7436 struct perf_event *group_leader = NULL, *output_event = NULL;
7262 struct perf_event *event, *sibling; 7437 struct perf_event *event, *sibling;
7263 struct perf_event_attr attr; 7438 struct perf_event_attr attr;
7264 struct perf_event_context *ctx; 7439 struct perf_event_context *ctx, *uninitialized_var(gctx);
7265 struct file *event_file = NULL; 7440 struct file *event_file = NULL;
7266 struct fd group = {NULL, 0}; 7441 struct fd group = {NULL, 0};
7267 struct task_struct *task = NULL; 7442 struct task_struct *task = NULL;
@@ -7459,43 +7634,68 @@ SYSCALL_DEFINE5(perf_event_open,
7459 } 7634 }
7460 7635
7461 if (move_group) { 7636 if (move_group) {
7462 struct perf_event_context *gctx = group_leader->ctx; 7637 gctx = group_leader->ctx;
7463
7464 mutex_lock(&gctx->mutex);
7465 perf_remove_from_context(group_leader, false);
7466 7638
7467 /* 7639 /*
7468 * Removing from the context ends up with disabled 7640 * See perf_event_ctx_lock() for comments on the details
7469 * event. What we want here is event in the initial 7641 * of swizzling perf_event::ctx.
7470 * startup state, ready to be add into new context.
7471 */ 7642 */
7472 perf_event__state_init(group_leader); 7643 mutex_lock_double(&gctx->mutex, &ctx->mutex);
7644
7645 perf_remove_from_context(group_leader, false);
7646
7473 list_for_each_entry(sibling, &group_leader->sibling_list, 7647 list_for_each_entry(sibling, &group_leader->sibling_list,
7474 group_entry) { 7648 group_entry) {
7475 perf_remove_from_context(sibling, false); 7649 perf_remove_from_context(sibling, false);
7476 perf_event__state_init(sibling);
7477 put_ctx(gctx); 7650 put_ctx(gctx);
7478 } 7651 }
7479 mutex_unlock(&gctx->mutex); 7652 } else {
7480 put_ctx(gctx); 7653 mutex_lock(&ctx->mutex);
7481 } 7654 }
7482 7655
7483 WARN_ON_ONCE(ctx->parent_ctx); 7656 WARN_ON_ONCE(ctx->parent_ctx);
7484 mutex_lock(&ctx->mutex);
7485 7657
7486 if (move_group) { 7658 if (move_group) {
7659 /*
7660 * Wait for everybody to stop referencing the events through
7661 * the old lists, before installing it on new lists.
7662 */
7487 synchronize_rcu(); 7663 synchronize_rcu();
7488 perf_install_in_context(ctx, group_leader, group_leader->cpu); 7664
7489 get_ctx(ctx); 7665 /*
7666 * Install the group siblings before the group leader.
7667 *
7668 * Because a group leader will try and install the entire group
7669 * (through the sibling list, which is still in-tact), we can
7670 * end up with siblings installed in the wrong context.
7671 *
7672 * By installing siblings first we NO-OP because they're not
7673 * reachable through the group lists.
7674 */
7490 list_for_each_entry(sibling, &group_leader->sibling_list, 7675 list_for_each_entry(sibling, &group_leader->sibling_list,
7491 group_entry) { 7676 group_entry) {
7677 perf_event__state_init(sibling);
7492 perf_install_in_context(ctx, sibling, sibling->cpu); 7678 perf_install_in_context(ctx, sibling, sibling->cpu);
7493 get_ctx(ctx); 7679 get_ctx(ctx);
7494 } 7680 }
7681
7682 /*
7683 * Removing from the context ends up with disabled
7684 * event. What we want here is event in the initial
7685 * startup state, ready to be add into new context.
7686 */
7687 perf_event__state_init(group_leader);
7688 perf_install_in_context(ctx, group_leader, group_leader->cpu);
7689 get_ctx(ctx);
7495 } 7690 }
7496 7691
7497 perf_install_in_context(ctx, event, event->cpu); 7692 perf_install_in_context(ctx, event, event->cpu);
7498 perf_unpin_context(ctx); 7693 perf_unpin_context(ctx);
7694
7695 if (move_group) {
7696 mutex_unlock(&gctx->mutex);
7697 put_ctx(gctx);
7698 }
7499 mutex_unlock(&ctx->mutex); 7699 mutex_unlock(&ctx->mutex);
7500 7700
7501 put_online_cpus(); 7701 put_online_cpus();
@@ -7603,7 +7803,11 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
7603 src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx; 7803 src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
7604 dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx; 7804 dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
7605 7805
7606 mutex_lock(&src_ctx->mutex); 7806 /*
7807 * See perf_event_ctx_lock() for comments on the details
7808 * of swizzling perf_event::ctx.
7809 */
7810 mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
7607 list_for_each_entry_safe(event, tmp, &src_ctx->event_list, 7811 list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
7608 event_entry) { 7812 event_entry) {
7609 perf_remove_from_context(event, false); 7813 perf_remove_from_context(event, false);
@@ -7611,11 +7815,36 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
7611 put_ctx(src_ctx); 7815 put_ctx(src_ctx);
7612 list_add(&event->migrate_entry, &events); 7816 list_add(&event->migrate_entry, &events);
7613 } 7817 }
7614 mutex_unlock(&src_ctx->mutex);
7615 7818
7819 /*
7820 * Wait for the events to quiesce before re-instating them.
7821 */
7616 synchronize_rcu(); 7822 synchronize_rcu();
7617 7823
7618 mutex_lock(&dst_ctx->mutex); 7824 /*
7825 * Re-instate events in 2 passes.
7826 *
7827 * Skip over group leaders and only install siblings on this first
7828 * pass, siblings will not get enabled without a leader, however a
7829 * leader will enable its siblings, even if those are still on the old
7830 * context.
7831 */
7832 list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
7833 if (event->group_leader == event)
7834 continue;
7835
7836 list_del(&event->migrate_entry);
7837 if (event->state >= PERF_EVENT_STATE_OFF)
7838 event->state = PERF_EVENT_STATE_INACTIVE;
7839 account_event_cpu(event, dst_cpu);
7840 perf_install_in_context(dst_ctx, event, dst_cpu);
7841 get_ctx(dst_ctx);
7842 }
7843
7844 /*
7845 * Once all the siblings are setup properly, install the group leaders
7846 * to make it go.
7847 */
7619 list_for_each_entry_safe(event, tmp, &events, migrate_entry) { 7848 list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
7620 list_del(&event->migrate_entry); 7849 list_del(&event->migrate_entry);
7621 if (event->state >= PERF_EVENT_STATE_OFF) 7850 if (event->state >= PERF_EVENT_STATE_OFF)
@@ -7625,6 +7854,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
7625 get_ctx(dst_ctx); 7854 get_ctx(dst_ctx);
7626 } 7855 }
7627 mutex_unlock(&dst_ctx->mutex); 7856 mutex_unlock(&dst_ctx->mutex);
7857 mutex_unlock(&src_ctx->mutex);
7628} 7858}
7629EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); 7859EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
7630 7860
@@ -7811,14 +8041,19 @@ static void perf_free_event(struct perf_event *event,
7811 8041
7812 put_event(parent); 8042 put_event(parent);
7813 8043
8044 raw_spin_lock_irq(&ctx->lock);
7814 perf_group_detach(event); 8045 perf_group_detach(event);
7815 list_del_event(event, ctx); 8046 list_del_event(event, ctx);
8047 raw_spin_unlock_irq(&ctx->lock);
7816 free_event(event); 8048 free_event(event);
7817} 8049}
7818 8050
7819/* 8051/*
7820 * free an unexposed, unused context as created by inheritance by 8052 * Free an unexposed, unused context as created by inheritance by
7821 * perf_event_init_task below, used by fork() in case of fail. 8053 * perf_event_init_task below, used by fork() in case of fail.
8054 *
8055 * Not all locks are strictly required, but take them anyway to be nice and
8056 * help out with the lockdep assertions.
7822 */ 8057 */
7823void perf_event_free_task(struct task_struct *task) 8058void perf_event_free_task(struct task_struct *task)
7824{ 8059{
@@ -8137,7 +8372,7 @@ static void __init perf_event_init_all_cpus(void)
8137 for_each_possible_cpu(cpu) { 8372 for_each_possible_cpu(cpu) {
8138 swhash = &per_cpu(swevent_htable, cpu); 8373 swhash = &per_cpu(swevent_htable, cpu);
8139 mutex_init(&swhash->hlist_mutex); 8374 mutex_init(&swhash->hlist_mutex);
8140 INIT_LIST_HEAD(&per_cpu(rotation_list, cpu)); 8375 INIT_LIST_HEAD(&per_cpu(active_ctx_list, cpu));
8141 } 8376 }
8142} 8377}
8143 8378
@@ -8158,22 +8393,11 @@ static void perf_event_init_cpu(int cpu)
8158} 8393}
8159 8394
8160#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC 8395#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
8161static void perf_pmu_rotate_stop(struct pmu *pmu)
8162{
8163 struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
8164
8165 WARN_ON(!irqs_disabled());
8166
8167 list_del_init(&cpuctx->rotation_list);
8168}
8169
8170static void __perf_event_exit_context(void *__info) 8396static void __perf_event_exit_context(void *__info)
8171{ 8397{
8172 struct remove_event re = { .detach_group = true }; 8398 struct remove_event re = { .detach_group = true };
8173 struct perf_event_context *ctx = __info; 8399 struct perf_event_context *ctx = __info;
8174 8400
8175 perf_pmu_rotate_stop(ctx->pmu);
8176
8177 rcu_read_lock(); 8401 rcu_read_lock();
8178 list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry) 8402 list_for_each_entry_rcu(re.event, &ctx->event_list, event_entry)
8179 __perf_remove_from_context(&re); 8403 __perf_remove_from_context(&re);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 146a5792b1d2..eadb95ce7aac 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -13,12 +13,13 @@
13#include <linux/vmalloc.h> 13#include <linux/vmalloc.h>
14#include <linux/slab.h> 14#include <linux/slab.h>
15#include <linux/circ_buf.h> 15#include <linux/circ_buf.h>
16#include <linux/poll.h>
16 17
17#include "internal.h" 18#include "internal.h"
18 19
19static void perf_output_wakeup(struct perf_output_handle *handle) 20static void perf_output_wakeup(struct perf_output_handle *handle)
20{ 21{
21 atomic_set(&handle->rb->poll, POLL_IN); 22 atomic_set(&handle->rb->poll, POLLIN);
22 23
23 handle->event->pending_wakeup = 1; 24 handle->event->pending_wakeup = 1;
24 irq_work_queue(&handle->event->pending); 25 irq_work_queue(&handle->event->pending);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5eab11d4b747..1612578a5b7a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1082,7 +1082,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
1082 if (p->sched_class->migrate_task_rq) 1082 if (p->sched_class->migrate_task_rq)
1083 p->sched_class->migrate_task_rq(p, new_cpu); 1083 p->sched_class->migrate_task_rq(p, new_cpu);
1084 p->se.nr_migrations++; 1084 p->se.nr_migrations++;
1085 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, NULL, 0); 1085 perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
1086 } 1086 }
1087 1087
1088 __set_task_cpu(p, new_cpu); 1088 __set_task_cpu(p, new_cpu);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 4b9c114ee9de..6fa484de2ba1 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -261,7 +261,7 @@ void perf_trace_del(struct perf_event *p_event, int flags)
261} 261}
262 262
263void *perf_trace_buf_prepare(int size, unsigned short type, 263void *perf_trace_buf_prepare(int size, unsigned short type,
264 struct pt_regs *regs, int *rctxp) 264 struct pt_regs **regs, int *rctxp)
265{ 265{
266 struct trace_entry *entry; 266 struct trace_entry *entry;
267 unsigned long flags; 267 unsigned long flags;
@@ -280,6 +280,8 @@ void *perf_trace_buf_prepare(int size, unsigned short type,
280 if (*rctxp < 0) 280 if (*rctxp < 0)
281 return NULL; 281 return NULL;
282 282
283 if (regs)
284 *regs = this_cpu_ptr(&__perf_regs[*rctxp]);
283 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]); 285 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
284 286
285 /* zero the dead bytes from align to not leak stack to user */ 287 /* zero the dead bytes from align to not leak stack to user */
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5edb518be345..296079ae6583 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1148,7 +1148,7 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
1148 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1148 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1149 size -= sizeof(u32); 1149 size -= sizeof(u32);
1150 1150
1151 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1151 entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx);
1152 if (!entry) 1152 if (!entry)
1153 return; 1153 return;
1154 1154
@@ -1179,7 +1179,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
1179 size = ALIGN(__size + sizeof(u32), sizeof(u64)); 1179 size = ALIGN(__size + sizeof(u32), sizeof(u64));
1180 size -= sizeof(u32); 1180 size -= sizeof(u32);
1181 1181
1182 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1182 entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx);
1183 if (!entry) 1183 if (!entry)
1184 return; 1184 return;
1185 1185
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index c6ee36fcbf90..f97f6e3a676c 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -574,7 +574,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
574 size -= sizeof(u32); 574 size -= sizeof(u32);
575 575
576 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size, 576 rec = (struct syscall_trace_enter *)perf_trace_buf_prepare(size,
577 sys_data->enter_event->event.type, regs, &rctx); 577 sys_data->enter_event->event.type, NULL, &rctx);
578 if (!rec) 578 if (!rec)
579 return; 579 return;
580 580
@@ -647,7 +647,7 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
647 size -= sizeof(u32); 647 size -= sizeof(u32);
648 648
649 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size, 649 rec = (struct syscall_trace_exit *)perf_trace_buf_prepare(size,
650 sys_data->exit_event->event.type, regs, &rctx); 650 sys_data->exit_event->event.type, NULL, &rctx);
651 if (!rec) 651 if (!rec)
652 return; 652 return;
653 653
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 8520acc34b18..b11441321e7a 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1111,7 +1111,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
1111 if (hlist_empty(head)) 1111 if (hlist_empty(head))
1112 goto out; 1112 goto out;
1113 1113
1114 entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx); 1114 entry = perf_trace_buf_prepare(size, call->event.type, NULL, &rctx);
1115 if (!entry) 1115 if (!entry)
1116 goto out; 1116 goto out;
1117 1117