aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2011-02-02 07:19:09 -0500
committerIngo Molnar <mingo@elte.hu>2011-02-03 06:14:43 -0500
commitfe4b04fa31a6dcf4358aa84cf81e5a7fd079469b (patch)
tree20a6db874d0db2a2f2e38e3ff77df4bdaa5f1cfe /kernel/perf_event.c
parentb84defe6036e6dea782d41b80a4590e54f249671 (diff)
perf: Cure task_oncpu_function_call() races
Oleg reported that on architectures with __ARCH_WANT_INTERRUPTS_ON_CTXSW the IPI from task_oncpu_function_call() can land before perf_event_task_sched_in() and cause interesting situations for eg. perf_install_in_context(). This patch reworks the task_oncpu_function_call() interface to give a more usable primitive as well as rework all its users to hopefully be more obvious as well as remove the races. While looking at the code I also found a number of races against perf_event_task_sched_out() which can flip contexts between tasks so plug those too. Reported-and-reviewed-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c260
1 files changed, 168 insertions, 92 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 126a302c481c..7d3faa25e136 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -38,6 +38,79 @@
38 38
39#include <asm/irq_regs.h> 39#include <asm/irq_regs.h>
40 40
41struct remote_function_call {
42 struct task_struct *p;
43 int (*func)(void *info);
44 void *info;
45 int ret;
46};
47
48static void remote_function(void *data)
49{
50 struct remote_function_call *tfc = data;
51 struct task_struct *p = tfc->p;
52
53 if (p) {
54 tfc->ret = -EAGAIN;
55 if (task_cpu(p) != smp_processor_id() || !task_curr(p))
56 return;
57 }
58
59 tfc->ret = tfc->func(tfc->info);
60}
61
62/**
63 * task_function_call - call a function on the cpu on which a task runs
64 * @p: the task to evaluate
65 * @func: the function to be called
66 * @info: the function call argument
67 *
68 * Calls the function @func when the task is currently running. This might
69 * be on the current CPU, which just calls the function directly
70 *
71 * returns: @func return value, or
72 * -ESRCH - when the process isn't running
73 * -EAGAIN - when the process moved away
74 */
75static int
76task_function_call(struct task_struct *p, int (*func) (void *info), void *info)
77{
78 struct remote_function_call data = {
79 .p = p,
80 .func = func,
81 .info = info,
82 .ret = -ESRCH, /* No such (running) process */
83 };
84
85 if (task_curr(p))
86 smp_call_function_single(task_cpu(p), remote_function, &data, 1);
87
88 return data.ret;
89}
90
91/**
92 * cpu_function_call - call a function on the cpu
93 * @func: the function to be called
94 * @info: the function call argument
95 *
96 * Calls the function @func on the remote cpu.
97 *
98 * returns: @func return value or -ENXIO when the cpu is offline
99 */
100static int cpu_function_call(int cpu, int (*func) (void *info), void *info)
101{
102 struct remote_function_call data = {
103 .p = NULL,
104 .func = func,
105 .info = info,
106 .ret = -ENXIO, /* No such CPU */
107 };
108
109 smp_call_function_single(cpu, remote_function, &data, 1);
110
111 return data.ret;
112}
113
41enum event_type_t { 114enum event_type_t {
42 EVENT_FLEXIBLE = 0x1, 115 EVENT_FLEXIBLE = 0x1,
43 EVENT_PINNED = 0x2, 116 EVENT_PINNED = 0x2,
@@ -254,7 +327,6 @@ static void perf_unpin_context(struct perf_event_context *ctx)
254 raw_spin_lock_irqsave(&ctx->lock, flags); 327 raw_spin_lock_irqsave(&ctx->lock, flags);
255 --ctx->pin_count; 328 --ctx->pin_count;
256 raw_spin_unlock_irqrestore(&ctx->lock, flags); 329 raw_spin_unlock_irqrestore(&ctx->lock, flags);
257 put_ctx(ctx);
258} 330}
259 331
260/* 332/*
@@ -618,35 +690,24 @@ __get_cpu_context(struct perf_event_context *ctx)
618 * We disable the event on the hardware level first. After that we 690 * We disable the event on the hardware level first. After that we
619 * remove it from the context list. 691 * remove it from the context list.
620 */ 692 */
621static void __perf_event_remove_from_context(void *info) 693static int __perf_remove_from_context(void *info)
622{ 694{
623 struct perf_event *event = info; 695 struct perf_event *event = info;
624 struct perf_event_context *ctx = event->ctx; 696 struct perf_event_context *ctx = event->ctx;
625 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 697 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
626 698
627 /*
628 * If this is a task context, we need to check whether it is
629 * the current task context of this cpu. If not it has been
630 * scheduled out before the smp call arrived.
631 */
632 if (ctx->task && cpuctx->task_ctx != ctx)
633 return;
634
635 raw_spin_lock(&ctx->lock); 699 raw_spin_lock(&ctx->lock);
636
637 event_sched_out(event, cpuctx, ctx); 700 event_sched_out(event, cpuctx, ctx);
638
639 list_del_event(event, ctx); 701 list_del_event(event, ctx);
640
641 raw_spin_unlock(&ctx->lock); 702 raw_spin_unlock(&ctx->lock);
703
704 return 0;
642} 705}
643 706
644 707
645/* 708/*
646 * Remove the event from a task's (or a CPU's) list of events. 709 * Remove the event from a task's (or a CPU's) list of events.
647 * 710 *
648 * Must be called with ctx->mutex held.
649 *
650 * CPU events are removed with a smp call. For task events we only 711 * CPU events are removed with a smp call. For task events we only
651 * call when the task is on a CPU. 712 * call when the task is on a CPU.
652 * 713 *
@@ -657,49 +718,48 @@ static void __perf_event_remove_from_context(void *info)
657 * When called from perf_event_exit_task, it's OK because the 718 * When called from perf_event_exit_task, it's OK because the
658 * context has been detached from its task. 719 * context has been detached from its task.
659 */ 720 */
660static void perf_event_remove_from_context(struct perf_event *event) 721static void perf_remove_from_context(struct perf_event *event)
661{ 722{
662 struct perf_event_context *ctx = event->ctx; 723 struct perf_event_context *ctx = event->ctx;
663 struct task_struct *task = ctx->task; 724 struct task_struct *task = ctx->task;
664 725
726 lockdep_assert_held(&ctx->mutex);
727
665 if (!task) { 728 if (!task) {
666 /* 729 /*
667 * Per cpu events are removed via an smp call and 730 * Per cpu events are removed via an smp call and
668 * the removal is always successful. 731 * the removal is always successful.
669 */ 732 */
670 smp_call_function_single(event->cpu, 733 cpu_function_call(event->cpu, __perf_remove_from_context, event);
671 __perf_event_remove_from_context,
672 event, 1);
673 return; 734 return;
674 } 735 }
675 736
676retry: 737retry:
677 task_oncpu_function_call(task, __perf_event_remove_from_context, 738 if (!task_function_call(task, __perf_remove_from_context, event))
678 event); 739 return;
679 740
680 raw_spin_lock_irq(&ctx->lock); 741 raw_spin_lock_irq(&ctx->lock);
681 /* 742 /*
682 * If the context is active we need to retry the smp call. 743 * If we failed to find a running task, but find the context active now
744 * that we've acquired the ctx->lock, retry.
683 */ 745 */
684 if (ctx->nr_active && !list_empty(&event->group_entry)) { 746 if (ctx->is_active) {
685 raw_spin_unlock_irq(&ctx->lock); 747 raw_spin_unlock_irq(&ctx->lock);
686 goto retry; 748 goto retry;
687 } 749 }
688 750
689 /* 751 /*
690 * The lock prevents that this context is scheduled in so we 752 * Since the task isn't running, its safe to remove the event, us
691 * can remove the event safely, if the call above did not 753 * holding the ctx->lock ensures the task won't get scheduled in.
692 * succeed.
693 */ 754 */
694 if (!list_empty(&event->group_entry)) 755 list_del_event(event, ctx);
695 list_del_event(event, ctx);
696 raw_spin_unlock_irq(&ctx->lock); 756 raw_spin_unlock_irq(&ctx->lock);
697} 757}
698 758
699/* 759/*
700 * Cross CPU call to disable a performance event 760 * Cross CPU call to disable a performance event
701 */ 761 */
702static void __perf_event_disable(void *info) 762static int __perf_event_disable(void *info)
703{ 763{
704 struct perf_event *event = info; 764 struct perf_event *event = info;
705 struct perf_event_context *ctx = event->ctx; 765 struct perf_event_context *ctx = event->ctx;
@@ -708,9 +768,12 @@ static void __perf_event_disable(void *info)
708 /* 768 /*
709 * If this is a per-task event, need to check whether this 769 * If this is a per-task event, need to check whether this
710 * event's task is the current task on this cpu. 770 * event's task is the current task on this cpu.
771 *
772 * Can trigger due to concurrent perf_event_context_sched_out()
773 * flipping contexts around.
711 */ 774 */
712 if (ctx->task && cpuctx->task_ctx != ctx) 775 if (ctx->task && cpuctx->task_ctx != ctx)
713 return; 776 return -EINVAL;
714 777
715 raw_spin_lock(&ctx->lock); 778 raw_spin_lock(&ctx->lock);
716 779
@@ -729,6 +792,8 @@ static void __perf_event_disable(void *info)
729 } 792 }
730 793
731 raw_spin_unlock(&ctx->lock); 794 raw_spin_unlock(&ctx->lock);
795
796 return 0;
732} 797}
733 798
734/* 799/*
@@ -753,13 +818,13 @@ void perf_event_disable(struct perf_event *event)
753 /* 818 /*
754 * Disable the event on the cpu that it's on 819 * Disable the event on the cpu that it's on
755 */ 820 */
756 smp_call_function_single(event->cpu, __perf_event_disable, 821 cpu_function_call(event->cpu, __perf_event_disable, event);
757 event, 1);
758 return; 822 return;
759 } 823 }
760 824
761retry: 825retry:
762 task_oncpu_function_call(task, __perf_event_disable, event); 826 if (!task_function_call(task, __perf_event_disable, event))
827 return;
763 828
764 raw_spin_lock_irq(&ctx->lock); 829 raw_spin_lock_irq(&ctx->lock);
765 /* 830 /*
@@ -767,6 +832,11 @@ retry:
767 */ 832 */
768 if (event->state == PERF_EVENT_STATE_ACTIVE) { 833 if (event->state == PERF_EVENT_STATE_ACTIVE) {
769 raw_spin_unlock_irq(&ctx->lock); 834 raw_spin_unlock_irq(&ctx->lock);
835 /*
836 * Reload the task pointer, it might have been changed by
837 * a concurrent perf_event_context_sched_out().
838 */
839 task = ctx->task;
770 goto retry; 840 goto retry;
771 } 841 }
772 842
@@ -778,7 +848,6 @@ retry:
778 update_group_times(event); 848 update_group_times(event);
779 event->state = PERF_EVENT_STATE_OFF; 849 event->state = PERF_EVENT_STATE_OFF;
780 } 850 }
781
782 raw_spin_unlock_irq(&ctx->lock); 851 raw_spin_unlock_irq(&ctx->lock);
783} 852}
784 853
@@ -928,12 +997,14 @@ static void add_event_to_ctx(struct perf_event *event,
928 event->tstamp_stopped = tstamp; 997 event->tstamp_stopped = tstamp;
929} 998}
930 999
1000static void perf_event_context_sched_in(struct perf_event_context *ctx);
1001
931/* 1002/*
932 * Cross CPU call to install and enable a performance event 1003 * Cross CPU call to install and enable a performance event
933 * 1004 *
934 * Must be called with ctx->mutex held 1005 * Must be called with ctx->mutex held
935 */ 1006 */
936static void __perf_install_in_context(void *info) 1007static int __perf_install_in_context(void *info)
937{ 1008{
938 struct perf_event *event = info; 1009 struct perf_event *event = info;
939 struct perf_event_context *ctx = event->ctx; 1010 struct perf_event_context *ctx = event->ctx;
@@ -942,17 +1013,12 @@ static void __perf_install_in_context(void *info)
942 int err; 1013 int err;
943 1014
944 /* 1015 /*
945 * If this is a task context, we need to check whether it is 1016 * In case we're installing a new context to an already running task,
946 * the current task context of this cpu. If not it has been 1017 * could also happen before perf_event_task_sched_in() on architectures
947 * scheduled out before the smp call arrived. 1018 * which do context switches with IRQs enabled.
948 * Or possibly this is the right context but it isn't
949 * on this cpu because it had no events.
950 */ 1019 */
951 if (ctx->task && cpuctx->task_ctx != ctx) { 1020 if (ctx->task && !cpuctx->task_ctx)
952 if (cpuctx->task_ctx || ctx->task != current) 1021 perf_event_context_sched_in(ctx);
953 return;
954 cpuctx->task_ctx = ctx;
955 }
956 1022
957 raw_spin_lock(&ctx->lock); 1023 raw_spin_lock(&ctx->lock);
958 ctx->is_active = 1; 1024 ctx->is_active = 1;
@@ -997,6 +1063,8 @@ static void __perf_install_in_context(void *info)
997 1063
998unlock: 1064unlock:
999 raw_spin_unlock(&ctx->lock); 1065 raw_spin_unlock(&ctx->lock);
1066
1067 return 0;
1000} 1068}
1001 1069
1002/* 1070/*
@@ -1008,8 +1076,6 @@ unlock:
1008 * If the event is attached to a task which is on a CPU we use a smp 1076 * If the event is attached to a task which is on a CPU we use a smp
1009 * call to enable it in the task context. The task might have been 1077 * call to enable it in the task context. The task might have been
1010 * scheduled away, but we check this in the smp call again. 1078 * scheduled away, but we check this in the smp call again.
1011 *
1012 * Must be called with ctx->mutex held.
1013 */ 1079 */
1014static void 1080static void
1015perf_install_in_context(struct perf_event_context *ctx, 1081perf_install_in_context(struct perf_event_context *ctx,
@@ -1018,6 +1084,8 @@ perf_install_in_context(struct perf_event_context *ctx,
1018{ 1084{
1019 struct task_struct *task = ctx->task; 1085 struct task_struct *task = ctx->task;
1020 1086
1087 lockdep_assert_held(&ctx->mutex);
1088
1021 event->ctx = ctx; 1089 event->ctx = ctx;
1022 1090
1023 if (!task) { 1091 if (!task) {
@@ -1025,31 +1093,29 @@ perf_install_in_context(struct perf_event_context *ctx,
1025 * Per cpu events are installed via an smp call and 1093 * Per cpu events are installed via an smp call and
1026 * the install is always successful. 1094 * the install is always successful.
1027 */ 1095 */
1028 smp_call_function_single(cpu, __perf_install_in_context, 1096 cpu_function_call(cpu, __perf_install_in_context, event);
1029 event, 1);
1030 return; 1097 return;
1031 } 1098 }
1032 1099
1033retry: 1100retry:
1034 task_oncpu_function_call(task, __perf_install_in_context, 1101 if (!task_function_call(task, __perf_install_in_context, event))
1035 event); 1102 return;
1036 1103
1037 raw_spin_lock_irq(&ctx->lock); 1104 raw_spin_lock_irq(&ctx->lock);
1038 /* 1105 /*
1039 * we need to retry the smp call. 1106 * If we failed to find a running task, but find the context active now
1107 * that we've acquired the ctx->lock, retry.
1040 */ 1108 */
1041 if (ctx->is_active && list_empty(&event->group_entry)) { 1109 if (ctx->is_active) {
1042 raw_spin_unlock_irq(&ctx->lock); 1110 raw_spin_unlock_irq(&ctx->lock);
1043 goto retry; 1111 goto retry;
1044 } 1112 }
1045 1113
1046 /* 1114 /*
1047 * The lock prevents that this context is scheduled in so we 1115 * Since the task isn't running, its safe to add the event, us holding
1048 * can add the event safely, if it the call above did not 1116 * the ctx->lock ensures the task won't get scheduled in.
1049 * succeed.
1050 */ 1117 */
1051 if (list_empty(&event->group_entry)) 1118 add_event_to_ctx(event, ctx);
1052 add_event_to_ctx(event, ctx);
1053 raw_spin_unlock_irq(&ctx->lock); 1119 raw_spin_unlock_irq(&ctx->lock);
1054} 1120}
1055 1121
@@ -1078,7 +1144,7 @@ static void __perf_event_mark_enabled(struct perf_event *event,
1078/* 1144/*
1079 * Cross CPU call to enable a performance event 1145 * Cross CPU call to enable a performance event
1080 */ 1146 */
1081static void __perf_event_enable(void *info) 1147static int __perf_event_enable(void *info)
1082{ 1148{
1083 struct perf_event *event = info; 1149 struct perf_event *event = info;
1084 struct perf_event_context *ctx = event->ctx; 1150 struct perf_event_context *ctx = event->ctx;
@@ -1086,18 +1152,10 @@ static void __perf_event_enable(void *info)
1086 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); 1152 struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
1087 int err; 1153 int err;
1088 1154
1089 /* 1155 if (WARN_ON_ONCE(!ctx->is_active))
1090 * If this is a per-task event, need to check whether this 1156 return -EINVAL;
1091 * event's task is the current task on this cpu.
1092 */
1093 if (ctx->task && cpuctx->task_ctx != ctx) {
1094 if (cpuctx->task_ctx || ctx->task != current)
1095 return;
1096 cpuctx->task_ctx = ctx;
1097 }
1098 1157
1099 raw_spin_lock(&ctx->lock); 1158 raw_spin_lock(&ctx->lock);
1100 ctx->is_active = 1;
1101 update_context_time(ctx); 1159 update_context_time(ctx);
1102 1160
1103 if (event->state >= PERF_EVENT_STATE_INACTIVE) 1161 if (event->state >= PERF_EVENT_STATE_INACTIVE)
@@ -1138,6 +1196,8 @@ static void __perf_event_enable(void *info)
1138 1196
1139unlock: 1197unlock:
1140 raw_spin_unlock(&ctx->lock); 1198 raw_spin_unlock(&ctx->lock);
1199
1200 return 0;
1141} 1201}
1142 1202
1143/* 1203/*
@@ -1158,8 +1218,7 @@ void perf_event_enable(struct perf_event *event)
1158 /* 1218 /*
1159 * Enable the event on the cpu that it's on 1219 * Enable the event on the cpu that it's on
1160 */ 1220 */
1161 smp_call_function_single(event->cpu, __perf_event_enable, 1221 cpu_function_call(event->cpu, __perf_event_enable, event);
1162 event, 1);
1163 return; 1222 return;
1164 } 1223 }
1165 1224
@@ -1178,8 +1237,15 @@ void perf_event_enable(struct perf_event *event)
1178 event->state = PERF_EVENT_STATE_OFF; 1237 event->state = PERF_EVENT_STATE_OFF;
1179 1238
1180retry: 1239retry:
1240 if (!ctx->is_active) {
1241 __perf_event_mark_enabled(event, ctx);
1242 goto out;
1243 }
1244
1181 raw_spin_unlock_irq(&ctx->lock); 1245 raw_spin_unlock_irq(&ctx->lock);
1182 task_oncpu_function_call(task, __perf_event_enable, event); 1246
1247 if (!task_function_call(task, __perf_event_enable, event))
1248 return;
1183 1249
1184 raw_spin_lock_irq(&ctx->lock); 1250 raw_spin_lock_irq(&ctx->lock);
1185 1251
@@ -1187,15 +1253,14 @@ retry:
1187 * If the context is active and the event is still off, 1253 * If the context is active and the event is still off,
1188 * we need to retry the cross-call. 1254 * we need to retry the cross-call.
1189 */ 1255 */
1190 if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) 1256 if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) {
1257 /*
1258 * task could have been flipped by a concurrent
1259 * perf_event_context_sched_out()
1260 */
1261 task = ctx->task;
1191 goto retry; 1262 goto retry;
1192 1263 }
1193 /*
1194 * Since we have the lock this context can't be scheduled
1195 * in, so we can change the state safely.
1196 */
1197 if (event->state == PERF_EVENT_STATE_OFF)
1198 __perf_event_mark_enabled(event, ctx);
1199 1264
1200out: 1265out:
1201 raw_spin_unlock_irq(&ctx->lock); 1266 raw_spin_unlock_irq(&ctx->lock);
@@ -1339,8 +1404,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
1339 } 1404 }
1340} 1405}
1341 1406
1342void perf_event_context_sched_out(struct task_struct *task, int ctxn, 1407static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
1343 struct task_struct *next) 1408 struct task_struct *next)
1344{ 1409{
1345 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; 1410 struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
1346 struct perf_event_context *next_ctx; 1411 struct perf_event_context *next_ctx;
@@ -1533,7 +1598,7 @@ static void task_ctx_sched_in(struct perf_event_context *ctx,
1533{ 1598{
1534 struct perf_cpu_context *cpuctx; 1599 struct perf_cpu_context *cpuctx;
1535 1600
1536 cpuctx = __get_cpu_context(ctx); 1601 cpuctx = __get_cpu_context(ctx);
1537 if (cpuctx->task_ctx == ctx) 1602 if (cpuctx->task_ctx == ctx)
1538 return; 1603 return;
1539 1604
@@ -1541,7 +1606,7 @@ static void task_ctx_sched_in(struct perf_event_context *ctx,
1541 cpuctx->task_ctx = ctx; 1606 cpuctx->task_ctx = ctx;
1542} 1607}
1543 1608
1544void perf_event_context_sched_in(struct perf_event_context *ctx) 1609static void perf_event_context_sched_in(struct perf_event_context *ctx)
1545{ 1610{
1546 struct perf_cpu_context *cpuctx; 1611 struct perf_cpu_context *cpuctx;
1547 1612
@@ -1627,7 +1692,7 @@ static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
1627 * Reduce accuracy by one bit such that @a and @b converge 1692 * Reduce accuracy by one bit such that @a and @b converge
1628 * to a similar magnitude. 1693 * to a similar magnitude.
1629 */ 1694 */
1630#define REDUCE_FLS(a, b) \ 1695#define REDUCE_FLS(a, b) \
1631do { \ 1696do { \
1632 if (a##_fls > b##_fls) { \ 1697 if (a##_fls > b##_fls) { \
1633 a >>= 1; \ 1698 a >>= 1; \
@@ -2213,6 +2278,9 @@ errout:
2213 2278
2214} 2279}
2215 2280
2281/*
2282 * Returns a matching context with refcount and pincount.
2283 */
2216static struct perf_event_context * 2284static struct perf_event_context *
2217find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) 2285find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2218{ 2286{
@@ -2237,6 +2305,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
2237 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); 2305 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
2238 ctx = &cpuctx->ctx; 2306 ctx = &cpuctx->ctx;
2239 get_ctx(ctx); 2307 get_ctx(ctx);
2308 ++ctx->pin_count;
2240 2309
2241 return ctx; 2310 return ctx;
2242 } 2311 }
@@ -2250,6 +2319,7 @@ retry:
2250 ctx = perf_lock_task_context(task, ctxn, &flags); 2319 ctx = perf_lock_task_context(task, ctxn, &flags);
2251 if (ctx) { 2320 if (ctx) {
2252 unclone_ctx(ctx); 2321 unclone_ctx(ctx);
2322 ++ctx->pin_count;
2253 raw_spin_unlock_irqrestore(&ctx->lock, flags); 2323 raw_spin_unlock_irqrestore(&ctx->lock, flags);
2254 } 2324 }
2255 2325
@@ -2271,8 +2341,10 @@ retry:
2271 err = -ESRCH; 2341 err = -ESRCH;
2272 else if (task->perf_event_ctxp[ctxn]) 2342 else if (task->perf_event_ctxp[ctxn])
2273 err = -EAGAIN; 2343 err = -EAGAIN;
2274 else 2344 else {
2345 ++ctx->pin_count;
2275 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); 2346 rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx);
2347 }
2276 mutex_unlock(&task->perf_event_mutex); 2348 mutex_unlock(&task->perf_event_mutex);
2277 2349
2278 if (unlikely(err)) { 2350 if (unlikely(err)) {
@@ -5950,10 +6022,10 @@ SYSCALL_DEFINE5(perf_event_open,
5950 struct perf_event_context *gctx = group_leader->ctx; 6022 struct perf_event_context *gctx = group_leader->ctx;
5951 6023
5952 mutex_lock(&gctx->mutex); 6024 mutex_lock(&gctx->mutex);
5953 perf_event_remove_from_context(group_leader); 6025 perf_remove_from_context(group_leader);
5954 list_for_each_entry(sibling, &group_leader->sibling_list, 6026 list_for_each_entry(sibling, &group_leader->sibling_list,
5955 group_entry) { 6027 group_entry) {
5956 perf_event_remove_from_context(sibling); 6028 perf_remove_from_context(sibling);
5957 put_ctx(gctx); 6029 put_ctx(gctx);
5958 } 6030 }
5959 mutex_unlock(&gctx->mutex); 6031 mutex_unlock(&gctx->mutex);
@@ -5976,6 +6048,7 @@ SYSCALL_DEFINE5(perf_event_open,
5976 6048
5977 perf_install_in_context(ctx, event, cpu); 6049 perf_install_in_context(ctx, event, cpu);
5978 ++ctx->generation; 6050 ++ctx->generation;
6051 perf_unpin_context(ctx);
5979 mutex_unlock(&ctx->mutex); 6052 mutex_unlock(&ctx->mutex);
5980 6053
5981 event->owner = current; 6054 event->owner = current;
@@ -6001,6 +6074,7 @@ SYSCALL_DEFINE5(perf_event_open,
6001 return event_fd; 6074 return event_fd;
6002 6075
6003err_context: 6076err_context:
6077 perf_unpin_context(ctx);
6004 put_ctx(ctx); 6078 put_ctx(ctx);
6005err_alloc: 6079err_alloc:
6006 free_event(event); 6080 free_event(event);
@@ -6051,6 +6125,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
6051 mutex_lock(&ctx->mutex); 6125 mutex_lock(&ctx->mutex);
6052 perf_install_in_context(ctx, event, cpu); 6126 perf_install_in_context(ctx, event, cpu);
6053 ++ctx->generation; 6127 ++ctx->generation;
6128 perf_unpin_context(ctx);
6054 mutex_unlock(&ctx->mutex); 6129 mutex_unlock(&ctx->mutex);
6055 6130
6056 return event; 6131 return event;
@@ -6104,7 +6179,7 @@ __perf_event_exit_task(struct perf_event *child_event,
6104{ 6179{
6105 struct perf_event *parent_event; 6180 struct perf_event *parent_event;
6106 6181
6107 perf_event_remove_from_context(child_event); 6182 perf_remove_from_context(child_event);
6108 6183
6109 parent_event = child_event->parent; 6184 parent_event = child_event->parent;
6110 /* 6185 /*
@@ -6411,7 +6486,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent,
6411 return 0; 6486 return 0;
6412 } 6487 }
6413 6488
6414 child_ctx = child->perf_event_ctxp[ctxn]; 6489 child_ctx = child->perf_event_ctxp[ctxn];
6415 if (!child_ctx) { 6490 if (!child_ctx) {
6416 /* 6491 /*
6417 * This is executed from the parent task context, so 6492 * This is executed from the parent task context, so
@@ -6526,6 +6601,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
6526 mutex_unlock(&parent_ctx->mutex); 6601 mutex_unlock(&parent_ctx->mutex);
6527 6602
6528 perf_unpin_context(parent_ctx); 6603 perf_unpin_context(parent_ctx);
6604 put_ctx(parent_ctx);
6529 6605
6530 return ret; 6606 return ret;
6531} 6607}
@@ -6595,9 +6671,9 @@ static void __perf_event_exit_context(void *__info)
6595 perf_pmu_rotate_stop(ctx->pmu); 6671 perf_pmu_rotate_stop(ctx->pmu);
6596 6672
6597 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) 6673 list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry)
6598 __perf_event_remove_from_context(event); 6674 __perf_remove_from_context(event);
6599 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) 6675 list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry)
6600 __perf_event_remove_from_context(event); 6676 __perf_remove_from_context(event);
6601} 6677}
6602 6678
6603static void perf_event_exit_cpu_context(int cpu) 6679static void perf_event_exit_cpu_context(int cpu)