diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2011-02-02 07:19:09 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-02-03 06:14:43 -0500 |
commit | fe4b04fa31a6dcf4358aa84cf81e5a7fd079469b (patch) | |
tree | 20a6db874d0db2a2f2e38e3ff77df4bdaa5f1cfe /kernel/perf_event.c | |
parent | b84defe6036e6dea782d41b80a4590e54f249671 (diff) |
perf: Cure task_oncpu_function_call() races
Oleg reported that on architectures with
__ARCH_WANT_INTERRUPTS_ON_CTXSW the IPI from
task_oncpu_function_call() can land before perf_event_task_sched_in()
and cause interesting situations for eg. perf_install_in_context().
This patch reworks the task_oncpu_function_call() interface to give a
more usable primitive as well as rework all its users to hopefully be
more obvious as well as remove the races.
While looking at the code I also found a number of races against
perf_event_task_sched_out() which can flip contexts between tasks so
plug those too.
Reported-and-reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r-- | kernel/perf_event.c | 260 |
1 files changed, 168 insertions, 92 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 126a302c481c..7d3faa25e136 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c | |||
@@ -38,6 +38,79 @@ | |||
38 | 38 | ||
39 | #include <asm/irq_regs.h> | 39 | #include <asm/irq_regs.h> |
40 | 40 | ||
41 | struct remote_function_call { | ||
42 | struct task_struct *p; | ||
43 | int (*func)(void *info); | ||
44 | void *info; | ||
45 | int ret; | ||
46 | }; | ||
47 | |||
48 | static void remote_function(void *data) | ||
49 | { | ||
50 | struct remote_function_call *tfc = data; | ||
51 | struct task_struct *p = tfc->p; | ||
52 | |||
53 | if (p) { | ||
54 | tfc->ret = -EAGAIN; | ||
55 | if (task_cpu(p) != smp_processor_id() || !task_curr(p)) | ||
56 | return; | ||
57 | } | ||
58 | |||
59 | tfc->ret = tfc->func(tfc->info); | ||
60 | } | ||
61 | |||
62 | /** | ||
63 | * task_function_call - call a function on the cpu on which a task runs | ||
64 | * @p: the task to evaluate | ||
65 | * @func: the function to be called | ||
66 | * @info: the function call argument | ||
67 | * | ||
68 | * Calls the function @func when the task is currently running. This might | ||
69 | * be on the current CPU, which just calls the function directly | ||
70 | * | ||
71 | * returns: @func return value, or | ||
72 | * -ESRCH - when the process isn't running | ||
73 | * -EAGAIN - when the process moved away | ||
74 | */ | ||
75 | static int | ||
76 | task_function_call(struct task_struct *p, int (*func) (void *info), void *info) | ||
77 | { | ||
78 | struct remote_function_call data = { | ||
79 | .p = p, | ||
80 | .func = func, | ||
81 | .info = info, | ||
82 | .ret = -ESRCH, /* No such (running) process */ | ||
83 | }; | ||
84 | |||
85 | if (task_curr(p)) | ||
86 | smp_call_function_single(task_cpu(p), remote_function, &data, 1); | ||
87 | |||
88 | return data.ret; | ||
89 | } | ||
90 | |||
91 | /** | ||
92 | * cpu_function_call - call a function on the cpu | ||
93 | * @func: the function to be called | ||
94 | * @info: the function call argument | ||
95 | * | ||
96 | * Calls the function @func on the remote cpu. | ||
97 | * | ||
98 | * returns: @func return value or -ENXIO when the cpu is offline | ||
99 | */ | ||
100 | static int cpu_function_call(int cpu, int (*func) (void *info), void *info) | ||
101 | { | ||
102 | struct remote_function_call data = { | ||
103 | .p = NULL, | ||
104 | .func = func, | ||
105 | .info = info, | ||
106 | .ret = -ENXIO, /* No such CPU */ | ||
107 | }; | ||
108 | |||
109 | smp_call_function_single(cpu, remote_function, &data, 1); | ||
110 | |||
111 | return data.ret; | ||
112 | } | ||
113 | |||
41 | enum event_type_t { | 114 | enum event_type_t { |
42 | EVENT_FLEXIBLE = 0x1, | 115 | EVENT_FLEXIBLE = 0x1, |
43 | EVENT_PINNED = 0x2, | 116 | EVENT_PINNED = 0x2, |
@@ -254,7 +327,6 @@ static void perf_unpin_context(struct perf_event_context *ctx) | |||
254 | raw_spin_lock_irqsave(&ctx->lock, flags); | 327 | raw_spin_lock_irqsave(&ctx->lock, flags); |
255 | --ctx->pin_count; | 328 | --ctx->pin_count; |
256 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 329 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
257 | put_ctx(ctx); | ||
258 | } | 330 | } |
259 | 331 | ||
260 | /* | 332 | /* |
@@ -618,35 +690,24 @@ __get_cpu_context(struct perf_event_context *ctx) | |||
618 | * We disable the event on the hardware level first. After that we | 690 | * We disable the event on the hardware level first. After that we |
619 | * remove it from the context list. | 691 | * remove it from the context list. |
620 | */ | 692 | */ |
621 | static void __perf_event_remove_from_context(void *info) | 693 | static int __perf_remove_from_context(void *info) |
622 | { | 694 | { |
623 | struct perf_event *event = info; | 695 | struct perf_event *event = info; |
624 | struct perf_event_context *ctx = event->ctx; | 696 | struct perf_event_context *ctx = event->ctx; |
625 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 697 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
626 | 698 | ||
627 | /* | ||
628 | * If this is a task context, we need to check whether it is | ||
629 | * the current task context of this cpu. If not it has been | ||
630 | * scheduled out before the smp call arrived. | ||
631 | */ | ||
632 | if (ctx->task && cpuctx->task_ctx != ctx) | ||
633 | return; | ||
634 | |||
635 | raw_spin_lock(&ctx->lock); | 699 | raw_spin_lock(&ctx->lock); |
636 | |||
637 | event_sched_out(event, cpuctx, ctx); | 700 | event_sched_out(event, cpuctx, ctx); |
638 | |||
639 | list_del_event(event, ctx); | 701 | list_del_event(event, ctx); |
640 | |||
641 | raw_spin_unlock(&ctx->lock); | 702 | raw_spin_unlock(&ctx->lock); |
703 | |||
704 | return 0; | ||
642 | } | 705 | } |
643 | 706 | ||
644 | 707 | ||
645 | /* | 708 | /* |
646 | * Remove the event from a task's (or a CPU's) list of events. | 709 | * Remove the event from a task's (or a CPU's) list of events. |
647 | * | 710 | * |
648 | * Must be called with ctx->mutex held. | ||
649 | * | ||
650 | * CPU events are removed with a smp call. For task events we only | 711 | * CPU events are removed with a smp call. For task events we only |
651 | * call when the task is on a CPU. | 712 | * call when the task is on a CPU. |
652 | * | 713 | * |
@@ -657,49 +718,48 @@ static void __perf_event_remove_from_context(void *info) | |||
657 | * When called from perf_event_exit_task, it's OK because the | 718 | * When called from perf_event_exit_task, it's OK because the |
658 | * context has been detached from its task. | 719 | * context has been detached from its task. |
659 | */ | 720 | */ |
660 | static void perf_event_remove_from_context(struct perf_event *event) | 721 | static void perf_remove_from_context(struct perf_event *event) |
661 | { | 722 | { |
662 | struct perf_event_context *ctx = event->ctx; | 723 | struct perf_event_context *ctx = event->ctx; |
663 | struct task_struct *task = ctx->task; | 724 | struct task_struct *task = ctx->task; |
664 | 725 | ||
726 | lockdep_assert_held(&ctx->mutex); | ||
727 | |||
665 | if (!task) { | 728 | if (!task) { |
666 | /* | 729 | /* |
667 | * Per cpu events are removed via an smp call and | 730 | * Per cpu events are removed via an smp call and |
668 | * the removal is always successful. | 731 | * the removal is always successful. |
669 | */ | 732 | */ |
670 | smp_call_function_single(event->cpu, | 733 | cpu_function_call(event->cpu, __perf_remove_from_context, event); |
671 | __perf_event_remove_from_context, | ||
672 | event, 1); | ||
673 | return; | 734 | return; |
674 | } | 735 | } |
675 | 736 | ||
676 | retry: | 737 | retry: |
677 | task_oncpu_function_call(task, __perf_event_remove_from_context, | 738 | if (!task_function_call(task, __perf_remove_from_context, event)) |
678 | event); | 739 | return; |
679 | 740 | ||
680 | raw_spin_lock_irq(&ctx->lock); | 741 | raw_spin_lock_irq(&ctx->lock); |
681 | /* | 742 | /* |
682 | * If the context is active we need to retry the smp call. | 743 | * If we failed to find a running task, but find the context active now |
744 | * that we've acquired the ctx->lock, retry. | ||
683 | */ | 745 | */ |
684 | if (ctx->nr_active && !list_empty(&event->group_entry)) { | 746 | if (ctx->is_active) { |
685 | raw_spin_unlock_irq(&ctx->lock); | 747 | raw_spin_unlock_irq(&ctx->lock); |
686 | goto retry; | 748 | goto retry; |
687 | } | 749 | } |
688 | 750 | ||
689 | /* | 751 | /* |
690 | * The lock prevents that this context is scheduled in so we | 752 | * Since the task isn't running, its safe to remove the event, us |
691 | * can remove the event safely, if the call above did not | 753 | * holding the ctx->lock ensures the task won't get scheduled in. |
692 | * succeed. | ||
693 | */ | 754 | */ |
694 | if (!list_empty(&event->group_entry)) | 755 | list_del_event(event, ctx); |
695 | list_del_event(event, ctx); | ||
696 | raw_spin_unlock_irq(&ctx->lock); | 756 | raw_spin_unlock_irq(&ctx->lock); |
697 | } | 757 | } |
698 | 758 | ||
699 | /* | 759 | /* |
700 | * Cross CPU call to disable a performance event | 760 | * Cross CPU call to disable a performance event |
701 | */ | 761 | */ |
702 | static void __perf_event_disable(void *info) | 762 | static int __perf_event_disable(void *info) |
703 | { | 763 | { |
704 | struct perf_event *event = info; | 764 | struct perf_event *event = info; |
705 | struct perf_event_context *ctx = event->ctx; | 765 | struct perf_event_context *ctx = event->ctx; |
@@ -708,9 +768,12 @@ static void __perf_event_disable(void *info) | |||
708 | /* | 768 | /* |
709 | * If this is a per-task event, need to check whether this | 769 | * If this is a per-task event, need to check whether this |
710 | * event's task is the current task on this cpu. | 770 | * event's task is the current task on this cpu. |
771 | * | ||
772 | * Can trigger due to concurrent perf_event_context_sched_out() | ||
773 | * flipping contexts around. | ||
711 | */ | 774 | */ |
712 | if (ctx->task && cpuctx->task_ctx != ctx) | 775 | if (ctx->task && cpuctx->task_ctx != ctx) |
713 | return; | 776 | return -EINVAL; |
714 | 777 | ||
715 | raw_spin_lock(&ctx->lock); | 778 | raw_spin_lock(&ctx->lock); |
716 | 779 | ||
@@ -729,6 +792,8 @@ static void __perf_event_disable(void *info) | |||
729 | } | 792 | } |
730 | 793 | ||
731 | raw_spin_unlock(&ctx->lock); | 794 | raw_spin_unlock(&ctx->lock); |
795 | |||
796 | return 0; | ||
732 | } | 797 | } |
733 | 798 | ||
734 | /* | 799 | /* |
@@ -753,13 +818,13 @@ void perf_event_disable(struct perf_event *event) | |||
753 | /* | 818 | /* |
754 | * Disable the event on the cpu that it's on | 819 | * Disable the event on the cpu that it's on |
755 | */ | 820 | */ |
756 | smp_call_function_single(event->cpu, __perf_event_disable, | 821 | cpu_function_call(event->cpu, __perf_event_disable, event); |
757 | event, 1); | ||
758 | return; | 822 | return; |
759 | } | 823 | } |
760 | 824 | ||
761 | retry: | 825 | retry: |
762 | task_oncpu_function_call(task, __perf_event_disable, event); | 826 | if (!task_function_call(task, __perf_event_disable, event)) |
827 | return; | ||
763 | 828 | ||
764 | raw_spin_lock_irq(&ctx->lock); | 829 | raw_spin_lock_irq(&ctx->lock); |
765 | /* | 830 | /* |
@@ -767,6 +832,11 @@ retry: | |||
767 | */ | 832 | */ |
768 | if (event->state == PERF_EVENT_STATE_ACTIVE) { | 833 | if (event->state == PERF_EVENT_STATE_ACTIVE) { |
769 | raw_spin_unlock_irq(&ctx->lock); | 834 | raw_spin_unlock_irq(&ctx->lock); |
835 | /* | ||
836 | * Reload the task pointer, it might have been changed by | ||
837 | * a concurrent perf_event_context_sched_out(). | ||
838 | */ | ||
839 | task = ctx->task; | ||
770 | goto retry; | 840 | goto retry; |
771 | } | 841 | } |
772 | 842 | ||
@@ -778,7 +848,6 @@ retry: | |||
778 | update_group_times(event); | 848 | update_group_times(event); |
779 | event->state = PERF_EVENT_STATE_OFF; | 849 | event->state = PERF_EVENT_STATE_OFF; |
780 | } | 850 | } |
781 | |||
782 | raw_spin_unlock_irq(&ctx->lock); | 851 | raw_spin_unlock_irq(&ctx->lock); |
783 | } | 852 | } |
784 | 853 | ||
@@ -928,12 +997,14 @@ static void add_event_to_ctx(struct perf_event *event, | |||
928 | event->tstamp_stopped = tstamp; | 997 | event->tstamp_stopped = tstamp; |
929 | } | 998 | } |
930 | 999 | ||
1000 | static void perf_event_context_sched_in(struct perf_event_context *ctx); | ||
1001 | |||
931 | /* | 1002 | /* |
932 | * Cross CPU call to install and enable a performance event | 1003 | * Cross CPU call to install and enable a performance event |
933 | * | 1004 | * |
934 | * Must be called with ctx->mutex held | 1005 | * Must be called with ctx->mutex held |
935 | */ | 1006 | */ |
936 | static void __perf_install_in_context(void *info) | 1007 | static int __perf_install_in_context(void *info) |
937 | { | 1008 | { |
938 | struct perf_event *event = info; | 1009 | struct perf_event *event = info; |
939 | struct perf_event_context *ctx = event->ctx; | 1010 | struct perf_event_context *ctx = event->ctx; |
@@ -942,17 +1013,12 @@ static void __perf_install_in_context(void *info) | |||
942 | int err; | 1013 | int err; |
943 | 1014 | ||
944 | /* | 1015 | /* |
945 | * If this is a task context, we need to check whether it is | 1016 | * In case we're installing a new context to an already running task, |
946 | * the current task context of this cpu. If not it has been | 1017 | * could also happen before perf_event_task_sched_in() on architectures |
947 | * scheduled out before the smp call arrived. | 1018 | * which do context switches with IRQs enabled. |
948 | * Or possibly this is the right context but it isn't | ||
949 | * on this cpu because it had no events. | ||
950 | */ | 1019 | */ |
951 | if (ctx->task && cpuctx->task_ctx != ctx) { | 1020 | if (ctx->task && !cpuctx->task_ctx) |
952 | if (cpuctx->task_ctx || ctx->task != current) | 1021 | perf_event_context_sched_in(ctx); |
953 | return; | ||
954 | cpuctx->task_ctx = ctx; | ||
955 | } | ||
956 | 1022 | ||
957 | raw_spin_lock(&ctx->lock); | 1023 | raw_spin_lock(&ctx->lock); |
958 | ctx->is_active = 1; | 1024 | ctx->is_active = 1; |
@@ -997,6 +1063,8 @@ static void __perf_install_in_context(void *info) | |||
997 | 1063 | ||
998 | unlock: | 1064 | unlock: |
999 | raw_spin_unlock(&ctx->lock); | 1065 | raw_spin_unlock(&ctx->lock); |
1066 | |||
1067 | return 0; | ||
1000 | } | 1068 | } |
1001 | 1069 | ||
1002 | /* | 1070 | /* |
@@ -1008,8 +1076,6 @@ unlock: | |||
1008 | * If the event is attached to a task which is on a CPU we use a smp | 1076 | * If the event is attached to a task which is on a CPU we use a smp |
1009 | * call to enable it in the task context. The task might have been | 1077 | * call to enable it in the task context. The task might have been |
1010 | * scheduled away, but we check this in the smp call again. | 1078 | * scheduled away, but we check this in the smp call again. |
1011 | * | ||
1012 | * Must be called with ctx->mutex held. | ||
1013 | */ | 1079 | */ |
1014 | static void | 1080 | static void |
1015 | perf_install_in_context(struct perf_event_context *ctx, | 1081 | perf_install_in_context(struct perf_event_context *ctx, |
@@ -1018,6 +1084,8 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
1018 | { | 1084 | { |
1019 | struct task_struct *task = ctx->task; | 1085 | struct task_struct *task = ctx->task; |
1020 | 1086 | ||
1087 | lockdep_assert_held(&ctx->mutex); | ||
1088 | |||
1021 | event->ctx = ctx; | 1089 | event->ctx = ctx; |
1022 | 1090 | ||
1023 | if (!task) { | 1091 | if (!task) { |
@@ -1025,31 +1093,29 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
1025 | * Per cpu events are installed via an smp call and | 1093 | * Per cpu events are installed via an smp call and |
1026 | * the install is always successful. | 1094 | * the install is always successful. |
1027 | */ | 1095 | */ |
1028 | smp_call_function_single(cpu, __perf_install_in_context, | 1096 | cpu_function_call(cpu, __perf_install_in_context, event); |
1029 | event, 1); | ||
1030 | return; | 1097 | return; |
1031 | } | 1098 | } |
1032 | 1099 | ||
1033 | retry: | 1100 | retry: |
1034 | task_oncpu_function_call(task, __perf_install_in_context, | 1101 | if (!task_function_call(task, __perf_install_in_context, event)) |
1035 | event); | 1102 | return; |
1036 | 1103 | ||
1037 | raw_spin_lock_irq(&ctx->lock); | 1104 | raw_spin_lock_irq(&ctx->lock); |
1038 | /* | 1105 | /* |
1039 | * we need to retry the smp call. | 1106 | * If we failed to find a running task, but find the context active now |
1107 | * that we've acquired the ctx->lock, retry. | ||
1040 | */ | 1108 | */ |
1041 | if (ctx->is_active && list_empty(&event->group_entry)) { | 1109 | if (ctx->is_active) { |
1042 | raw_spin_unlock_irq(&ctx->lock); | 1110 | raw_spin_unlock_irq(&ctx->lock); |
1043 | goto retry; | 1111 | goto retry; |
1044 | } | 1112 | } |
1045 | 1113 | ||
1046 | /* | 1114 | /* |
1047 | * The lock prevents that this context is scheduled in so we | 1115 | * Since the task isn't running, its safe to add the event, us holding |
1048 | * can add the event safely, if it the call above did not | 1116 | * the ctx->lock ensures the task won't get scheduled in. |
1049 | * succeed. | ||
1050 | */ | 1117 | */ |
1051 | if (list_empty(&event->group_entry)) | 1118 | add_event_to_ctx(event, ctx); |
1052 | add_event_to_ctx(event, ctx); | ||
1053 | raw_spin_unlock_irq(&ctx->lock); | 1119 | raw_spin_unlock_irq(&ctx->lock); |
1054 | } | 1120 | } |
1055 | 1121 | ||
@@ -1078,7 +1144,7 @@ static void __perf_event_mark_enabled(struct perf_event *event, | |||
1078 | /* | 1144 | /* |
1079 | * Cross CPU call to enable a performance event | 1145 | * Cross CPU call to enable a performance event |
1080 | */ | 1146 | */ |
1081 | static void __perf_event_enable(void *info) | 1147 | static int __perf_event_enable(void *info) |
1082 | { | 1148 | { |
1083 | struct perf_event *event = info; | 1149 | struct perf_event *event = info; |
1084 | struct perf_event_context *ctx = event->ctx; | 1150 | struct perf_event_context *ctx = event->ctx; |
@@ -1086,18 +1152,10 @@ static void __perf_event_enable(void *info) | |||
1086 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 1152 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
1087 | int err; | 1153 | int err; |
1088 | 1154 | ||
1089 | /* | 1155 | if (WARN_ON_ONCE(!ctx->is_active)) |
1090 | * If this is a per-task event, need to check whether this | 1156 | return -EINVAL; |
1091 | * event's task is the current task on this cpu. | ||
1092 | */ | ||
1093 | if (ctx->task && cpuctx->task_ctx != ctx) { | ||
1094 | if (cpuctx->task_ctx || ctx->task != current) | ||
1095 | return; | ||
1096 | cpuctx->task_ctx = ctx; | ||
1097 | } | ||
1098 | 1157 | ||
1099 | raw_spin_lock(&ctx->lock); | 1158 | raw_spin_lock(&ctx->lock); |
1100 | ctx->is_active = 1; | ||
1101 | update_context_time(ctx); | 1159 | update_context_time(ctx); |
1102 | 1160 | ||
1103 | if (event->state >= PERF_EVENT_STATE_INACTIVE) | 1161 | if (event->state >= PERF_EVENT_STATE_INACTIVE) |
@@ -1138,6 +1196,8 @@ static void __perf_event_enable(void *info) | |||
1138 | 1196 | ||
1139 | unlock: | 1197 | unlock: |
1140 | raw_spin_unlock(&ctx->lock); | 1198 | raw_spin_unlock(&ctx->lock); |
1199 | |||
1200 | return 0; | ||
1141 | } | 1201 | } |
1142 | 1202 | ||
1143 | /* | 1203 | /* |
@@ -1158,8 +1218,7 @@ void perf_event_enable(struct perf_event *event) | |||
1158 | /* | 1218 | /* |
1159 | * Enable the event on the cpu that it's on | 1219 | * Enable the event on the cpu that it's on |
1160 | */ | 1220 | */ |
1161 | smp_call_function_single(event->cpu, __perf_event_enable, | 1221 | cpu_function_call(event->cpu, __perf_event_enable, event); |
1162 | event, 1); | ||
1163 | return; | 1222 | return; |
1164 | } | 1223 | } |
1165 | 1224 | ||
@@ -1178,8 +1237,15 @@ void perf_event_enable(struct perf_event *event) | |||
1178 | event->state = PERF_EVENT_STATE_OFF; | 1237 | event->state = PERF_EVENT_STATE_OFF; |
1179 | 1238 | ||
1180 | retry: | 1239 | retry: |
1240 | if (!ctx->is_active) { | ||
1241 | __perf_event_mark_enabled(event, ctx); | ||
1242 | goto out; | ||
1243 | } | ||
1244 | |||
1181 | raw_spin_unlock_irq(&ctx->lock); | 1245 | raw_spin_unlock_irq(&ctx->lock); |
1182 | task_oncpu_function_call(task, __perf_event_enable, event); | 1246 | |
1247 | if (!task_function_call(task, __perf_event_enable, event)) | ||
1248 | return; | ||
1183 | 1249 | ||
1184 | raw_spin_lock_irq(&ctx->lock); | 1250 | raw_spin_lock_irq(&ctx->lock); |
1185 | 1251 | ||
@@ -1187,15 +1253,14 @@ retry: | |||
1187 | * If the context is active and the event is still off, | 1253 | * If the context is active and the event is still off, |
1188 | * we need to retry the cross-call. | 1254 | * we need to retry the cross-call. |
1189 | */ | 1255 | */ |
1190 | if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) | 1256 | if (ctx->is_active && event->state == PERF_EVENT_STATE_OFF) { |
1257 | /* | ||
1258 | * task could have been flipped by a concurrent | ||
1259 | * perf_event_context_sched_out() | ||
1260 | */ | ||
1261 | task = ctx->task; | ||
1191 | goto retry; | 1262 | goto retry; |
1192 | 1263 | } | |
1193 | /* | ||
1194 | * Since we have the lock this context can't be scheduled | ||
1195 | * in, so we can change the state safely. | ||
1196 | */ | ||
1197 | if (event->state == PERF_EVENT_STATE_OFF) | ||
1198 | __perf_event_mark_enabled(event, ctx); | ||
1199 | 1264 | ||
1200 | out: | 1265 | out: |
1201 | raw_spin_unlock_irq(&ctx->lock); | 1266 | raw_spin_unlock_irq(&ctx->lock); |
@@ -1339,8 +1404,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, | |||
1339 | } | 1404 | } |
1340 | } | 1405 | } |
1341 | 1406 | ||
1342 | void perf_event_context_sched_out(struct task_struct *task, int ctxn, | 1407 | static void perf_event_context_sched_out(struct task_struct *task, int ctxn, |
1343 | struct task_struct *next) | 1408 | struct task_struct *next) |
1344 | { | 1409 | { |
1345 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; | 1410 | struct perf_event_context *ctx = task->perf_event_ctxp[ctxn]; |
1346 | struct perf_event_context *next_ctx; | 1411 | struct perf_event_context *next_ctx; |
@@ -1533,7 +1598,7 @@ static void task_ctx_sched_in(struct perf_event_context *ctx, | |||
1533 | { | 1598 | { |
1534 | struct perf_cpu_context *cpuctx; | 1599 | struct perf_cpu_context *cpuctx; |
1535 | 1600 | ||
1536 | cpuctx = __get_cpu_context(ctx); | 1601 | cpuctx = __get_cpu_context(ctx); |
1537 | if (cpuctx->task_ctx == ctx) | 1602 | if (cpuctx->task_ctx == ctx) |
1538 | return; | 1603 | return; |
1539 | 1604 | ||
@@ -1541,7 +1606,7 @@ static void task_ctx_sched_in(struct perf_event_context *ctx, | |||
1541 | cpuctx->task_ctx = ctx; | 1606 | cpuctx->task_ctx = ctx; |
1542 | } | 1607 | } |
1543 | 1608 | ||
1544 | void perf_event_context_sched_in(struct perf_event_context *ctx) | 1609 | static void perf_event_context_sched_in(struct perf_event_context *ctx) |
1545 | { | 1610 | { |
1546 | struct perf_cpu_context *cpuctx; | 1611 | struct perf_cpu_context *cpuctx; |
1547 | 1612 | ||
@@ -1627,7 +1692,7 @@ static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) | |||
1627 | * Reduce accuracy by one bit such that @a and @b converge | 1692 | * Reduce accuracy by one bit such that @a and @b converge |
1628 | * to a similar magnitude. | 1693 | * to a similar magnitude. |
1629 | */ | 1694 | */ |
1630 | #define REDUCE_FLS(a, b) \ | 1695 | #define REDUCE_FLS(a, b) \ |
1631 | do { \ | 1696 | do { \ |
1632 | if (a##_fls > b##_fls) { \ | 1697 | if (a##_fls > b##_fls) { \ |
1633 | a >>= 1; \ | 1698 | a >>= 1; \ |
@@ -2213,6 +2278,9 @@ errout: | |||
2213 | 2278 | ||
2214 | } | 2279 | } |
2215 | 2280 | ||
2281 | /* | ||
2282 | * Returns a matching context with refcount and pincount. | ||
2283 | */ | ||
2216 | static struct perf_event_context * | 2284 | static struct perf_event_context * |
2217 | find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | 2285 | find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) |
2218 | { | 2286 | { |
@@ -2237,6 +2305,7 @@ find_get_context(struct pmu *pmu, struct task_struct *task, int cpu) | |||
2237 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); | 2305 | cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); |
2238 | ctx = &cpuctx->ctx; | 2306 | ctx = &cpuctx->ctx; |
2239 | get_ctx(ctx); | 2307 | get_ctx(ctx); |
2308 | ++ctx->pin_count; | ||
2240 | 2309 | ||
2241 | return ctx; | 2310 | return ctx; |
2242 | } | 2311 | } |
@@ -2250,6 +2319,7 @@ retry: | |||
2250 | ctx = perf_lock_task_context(task, ctxn, &flags); | 2319 | ctx = perf_lock_task_context(task, ctxn, &flags); |
2251 | if (ctx) { | 2320 | if (ctx) { |
2252 | unclone_ctx(ctx); | 2321 | unclone_ctx(ctx); |
2322 | ++ctx->pin_count; | ||
2253 | raw_spin_unlock_irqrestore(&ctx->lock, flags); | 2323 | raw_spin_unlock_irqrestore(&ctx->lock, flags); |
2254 | } | 2324 | } |
2255 | 2325 | ||
@@ -2271,8 +2341,10 @@ retry: | |||
2271 | err = -ESRCH; | 2341 | err = -ESRCH; |
2272 | else if (task->perf_event_ctxp[ctxn]) | 2342 | else if (task->perf_event_ctxp[ctxn]) |
2273 | err = -EAGAIN; | 2343 | err = -EAGAIN; |
2274 | else | 2344 | else { |
2345 | ++ctx->pin_count; | ||
2275 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); | 2346 | rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); |
2347 | } | ||
2276 | mutex_unlock(&task->perf_event_mutex); | 2348 | mutex_unlock(&task->perf_event_mutex); |
2277 | 2349 | ||
2278 | if (unlikely(err)) { | 2350 | if (unlikely(err)) { |
@@ -5950,10 +6022,10 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5950 | struct perf_event_context *gctx = group_leader->ctx; | 6022 | struct perf_event_context *gctx = group_leader->ctx; |
5951 | 6023 | ||
5952 | mutex_lock(&gctx->mutex); | 6024 | mutex_lock(&gctx->mutex); |
5953 | perf_event_remove_from_context(group_leader); | 6025 | perf_remove_from_context(group_leader); |
5954 | list_for_each_entry(sibling, &group_leader->sibling_list, | 6026 | list_for_each_entry(sibling, &group_leader->sibling_list, |
5955 | group_entry) { | 6027 | group_entry) { |
5956 | perf_event_remove_from_context(sibling); | 6028 | perf_remove_from_context(sibling); |
5957 | put_ctx(gctx); | 6029 | put_ctx(gctx); |
5958 | } | 6030 | } |
5959 | mutex_unlock(&gctx->mutex); | 6031 | mutex_unlock(&gctx->mutex); |
@@ -5976,6 +6048,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
5976 | 6048 | ||
5977 | perf_install_in_context(ctx, event, cpu); | 6049 | perf_install_in_context(ctx, event, cpu); |
5978 | ++ctx->generation; | 6050 | ++ctx->generation; |
6051 | perf_unpin_context(ctx); | ||
5979 | mutex_unlock(&ctx->mutex); | 6052 | mutex_unlock(&ctx->mutex); |
5980 | 6053 | ||
5981 | event->owner = current; | 6054 | event->owner = current; |
@@ -6001,6 +6074,7 @@ SYSCALL_DEFINE5(perf_event_open, | |||
6001 | return event_fd; | 6074 | return event_fd; |
6002 | 6075 | ||
6003 | err_context: | 6076 | err_context: |
6077 | perf_unpin_context(ctx); | ||
6004 | put_ctx(ctx); | 6078 | put_ctx(ctx); |
6005 | err_alloc: | 6079 | err_alloc: |
6006 | free_event(event); | 6080 | free_event(event); |
@@ -6051,6 +6125,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, | |||
6051 | mutex_lock(&ctx->mutex); | 6125 | mutex_lock(&ctx->mutex); |
6052 | perf_install_in_context(ctx, event, cpu); | 6126 | perf_install_in_context(ctx, event, cpu); |
6053 | ++ctx->generation; | 6127 | ++ctx->generation; |
6128 | perf_unpin_context(ctx); | ||
6054 | mutex_unlock(&ctx->mutex); | 6129 | mutex_unlock(&ctx->mutex); |
6055 | 6130 | ||
6056 | return event; | 6131 | return event; |
@@ -6104,7 +6179,7 @@ __perf_event_exit_task(struct perf_event *child_event, | |||
6104 | { | 6179 | { |
6105 | struct perf_event *parent_event; | 6180 | struct perf_event *parent_event; |
6106 | 6181 | ||
6107 | perf_event_remove_from_context(child_event); | 6182 | perf_remove_from_context(child_event); |
6108 | 6183 | ||
6109 | parent_event = child_event->parent; | 6184 | parent_event = child_event->parent; |
6110 | /* | 6185 | /* |
@@ -6411,7 +6486,7 @@ inherit_task_group(struct perf_event *event, struct task_struct *parent, | |||
6411 | return 0; | 6486 | return 0; |
6412 | } | 6487 | } |
6413 | 6488 | ||
6414 | child_ctx = child->perf_event_ctxp[ctxn]; | 6489 | child_ctx = child->perf_event_ctxp[ctxn]; |
6415 | if (!child_ctx) { | 6490 | if (!child_ctx) { |
6416 | /* | 6491 | /* |
6417 | * This is executed from the parent task context, so | 6492 | * This is executed from the parent task context, so |
@@ -6526,6 +6601,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn) | |||
6526 | mutex_unlock(&parent_ctx->mutex); | 6601 | mutex_unlock(&parent_ctx->mutex); |
6527 | 6602 | ||
6528 | perf_unpin_context(parent_ctx); | 6603 | perf_unpin_context(parent_ctx); |
6604 | put_ctx(parent_ctx); | ||
6529 | 6605 | ||
6530 | return ret; | 6606 | return ret; |
6531 | } | 6607 | } |
@@ -6595,9 +6671,9 @@ static void __perf_event_exit_context(void *__info) | |||
6595 | perf_pmu_rotate_stop(ctx->pmu); | 6671 | perf_pmu_rotate_stop(ctx->pmu); |
6596 | 6672 | ||
6597 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) | 6673 | list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) |
6598 | __perf_event_remove_from_context(event); | 6674 | __perf_remove_from_context(event); |
6599 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) | 6675 | list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) |
6600 | __perf_event_remove_from_context(event); | 6676 | __perf_remove_from_context(event); |
6601 | } | 6677 | } |
6602 | 6678 | ||
6603 | static void perf_event_exit_cpu_context(int cpu) | 6679 | static void perf_event_exit_cpu_context(int cpu) |