diff options
author | Peter Zijlstra <peterz@infradead.org> | 2016-02-24 12:45:50 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-02-25 02:44:29 -0500 |
commit | a096309bc4677f60caa8e93fcc613a55073c51d4 (patch) | |
tree | eb1a27d19c23f93adc746c74c8763f0ae6d7b901 /kernel | |
parent | bd2afa49d194c6412c333e9fdd48bc5d06bb465d (diff) |
perf: Fix scaling vs. perf_install_in_context()
Completely reworks perf_install_in_context() (again!) in order to
ensure that there will be no ctx time hole between add_event_to_ctx()
and any potential ctx_sched_in().
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dvyukov@google.com
Cc: eranian@google.com
Cc: oleg@redhat.com
Cc: panand@redhat.com
Cc: sasha.levin@oracle.com
Cc: vince@deater.net
Link: http://lkml.kernel.org/r/20160224174948.279399438@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 115 |
1 files changed, 70 insertions, 45 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 57c25faecfa5..25edabd207de 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -276,10 +276,10 @@ static void event_function_call(struct perf_event *event, event_f func, void *da | |||
276 | return; | 276 | return; |
277 | } | 277 | } |
278 | 278 | ||
279 | again: | ||
280 | if (task == TASK_TOMBSTONE) | 279 | if (task == TASK_TOMBSTONE) |
281 | return; | 280 | return; |
282 | 281 | ||
282 | again: | ||
283 | if (!task_function_call(task, event_function, &efs)) | 283 | if (!task_function_call(task, event_function, &efs)) |
284 | return; | 284 | return; |
285 | 285 | ||
@@ -289,13 +289,15 @@ again: | |||
289 | * a concurrent perf_event_context_sched_out(). | 289 | * a concurrent perf_event_context_sched_out(). |
290 | */ | 290 | */ |
291 | task = ctx->task; | 291 | task = ctx->task; |
292 | if (task != TASK_TOMBSTONE) { | 292 | if (task == TASK_TOMBSTONE) { |
293 | if (ctx->is_active) { | 293 | raw_spin_unlock_irq(&ctx->lock); |
294 | raw_spin_unlock_irq(&ctx->lock); | 294 | return; |
295 | goto again; | 295 | } |
296 | } | 296 | if (ctx->is_active) { |
297 | func(event, NULL, ctx, data); | 297 | raw_spin_unlock_irq(&ctx->lock); |
298 | goto again; | ||
298 | } | 299 | } |
300 | func(event, NULL, ctx, data); | ||
299 | raw_spin_unlock_irq(&ctx->lock); | 301 | raw_spin_unlock_irq(&ctx->lock); |
300 | } | 302 | } |
301 | 303 | ||
@@ -2116,49 +2118,68 @@ static void ctx_resched(struct perf_cpu_context *cpuctx, | |||
2116 | /* | 2118 | /* |
2117 | * Cross CPU call to install and enable a performance event | 2119 | * Cross CPU call to install and enable a performance event |
2118 | * | 2120 | * |
2119 | * Must be called with ctx->mutex held | 2121 | * Very similar to remote_function() + event_function() but cannot assume that |
2122 | * things like ctx->is_active and cpuctx->task_ctx are set. | ||
2120 | */ | 2123 | */ |
2121 | static int __perf_install_in_context(void *info) | 2124 | static int __perf_install_in_context(void *info) |
2122 | { | 2125 | { |
2123 | struct perf_event_context *ctx = info; | 2126 | struct perf_event *event = info; |
2127 | struct perf_event_context *ctx = event->ctx; | ||
2124 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); | 2128 | struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |
2125 | struct perf_event_context *task_ctx = cpuctx->task_ctx; | 2129 | struct perf_event_context *task_ctx = cpuctx->task_ctx; |
2130 | bool activate = true; | ||
2131 | int ret = 0; | ||
2126 | 2132 | ||
2127 | raw_spin_lock(&cpuctx->ctx.lock); | 2133 | raw_spin_lock(&cpuctx->ctx.lock); |
2128 | if (ctx->task) { | 2134 | if (ctx->task) { |
2129 | raw_spin_lock(&ctx->lock); | 2135 | raw_spin_lock(&ctx->lock); |
2130 | /* | ||
2131 | * If we hit the 'wrong' task, we've since scheduled and | ||
2132 | * everything should be sorted, nothing to do! | ||
2133 | */ | ||
2134 | task_ctx = ctx; | 2136 | task_ctx = ctx; |
2135 | if (ctx->task != current) | 2137 | |
2138 | /* If we're on the wrong CPU, try again */ | ||
2139 | if (task_cpu(ctx->task) != smp_processor_id()) { | ||
2140 | ret = -ESRCH; | ||
2136 | goto unlock; | 2141 | goto unlock; |
2142 | } | ||
2137 | 2143 | ||
2138 | /* | 2144 | /* |
2139 | * If task_ctx is set, it had better be to us. | 2145 | * If we're on the right CPU, see if the task we target is |
2146 | * current, if not we don't have to activate the ctx, a future | ||
2147 | * context switch will do that for us. | ||
2140 | */ | 2148 | */ |
2141 | WARN_ON_ONCE(cpuctx->task_ctx != ctx && cpuctx->task_ctx); | 2149 | if (ctx->task != current) |
2150 | activate = false; | ||
2151 | else | ||
2152 | WARN_ON_ONCE(cpuctx->task_ctx && cpuctx->task_ctx != ctx); | ||
2153 | |||
2142 | } else if (task_ctx) { | 2154 | } else if (task_ctx) { |
2143 | raw_spin_lock(&task_ctx->lock); | 2155 | raw_spin_lock(&task_ctx->lock); |
2144 | } | 2156 | } |
2145 | 2157 | ||
2146 | ctx_resched(cpuctx, task_ctx); | 2158 | if (activate) { |
2159 | ctx_sched_out(ctx, cpuctx, EVENT_TIME); | ||
2160 | add_event_to_ctx(event, ctx); | ||
2161 | ctx_resched(cpuctx, task_ctx); | ||
2162 | } else { | ||
2163 | add_event_to_ctx(event, ctx); | ||
2164 | } | ||
2165 | |||
2147 | unlock: | 2166 | unlock: |
2148 | perf_ctx_unlock(cpuctx, task_ctx); | 2167 | perf_ctx_unlock(cpuctx, task_ctx); |
2149 | 2168 | ||
2150 | return 0; | 2169 | return ret; |
2151 | } | 2170 | } |
2152 | 2171 | ||
2153 | /* | 2172 | /* |
2154 | * Attach a performance event to a context | 2173 | * Attach a performance event to a context. |
2174 | * | ||
2175 | * Very similar to event_function_call, see comment there. | ||
2155 | */ | 2176 | */ |
2156 | static void | 2177 | static void |
2157 | perf_install_in_context(struct perf_event_context *ctx, | 2178 | perf_install_in_context(struct perf_event_context *ctx, |
2158 | struct perf_event *event, | 2179 | struct perf_event *event, |
2159 | int cpu) | 2180 | int cpu) |
2160 | { | 2181 | { |
2161 | struct task_struct *task = NULL; | 2182 | struct task_struct *task = READ_ONCE(ctx->task); |
2162 | 2183 | ||
2163 | lockdep_assert_held(&ctx->mutex); | 2184 | lockdep_assert_held(&ctx->mutex); |
2164 | 2185 | ||
@@ -2166,42 +2187,46 @@ perf_install_in_context(struct perf_event_context *ctx, | |||
2166 | if (event->cpu != -1) | 2187 | if (event->cpu != -1) |
2167 | event->cpu = cpu; | 2188 | event->cpu = cpu; |
2168 | 2189 | ||
2190 | if (!task) { | ||
2191 | cpu_function_call(cpu, __perf_install_in_context, event); | ||
2192 | return; | ||
2193 | } | ||
2194 | |||
2195 | /* | ||
2196 | * Should not happen, we validate the ctx is still alive before calling. | ||
2197 | */ | ||
2198 | if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) | ||
2199 | return; | ||
2200 | |||
2169 | /* | 2201 | /* |
2170 | * Installing events is tricky because we cannot rely on ctx->is_active | 2202 | * Installing events is tricky because we cannot rely on ctx->is_active |
2171 | * to be set in case this is the nr_events 0 -> 1 transition. | 2203 | * to be set in case this is the nr_events 0 -> 1 transition. |
2172 | * | ||
2173 | * So what we do is we add the event to the list here, which will allow | ||
2174 | * a future context switch to DTRT and then send a racy IPI. If the IPI | ||
2175 | * fails to hit the right task, this means a context switch must have | ||
2176 | * happened and that will have taken care of business. | ||
2177 | */ | 2204 | */ |
2178 | raw_spin_lock_irq(&ctx->lock); | 2205 | again: |
2179 | task = ctx->task; | ||
2180 | |||
2181 | /* | 2206 | /* |
2182 | * If between ctx = find_get_context() and mutex_lock(&ctx->mutex) the | 2207 | * Cannot use task_function_call() because we need to run on the task's |
2183 | * ctx gets destroyed, we must not install an event into it. | 2208 | * CPU regardless of whether its current or not. |
2184 | * | ||
2185 | * This is normally tested for after we acquire the mutex, so this is | ||
2186 | * a sanity check. | ||
2187 | */ | 2209 | */ |
2210 | if (!cpu_function_call(task_cpu(task), __perf_install_in_context, event)) | ||
2211 | return; | ||
2212 | |||
2213 | raw_spin_lock_irq(&ctx->lock); | ||
2214 | task = ctx->task; | ||
2188 | if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) { | 2215 | if (WARN_ON_ONCE(task == TASK_TOMBSTONE)) { |
2216 | /* | ||
2217 | * Cannot happen because we already checked above (which also | ||
2218 | * cannot happen), and we hold ctx->mutex, which serializes us | ||
2219 | * against perf_event_exit_task_context(). | ||
2220 | */ | ||
2189 | raw_spin_unlock_irq(&ctx->lock); | 2221 | raw_spin_unlock_irq(&ctx->lock); |
2190 | return; | 2222 | return; |
2191 | } | 2223 | } |
2192 | |||
2193 | if (ctx->is_active) { | ||
2194 | update_context_time(ctx); | ||
2195 | update_cgrp_time_from_event(event); | ||
2196 | } | ||
2197 | |||
2198 | add_event_to_ctx(event, ctx); | ||
2199 | raw_spin_unlock_irq(&ctx->lock); | 2224 | raw_spin_unlock_irq(&ctx->lock); |
2200 | 2225 | /* | |
2201 | if (task) | 2226 | * Since !ctx->is_active doesn't mean anything, we must IPI |
2202 | task_function_call(task, __perf_install_in_context, ctx); | 2227 | * unconditionally. |
2203 | else | 2228 | */ |
2204 | cpu_function_call(cpu, __perf_install_in_context, ctx); | 2229 | goto again; |
2205 | } | 2230 | } |
2206 | 2231 | ||
2207 | /* | 2232 | /* |