aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2016-07-07 13:37:52 -0400
committerIngo Molnar <mingo@kernel.org>2016-08-10 07:13:27 -0400
commit3e2c1a67d616dbc1034bc39448cd5f4aa3bd3cca (patch)
treed7724d36646c8d2614a49a59518254abdaf0fa71
parenta5dcff628a678b9f4535155662f81c5cda066bc7 (diff)
perf/x86/intel: Clean up LBR state tracking
The lbr_context logic confused me; it appears to me to try and do the same thing the pmu::sched_task() callback does now, but limited to per-task events. So rip it out. Afaict this should also improve performance, because I think the current code can end up doing lbr_reset() twice, once from the pmu::add() and then again from pmu::sched_task(), and MSR writes (all 3*16 of them) are expensive!! While thinking through the cases that need the reset it occured to me the first install of an event in an active context needs to reset the LBR (who knows what crap is in there), but detecting this case is somewhat hard. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/events/intel/lbr.c57
-rw-r--r--arch/x86/events/perf_event.h1
2 files changed, 29 insertions, 29 deletions
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 439b09d33856..fc6cf21c535e 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -380,7 +380,6 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
380 380
381void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in) 381void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
382{ 382{
383 struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
384 struct x86_perf_task_context *task_ctx; 383 struct x86_perf_task_context *task_ctx;
385 384
386 /* 385 /*
@@ -390,31 +389,21 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
390 */ 389 */
391 task_ctx = ctx ? ctx->task_ctx_data : NULL; 390 task_ctx = ctx ? ctx->task_ctx_data : NULL;
392 if (task_ctx) { 391 if (task_ctx) {
393 if (sched_in) { 392 if (sched_in)
394 __intel_pmu_lbr_restore(task_ctx); 393 __intel_pmu_lbr_restore(task_ctx);
395 cpuc->lbr_context = ctx; 394 else
396 } else {
397 __intel_pmu_lbr_save(task_ctx); 395 __intel_pmu_lbr_save(task_ctx);
398 }
399 return; 396 return;
400 } 397 }
401 398
402 /* 399 /*
403 * When sampling the branck stack in system-wide, it may be 400 * Since a context switch can flip the address space and LBR entries
404 * necessary to flush the stack on context switch. This happens 401 * are not tagged with an identifier, we need to wipe the LBR, even for
405 * when the branch stack does not tag its entries with the pid 402 * per-cpu events. You simply cannot resolve the branches from the old
406 * of the current task. Otherwise it becomes impossible to 403 * address space.
407 * associate a branch entry with a task. This ambiguity is more 404 */
408 * likely to appear when the branch stack supports priv level 405 if (sched_in)
409 * filtering and the user sets it to monitor only at the user
410 * level (which could be a useful measurement in system-wide
411 * mode). In that case, the risk is high of having a branch
412 * stack with branch from multiple tasks.
413 */
414 if (sched_in) {
415 intel_pmu_lbr_reset(); 406 intel_pmu_lbr_reset();
416 cpuc->lbr_context = ctx;
417 }
418} 407}
419 408
420static inline bool branch_user_callstack(unsigned br_sel) 409static inline bool branch_user_callstack(unsigned br_sel)
@@ -430,14 +419,6 @@ void intel_pmu_lbr_add(struct perf_event *event)
430 if (!x86_pmu.lbr_nr) 419 if (!x86_pmu.lbr_nr)
431 return; 420 return;
432 421
433 /*
434 * Reset the LBR stack if we changed task context to
435 * avoid data leaks.
436 */
437 if (event->ctx->task && cpuc->lbr_context != event->ctx) {
438 intel_pmu_lbr_reset();
439 cpuc->lbr_context = event->ctx;
440 }
441 cpuc->br_sel = event->hw.branch_reg.reg; 422 cpuc->br_sel = event->hw.branch_reg.reg;
442 423
443 if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) { 424 if (branch_user_callstack(cpuc->br_sel) && event->ctx->task_ctx_data) {
@@ -445,8 +426,28 @@ void intel_pmu_lbr_add(struct perf_event *event)
445 task_ctx->lbr_callstack_users++; 426 task_ctx->lbr_callstack_users++;
446 } 427 }
447 428
448 cpuc->lbr_users++; 429 /*
430 * Request pmu::sched_task() callback, which will fire inside the
431 * regular perf event scheduling, so that call will:
432 *
433 * - restore or wipe; when LBR-callstack,
434 * - wipe; otherwise,
435 *
436 * when this is from __perf_event_task_sched_in().
437 *
438 * However, if this is from perf_install_in_context(), no such callback
439 * will follow and we'll need to reset the LBR here if this is the
440 * first LBR event.
441 *
442 * The problem is, we cannot tell these cases apart... but we can
443 * exclude the biggest chunk of cases by looking at
444 * event->total_time_running. An event that has accrued runtime cannot
445 * be 'new'. Conversely, a new event can get installed through the
446 * context switch path for the first time.
447 */
449 perf_sched_cb_inc(event->ctx->pmu); 448 perf_sched_cb_inc(event->ctx->pmu);
449 if (!cpuc->lbr_users++ && !event->total_time_running)
450 intel_pmu_lbr_reset();
450} 451}
451 452
452void intel_pmu_lbr_del(struct perf_event *event) 453void intel_pmu_lbr_del(struct perf_event *event)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index aa6ea5a84240..5874d8de1f8d 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -201,7 +201,6 @@ struct cpu_hw_events {
201 * Intel LBR bits 201 * Intel LBR bits
202 */ 202 */
203 int lbr_users; 203 int lbr_users;
204 void *lbr_context;
205 struct perf_branch_stack lbr_stack; 204 struct perf_branch_stack lbr_stack;
206 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 205 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
207 struct er_account *lbr_sel; 206 struct er_account *lbr_sel;