aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2015-05-28 00:13:18 -0400
committerIngo Molnar <mingo@kernel.org>2015-08-04 04:16:59 -0400
commit90405aa02247c1a6313c33e2253f9fd2299ae60b (patch)
tree2515d1a1ac3844ac12555b301df2c2e797b81d6c
parente0573364b8c5b17401569ef581f1625803210f4d (diff)
perf/x86/intel/lbr: Limit LBR accesses to TOS in callstack mode
In callstack mode the LBR is not a ring buffer, but a stack that grows up and down. This means in this case we don't need to access all LBRs, only the ones up to TOS. Do this optimization for the normal LBR read, and the context switch save/restore code. For save/restore it can be done unconditionally, as it only runs when call stack mode is active. This recovers some of the cost of going to 32 LBRs on Skylake. Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: acme@kernel.org Cc: eranian@google.com Cc: jolsa@redhat.com Link: http://lkml.kernel.org/r/1432786398-23861-6-git-send-email-andi@firstfloor.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c10
1 files changed, 7 insertions, 3 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index a5bc424569b9..b2c9475b7ff2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -240,7 +240,7 @@ static void __intel_pmu_lbr_restore(struct x86_perf_task_context *task_ctx)
240 240
241 mask = x86_pmu.lbr_nr - 1; 241 mask = x86_pmu.lbr_nr - 1;
242 tos = intel_pmu_lbr_tos(); 242 tos = intel_pmu_lbr_tos();
243 for (i = 0; i < x86_pmu.lbr_nr; i++) { 243 for (i = 0; i < tos; i++) {
244 lbr_idx = (tos - i) & mask; 244 lbr_idx = (tos - i) & mask;
245 wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); 245 wrmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
246 wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); 246 wrmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
@@ -263,7 +263,7 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
263 263
264 mask = x86_pmu.lbr_nr - 1; 264 mask = x86_pmu.lbr_nr - 1;
265 tos = intel_pmu_lbr_tos(); 265 tos = intel_pmu_lbr_tos();
266 for (i = 0; i < x86_pmu.lbr_nr; i++) { 266 for (i = 0; i < tos; i++) {
267 lbr_idx = (tos - i) & mask; 267 lbr_idx = (tos - i) & mask;
268 rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]); 268 rdmsrl(x86_pmu.lbr_from + lbr_idx, task_ctx->lbr_from[i]);
269 rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]); 269 rdmsrl(x86_pmu.lbr_to + lbr_idx, task_ctx->lbr_to[i]);
@@ -425,8 +425,12 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
425 u64 tos = intel_pmu_lbr_tos(); 425 u64 tos = intel_pmu_lbr_tos();
426 int i; 426 int i;
427 int out = 0; 427 int out = 0;
428 int num = x86_pmu.lbr_nr;
428 429
429 for (i = 0; i < x86_pmu.lbr_nr; i++) { 430 if (cpuc->lbr_sel->config & LBR_CALL_STACK)
431 num = tos;
432
433 for (i = 0; i < num; i++) {
430 unsigned long lbr_idx = (tos - i) & mask; 434 unsigned long lbr_idx = (tos - i) & mask;
431 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0; 435 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
432 int skip = 0; 436 int skip = 0;