aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2014-11-04 21:55:57 -0500
committerIngo Molnar <mingo@kernel.org>2015-02-18 11:16:01 -0500
commit27ac905b8f88d28779b0661809286b5ba2817d37 (patch)
treefba88a53448a59d56df83895a087ebff5d032620 /arch
parentc796b205b88c775fd220c1a63390bac6a8cdda3f (diff)
perf/x86/intel: Reduce lbr_sel_map[] size
The index of lbr_sel_map is bit value of perf branch_sample_type. PERF_SAMPLE_BRANCH_MAX is 1024 at present, so each lbr_sel_map uses 4096 bytes. By using bit shift as index, we can reduce lbr_sel_map size to 40 bytes. This patch defines 'bit shift' for branch types, and use 'bit shift' to define lbr_sel_maps. Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Signed-off-by: Kan Liang <kan.liang@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Stephane Eranian <eranian@google.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: jolsa@redhat.com Cc: linux-api@vger.kernel.org Link: http://lkml.kernel.org/r/1415156173-10035-2-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/cpu/perf_event.h4
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_lbr.c54
2 files changed, 29 insertions, 29 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index df525d2be1e8..0c45b22495dc 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -515,6 +515,10 @@ struct x86_pmu {
515 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); 515 struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
516}; 516};
517 517
518enum {
519 PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE = PERF_SAMPLE_BRANCH_MAX_SHIFT,
520};
521
518#define x86_add_quirk(func_) \ 522#define x86_add_quirk(func_) \
519do { \ 523do { \
520 static struct x86_pmu_quirk __quirk __initdata = { \ 524 static struct x86_pmu_quirk __quirk __initdata = { \
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 58f1a94beaf0..8bc078f43a82 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -69,10 +69,6 @@ static enum {
69#define LBR_FROM_FLAG_IN_TX (1ULL << 62) 69#define LBR_FROM_FLAG_IN_TX (1ULL << 62)
70#define LBR_FROM_FLAG_ABORT (1ULL << 61) 70#define LBR_FROM_FLAG_ABORT (1ULL << 61)
71 71
72#define for_each_branch_sample_type(x) \
73 for ((x) = PERF_SAMPLE_BRANCH_USER; \
74 (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
75
76/* 72/*
77 * x86control flow change classification 73 * x86control flow change classification
78 * x86control flow changes include branches, interrupts, traps, faults 74 * x86control flow changes include branches, interrupts, traps, faults
@@ -403,14 +399,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
403{ 399{
404 struct hw_perf_event_extra *reg; 400 struct hw_perf_event_extra *reg;
405 u64 br_type = event->attr.branch_sample_type; 401 u64 br_type = event->attr.branch_sample_type;
406 u64 mask = 0, m; 402 u64 mask = 0, v;
407 u64 v; 403 int i;
408 404
409 for_each_branch_sample_type(m) { 405 for (i = 0; i < PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE; i++) {
410 if (!(br_type & m)) 406 if (!(br_type & (1ULL << i)))
411 continue; 407 continue;
412 408
413 v = x86_pmu.lbr_sel_map[m]; 409 v = x86_pmu.lbr_sel_map[i];
414 if (v == LBR_NOT_SUPP) 410 if (v == LBR_NOT_SUPP)
415 return -EOPNOTSUPP; 411 return -EOPNOTSUPP;
416 412
@@ -678,35 +674,35 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
678/* 674/*
679 * Map interface branch filters onto LBR filters 675 * Map interface branch filters onto LBR filters
680 */ 676 */
681static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { 677static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
682 [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, 678 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
683 [PERF_SAMPLE_BRANCH_USER] = LBR_USER, 679 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
684 [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, 680 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
685 [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, 681 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
686 [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP 682 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP
687 | LBR_IND_JMP | LBR_FAR, 683 | LBR_IND_JMP | LBR_FAR,
688 /* 684 /*
689 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches 685 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
690 */ 686 */
691 [PERF_SAMPLE_BRANCH_ANY_CALL] = 687 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
692 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, 688 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
693 /* 689 /*
694 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL 690 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
695 */ 691 */
696 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, 692 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
697 [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, 693 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
698}; 694};
699 695
700static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { 696static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
701 [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, 697 [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY,
702 [PERF_SAMPLE_BRANCH_USER] = LBR_USER, 698 [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER,
703 [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, 699 [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL,
704 [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, 700 [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN,
705 [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, 701 [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR,
706 [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL 702 [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL
707 | LBR_FAR, 703 | LBR_FAR,
708 [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, 704 [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL,
709 [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, 705 [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC,
710}; 706};
711 707
712/* core */ 708/* core */