diff options
author | Yan, Zheng <zheng.z.yan@intel.com> | 2014-11-04 21:55:57 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-02-18 11:16:01 -0500 |
commit | 27ac905b8f88d28779b0661809286b5ba2817d37 (patch) | |
tree | fba88a53448a59d56df83895a087ebff5d032620 /arch | |
parent | c796b205b88c775fd220c1a63390bac6a8cdda3f (diff) |
perf/x86/intel: Reduce lbr_sel_map[] size
The index of lbr_sel_map is bit value of perf branch_sample_type.
PERF_SAMPLE_BRANCH_MAX is 1024 at present, so each lbr_sel_map uses
4096 bytes. By using bit shift as index, we can reduce lbr_sel_map
size to 40 bytes. This patch defines 'bit shift' for branch types,
and use 'bit shift' to define lbr_sel_maps.
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: jolsa@redhat.com
Cc: linux-api@vger.kernel.org
Link: http://lkml.kernel.org/r/1415156173-10035-2-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_lbr.c | 54 |
2 files changed, 29 insertions, 29 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index df525d2be1e8..0c45b22495dc 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -515,6 +515,10 @@ struct x86_pmu { | |||
515 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); | 515 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); |
516 | }; | 516 | }; |
517 | 517 | ||
518 | enum { | ||
519 | PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE = PERF_SAMPLE_BRANCH_MAX_SHIFT, | ||
520 | }; | ||
521 | |||
518 | #define x86_add_quirk(func_) \ | 522 | #define x86_add_quirk(func_) \ |
519 | do { \ | 523 | do { \ |
520 | static struct x86_pmu_quirk __quirk __initdata = { \ | 524 | static struct x86_pmu_quirk __quirk __initdata = { \ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 58f1a94beaf0..8bc078f43a82 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -69,10 +69,6 @@ static enum { | |||
69 | #define LBR_FROM_FLAG_IN_TX (1ULL << 62) | 69 | #define LBR_FROM_FLAG_IN_TX (1ULL << 62) |
70 | #define LBR_FROM_FLAG_ABORT (1ULL << 61) | 70 | #define LBR_FROM_FLAG_ABORT (1ULL << 61) |
71 | 71 | ||
72 | #define for_each_branch_sample_type(x) \ | ||
73 | for ((x) = PERF_SAMPLE_BRANCH_USER; \ | ||
74 | (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1) | ||
75 | |||
76 | /* | 72 | /* |
77 | * x86control flow change classification | 73 | * x86control flow change classification |
78 | * x86control flow changes include branches, interrupts, traps, faults | 74 | * x86control flow changes include branches, interrupts, traps, faults |
@@ -403,14 +399,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event) | |||
403 | { | 399 | { |
404 | struct hw_perf_event_extra *reg; | 400 | struct hw_perf_event_extra *reg; |
405 | u64 br_type = event->attr.branch_sample_type; | 401 | u64 br_type = event->attr.branch_sample_type; |
406 | u64 mask = 0, m; | 402 | u64 mask = 0, v; |
407 | u64 v; | 403 | int i; |
408 | 404 | ||
409 | for_each_branch_sample_type(m) { | 405 | for (i = 0; i < PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE; i++) { |
410 | if (!(br_type & m)) | 406 | if (!(br_type & (1ULL << i))) |
411 | continue; | 407 | continue; |
412 | 408 | ||
413 | v = x86_pmu.lbr_sel_map[m]; | 409 | v = x86_pmu.lbr_sel_map[i]; |
414 | if (v == LBR_NOT_SUPP) | 410 | if (v == LBR_NOT_SUPP) |
415 | return -EOPNOTSUPP; | 411 | return -EOPNOTSUPP; |
416 | 412 | ||
@@ -678,35 +674,35 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc) | |||
678 | /* | 674 | /* |
679 | * Map interface branch filters onto LBR filters | 675 | * Map interface branch filters onto LBR filters |
680 | */ | 676 | */ |
681 | static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | 677 | static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { |
682 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | 678 | [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, |
683 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | 679 | [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, |
684 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | 680 | [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, |
685 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | 681 | [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, |
686 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP | 682 | [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_REL_JMP |
687 | | LBR_IND_JMP | LBR_FAR, | 683 | | LBR_IND_JMP | LBR_FAR, |
688 | /* | 684 | /* |
689 | * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches | 685 | * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches |
690 | */ | 686 | */ |
691 | [PERF_SAMPLE_BRANCH_ANY_CALL] = | 687 | [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = |
692 | LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, | 688 | LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR, |
693 | /* | 689 | /* |
694 | * NHM/WSM erratum: must include IND_JMP to capture IND_CALL | 690 | * NHM/WSM erratum: must include IND_JMP to capture IND_CALL |
695 | */ | 691 | */ |
696 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP, | 692 | [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP, |
697 | [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, | 693 | [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, |
698 | }; | 694 | }; |
699 | 695 | ||
700 | static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = { | 696 | static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = { |
701 | [PERF_SAMPLE_BRANCH_ANY] = LBR_ANY, | 697 | [PERF_SAMPLE_BRANCH_ANY_SHIFT] = LBR_ANY, |
702 | [PERF_SAMPLE_BRANCH_USER] = LBR_USER, | 698 | [PERF_SAMPLE_BRANCH_USER_SHIFT] = LBR_USER, |
703 | [PERF_SAMPLE_BRANCH_KERNEL] = LBR_KERNEL, | 699 | [PERF_SAMPLE_BRANCH_KERNEL_SHIFT] = LBR_KERNEL, |
704 | [PERF_SAMPLE_BRANCH_HV] = LBR_IGN, | 700 | [PERF_SAMPLE_BRANCH_HV_SHIFT] = LBR_IGN, |
705 | [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR, | 701 | [PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT] = LBR_RETURN | LBR_FAR, |
706 | [PERF_SAMPLE_BRANCH_ANY_CALL] = LBR_REL_CALL | LBR_IND_CALL | 702 | [PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] = LBR_REL_CALL | LBR_IND_CALL |
707 | | LBR_FAR, | 703 | | LBR_FAR, |
708 | [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL, | 704 | [PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL, |
709 | [PERF_SAMPLE_BRANCH_COND] = LBR_JCC, | 705 | [PERF_SAMPLE_BRANCH_COND_SHIFT] = LBR_JCC, |
710 | }; | 706 | }; |
711 | 707 | ||
712 | /* core */ | 708 | /* core */ |