diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-03-03 06:02:30 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-10 07:23:32 -0500 |
commit | caff2befffe899e63df5cc760b7ed01cfd902685 (patch) | |
tree | fe07f997bd67d1e5ae3122db789d7e7361ddca28 | |
parent | 69fef0d2e2c2c049ef4207a52e78b50d527bd85a (diff) |
perf, x86: Implement simple LBR support
Implement simple suport Intel Last-Branch-Record, it supports all
hardware that implements FREEZE_LBRS_ON_PMI, but does not (yet) implement
the LBR config register.
The Intel LBR is a FIFO of From,To addresses describing the last few
branches the hardware took.
This patch does not add perf interface to the LBR, but merely provides an
interface for internal use.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.544191154@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_lbr.c | 228 | ||||
-rw-r--r-- | include/linux/perf_event.h | 11 |
4 files changed, 270 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0c03d5c1671f..1badff6b6b28 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -48,6 +48,8 @@ struct amd_nb { | |||
48 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 48 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
49 | }; | 49 | }; |
50 | 50 | ||
51 | #define MAX_LBR_ENTRIES 16 | ||
52 | |||
51 | struct cpu_hw_events { | 53 | struct cpu_hw_events { |
52 | /* | 54 | /* |
53 | * Generic x86 PMC bits | 55 | * Generic x86 PMC bits |
@@ -70,6 +72,14 @@ struct cpu_hw_events { | |||
70 | u64 pebs_enabled; | 72 | u64 pebs_enabled; |
71 | 73 | ||
72 | /* | 74 | /* |
75 | * Intel LBR bits | ||
76 | */ | ||
77 | int lbr_users; | ||
78 | void *lbr_context; | ||
79 | struct perf_branch_stack lbr_stack; | ||
80 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
81 | |||
82 | /* | ||
73 | * AMD specific bits | 83 | * AMD specific bits |
74 | */ | 84 | */ |
75 | struct amd_nb *amd_nb; | 85 | struct amd_nb *amd_nb; |
@@ -159,6 +169,13 @@ struct x86_pmu { | |||
159 | int pebs_record_size; | 169 | int pebs_record_size; |
160 | void (*drain_pebs)(struct pt_regs *regs); | 170 | void (*drain_pebs)(struct pt_regs *regs); |
161 | struct event_constraint *pebs_constraints; | 171 | struct event_constraint *pebs_constraints; |
172 | |||
173 | /* | ||
174 | * Intel LBR | ||
175 | */ | ||
176 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
177 | int lbr_nr; /* hardware stack size */ | ||
178 | int lbr_format; /* hardware format */ | ||
162 | }; | 179 | }; |
163 | 180 | ||
164 | static struct x86_pmu x86_pmu __read_mostly; | 181 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -1237,6 +1254,7 @@ undo: | |||
1237 | 1254 | ||
1238 | #include "perf_event_amd.c" | 1255 | #include "perf_event_amd.c" |
1239 | #include "perf_event_p6.c" | 1256 | #include "perf_event_p6.c" |
1257 | #include "perf_event_intel_lbr.c" | ||
1240 | #include "perf_event_intel_ds.c" | 1258 | #include "perf_event_intel_ds.c" |
1241 | #include "perf_event_intel.c" | 1259 | #include "perf_event_intel.c" |
1242 | 1260 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 11446412e4c7..44f6ed42a934 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void) | |||
480 | intel_pmu_disable_bts(); | 480 | intel_pmu_disable_bts(); |
481 | 481 | ||
482 | intel_pmu_pebs_disable_all(); | 482 | intel_pmu_pebs_disable_all(); |
483 | intel_pmu_lbr_disable_all(); | ||
483 | } | 484 | } |
484 | 485 | ||
485 | static void intel_pmu_enable_all(void) | 486 | static void intel_pmu_enable_all(void) |
@@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void) | |||
499 | } | 500 | } |
500 | 501 | ||
501 | intel_pmu_pebs_enable_all(); | 502 | intel_pmu_pebs_enable_all(); |
503 | intel_pmu_lbr_enable_all(); | ||
502 | } | 504 | } |
503 | 505 | ||
504 | static inline u64 intel_pmu_get_status(void) | 506 | static inline u64 intel_pmu_get_status(void) |
@@ -674,6 +676,8 @@ again: | |||
674 | inc_irq_stat(apic_perf_irqs); | 676 | inc_irq_stat(apic_perf_irqs); |
675 | ack = status; | 677 | ack = status; |
676 | 678 | ||
679 | intel_pmu_lbr_read(); | ||
680 | |||
677 | /* | 681 | /* |
678 | * PEBS overflow sets bit 62 in the global status register | 682 | * PEBS overflow sets bit 62 in the global status register |
679 | */ | 683 | */ |
@@ -848,6 +852,8 @@ static __init int intel_pmu_init(void) | |||
848 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 852 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
849 | sizeof(hw_cache_event_ids)); | 853 | sizeof(hw_cache_event_ids)); |
850 | 854 | ||
855 | intel_pmu_lbr_init_core(); | ||
856 | |||
851 | x86_pmu.event_constraints = intel_core2_event_constraints; | 857 | x86_pmu.event_constraints = intel_core2_event_constraints; |
852 | pr_cont("Core2 events, "); | 858 | pr_cont("Core2 events, "); |
853 | break; | 859 | break; |
@@ -857,13 +863,18 @@ static __init int intel_pmu_init(void) | |||
857 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 863 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
858 | sizeof(hw_cache_event_ids)); | 864 | sizeof(hw_cache_event_ids)); |
859 | 865 | ||
866 | intel_pmu_lbr_init_nhm(); | ||
867 | |||
860 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 868 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
861 | pr_cont("Nehalem/Corei7 events, "); | 869 | pr_cont("Nehalem/Corei7 events, "); |
862 | break; | 870 | break; |
871 | |||
863 | case 28: /* Atom */ | 872 | case 28: /* Atom */ |
864 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 873 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
865 | sizeof(hw_cache_event_ids)); | 874 | sizeof(hw_cache_event_ids)); |
866 | 875 | ||
876 | intel_pmu_lbr_init_atom(); | ||
877 | |||
867 | x86_pmu.event_constraints = intel_gen_event_constraints; | 878 | x86_pmu.event_constraints = intel_gen_event_constraints; |
868 | pr_cont("Atom events, "); | 879 | pr_cont("Atom events, "); |
869 | break; | 880 | break; |
@@ -873,6 +884,8 @@ static __init int intel_pmu_init(void) | |||
873 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 884 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
874 | sizeof(hw_cache_event_ids)); | 885 | sizeof(hw_cache_event_ids)); |
875 | 886 | ||
887 | intel_pmu_lbr_init_nhm(); | ||
888 | |||
876 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 889 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
877 | pr_cont("Westmere events, "); | 890 | pr_cont("Westmere events, "); |
878 | break; | 891 | break; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 000000000000..ea3e99ed82ce --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -0,0 +1,228 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | enum { | ||
4 | LBR_FORMAT_32 = 0x00, | ||
5 | LBR_FORMAT_LIP = 0x01, | ||
6 | LBR_FORMAT_EIP = 0x02, | ||
7 | LBR_FORMAT_EIP_FLAGS = 0x03, | ||
8 | }; | ||
9 | |||
10 | /* | ||
11 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | ||
12 | * otherwise it becomes near impossible to get a reliable stack. | ||
13 | */ | ||
14 | |||
15 | #define X86_DEBUGCTL_LBR (1 << 0) | ||
16 | #define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11) | ||
17 | |||
18 | static void __intel_pmu_lbr_enable(void) | ||
19 | { | ||
20 | u64 debugctl; | ||
21 | |||
22 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
23 | debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); | ||
24 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
25 | } | ||
26 | |||
27 | static void __intel_pmu_lbr_disable(void) | ||
28 | { | ||
29 | u64 debugctl; | ||
30 | |||
31 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
32 | debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); | ||
33 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
34 | } | ||
35 | |||
36 | static void intel_pmu_lbr_reset_32(void) | ||
37 | { | ||
38 | int i; | ||
39 | |||
40 | for (i = 0; i < x86_pmu.lbr_nr; i++) | ||
41 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
42 | } | ||
43 | |||
44 | static void intel_pmu_lbr_reset_64(void) | ||
45 | { | ||
46 | int i; | ||
47 | |||
48 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
49 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
50 | wrmsrl(x86_pmu.lbr_to + i, 0); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | static void intel_pmu_lbr_reset(void) | ||
55 | { | ||
56 | if (x86_pmu.lbr_format == LBR_FORMAT_32) | ||
57 | intel_pmu_lbr_reset_32(); | ||
58 | else | ||
59 | intel_pmu_lbr_reset_64(); | ||
60 | } | ||
61 | |||
62 | static void intel_pmu_lbr_enable(struct perf_event *event) | ||
63 | { | ||
64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
65 | |||
66 | if (!x86_pmu.lbr_nr) | ||
67 | return; | ||
68 | |||
69 | WARN_ON(cpuc->enabled); | ||
70 | |||
71 | /* | ||
72 | * Reset the LBR stack if this is the first LBR user or | ||
73 | * we changed task context so as to avoid data leaks. | ||
74 | */ | ||
75 | |||
76 | if (!cpuc->lbr_users || | ||
77 | (event->ctx->task && cpuc->lbr_context != event->ctx)) { | ||
78 | intel_pmu_lbr_reset(); | ||
79 | cpuc->lbr_context = event->ctx; | ||
80 | } | ||
81 | |||
82 | cpuc->lbr_users++; | ||
83 | } | ||
84 | |||
85 | static void intel_pmu_lbr_disable(struct perf_event *event) | ||
86 | { | ||
87 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
88 | |||
89 | if (!x86_pmu.lbr_nr) | ||
90 | return; | ||
91 | |||
92 | cpuc->lbr_users--; | ||
93 | |||
94 | BUG_ON(cpuc->lbr_users < 0); | ||
95 | WARN_ON(cpuc->enabled); | ||
96 | } | ||
97 | |||
98 | static void intel_pmu_lbr_enable_all(void) | ||
99 | { | ||
100 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
101 | |||
102 | if (cpuc->lbr_users) | ||
103 | __intel_pmu_lbr_enable(); | ||
104 | } | ||
105 | |||
106 | static void intel_pmu_lbr_disable_all(void) | ||
107 | { | ||
108 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
109 | |||
110 | if (cpuc->lbr_users) | ||
111 | __intel_pmu_lbr_disable(); | ||
112 | } | ||
113 | |||
114 | static inline u64 intel_pmu_lbr_tos(void) | ||
115 | { | ||
116 | u64 tos; | ||
117 | |||
118 | rdmsrl(x86_pmu.lbr_tos, tos); | ||
119 | |||
120 | return tos; | ||
121 | } | ||
122 | |||
123 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | ||
124 | { | ||
125 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
126 | u64 tos = intel_pmu_lbr_tos(); | ||
127 | int i; | ||
128 | |||
129 | for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) { | ||
130 | unsigned long lbr_idx = (tos - i) & mask; | ||
131 | union { | ||
132 | struct { | ||
133 | u32 from; | ||
134 | u32 to; | ||
135 | }; | ||
136 | u64 lbr; | ||
137 | } msr_lastbranch; | ||
138 | |||
139 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | ||
140 | |||
141 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | ||
142 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | ||
143 | cpuc->lbr_entries[i].flags = 0; | ||
144 | } | ||
145 | cpuc->lbr_stack.nr = i; | ||
146 | } | ||
147 | |||
148 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
149 | |||
150 | /* | ||
151 | * Due to lack of segmentation in Linux the effective address (offset) | ||
152 | * is the same as the linear address, allowing us to merge the LIP and EIP | ||
153 | * LBR formats. | ||
154 | */ | ||
155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||
156 | { | ||
157 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
158 | u64 tos = intel_pmu_lbr_tos(); | ||
159 | int i; | ||
160 | |||
161 | for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) { | ||
162 | unsigned long lbr_idx = (tos - i) & mask; | ||
163 | u64 from, to, flags = 0; | ||
164 | |||
165 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||
166 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | ||
167 | |||
168 | if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) { | ||
169 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | ||
170 | from = (u64)((((s64)from) << 1) >> 1); | ||
171 | } | ||
172 | |||
173 | cpuc->lbr_entries[i].from = from; | ||
174 | cpuc->lbr_entries[i].to = to; | ||
175 | cpuc->lbr_entries[i].flags = flags; | ||
176 | } | ||
177 | cpuc->lbr_stack.nr = i; | ||
178 | } | ||
179 | |||
180 | static void intel_pmu_lbr_read(void) | ||
181 | { | ||
182 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
183 | |||
184 | if (!cpuc->lbr_users) | ||
185 | return; | ||
186 | |||
187 | if (x86_pmu.lbr_format == LBR_FORMAT_32) | ||
188 | intel_pmu_lbr_read_32(cpuc); | ||
189 | else | ||
190 | intel_pmu_lbr_read_64(cpuc); | ||
191 | } | ||
192 | |||
193 | static int intel_pmu_lbr_format(void) | ||
194 | { | ||
195 | u64 capabilities; | ||
196 | |||
197 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
198 | return capabilities & 0x1f; | ||
199 | } | ||
200 | |||
201 | static void intel_pmu_lbr_init_core(void) | ||
202 | { | ||
203 | x86_pmu.lbr_format = intel_pmu_lbr_format(); | ||
204 | x86_pmu.lbr_nr = 4; | ||
205 | x86_pmu.lbr_tos = 0x01c9; | ||
206 | x86_pmu.lbr_from = 0x40; | ||
207 | x86_pmu.lbr_to = 0x60; | ||
208 | } | ||
209 | |||
210 | static void intel_pmu_lbr_init_nhm(void) | ||
211 | { | ||
212 | x86_pmu.lbr_format = intel_pmu_lbr_format(); | ||
213 | x86_pmu.lbr_nr = 16; | ||
214 | x86_pmu.lbr_tos = 0x01c9; | ||
215 | x86_pmu.lbr_from = 0x680; | ||
216 | x86_pmu.lbr_to = 0x6c0; | ||
217 | } | ||
218 | |||
219 | static void intel_pmu_lbr_init_atom(void) | ||
220 | { | ||
221 | x86_pmu.lbr_format = intel_pmu_lbr_format(); | ||
222 | x86_pmu.lbr_nr = 8; | ||
223 | x86_pmu.lbr_tos = 0x01c9; | ||
224 | x86_pmu.lbr_from = 0x40; | ||
225 | x86_pmu.lbr_to = 0x60; | ||
226 | } | ||
227 | |||
228 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 42307b50c787..ab4fd9ede264 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -467,6 +467,17 @@ struct perf_raw_record { | |||
467 | void *data; | 467 | void *data; |
468 | }; | 468 | }; |
469 | 469 | ||
470 | struct perf_branch_entry { | ||
471 | __u64 from; | ||
472 | __u64 to; | ||
473 | __u64 flags; | ||
474 | }; | ||
475 | |||
476 | struct perf_branch_stack { | ||
477 | __u64 nr; | ||
478 | struct perf_branch_entry entries[0]; | ||
479 | }; | ||
480 | |||
470 | struct task_struct; | 481 | struct task_struct; |
471 | 482 | ||
472 | /** | 483 | /** |