diff options
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_lbr.c | 228 | ||||
-rw-r--r-- | include/linux/perf_event.h | 11 |
4 files changed, 270 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0c03d5c1671f..1badff6b6b28 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -48,6 +48,8 @@ struct amd_nb { | |||
48 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 48 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
49 | }; | 49 | }; |
50 | 50 | ||
51 | #define MAX_LBR_ENTRIES 16 | ||
52 | |||
51 | struct cpu_hw_events { | 53 | struct cpu_hw_events { |
52 | /* | 54 | /* |
53 | * Generic x86 PMC bits | 55 | * Generic x86 PMC bits |
@@ -70,6 +72,14 @@ struct cpu_hw_events { | |||
70 | u64 pebs_enabled; | 72 | u64 pebs_enabled; |
71 | 73 | ||
72 | /* | 74 | /* |
75 | * Intel LBR bits | ||
76 | */ | ||
77 | int lbr_users; | ||
78 | void *lbr_context; | ||
79 | struct perf_branch_stack lbr_stack; | ||
80 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
81 | |||
82 | /* | ||
73 | * AMD specific bits | 83 | * AMD specific bits |
74 | */ | 84 | */ |
75 | struct amd_nb *amd_nb; | 85 | struct amd_nb *amd_nb; |
@@ -159,6 +169,13 @@ struct x86_pmu { | |||
159 | int pebs_record_size; | 169 | int pebs_record_size; |
160 | void (*drain_pebs)(struct pt_regs *regs); | 170 | void (*drain_pebs)(struct pt_regs *regs); |
161 | struct event_constraint *pebs_constraints; | 171 | struct event_constraint *pebs_constraints; |
172 | |||
173 | /* | ||
174 | * Intel LBR | ||
175 | */ | ||
176 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
177 | int lbr_nr; /* hardware stack size */ | ||
178 | int lbr_format; /* hardware format */ | ||
162 | }; | 179 | }; |
163 | 180 | ||
164 | static struct x86_pmu x86_pmu __read_mostly; | 181 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -1237,6 +1254,7 @@ undo: | |||
1237 | 1254 | ||
1238 | #include "perf_event_amd.c" | 1255 | #include "perf_event_amd.c" |
1239 | #include "perf_event_p6.c" | 1256 | #include "perf_event_p6.c" |
1257 | #include "perf_event_intel_lbr.c" | ||
1240 | #include "perf_event_intel_ds.c" | 1258 | #include "perf_event_intel_ds.c" |
1241 | #include "perf_event_intel.c" | 1259 | #include "perf_event_intel.c" |
1242 | 1260 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 11446412e4c7..44f6ed42a934 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void) | |||
480 | intel_pmu_disable_bts(); | 480 | intel_pmu_disable_bts(); |
481 | 481 | ||
482 | intel_pmu_pebs_disable_all(); | 482 | intel_pmu_pebs_disable_all(); |
483 | intel_pmu_lbr_disable_all(); | ||
483 | } | 484 | } |
484 | 485 | ||
485 | static void intel_pmu_enable_all(void) | 486 | static void intel_pmu_enable_all(void) |
@@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void) | |||
499 | } | 500 | } |
500 | 501 | ||
501 | intel_pmu_pebs_enable_all(); | 502 | intel_pmu_pebs_enable_all(); |
503 | intel_pmu_lbr_enable_all(); | ||
502 | } | 504 | } |
503 | 505 | ||
504 | static inline u64 intel_pmu_get_status(void) | 506 | static inline u64 intel_pmu_get_status(void) |
@@ -674,6 +676,8 @@ again: | |||
674 | inc_irq_stat(apic_perf_irqs); | 676 | inc_irq_stat(apic_perf_irqs); |
675 | ack = status; | 677 | ack = status; |
676 | 678 | ||
679 | intel_pmu_lbr_read(); | ||
680 | |||
677 | /* | 681 | /* |
678 | * PEBS overflow sets bit 62 in the global status register | 682 | * PEBS overflow sets bit 62 in the global status register |
679 | */ | 683 | */ |
@@ -848,6 +852,8 @@ static __init int intel_pmu_init(void) | |||
848 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | 852 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, |
849 | sizeof(hw_cache_event_ids)); | 853 | sizeof(hw_cache_event_ids)); |
850 | 854 | ||
855 | intel_pmu_lbr_init_core(); | ||
856 | |||
851 | x86_pmu.event_constraints = intel_core2_event_constraints; | 857 | x86_pmu.event_constraints = intel_core2_event_constraints; |
852 | pr_cont("Core2 events, "); | 858 | pr_cont("Core2 events, "); |
853 | break; | 859 | break; |
@@ -857,13 +863,18 @@ static __init int intel_pmu_init(void) | |||
857 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | 863 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, |
858 | sizeof(hw_cache_event_ids)); | 864 | sizeof(hw_cache_event_ids)); |
859 | 865 | ||
866 | intel_pmu_lbr_init_nhm(); | ||
867 | |||
860 | x86_pmu.event_constraints = intel_nehalem_event_constraints; | 868 | x86_pmu.event_constraints = intel_nehalem_event_constraints; |
861 | pr_cont("Nehalem/Corei7 events, "); | 869 | pr_cont("Nehalem/Corei7 events, "); |
862 | break; | 870 | break; |
871 | |||
863 | case 28: /* Atom */ | 872 | case 28: /* Atom */ |
864 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | 873 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, |
865 | sizeof(hw_cache_event_ids)); | 874 | sizeof(hw_cache_event_ids)); |
866 | 875 | ||
876 | intel_pmu_lbr_init_atom(); | ||
877 | |||
867 | x86_pmu.event_constraints = intel_gen_event_constraints; | 878 | x86_pmu.event_constraints = intel_gen_event_constraints; |
868 | pr_cont("Atom events, "); | 879 | pr_cont("Atom events, "); |
869 | break; | 880 | break; |
@@ -873,6 +884,8 @@ static __init int intel_pmu_init(void) | |||
873 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, | 884 | memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, |
874 | sizeof(hw_cache_event_ids)); | 885 | sizeof(hw_cache_event_ids)); |
875 | 886 | ||
887 | intel_pmu_lbr_init_nhm(); | ||
888 | |||
876 | x86_pmu.event_constraints = intel_westmere_event_constraints; | 889 | x86_pmu.event_constraints = intel_westmere_event_constraints; |
877 | pr_cont("Westmere events, "); | 890 | pr_cont("Westmere events, "); |
878 | break; | 891 | break; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 000000000000..ea3e99ed82ce --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -0,0 +1,228 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | enum { | ||
4 | LBR_FORMAT_32 = 0x00, | ||
5 | LBR_FORMAT_LIP = 0x01, | ||
6 | LBR_FORMAT_EIP = 0x02, | ||
7 | LBR_FORMAT_EIP_FLAGS = 0x03, | ||
8 | }; | ||
9 | |||
10 | /* | ||
11 | * We only support LBR implementations that have FREEZE_LBRS_ON_PMI | ||
12 | * otherwise it becomes near impossible to get a reliable stack. | ||
13 | */ | ||
14 | |||
15 | #define X86_DEBUGCTL_LBR (1 << 0) | ||
16 | #define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11) | ||
17 | |||
18 | static void __intel_pmu_lbr_enable(void) | ||
19 | { | ||
20 | u64 debugctl; | ||
21 | |||
22 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
23 | debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); | ||
24 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
25 | } | ||
26 | |||
27 | static void __intel_pmu_lbr_disable(void) | ||
28 | { | ||
29 | u64 debugctl; | ||
30 | |||
31 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
32 | debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); | ||
33 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); | ||
34 | } | ||
35 | |||
36 | static void intel_pmu_lbr_reset_32(void) | ||
37 | { | ||
38 | int i; | ||
39 | |||
40 | for (i = 0; i < x86_pmu.lbr_nr; i++) | ||
41 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
42 | } | ||
43 | |||
44 | static void intel_pmu_lbr_reset_64(void) | ||
45 | { | ||
46 | int i; | ||
47 | |||
48 | for (i = 0; i < x86_pmu.lbr_nr; i++) { | ||
49 | wrmsrl(x86_pmu.lbr_from + i, 0); | ||
50 | wrmsrl(x86_pmu.lbr_to + i, 0); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | static void intel_pmu_lbr_reset(void) | ||
55 | { | ||
56 | if (x86_pmu.lbr_format == LBR_FORMAT_32) | ||
57 | intel_pmu_lbr_reset_32(); | ||
58 | else | ||
59 | intel_pmu_lbr_reset_64(); | ||
60 | } | ||
61 | |||
62 | static void intel_pmu_lbr_enable(struct perf_event *event) | ||
63 | { | ||
64 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
65 | |||
66 | if (!x86_pmu.lbr_nr) | ||
67 | return; | ||
68 | |||
69 | WARN_ON(cpuc->enabled); | ||
70 | |||
71 | /* | ||
72 | * Reset the LBR stack if this is the first LBR user or | ||
73 | * we changed task context so as to avoid data leaks. | ||
74 | */ | ||
75 | |||
76 | if (!cpuc->lbr_users || | ||
77 | (event->ctx->task && cpuc->lbr_context != event->ctx)) { | ||
78 | intel_pmu_lbr_reset(); | ||
79 | cpuc->lbr_context = event->ctx; | ||
80 | } | ||
81 | |||
82 | cpuc->lbr_users++; | ||
83 | } | ||
84 | |||
85 | static void intel_pmu_lbr_disable(struct perf_event *event) | ||
86 | { | ||
87 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
88 | |||
89 | if (!x86_pmu.lbr_nr) | ||
90 | return; | ||
91 | |||
92 | cpuc->lbr_users--; | ||
93 | |||
94 | BUG_ON(cpuc->lbr_users < 0); | ||
95 | WARN_ON(cpuc->enabled); | ||
96 | } | ||
97 | |||
98 | static void intel_pmu_lbr_enable_all(void) | ||
99 | { | ||
100 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
101 | |||
102 | if (cpuc->lbr_users) | ||
103 | __intel_pmu_lbr_enable(); | ||
104 | } | ||
105 | |||
106 | static void intel_pmu_lbr_disable_all(void) | ||
107 | { | ||
108 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
109 | |||
110 | if (cpuc->lbr_users) | ||
111 | __intel_pmu_lbr_disable(); | ||
112 | } | ||
113 | |||
114 | static inline u64 intel_pmu_lbr_tos(void) | ||
115 | { | ||
116 | u64 tos; | ||
117 | |||
118 | rdmsrl(x86_pmu.lbr_tos, tos); | ||
119 | |||
120 | return tos; | ||
121 | } | ||
122 | |||
123 | static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | ||
124 | { | ||
125 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
126 | u64 tos = intel_pmu_lbr_tos(); | ||
127 | int i; | ||
128 | |||
129 | for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) { | ||
130 | unsigned long lbr_idx = (tos - i) & mask; | ||
131 | union { | ||
132 | struct { | ||
133 | u32 from; | ||
134 | u32 to; | ||
135 | }; | ||
136 | u64 lbr; | ||
137 | } msr_lastbranch; | ||
138 | |||
139 | rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); | ||
140 | |||
141 | cpuc->lbr_entries[i].from = msr_lastbranch.from; | ||
142 | cpuc->lbr_entries[i].to = msr_lastbranch.to; | ||
143 | cpuc->lbr_entries[i].flags = 0; | ||
144 | } | ||
145 | cpuc->lbr_stack.nr = i; | ||
146 | } | ||
147 | |||
148 | #define LBR_FROM_FLAG_MISPRED (1ULL << 63) | ||
149 | |||
150 | /* | ||
151 | * Due to lack of segmentation in Linux the effective address (offset) | ||
152 | * is the same as the linear address, allowing us to merge the LIP and EIP | ||
153 | * LBR formats. | ||
154 | */ | ||
155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | ||
156 | { | ||
157 | unsigned long mask = x86_pmu.lbr_nr - 1; | ||
158 | u64 tos = intel_pmu_lbr_tos(); | ||
159 | int i; | ||
160 | |||
161 | for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) { | ||
162 | unsigned long lbr_idx = (tos - i) & mask; | ||
163 | u64 from, to, flags = 0; | ||
164 | |||
165 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | ||
166 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | ||
167 | |||
168 | if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) { | ||
169 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | ||
170 | from = (u64)((((s64)from) << 1) >> 1); | ||
171 | } | ||
172 | |||
173 | cpuc->lbr_entries[i].from = from; | ||
174 | cpuc->lbr_entries[i].to = to; | ||
175 | cpuc->lbr_entries[i].flags = flags; | ||
176 | } | ||
177 | cpuc->lbr_stack.nr = i; | ||
178 | } | ||
179 | |||
180 | static void intel_pmu_lbr_read(void) | ||
181 | { | ||
182 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
183 | |||
184 | if (!cpuc->lbr_users) | ||
185 | return; | ||
186 | |||
187 | if (x86_pmu.lbr_format == LBR_FORMAT_32) | ||
188 | intel_pmu_lbr_read_32(cpuc); | ||
189 | else | ||
190 | intel_pmu_lbr_read_64(cpuc); | ||
191 | } | ||
192 | |||
193 | static int intel_pmu_lbr_format(void) | ||
194 | { | ||
195 | u64 capabilities; | ||
196 | |||
197 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
198 | return capabilities & 0x1f; | ||
199 | } | ||
200 | |||
201 | static void intel_pmu_lbr_init_core(void) | ||
202 | { | ||
203 | x86_pmu.lbr_format = intel_pmu_lbr_format(); | ||
204 | x86_pmu.lbr_nr = 4; | ||
205 | x86_pmu.lbr_tos = 0x01c9; | ||
206 | x86_pmu.lbr_from = 0x40; | ||
207 | x86_pmu.lbr_to = 0x60; | ||
208 | } | ||
209 | |||
210 | static void intel_pmu_lbr_init_nhm(void) | ||
211 | { | ||
212 | x86_pmu.lbr_format = intel_pmu_lbr_format(); | ||
213 | x86_pmu.lbr_nr = 16; | ||
214 | x86_pmu.lbr_tos = 0x01c9; | ||
215 | x86_pmu.lbr_from = 0x680; | ||
216 | x86_pmu.lbr_to = 0x6c0; | ||
217 | } | ||
218 | |||
219 | static void intel_pmu_lbr_init_atom(void) | ||
220 | { | ||
221 | x86_pmu.lbr_format = intel_pmu_lbr_format(); | ||
222 | x86_pmu.lbr_nr = 8; | ||
223 | x86_pmu.lbr_tos = 0x01c9; | ||
224 | x86_pmu.lbr_from = 0x40; | ||
225 | x86_pmu.lbr_to = 0x60; | ||
226 | } | ||
227 | |||
228 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 42307b50c787..ab4fd9ede264 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -467,6 +467,17 @@ struct perf_raw_record { | |||
467 | void *data; | 467 | void *data; |
468 | }; | 468 | }; |
469 | 469 | ||
470 | struct perf_branch_entry { | ||
471 | __u64 from; | ||
472 | __u64 to; | ||
473 | __u64 flags; | ||
474 | }; | ||
475 | |||
476 | struct perf_branch_stack { | ||
477 | __u64 nr; | ||
478 | struct perf_branch_entry entries[0]; | ||
479 | }; | ||
480 | |||
470 | struct task_struct; | 481 | struct task_struct; |
471 | 482 | ||
472 | /** | 483 | /** |