diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 2298 |
1 files changed, 2298 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c new file mode 100644 index 000000000000..0d03629fb1a5 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -0,0 +1,2298 @@ | |||
1 | /* | ||
2 | * Performance events x86 architecture code | ||
3 | * | ||
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
10 | * | ||
11 | * For licencing details see kernel-base/COPYING | ||
12 | */ | ||
13 | |||
14 | #include <linux/perf_event.h> | ||
15 | #include <linux/capability.h> | ||
16 | #include <linux/notifier.h> | ||
17 | #include <linux/hardirq.h> | ||
18 | #include <linux/kprobes.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/kdebug.h> | ||
21 | #include <linux/sched.h> | ||
22 | #include <linux/uaccess.h> | ||
23 | #include <linux/highmem.h> | ||
24 | #include <linux/cpu.h> | ||
25 | |||
26 | #include <asm/apic.h> | ||
27 | #include <asm/stacktrace.h> | ||
28 | #include <asm/nmi.h> | ||
29 | |||
30 | static u64 perf_event_mask __read_mostly; | ||
31 | |||
32 | /* The maximal number of PEBS events: */ | ||
33 | #define MAX_PEBS_EVENTS 4 | ||
34 | |||
35 | /* The size of a BTS record in bytes: */ | ||
36 | #define BTS_RECORD_SIZE 24 | ||
37 | |||
38 | /* The size of a per-cpu BTS buffer in bytes: */ | ||
39 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | ||
40 | |||
41 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | ||
42 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | ||
43 | |||
44 | |||
45 | /* | ||
46 | * Bits in the debugctlmsr controlling branch tracing. | ||
47 | */ | ||
48 | #define X86_DEBUGCTL_TR (1 << 6) | ||
49 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
50 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
51 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
52 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
53 | |||
54 | /* | ||
55 | * A debug store configuration. | ||
56 | * | ||
57 | * We only support architectures that use 64bit fields. | ||
58 | */ | ||
59 | struct debug_store { | ||
60 | u64 bts_buffer_base; | ||
61 | u64 bts_index; | ||
62 | u64 bts_absolute_maximum; | ||
63 | u64 bts_interrupt_threshold; | ||
64 | u64 pebs_buffer_base; | ||
65 | u64 pebs_index; | ||
66 | u64 pebs_absolute_maximum; | ||
67 | u64 pebs_interrupt_threshold; | ||
68 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
69 | }; | ||
70 | |||
71 | struct cpu_hw_events { | ||
72 | struct perf_event *events[X86_PMC_IDX_MAX]; | ||
73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
75 | unsigned long interrupts; | ||
76 | int enabled; | ||
77 | struct debug_store *ds; | ||
78 | }; | ||
79 | |||
80 | /* | ||
81 | * struct x86_pmu - generic x86 pmu | ||
82 | */ | ||
83 | struct x86_pmu { | ||
84 | const char *name; | ||
85 | int version; | ||
86 | int (*handle_irq)(struct pt_regs *); | ||
87 | void (*disable_all)(void); | ||
88 | void (*enable_all)(void); | ||
89 | void (*enable)(struct hw_perf_event *, int); | ||
90 | void (*disable)(struct hw_perf_event *, int); | ||
91 | unsigned eventsel; | ||
92 | unsigned perfctr; | ||
93 | u64 (*event_map)(int); | ||
94 | u64 (*raw_event)(u64); | ||
95 | int max_events; | ||
96 | int num_events; | ||
97 | int num_events_fixed; | ||
98 | int event_bits; | ||
99 | u64 event_mask; | ||
100 | int apic; | ||
101 | u64 max_period; | ||
102 | u64 intel_ctrl; | ||
103 | void (*enable_bts)(u64 config); | ||
104 | void (*disable_bts)(void); | ||
105 | }; | ||
106 | |||
107 | static struct x86_pmu x86_pmu __read_mostly; | ||
108 | |||
109 | static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { | ||
110 | .enabled = 1, | ||
111 | }; | ||
112 | |||
113 | /* | ||
114 | * Not sure about some of these | ||
115 | */ | ||
116 | static const u64 p6_perfmon_event_map[] = | ||
117 | { | ||
118 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
119 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
120 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
121 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
122 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
123 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
124 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
125 | }; | ||
126 | |||
127 | static u64 p6_pmu_event_map(int hw_event) | ||
128 | { | ||
129 | return p6_perfmon_event_map[hw_event]; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Event setting that is specified not to count anything. | ||
134 | * We use this to effectively disable a counter. | ||
135 | * | ||
136 | * L2_RQSTS with 0 MESI unit mask. | ||
137 | */ | ||
138 | #define P6_NOP_EVENT 0x0000002EULL | ||
139 | |||
140 | static u64 p6_pmu_raw_event(u64 hw_event) | ||
141 | { | ||
142 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
143 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
144 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
145 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
146 | #define P6_EVNTSEL_REG_MASK 0xFF000000ULL | ||
147 | |||
148 | #define P6_EVNTSEL_MASK \ | ||
149 | (P6_EVNTSEL_EVENT_MASK | \ | ||
150 | P6_EVNTSEL_UNIT_MASK | \ | ||
151 | P6_EVNTSEL_EDGE_MASK | \ | ||
152 | P6_EVNTSEL_INV_MASK | \ | ||
153 | P6_EVNTSEL_REG_MASK) | ||
154 | |||
155 | return hw_event & P6_EVNTSEL_MASK; | ||
156 | } | ||
157 | |||
158 | |||
159 | /* | ||
160 | * Intel PerfMon v3. Used on Core2 and later. | ||
161 | */ | ||
162 | static const u64 intel_perfmon_event_map[] = | ||
163 | { | ||
164 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
165 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
166 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
167 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
168 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
169 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
170 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
171 | }; | ||
172 | |||
173 | static u64 intel_pmu_event_map(int hw_event) | ||
174 | { | ||
175 | return intel_perfmon_event_map[hw_event]; | ||
176 | } | ||
177 | |||
178 | /* | ||
179 | * Generalized hw caching related hw_event table, filled | ||
180 | * in on a per model basis. A value of 0 means | ||
181 | * 'not supported', -1 means 'hw_event makes no sense on | ||
182 | * this CPU', any other value means the raw hw_event | ||
183 | * ID. | ||
184 | */ | ||
185 | |||
186 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
187 | |||
188 | static u64 __read_mostly hw_cache_event_ids | ||
189 | [PERF_COUNT_HW_CACHE_MAX] | ||
190 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
191 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
192 | |||
193 | static const u64 nehalem_hw_cache_event_ids | ||
194 | [PERF_COUNT_HW_CACHE_MAX] | ||
195 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
196 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
197 | { | ||
198 | [ C(L1D) ] = { | ||
199 | [ C(OP_READ) ] = { | ||
200 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
201 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
202 | }, | ||
203 | [ C(OP_WRITE) ] = { | ||
204 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
205 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
206 | }, | ||
207 | [ C(OP_PREFETCH) ] = { | ||
208 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
209 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
210 | }, | ||
211 | }, | ||
212 | [ C(L1I ) ] = { | ||
213 | [ C(OP_READ) ] = { | ||
214 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
215 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
216 | }, | ||
217 | [ C(OP_WRITE) ] = { | ||
218 | [ C(RESULT_ACCESS) ] = -1, | ||
219 | [ C(RESULT_MISS) ] = -1, | ||
220 | }, | ||
221 | [ C(OP_PREFETCH) ] = { | ||
222 | [ C(RESULT_ACCESS) ] = 0x0, | ||
223 | [ C(RESULT_MISS) ] = 0x0, | ||
224 | }, | ||
225 | }, | ||
226 | [ C(LL ) ] = { | ||
227 | [ C(OP_READ) ] = { | ||
228 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
229 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
230 | }, | ||
231 | [ C(OP_WRITE) ] = { | ||
232 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
233 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
234 | }, | ||
235 | [ C(OP_PREFETCH) ] = { | ||
236 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
237 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
238 | }, | ||
239 | }, | ||
240 | [ C(DTLB) ] = { | ||
241 | [ C(OP_READ) ] = { | ||
242 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
243 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
244 | }, | ||
245 | [ C(OP_WRITE) ] = { | ||
246 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
247 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
248 | }, | ||
249 | [ C(OP_PREFETCH) ] = { | ||
250 | [ C(RESULT_ACCESS) ] = 0x0, | ||
251 | [ C(RESULT_MISS) ] = 0x0, | ||
252 | }, | ||
253 | }, | ||
254 | [ C(ITLB) ] = { | ||
255 | [ C(OP_READ) ] = { | ||
256 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
257 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
258 | }, | ||
259 | [ C(OP_WRITE) ] = { | ||
260 | [ C(RESULT_ACCESS) ] = -1, | ||
261 | [ C(RESULT_MISS) ] = -1, | ||
262 | }, | ||
263 | [ C(OP_PREFETCH) ] = { | ||
264 | [ C(RESULT_ACCESS) ] = -1, | ||
265 | [ C(RESULT_MISS) ] = -1, | ||
266 | }, | ||
267 | }, | ||
268 | [ C(BPU ) ] = { | ||
269 | [ C(OP_READ) ] = { | ||
270 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
271 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
272 | }, | ||
273 | [ C(OP_WRITE) ] = { | ||
274 | [ C(RESULT_ACCESS) ] = -1, | ||
275 | [ C(RESULT_MISS) ] = -1, | ||
276 | }, | ||
277 | [ C(OP_PREFETCH) ] = { | ||
278 | [ C(RESULT_ACCESS) ] = -1, | ||
279 | [ C(RESULT_MISS) ] = -1, | ||
280 | }, | ||
281 | }, | ||
282 | }; | ||
283 | |||
284 | static const u64 core2_hw_cache_event_ids | ||
285 | [PERF_COUNT_HW_CACHE_MAX] | ||
286 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
287 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
288 | { | ||
289 | [ C(L1D) ] = { | ||
290 | [ C(OP_READ) ] = { | ||
291 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
292 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
293 | }, | ||
294 | [ C(OP_WRITE) ] = { | ||
295 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
296 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
297 | }, | ||
298 | [ C(OP_PREFETCH) ] = { | ||
299 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
300 | [ C(RESULT_MISS) ] = 0, | ||
301 | }, | ||
302 | }, | ||
303 | [ C(L1I ) ] = { | ||
304 | [ C(OP_READ) ] = { | ||
305 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
306 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
307 | }, | ||
308 | [ C(OP_WRITE) ] = { | ||
309 | [ C(RESULT_ACCESS) ] = -1, | ||
310 | [ C(RESULT_MISS) ] = -1, | ||
311 | }, | ||
312 | [ C(OP_PREFETCH) ] = { | ||
313 | [ C(RESULT_ACCESS) ] = 0, | ||
314 | [ C(RESULT_MISS) ] = 0, | ||
315 | }, | ||
316 | }, | ||
317 | [ C(LL ) ] = { | ||
318 | [ C(OP_READ) ] = { | ||
319 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
320 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
321 | }, | ||
322 | [ C(OP_WRITE) ] = { | ||
323 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
324 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
325 | }, | ||
326 | [ C(OP_PREFETCH) ] = { | ||
327 | [ C(RESULT_ACCESS) ] = 0, | ||
328 | [ C(RESULT_MISS) ] = 0, | ||
329 | }, | ||
330 | }, | ||
331 | [ C(DTLB) ] = { | ||
332 | [ C(OP_READ) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
334 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
335 | }, | ||
336 | [ C(OP_WRITE) ] = { | ||
337 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
338 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
339 | }, | ||
340 | [ C(OP_PREFETCH) ] = { | ||
341 | [ C(RESULT_ACCESS) ] = 0, | ||
342 | [ C(RESULT_MISS) ] = 0, | ||
343 | }, | ||
344 | }, | ||
345 | [ C(ITLB) ] = { | ||
346 | [ C(OP_READ) ] = { | ||
347 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
348 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
349 | }, | ||
350 | [ C(OP_WRITE) ] = { | ||
351 | [ C(RESULT_ACCESS) ] = -1, | ||
352 | [ C(RESULT_MISS) ] = -1, | ||
353 | }, | ||
354 | [ C(OP_PREFETCH) ] = { | ||
355 | [ C(RESULT_ACCESS) ] = -1, | ||
356 | [ C(RESULT_MISS) ] = -1, | ||
357 | }, | ||
358 | }, | ||
359 | [ C(BPU ) ] = { | ||
360 | [ C(OP_READ) ] = { | ||
361 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
362 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
363 | }, | ||
364 | [ C(OP_WRITE) ] = { | ||
365 | [ C(RESULT_ACCESS) ] = -1, | ||
366 | [ C(RESULT_MISS) ] = -1, | ||
367 | }, | ||
368 | [ C(OP_PREFETCH) ] = { | ||
369 | [ C(RESULT_ACCESS) ] = -1, | ||
370 | [ C(RESULT_MISS) ] = -1, | ||
371 | }, | ||
372 | }, | ||
373 | }; | ||
374 | |||
375 | static const u64 atom_hw_cache_event_ids | ||
376 | [PERF_COUNT_HW_CACHE_MAX] | ||
377 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
378 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
379 | { | ||
380 | [ C(L1D) ] = { | ||
381 | [ C(OP_READ) ] = { | ||
382 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
383 | [ C(RESULT_MISS) ] = 0, | ||
384 | }, | ||
385 | [ C(OP_WRITE) ] = { | ||
386 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
387 | [ C(RESULT_MISS) ] = 0, | ||
388 | }, | ||
389 | [ C(OP_PREFETCH) ] = { | ||
390 | [ C(RESULT_ACCESS) ] = 0x0, | ||
391 | [ C(RESULT_MISS) ] = 0, | ||
392 | }, | ||
393 | }, | ||
394 | [ C(L1I ) ] = { | ||
395 | [ C(OP_READ) ] = { | ||
396 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
397 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
398 | }, | ||
399 | [ C(OP_WRITE) ] = { | ||
400 | [ C(RESULT_ACCESS) ] = -1, | ||
401 | [ C(RESULT_MISS) ] = -1, | ||
402 | }, | ||
403 | [ C(OP_PREFETCH) ] = { | ||
404 | [ C(RESULT_ACCESS) ] = 0, | ||
405 | [ C(RESULT_MISS) ] = 0, | ||
406 | }, | ||
407 | }, | ||
408 | [ C(LL ) ] = { | ||
409 | [ C(OP_READ) ] = { | ||
410 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
411 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
412 | }, | ||
413 | [ C(OP_WRITE) ] = { | ||
414 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
415 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
416 | }, | ||
417 | [ C(OP_PREFETCH) ] = { | ||
418 | [ C(RESULT_ACCESS) ] = 0, | ||
419 | [ C(RESULT_MISS) ] = 0, | ||
420 | }, | ||
421 | }, | ||
422 | [ C(DTLB) ] = { | ||
423 | [ C(OP_READ) ] = { | ||
424 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
425 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
426 | }, | ||
427 | [ C(OP_WRITE) ] = { | ||
428 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
429 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
430 | }, | ||
431 | [ C(OP_PREFETCH) ] = { | ||
432 | [ C(RESULT_ACCESS) ] = 0, | ||
433 | [ C(RESULT_MISS) ] = 0, | ||
434 | }, | ||
435 | }, | ||
436 | [ C(ITLB) ] = { | ||
437 | [ C(OP_READ) ] = { | ||
438 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
439 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
440 | }, | ||
441 | [ C(OP_WRITE) ] = { | ||
442 | [ C(RESULT_ACCESS) ] = -1, | ||
443 | [ C(RESULT_MISS) ] = -1, | ||
444 | }, | ||
445 | [ C(OP_PREFETCH) ] = { | ||
446 | [ C(RESULT_ACCESS) ] = -1, | ||
447 | [ C(RESULT_MISS) ] = -1, | ||
448 | }, | ||
449 | }, | ||
450 | [ C(BPU ) ] = { | ||
451 | [ C(OP_READ) ] = { | ||
452 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
453 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
454 | }, | ||
455 | [ C(OP_WRITE) ] = { | ||
456 | [ C(RESULT_ACCESS) ] = -1, | ||
457 | [ C(RESULT_MISS) ] = -1, | ||
458 | }, | ||
459 | [ C(OP_PREFETCH) ] = { | ||
460 | [ C(RESULT_ACCESS) ] = -1, | ||
461 | [ C(RESULT_MISS) ] = -1, | ||
462 | }, | ||
463 | }, | ||
464 | }; | ||
465 | |||
466 | static u64 intel_pmu_raw_event(u64 hw_event) | ||
467 | { | ||
468 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
469 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
470 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
471 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
472 | #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL | ||
473 | |||
474 | #define CORE_EVNTSEL_MASK \ | ||
475 | (CORE_EVNTSEL_EVENT_MASK | \ | ||
476 | CORE_EVNTSEL_UNIT_MASK | \ | ||
477 | CORE_EVNTSEL_EDGE_MASK | \ | ||
478 | CORE_EVNTSEL_INV_MASK | \ | ||
479 | CORE_EVNTSEL_REG_MASK) | ||
480 | |||
481 | return hw_event & CORE_EVNTSEL_MASK; | ||
482 | } | ||
483 | |||
484 | static const u64 amd_hw_cache_event_ids | ||
485 | [PERF_COUNT_HW_CACHE_MAX] | ||
486 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
487 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
488 | { | ||
489 | [ C(L1D) ] = { | ||
490 | [ C(OP_READ) ] = { | ||
491 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
492 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
493 | }, | ||
494 | [ C(OP_WRITE) ] = { | ||
495 | [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ | ||
496 | [ C(RESULT_MISS) ] = 0, | ||
497 | }, | ||
498 | [ C(OP_PREFETCH) ] = { | ||
499 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
500 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
501 | }, | ||
502 | }, | ||
503 | [ C(L1I ) ] = { | ||
504 | [ C(OP_READ) ] = { | ||
505 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
506 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
507 | }, | ||
508 | [ C(OP_WRITE) ] = { | ||
509 | [ C(RESULT_ACCESS) ] = -1, | ||
510 | [ C(RESULT_MISS) ] = -1, | ||
511 | }, | ||
512 | [ C(OP_PREFETCH) ] = { | ||
513 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
514 | [ C(RESULT_MISS) ] = 0, | ||
515 | }, | ||
516 | }, | ||
517 | [ C(LL ) ] = { | ||
518 | [ C(OP_READ) ] = { | ||
519 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
520 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
521 | }, | ||
522 | [ C(OP_WRITE) ] = { | ||
523 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
524 | [ C(RESULT_MISS) ] = 0, | ||
525 | }, | ||
526 | [ C(OP_PREFETCH) ] = { | ||
527 | [ C(RESULT_ACCESS) ] = 0, | ||
528 | [ C(RESULT_MISS) ] = 0, | ||
529 | }, | ||
530 | }, | ||
531 | [ C(DTLB) ] = { | ||
532 | [ C(OP_READ) ] = { | ||
533 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
534 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
535 | }, | ||
536 | [ C(OP_WRITE) ] = { | ||
537 | [ C(RESULT_ACCESS) ] = 0, | ||
538 | [ C(RESULT_MISS) ] = 0, | ||
539 | }, | ||
540 | [ C(OP_PREFETCH) ] = { | ||
541 | [ C(RESULT_ACCESS) ] = 0, | ||
542 | [ C(RESULT_MISS) ] = 0, | ||
543 | }, | ||
544 | }, | ||
545 | [ C(ITLB) ] = { | ||
546 | [ C(OP_READ) ] = { | ||
547 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
548 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
549 | }, | ||
550 | [ C(OP_WRITE) ] = { | ||
551 | [ C(RESULT_ACCESS) ] = -1, | ||
552 | [ C(RESULT_MISS) ] = -1, | ||
553 | }, | ||
554 | [ C(OP_PREFETCH) ] = { | ||
555 | [ C(RESULT_ACCESS) ] = -1, | ||
556 | [ C(RESULT_MISS) ] = -1, | ||
557 | }, | ||
558 | }, | ||
559 | [ C(BPU ) ] = { | ||
560 | [ C(OP_READ) ] = { | ||
561 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
562 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
563 | }, | ||
564 | [ C(OP_WRITE) ] = { | ||
565 | [ C(RESULT_ACCESS) ] = -1, | ||
566 | [ C(RESULT_MISS) ] = -1, | ||
567 | }, | ||
568 | [ C(OP_PREFETCH) ] = { | ||
569 | [ C(RESULT_ACCESS) ] = -1, | ||
570 | [ C(RESULT_MISS) ] = -1, | ||
571 | }, | ||
572 | }, | ||
573 | }; | ||
574 | |||
575 | /* | ||
576 | * AMD Performance Monitor K7 and later. | ||
577 | */ | ||
578 | static const u64 amd_perfmon_event_map[] = | ||
579 | { | ||
580 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
581 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
582 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
583 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
584 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
585 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
586 | }; | ||
587 | |||
588 | static u64 amd_pmu_event_map(int hw_event) | ||
589 | { | ||
590 | return amd_perfmon_event_map[hw_event]; | ||
591 | } | ||
592 | |||
593 | static u64 amd_pmu_raw_event(u64 hw_event) | ||
594 | { | ||
595 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL | ||
596 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
597 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
598 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
599 | #define K7_EVNTSEL_REG_MASK 0x0FF000000ULL | ||
600 | |||
601 | #define K7_EVNTSEL_MASK \ | ||
602 | (K7_EVNTSEL_EVENT_MASK | \ | ||
603 | K7_EVNTSEL_UNIT_MASK | \ | ||
604 | K7_EVNTSEL_EDGE_MASK | \ | ||
605 | K7_EVNTSEL_INV_MASK | \ | ||
606 | K7_EVNTSEL_REG_MASK) | ||
607 | |||
608 | return hw_event & K7_EVNTSEL_MASK; | ||
609 | } | ||
610 | |||
611 | /* | ||
612 | * Propagate event elapsed time into the generic event. | ||
613 | * Can only be executed on the CPU where the event is active. | ||
614 | * Returns the delta events processed. | ||
615 | */ | ||
616 | static u64 | ||
617 | x86_perf_event_update(struct perf_event *event, | ||
618 | struct hw_perf_event *hwc, int idx) | ||
619 | { | ||
620 | int shift = 64 - x86_pmu.event_bits; | ||
621 | u64 prev_raw_count, new_raw_count; | ||
622 | s64 delta; | ||
623 | |||
624 | if (idx == X86_PMC_IDX_FIXED_BTS) | ||
625 | return 0; | ||
626 | |||
627 | /* | ||
628 | * Careful: an NMI might modify the previous event value. | ||
629 | * | ||
630 | * Our tactic to handle this is to first atomically read and | ||
631 | * exchange a new raw count - then add that new-prev delta | ||
632 | * count to the generic event atomically: | ||
633 | */ | ||
634 | again: | ||
635 | prev_raw_count = atomic64_read(&hwc->prev_count); | ||
636 | rdmsrl(hwc->event_base + idx, new_raw_count); | ||
637 | |||
638 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
639 | new_raw_count) != prev_raw_count) | ||
640 | goto again; | ||
641 | |||
642 | /* | ||
643 | * Now we have the new raw value and have updated the prev | ||
644 | * timestamp already. We can now calculate the elapsed delta | ||
645 | * (event-)time and add that to the generic event. | ||
646 | * | ||
647 | * Careful, not all hw sign-extends above the physical width | ||
648 | * of the count. | ||
649 | */ | ||
650 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
651 | delta >>= shift; | ||
652 | |||
653 | atomic64_add(delta, &event->count); | ||
654 | atomic64_sub(delta, &hwc->period_left); | ||
655 | |||
656 | return new_raw_count; | ||
657 | } | ||
658 | |||
659 | static atomic_t active_events; | ||
660 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
661 | |||
662 | static bool reserve_pmc_hardware(void) | ||
663 | { | ||
664 | #ifdef CONFIG_X86_LOCAL_APIC | ||
665 | int i; | ||
666 | |||
667 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
668 | disable_lapic_nmi_watchdog(); | ||
669 | |||
670 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
671 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | ||
672 | goto perfctr_fail; | ||
673 | } | ||
674 | |||
675 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
676 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | ||
677 | goto eventsel_fail; | ||
678 | } | ||
679 | #endif | ||
680 | |||
681 | return true; | ||
682 | |||
683 | #ifdef CONFIG_X86_LOCAL_APIC | ||
684 | eventsel_fail: | ||
685 | for (i--; i >= 0; i--) | ||
686 | release_evntsel_nmi(x86_pmu.eventsel + i); | ||
687 | |||
688 | i = x86_pmu.num_events; | ||
689 | |||
690 | perfctr_fail: | ||
691 | for (i--; i >= 0; i--) | ||
692 | release_perfctr_nmi(x86_pmu.perfctr + i); | ||
693 | |||
694 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
695 | enable_lapic_nmi_watchdog(); | ||
696 | |||
697 | return false; | ||
698 | #endif | ||
699 | } | ||
700 | |||
701 | static void release_pmc_hardware(void) | ||
702 | { | ||
703 | #ifdef CONFIG_X86_LOCAL_APIC | ||
704 | int i; | ||
705 | |||
706 | for (i = 0; i < x86_pmu.num_events; i++) { | ||
707 | release_perfctr_nmi(x86_pmu.perfctr + i); | ||
708 | release_evntsel_nmi(x86_pmu.eventsel + i); | ||
709 | } | ||
710 | |||
711 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
712 | enable_lapic_nmi_watchdog(); | ||
713 | #endif | ||
714 | } | ||
715 | |||
716 | static inline bool bts_available(void) | ||
717 | { | ||
718 | return x86_pmu.enable_bts != NULL; | ||
719 | } | ||
720 | |||
721 | static inline void init_debug_store_on_cpu(int cpu) | ||
722 | { | ||
723 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
724 | |||
725 | if (!ds) | ||
726 | return; | ||
727 | |||
728 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
729 | (u32)((u64)(unsigned long)ds), | ||
730 | (u32)((u64)(unsigned long)ds >> 32)); | ||
731 | } | ||
732 | |||
733 | static inline void fini_debug_store_on_cpu(int cpu) | ||
734 | { | ||
735 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
736 | return; | ||
737 | |||
738 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
739 | } | ||
740 | |||
741 | static void release_bts_hardware(void) | ||
742 | { | ||
743 | int cpu; | ||
744 | |||
745 | if (!bts_available()) | ||
746 | return; | ||
747 | |||
748 | get_online_cpus(); | ||
749 | |||
750 | for_each_online_cpu(cpu) | ||
751 | fini_debug_store_on_cpu(cpu); | ||
752 | |||
753 | for_each_possible_cpu(cpu) { | ||
754 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
755 | |||
756 | if (!ds) | ||
757 | continue; | ||
758 | |||
759 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
760 | |||
761 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
762 | kfree(ds); | ||
763 | } | ||
764 | |||
765 | put_online_cpus(); | ||
766 | } | ||
767 | |||
768 | static int reserve_bts_hardware(void) | ||
769 | { | ||
770 | int cpu, err = 0; | ||
771 | |||
772 | if (!bts_available()) | ||
773 | return 0; | ||
774 | |||
775 | get_online_cpus(); | ||
776 | |||
777 | for_each_possible_cpu(cpu) { | ||
778 | struct debug_store *ds; | ||
779 | void *buffer; | ||
780 | |||
781 | err = -ENOMEM; | ||
782 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
783 | if (unlikely(!buffer)) | ||
784 | break; | ||
785 | |||
786 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
787 | if (unlikely(!ds)) { | ||
788 | kfree(buffer); | ||
789 | break; | ||
790 | } | ||
791 | |||
792 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
793 | ds->bts_index = ds->bts_buffer_base; | ||
794 | ds->bts_absolute_maximum = | ||
795 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
796 | ds->bts_interrupt_threshold = | ||
797 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
798 | |||
799 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
800 | err = 0; | ||
801 | } | ||
802 | |||
803 | if (err) | ||
804 | release_bts_hardware(); | ||
805 | else { | ||
806 | for_each_online_cpu(cpu) | ||
807 | init_debug_store_on_cpu(cpu); | ||
808 | } | ||
809 | |||
810 | put_online_cpus(); | ||
811 | |||
812 | return err; | ||
813 | } | ||
814 | |||
815 | static void hw_perf_event_destroy(struct perf_event *event) | ||
816 | { | ||
817 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | ||
818 | release_pmc_hardware(); | ||
819 | release_bts_hardware(); | ||
820 | mutex_unlock(&pmc_reserve_mutex); | ||
821 | } | ||
822 | } | ||
823 | |||
824 | static inline int x86_pmu_initialized(void) | ||
825 | { | ||
826 | return x86_pmu.handle_irq != NULL; | ||
827 | } | ||
828 | |||
829 | static inline int | ||
830 | set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) | ||
831 | { | ||
832 | unsigned int cache_type, cache_op, cache_result; | ||
833 | u64 config, val; | ||
834 | |||
835 | config = attr->config; | ||
836 | |||
837 | cache_type = (config >> 0) & 0xff; | ||
838 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
839 | return -EINVAL; | ||
840 | |||
841 | cache_op = (config >> 8) & 0xff; | ||
842 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
843 | return -EINVAL; | ||
844 | |||
845 | cache_result = (config >> 16) & 0xff; | ||
846 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
847 | return -EINVAL; | ||
848 | |||
849 | val = hw_cache_event_ids[cache_type][cache_op][cache_result]; | ||
850 | |||
851 | if (val == 0) | ||
852 | return -ENOENT; | ||
853 | |||
854 | if (val == -1) | ||
855 | return -EINVAL; | ||
856 | |||
857 | hwc->config |= val; | ||
858 | |||
859 | return 0; | ||
860 | } | ||
861 | |||
862 | static void intel_pmu_enable_bts(u64 config) | ||
863 | { | ||
864 | unsigned long debugctlmsr; | ||
865 | |||
866 | debugctlmsr = get_debugctlmsr(); | ||
867 | |||
868 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
869 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
870 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
871 | |||
872 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
873 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
874 | |||
875 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
876 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
877 | |||
878 | update_debugctlmsr(debugctlmsr); | ||
879 | } | ||
880 | |||
881 | static void intel_pmu_disable_bts(void) | ||
882 | { | ||
883 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
884 | unsigned long debugctlmsr; | ||
885 | |||
886 | if (!cpuc->ds) | ||
887 | return; | ||
888 | |||
889 | debugctlmsr = get_debugctlmsr(); | ||
890 | |||
891 | debugctlmsr &= | ||
892 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
893 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
894 | |||
895 | update_debugctlmsr(debugctlmsr); | ||
896 | } | ||
897 | |||
898 | /* | ||
899 | * Setup the hardware configuration for a given attr_type | ||
900 | */ | ||
901 | static int __hw_perf_event_init(struct perf_event *event) | ||
902 | { | ||
903 | struct perf_event_attr *attr = &event->attr; | ||
904 | struct hw_perf_event *hwc = &event->hw; | ||
905 | u64 config; | ||
906 | int err; | ||
907 | |||
908 | if (!x86_pmu_initialized()) | ||
909 | return -ENODEV; | ||
910 | |||
911 | err = 0; | ||
912 | if (!atomic_inc_not_zero(&active_events)) { | ||
913 | mutex_lock(&pmc_reserve_mutex); | ||
914 | if (atomic_read(&active_events) == 0) { | ||
915 | if (!reserve_pmc_hardware()) | ||
916 | err = -EBUSY; | ||
917 | else | ||
918 | err = reserve_bts_hardware(); | ||
919 | } | ||
920 | if (!err) | ||
921 | atomic_inc(&active_events); | ||
922 | mutex_unlock(&pmc_reserve_mutex); | ||
923 | } | ||
924 | if (err) | ||
925 | return err; | ||
926 | |||
927 | event->destroy = hw_perf_event_destroy; | ||
928 | |||
929 | /* | ||
930 | * Generate PMC IRQs: | ||
931 | * (keep 'enabled' bit clear for now) | ||
932 | */ | ||
933 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
934 | |||
935 | /* | ||
936 | * Count user and OS events unless requested not to. | ||
937 | */ | ||
938 | if (!attr->exclude_user) | ||
939 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
940 | if (!attr->exclude_kernel) | ||
941 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
942 | |||
943 | if (!hwc->sample_period) { | ||
944 | hwc->sample_period = x86_pmu.max_period; | ||
945 | hwc->last_period = hwc->sample_period; | ||
946 | atomic64_set(&hwc->period_left, hwc->sample_period); | ||
947 | } else { | ||
948 | /* | ||
949 | * If we have a PMU initialized but no APIC | ||
950 | * interrupts, we cannot sample hardware | ||
951 | * events (user-space has to fall back and | ||
952 | * sample via a hrtimer based software event): | ||
953 | */ | ||
954 | if (!x86_pmu.apic) | ||
955 | return -EOPNOTSUPP; | ||
956 | } | ||
957 | |||
958 | /* | ||
959 | * Raw hw_event type provide the config in the hw_event structure | ||
960 | */ | ||
961 | if (attr->type == PERF_TYPE_RAW) { | ||
962 | hwc->config |= x86_pmu.raw_event(attr->config); | ||
963 | return 0; | ||
964 | } | ||
965 | |||
966 | if (attr->type == PERF_TYPE_HW_CACHE) | ||
967 | return set_ext_hw_attr(hwc, attr); | ||
968 | |||
969 | if (attr->config >= x86_pmu.max_events) | ||
970 | return -EINVAL; | ||
971 | |||
972 | /* | ||
973 | * The generic map: | ||
974 | */ | ||
975 | config = x86_pmu.event_map(attr->config); | ||
976 | |||
977 | if (config == 0) | ||
978 | return -ENOENT; | ||
979 | |||
980 | if (config == -1LL) | ||
981 | return -EINVAL; | ||
982 | |||
983 | /* | ||
984 | * Branch tracing: | ||
985 | */ | ||
986 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | ||
987 | (hwc->sample_period == 1)) { | ||
988 | /* BTS is not supported by this architecture. */ | ||
989 | if (!bts_available()) | ||
990 | return -EOPNOTSUPP; | ||
991 | |||
992 | /* BTS is currently only allowed for user-mode. */ | ||
993 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
994 | return -EOPNOTSUPP; | ||
995 | } | ||
996 | |||
997 | hwc->config |= config; | ||
998 | |||
999 | return 0; | ||
1000 | } | ||
1001 | |||
1002 | static void p6_pmu_disable_all(void) | ||
1003 | { | ||
1004 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1005 | u64 val; | ||
1006 | |||
1007 | if (!cpuc->enabled) | ||
1008 | return; | ||
1009 | |||
1010 | cpuc->enabled = 0; | ||
1011 | barrier(); | ||
1012 | |||
1013 | /* p6 only has one enable register */ | ||
1014 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1015 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1016 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1017 | } | ||
1018 | |||
1019 | static void intel_pmu_disable_all(void) | ||
1020 | { | ||
1021 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1022 | |||
1023 | if (!cpuc->enabled) | ||
1024 | return; | ||
1025 | |||
1026 | cpuc->enabled = 0; | ||
1027 | barrier(); | ||
1028 | |||
1029 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
1030 | |||
1031 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
1032 | intel_pmu_disable_bts(); | ||
1033 | } | ||
1034 | |||
1035 | static void amd_pmu_disable_all(void) | ||
1036 | { | ||
1037 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1038 | int idx; | ||
1039 | |||
1040 | if (!cpuc->enabled) | ||
1041 | return; | ||
1042 | |||
1043 | cpuc->enabled = 0; | ||
1044 | /* | ||
1045 | * ensure we write the disable before we start disabling the | ||
1046 | * events proper, so that amd_pmu_enable_event() does the | ||
1047 | * right thing. | ||
1048 | */ | ||
1049 | barrier(); | ||
1050 | |||
1051 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1052 | u64 val; | ||
1053 | |||
1054 | if (!test_bit(idx, cpuc->active_mask)) | ||
1055 | continue; | ||
1056 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
1057 | if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) | ||
1058 | continue; | ||
1059 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1060 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
1061 | } | ||
1062 | } | ||
1063 | |||
1064 | void hw_perf_disable(void) | ||
1065 | { | ||
1066 | if (!x86_pmu_initialized()) | ||
1067 | return; | ||
1068 | return x86_pmu.disable_all(); | ||
1069 | } | ||
1070 | |||
1071 | static void p6_pmu_enable_all(void) | ||
1072 | { | ||
1073 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1074 | unsigned long val; | ||
1075 | |||
1076 | if (cpuc->enabled) | ||
1077 | return; | ||
1078 | |||
1079 | cpuc->enabled = 1; | ||
1080 | barrier(); | ||
1081 | |||
1082 | /* p6 only has one enable register */ | ||
1083 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
1084 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1085 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
1086 | } | ||
1087 | |||
1088 | static void intel_pmu_enable_all(void) | ||
1089 | { | ||
1090 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1091 | |||
1092 | if (cpuc->enabled) | ||
1093 | return; | ||
1094 | |||
1095 | cpuc->enabled = 1; | ||
1096 | barrier(); | ||
1097 | |||
1098 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
1099 | |||
1100 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
1101 | struct perf_event *event = | ||
1102 | cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1103 | |||
1104 | if (WARN_ON_ONCE(!event)) | ||
1105 | return; | ||
1106 | |||
1107 | intel_pmu_enable_bts(event->hw.config); | ||
1108 | } | ||
1109 | } | ||
1110 | |||
1111 | static void amd_pmu_enable_all(void) | ||
1112 | { | ||
1113 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1114 | int idx; | ||
1115 | |||
1116 | if (cpuc->enabled) | ||
1117 | return; | ||
1118 | |||
1119 | cpuc->enabled = 1; | ||
1120 | barrier(); | ||
1121 | |||
1122 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1123 | struct perf_event *event = cpuc->events[idx]; | ||
1124 | u64 val; | ||
1125 | |||
1126 | if (!test_bit(idx, cpuc->active_mask)) | ||
1127 | continue; | ||
1128 | |||
1129 | val = event->hw.config; | ||
1130 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1131 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
1132 | } | ||
1133 | } | ||
1134 | |||
1135 | void hw_perf_enable(void) | ||
1136 | { | ||
1137 | if (!x86_pmu_initialized()) | ||
1138 | return; | ||
1139 | x86_pmu.enable_all(); | ||
1140 | } | ||
1141 | |||
1142 | static inline u64 intel_pmu_get_status(void) | ||
1143 | { | ||
1144 | u64 status; | ||
1145 | |||
1146 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
1147 | |||
1148 | return status; | ||
1149 | } | ||
1150 | |||
1151 | static inline void intel_pmu_ack_status(u64 ack) | ||
1152 | { | ||
1153 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
1154 | } | ||
1155 | |||
1156 | static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1157 | { | ||
1158 | (void)checking_wrmsrl(hwc->config_base + idx, | ||
1159 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | ||
1160 | } | ||
1161 | |||
1162 | static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1163 | { | ||
1164 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); | ||
1165 | } | ||
1166 | |||
1167 | static inline void | ||
1168 | intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1169 | { | ||
1170 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1171 | u64 ctrl_val, mask; | ||
1172 | |||
1173 | mask = 0xfULL << (idx * 4); | ||
1174 | |||
1175 | rdmsrl(hwc->config_base, ctrl_val); | ||
1176 | ctrl_val &= ~mask; | ||
1177 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1178 | } | ||
1179 | |||
1180 | static inline void | ||
1181 | p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1182 | { | ||
1183 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1184 | u64 val = P6_NOP_EVENT; | ||
1185 | |||
1186 | if (cpuc->enabled) | ||
1187 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1188 | |||
1189 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1190 | } | ||
1191 | |||
1192 | static inline void | ||
1193 | intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1194 | { | ||
1195 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1196 | intel_pmu_disable_bts(); | ||
1197 | return; | ||
1198 | } | ||
1199 | |||
1200 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
1201 | intel_pmu_disable_fixed(hwc, idx); | ||
1202 | return; | ||
1203 | } | ||
1204 | |||
1205 | x86_pmu_disable_event(hwc, idx); | ||
1206 | } | ||
1207 | |||
1208 | static inline void | ||
1209 | amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) | ||
1210 | { | ||
1211 | x86_pmu_disable_event(hwc, idx); | ||
1212 | } | ||
1213 | |||
1214 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); | ||
1215 | |||
1216 | /* | ||
1217 | * Set the next IRQ period, based on the hwc->period_left value. | ||
1218 | * To be called with the event disabled in hw: | ||
1219 | */ | ||
1220 | static int | ||
1221 | x86_perf_event_set_period(struct perf_event *event, | ||
1222 | struct hw_perf_event *hwc, int idx) | ||
1223 | { | ||
1224 | s64 left = atomic64_read(&hwc->period_left); | ||
1225 | s64 period = hwc->sample_period; | ||
1226 | int err, ret = 0; | ||
1227 | |||
1228 | if (idx == X86_PMC_IDX_FIXED_BTS) | ||
1229 | return 0; | ||
1230 | |||
1231 | /* | ||
1232 | * If we are way outside a reasoable range then just skip forward: | ||
1233 | */ | ||
1234 | if (unlikely(left <= -period)) { | ||
1235 | left = period; | ||
1236 | atomic64_set(&hwc->period_left, left); | ||
1237 | hwc->last_period = period; | ||
1238 | ret = 1; | ||
1239 | } | ||
1240 | |||
1241 | if (unlikely(left <= 0)) { | ||
1242 | left += period; | ||
1243 | atomic64_set(&hwc->period_left, left); | ||
1244 | hwc->last_period = period; | ||
1245 | ret = 1; | ||
1246 | } | ||
1247 | /* | ||
1248 | * Quirk: certain CPUs dont like it if just 1 hw_event is left: | ||
1249 | */ | ||
1250 | if (unlikely(left < 2)) | ||
1251 | left = 2; | ||
1252 | |||
1253 | if (left > x86_pmu.max_period) | ||
1254 | left = x86_pmu.max_period; | ||
1255 | |||
1256 | per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; | ||
1257 | |||
1258 | /* | ||
1259 | * The hw event starts counting from this event offset, | ||
1260 | * mark it to be able to extra future deltas: | ||
1261 | */ | ||
1262 | atomic64_set(&hwc->prev_count, (u64)-left); | ||
1263 | |||
1264 | err = checking_wrmsrl(hwc->event_base + idx, | ||
1265 | (u64)(-left) & x86_pmu.event_mask); | ||
1266 | |||
1267 | perf_event_update_userpage(event); | ||
1268 | |||
1269 | return ret; | ||
1270 | } | ||
1271 | |||
1272 | static inline void | ||
1273 | intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) | ||
1274 | { | ||
1275 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
1276 | u64 ctrl_val, bits, mask; | ||
1277 | int err; | ||
1278 | |||
1279 | /* | ||
1280 | * Enable IRQ generation (0x8), | ||
1281 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
1282 | * if requested: | ||
1283 | */ | ||
1284 | bits = 0x8ULL; | ||
1285 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
1286 | bits |= 0x2; | ||
1287 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
1288 | bits |= 0x1; | ||
1289 | bits <<= (idx * 4); | ||
1290 | mask = 0xfULL << (idx * 4); | ||
1291 | |||
1292 | rdmsrl(hwc->config_base, ctrl_val); | ||
1293 | ctrl_val &= ~mask; | ||
1294 | ctrl_val |= bits; | ||
1295 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
1296 | } | ||
1297 | |||
1298 | static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1299 | { | ||
1300 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1301 | u64 val; | ||
1302 | |||
1303 | val = hwc->config; | ||
1304 | if (cpuc->enabled) | ||
1305 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1306 | |||
1307 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1308 | } | ||
1309 | |||
1310 | |||
1311 | static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1312 | { | ||
1313 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1314 | if (!__get_cpu_var(cpu_hw_events).enabled) | ||
1315 | return; | ||
1316 | |||
1317 | intel_pmu_enable_bts(hwc->config); | ||
1318 | return; | ||
1319 | } | ||
1320 | |||
1321 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
1322 | intel_pmu_enable_fixed(hwc, idx); | ||
1323 | return; | ||
1324 | } | ||
1325 | |||
1326 | x86_pmu_enable_event(hwc, idx); | ||
1327 | } | ||
1328 | |||
1329 | static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) | ||
1330 | { | ||
1331 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1332 | |||
1333 | if (cpuc->enabled) | ||
1334 | x86_pmu_enable_event(hwc, idx); | ||
1335 | } | ||
1336 | |||
1337 | static int | ||
1338 | fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc) | ||
1339 | { | ||
1340 | unsigned int hw_event; | ||
1341 | |||
1342 | hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
1343 | |||
1344 | if (unlikely((hw_event == | ||
1345 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
1346 | (hwc->sample_period == 1))) | ||
1347 | return X86_PMC_IDX_FIXED_BTS; | ||
1348 | |||
1349 | if (!x86_pmu.num_events_fixed) | ||
1350 | return -1; | ||
1351 | |||
1352 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | ||
1353 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | ||
1354 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | ||
1355 | return X86_PMC_IDX_FIXED_CPU_CYCLES; | ||
1356 | if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) | ||
1357 | return X86_PMC_IDX_FIXED_BUS_CYCLES; | ||
1358 | |||
1359 | return -1; | ||
1360 | } | ||
1361 | |||
1362 | /* | ||
1363 | * Find a PMC slot for the freshly enabled / scheduled in event: | ||
1364 | */ | ||
1365 | static int x86_pmu_enable(struct perf_event *event) | ||
1366 | { | ||
1367 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1368 | struct hw_perf_event *hwc = &event->hw; | ||
1369 | int idx; | ||
1370 | |||
1371 | idx = fixed_mode_idx(event, hwc); | ||
1372 | if (idx == X86_PMC_IDX_FIXED_BTS) { | ||
1373 | /* BTS is already occupied. */ | ||
1374 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1375 | return -EAGAIN; | ||
1376 | |||
1377 | hwc->config_base = 0; | ||
1378 | hwc->event_base = 0; | ||
1379 | hwc->idx = idx; | ||
1380 | } else if (idx >= 0) { | ||
1381 | /* | ||
1382 | * Try to get the fixed event, if that is already taken | ||
1383 | * then try to get a generic event: | ||
1384 | */ | ||
1385 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1386 | goto try_generic; | ||
1387 | |||
1388 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
1389 | /* | ||
1390 | * We set it so that event_base + idx in wrmsr/rdmsr maps to | ||
1391 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
1392 | */ | ||
1393 | hwc->event_base = | ||
1394 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
1395 | hwc->idx = idx; | ||
1396 | } else { | ||
1397 | idx = hwc->idx; | ||
1398 | /* Try to get the previous generic event again */ | ||
1399 | if (test_and_set_bit(idx, cpuc->used_mask)) { | ||
1400 | try_generic: | ||
1401 | idx = find_first_zero_bit(cpuc->used_mask, | ||
1402 | x86_pmu.num_events); | ||
1403 | if (idx == x86_pmu.num_events) | ||
1404 | return -EAGAIN; | ||
1405 | |||
1406 | set_bit(idx, cpuc->used_mask); | ||
1407 | hwc->idx = idx; | ||
1408 | } | ||
1409 | hwc->config_base = x86_pmu.eventsel; | ||
1410 | hwc->event_base = x86_pmu.perfctr; | ||
1411 | } | ||
1412 | |||
1413 | perf_events_lapic_init(); | ||
1414 | |||
1415 | x86_pmu.disable(hwc, idx); | ||
1416 | |||
1417 | cpuc->events[idx] = event; | ||
1418 | set_bit(idx, cpuc->active_mask); | ||
1419 | |||
1420 | x86_perf_event_set_period(event, hwc, idx); | ||
1421 | x86_pmu.enable(hwc, idx); | ||
1422 | |||
1423 | perf_event_update_userpage(event); | ||
1424 | |||
1425 | return 0; | ||
1426 | } | ||
1427 | |||
1428 | static void x86_pmu_unthrottle(struct perf_event *event) | ||
1429 | { | ||
1430 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1431 | struct hw_perf_event *hwc = &event->hw; | ||
1432 | |||
1433 | if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || | ||
1434 | cpuc->events[hwc->idx] != event)) | ||
1435 | return; | ||
1436 | |||
1437 | x86_pmu.enable(hwc, hwc->idx); | ||
1438 | } | ||
1439 | |||
1440 | void perf_event_print_debug(void) | ||
1441 | { | ||
1442 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | ||
1443 | struct cpu_hw_events *cpuc; | ||
1444 | unsigned long flags; | ||
1445 | int cpu, idx; | ||
1446 | |||
1447 | if (!x86_pmu.num_events) | ||
1448 | return; | ||
1449 | |||
1450 | local_irq_save(flags); | ||
1451 | |||
1452 | cpu = smp_processor_id(); | ||
1453 | cpuc = &per_cpu(cpu_hw_events, cpu); | ||
1454 | |||
1455 | if (x86_pmu.version >= 2) { | ||
1456 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); | ||
1457 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
1458 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | ||
1459 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | ||
1460 | |||
1461 | pr_info("\n"); | ||
1462 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | ||
1463 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | ||
1464 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | ||
1465 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | ||
1466 | } | ||
1467 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); | ||
1468 | |||
1469 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1470 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | ||
1471 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | ||
1472 | |||
1473 | prev_left = per_cpu(pmc_prev_left[idx], cpu); | ||
1474 | |||
1475 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", | ||
1476 | cpu, idx, pmc_ctrl); | ||
1477 | pr_info("CPU#%d: gen-PMC%d count: %016llx\n", | ||
1478 | cpu, idx, pmc_count); | ||
1479 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | ||
1480 | cpu, idx, prev_left); | ||
1481 | } | ||
1482 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
1483 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | ||
1484 | |||
1485 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | ||
1486 | cpu, idx, pmc_count); | ||
1487 | } | ||
1488 | local_irq_restore(flags); | ||
1489 | } | ||
1490 | |||
1491 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) | ||
1492 | { | ||
1493 | struct debug_store *ds = cpuc->ds; | ||
1494 | struct bts_record { | ||
1495 | u64 from; | ||
1496 | u64 to; | ||
1497 | u64 flags; | ||
1498 | }; | ||
1499 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
1500 | struct bts_record *at, *top; | ||
1501 | struct perf_output_handle handle; | ||
1502 | struct perf_event_header header; | ||
1503 | struct perf_sample_data data; | ||
1504 | struct pt_regs regs; | ||
1505 | |||
1506 | if (!event) | ||
1507 | return; | ||
1508 | |||
1509 | if (!ds) | ||
1510 | return; | ||
1511 | |||
1512 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
1513 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
1514 | |||
1515 | if (top <= at) | ||
1516 | return; | ||
1517 | |||
1518 | ds->bts_index = ds->bts_buffer_base; | ||
1519 | |||
1520 | |||
1521 | data.period = event->hw.last_period; | ||
1522 | data.addr = 0; | ||
1523 | regs.ip = 0; | ||
1524 | |||
1525 | /* | ||
1526 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
1527 | * We will overwrite the from and to address before we output | ||
1528 | * the sample. | ||
1529 | */ | ||
1530 | perf_prepare_sample(&header, &data, event, ®s); | ||
1531 | |||
1532 | if (perf_output_begin(&handle, event, | ||
1533 | header.size * (top - at), 1, 1)) | ||
1534 | return; | ||
1535 | |||
1536 | for (; at < top; at++) { | ||
1537 | data.ip = at->from; | ||
1538 | data.addr = at->to; | ||
1539 | |||
1540 | perf_output_sample(&handle, &header, &data, event); | ||
1541 | } | ||
1542 | |||
1543 | perf_output_end(&handle); | ||
1544 | |||
1545 | /* There's new data available. */ | ||
1546 | event->hw.interrupts++; | ||
1547 | event->pending_kill = POLL_IN; | ||
1548 | } | ||
1549 | |||
1550 | static void x86_pmu_disable(struct perf_event *event) | ||
1551 | { | ||
1552 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
1553 | struct hw_perf_event *hwc = &event->hw; | ||
1554 | int idx = hwc->idx; | ||
1555 | |||
1556 | /* | ||
1557 | * Must be done before we disable, otherwise the nmi handler | ||
1558 | * could reenable again: | ||
1559 | */ | ||
1560 | clear_bit(idx, cpuc->active_mask); | ||
1561 | x86_pmu.disable(hwc, idx); | ||
1562 | |||
1563 | /* | ||
1564 | * Make sure the cleared pointer becomes visible before we | ||
1565 | * (potentially) free the event: | ||
1566 | */ | ||
1567 | barrier(); | ||
1568 | |||
1569 | /* | ||
1570 | * Drain the remaining delta count out of a event | ||
1571 | * that we are disabling: | ||
1572 | */ | ||
1573 | x86_perf_event_update(event, hwc, idx); | ||
1574 | |||
1575 | /* Drain the remaining BTS records. */ | ||
1576 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) | ||
1577 | intel_pmu_drain_bts_buffer(cpuc); | ||
1578 | |||
1579 | cpuc->events[idx] = NULL; | ||
1580 | clear_bit(idx, cpuc->used_mask); | ||
1581 | |||
1582 | perf_event_update_userpage(event); | ||
1583 | } | ||
1584 | |||
1585 | /* | ||
1586 | * Save and restart an expired event. Called by NMI contexts, | ||
1587 | * so it has to be careful about preempting normal event ops: | ||
1588 | */ | ||
1589 | static int intel_pmu_save_and_restart(struct perf_event *event) | ||
1590 | { | ||
1591 | struct hw_perf_event *hwc = &event->hw; | ||
1592 | int idx = hwc->idx; | ||
1593 | int ret; | ||
1594 | |||
1595 | x86_perf_event_update(event, hwc, idx); | ||
1596 | ret = x86_perf_event_set_period(event, hwc, idx); | ||
1597 | |||
1598 | if (event->state == PERF_EVENT_STATE_ACTIVE) | ||
1599 | intel_pmu_enable_event(hwc, idx); | ||
1600 | |||
1601 | return ret; | ||
1602 | } | ||
1603 | |||
1604 | static void intel_pmu_reset(void) | ||
1605 | { | ||
1606 | struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; | ||
1607 | unsigned long flags; | ||
1608 | int idx; | ||
1609 | |||
1610 | if (!x86_pmu.num_events) | ||
1611 | return; | ||
1612 | |||
1613 | local_irq_save(flags); | ||
1614 | |||
1615 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
1616 | |||
1617 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1618 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
1619 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
1620 | } | ||
1621 | for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { | ||
1622 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
1623 | } | ||
1624 | if (ds) | ||
1625 | ds->bts_index = ds->bts_buffer_base; | ||
1626 | |||
1627 | local_irq_restore(flags); | ||
1628 | } | ||
1629 | |||
1630 | static int p6_pmu_handle_irq(struct pt_regs *regs) | ||
1631 | { | ||
1632 | struct perf_sample_data data; | ||
1633 | struct cpu_hw_events *cpuc; | ||
1634 | struct perf_event *event; | ||
1635 | struct hw_perf_event *hwc; | ||
1636 | int idx, handled = 0; | ||
1637 | u64 val; | ||
1638 | |||
1639 | data.addr = 0; | ||
1640 | |||
1641 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1642 | |||
1643 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1644 | if (!test_bit(idx, cpuc->active_mask)) | ||
1645 | continue; | ||
1646 | |||
1647 | event = cpuc->events[idx]; | ||
1648 | hwc = &event->hw; | ||
1649 | |||
1650 | val = x86_perf_event_update(event, hwc, idx); | ||
1651 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | ||
1652 | continue; | ||
1653 | |||
1654 | /* | ||
1655 | * event overflow | ||
1656 | */ | ||
1657 | handled = 1; | ||
1658 | data.period = event->hw.last_period; | ||
1659 | |||
1660 | if (!x86_perf_event_set_period(event, hwc, idx)) | ||
1661 | continue; | ||
1662 | |||
1663 | if (perf_event_overflow(event, 1, &data, regs)) | ||
1664 | p6_pmu_disable_event(hwc, idx); | ||
1665 | } | ||
1666 | |||
1667 | if (handled) | ||
1668 | inc_irq_stat(apic_perf_irqs); | ||
1669 | |||
1670 | return handled; | ||
1671 | } | ||
1672 | |||
1673 | /* | ||
1674 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
1675 | * rules apply: | ||
1676 | */ | ||
1677 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
1678 | { | ||
1679 | struct perf_sample_data data; | ||
1680 | struct cpu_hw_events *cpuc; | ||
1681 | int bit, loops; | ||
1682 | u64 ack, status; | ||
1683 | |||
1684 | data.addr = 0; | ||
1685 | |||
1686 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1687 | |||
1688 | perf_disable(); | ||
1689 | intel_pmu_drain_bts_buffer(cpuc); | ||
1690 | status = intel_pmu_get_status(); | ||
1691 | if (!status) { | ||
1692 | perf_enable(); | ||
1693 | return 0; | ||
1694 | } | ||
1695 | |||
1696 | loops = 0; | ||
1697 | again: | ||
1698 | if (++loops > 100) { | ||
1699 | WARN_ONCE(1, "perfevents: irq loop stuck!\n"); | ||
1700 | perf_event_print_debug(); | ||
1701 | intel_pmu_reset(); | ||
1702 | perf_enable(); | ||
1703 | return 1; | ||
1704 | } | ||
1705 | |||
1706 | inc_irq_stat(apic_perf_irqs); | ||
1707 | ack = status; | ||
1708 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
1709 | struct perf_event *event = cpuc->events[bit]; | ||
1710 | |||
1711 | clear_bit(bit, (unsigned long *) &status); | ||
1712 | if (!test_bit(bit, cpuc->active_mask)) | ||
1713 | continue; | ||
1714 | |||
1715 | if (!intel_pmu_save_and_restart(event)) | ||
1716 | continue; | ||
1717 | |||
1718 | data.period = event->hw.last_period; | ||
1719 | |||
1720 | if (perf_event_overflow(event, 1, &data, regs)) | ||
1721 | intel_pmu_disable_event(&event->hw, bit); | ||
1722 | } | ||
1723 | |||
1724 | intel_pmu_ack_status(ack); | ||
1725 | |||
1726 | /* | ||
1727 | * Repeat if there is more work to be done: | ||
1728 | */ | ||
1729 | status = intel_pmu_get_status(); | ||
1730 | if (status) | ||
1731 | goto again; | ||
1732 | |||
1733 | perf_enable(); | ||
1734 | |||
1735 | return 1; | ||
1736 | } | ||
1737 | |||
1738 | static int amd_pmu_handle_irq(struct pt_regs *regs) | ||
1739 | { | ||
1740 | struct perf_sample_data data; | ||
1741 | struct cpu_hw_events *cpuc; | ||
1742 | struct perf_event *event; | ||
1743 | struct hw_perf_event *hwc; | ||
1744 | int idx, handled = 0; | ||
1745 | u64 val; | ||
1746 | |||
1747 | data.addr = 0; | ||
1748 | |||
1749 | cpuc = &__get_cpu_var(cpu_hw_events); | ||
1750 | |||
1751 | for (idx = 0; idx < x86_pmu.num_events; idx++) { | ||
1752 | if (!test_bit(idx, cpuc->active_mask)) | ||
1753 | continue; | ||
1754 | |||
1755 | event = cpuc->events[idx]; | ||
1756 | hwc = &event->hw; | ||
1757 | |||
1758 | val = x86_perf_event_update(event, hwc, idx); | ||
1759 | if (val & (1ULL << (x86_pmu.event_bits - 1))) | ||
1760 | continue; | ||
1761 | |||
1762 | /* | ||
1763 | * event overflow | ||
1764 | */ | ||
1765 | handled = 1; | ||
1766 | data.period = event->hw.last_period; | ||
1767 | |||
1768 | if (!x86_perf_event_set_period(event, hwc, idx)) | ||
1769 | continue; | ||
1770 | |||
1771 | if (perf_event_overflow(event, 1, &data, regs)) | ||
1772 | amd_pmu_disable_event(hwc, idx); | ||
1773 | } | ||
1774 | |||
1775 | if (handled) | ||
1776 | inc_irq_stat(apic_perf_irqs); | ||
1777 | |||
1778 | return handled; | ||
1779 | } | ||
1780 | |||
1781 | void smp_perf_pending_interrupt(struct pt_regs *regs) | ||
1782 | { | ||
1783 | irq_enter(); | ||
1784 | ack_APIC_irq(); | ||
1785 | inc_irq_stat(apic_pending_irqs); | ||
1786 | perf_event_do_pending(); | ||
1787 | irq_exit(); | ||
1788 | } | ||
1789 | |||
1790 | void set_perf_event_pending(void) | ||
1791 | { | ||
1792 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1793 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | ||
1794 | #endif | ||
1795 | } | ||
1796 | |||
1797 | void perf_events_lapic_init(void) | ||
1798 | { | ||
1799 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1800 | if (!x86_pmu.apic || !x86_pmu_initialized()) | ||
1801 | return; | ||
1802 | |||
1803 | /* | ||
1804 | * Always use NMI for PMU | ||
1805 | */ | ||
1806 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1807 | #endif | ||
1808 | } | ||
1809 | |||
1810 | static int __kprobes | ||
1811 | perf_event_nmi_handler(struct notifier_block *self, | ||
1812 | unsigned long cmd, void *__args) | ||
1813 | { | ||
1814 | struct die_args *args = __args; | ||
1815 | struct pt_regs *regs; | ||
1816 | |||
1817 | if (!atomic_read(&active_events)) | ||
1818 | return NOTIFY_DONE; | ||
1819 | |||
1820 | switch (cmd) { | ||
1821 | case DIE_NMI: | ||
1822 | case DIE_NMI_IPI: | ||
1823 | break; | ||
1824 | |||
1825 | default: | ||
1826 | return NOTIFY_DONE; | ||
1827 | } | ||
1828 | |||
1829 | regs = args->regs; | ||
1830 | |||
1831 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1832 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1833 | #endif | ||
1834 | /* | ||
1835 | * Can't rely on the handled return value to say it was our NMI, two | ||
1836 | * events could trigger 'simultaneously' raising two back-to-back NMIs. | ||
1837 | * | ||
1838 | * If the first NMI handles both, the latter will be empty and daze | ||
1839 | * the CPU. | ||
1840 | */ | ||
1841 | x86_pmu.handle_irq(regs); | ||
1842 | |||
1843 | return NOTIFY_STOP; | ||
1844 | } | ||
1845 | |||
1846 | static __read_mostly struct notifier_block perf_event_nmi_notifier = { | ||
1847 | .notifier_call = perf_event_nmi_handler, | ||
1848 | .next = NULL, | ||
1849 | .priority = 1 | ||
1850 | }; | ||
1851 | |||
1852 | static struct x86_pmu p6_pmu = { | ||
1853 | .name = "p6", | ||
1854 | .handle_irq = p6_pmu_handle_irq, | ||
1855 | .disable_all = p6_pmu_disable_all, | ||
1856 | .enable_all = p6_pmu_enable_all, | ||
1857 | .enable = p6_pmu_enable_event, | ||
1858 | .disable = p6_pmu_disable_event, | ||
1859 | .eventsel = MSR_P6_EVNTSEL0, | ||
1860 | .perfctr = MSR_P6_PERFCTR0, | ||
1861 | .event_map = p6_pmu_event_map, | ||
1862 | .raw_event = p6_pmu_raw_event, | ||
1863 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
1864 | .apic = 1, | ||
1865 | .max_period = (1ULL << 31) - 1, | ||
1866 | .version = 0, | ||
1867 | .num_events = 2, | ||
1868 | /* | ||
1869 | * Events have 40 bits implemented. However they are designed such | ||
1870 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
1871 | * effective width of a event for P6-like PMU is 32 bits only. | ||
1872 | * | ||
1873 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
1874 | */ | ||
1875 | .event_bits = 32, | ||
1876 | .event_mask = (1ULL << 32) - 1, | ||
1877 | }; | ||
1878 | |||
1879 | static struct x86_pmu intel_pmu = { | ||
1880 | .name = "Intel", | ||
1881 | .handle_irq = intel_pmu_handle_irq, | ||
1882 | .disable_all = intel_pmu_disable_all, | ||
1883 | .enable_all = intel_pmu_enable_all, | ||
1884 | .enable = intel_pmu_enable_event, | ||
1885 | .disable = intel_pmu_disable_event, | ||
1886 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
1887 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
1888 | .event_map = intel_pmu_event_map, | ||
1889 | .raw_event = intel_pmu_raw_event, | ||
1890 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
1891 | .apic = 1, | ||
1892 | /* | ||
1893 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
1894 | * so we install an artificial 1<<31 period regardless of | ||
1895 | * the generic event period: | ||
1896 | */ | ||
1897 | .max_period = (1ULL << 31) - 1, | ||
1898 | .enable_bts = intel_pmu_enable_bts, | ||
1899 | .disable_bts = intel_pmu_disable_bts, | ||
1900 | }; | ||
1901 | |||
1902 | static struct x86_pmu amd_pmu = { | ||
1903 | .name = "AMD", | ||
1904 | .handle_irq = amd_pmu_handle_irq, | ||
1905 | .disable_all = amd_pmu_disable_all, | ||
1906 | .enable_all = amd_pmu_enable_all, | ||
1907 | .enable = amd_pmu_enable_event, | ||
1908 | .disable = amd_pmu_disable_event, | ||
1909 | .eventsel = MSR_K7_EVNTSEL0, | ||
1910 | .perfctr = MSR_K7_PERFCTR0, | ||
1911 | .event_map = amd_pmu_event_map, | ||
1912 | .raw_event = amd_pmu_raw_event, | ||
1913 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
1914 | .num_events = 4, | ||
1915 | .event_bits = 48, | ||
1916 | .event_mask = (1ULL << 48) - 1, | ||
1917 | .apic = 1, | ||
1918 | /* use highest bit to detect overflow */ | ||
1919 | .max_period = (1ULL << 47) - 1, | ||
1920 | }; | ||
1921 | |||
1922 | static int p6_pmu_init(void) | ||
1923 | { | ||
1924 | switch (boot_cpu_data.x86_model) { | ||
1925 | case 1: | ||
1926 | case 3: /* Pentium Pro */ | ||
1927 | case 5: | ||
1928 | case 6: /* Pentium II */ | ||
1929 | case 7: | ||
1930 | case 8: | ||
1931 | case 11: /* Pentium III */ | ||
1932 | break; | ||
1933 | case 9: | ||
1934 | case 13: | ||
1935 | /* Pentium M */ | ||
1936 | break; | ||
1937 | default: | ||
1938 | pr_cont("unsupported p6 CPU model %d ", | ||
1939 | boot_cpu_data.x86_model); | ||
1940 | return -ENODEV; | ||
1941 | } | ||
1942 | |||
1943 | x86_pmu = p6_pmu; | ||
1944 | |||
1945 | if (!cpu_has_apic) { | ||
1946 | pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); | ||
1947 | pr_info("no hardware sampling interrupt available.\n"); | ||
1948 | x86_pmu.apic = 0; | ||
1949 | } | ||
1950 | |||
1951 | return 0; | ||
1952 | } | ||
1953 | |||
1954 | static int intel_pmu_init(void) | ||
1955 | { | ||
1956 | union cpuid10_edx edx; | ||
1957 | union cpuid10_eax eax; | ||
1958 | unsigned int unused; | ||
1959 | unsigned int ebx; | ||
1960 | int version; | ||
1961 | |||
1962 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
1963 | /* check for P6 processor family */ | ||
1964 | if (boot_cpu_data.x86 == 6) { | ||
1965 | return p6_pmu_init(); | ||
1966 | } else { | ||
1967 | return -ENODEV; | ||
1968 | } | ||
1969 | } | ||
1970 | |||
1971 | /* | ||
1972 | * Check whether the Architectural PerfMon supports | ||
1973 | * Branch Misses Retired hw_event or not. | ||
1974 | */ | ||
1975 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
1976 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
1977 | return -ENODEV; | ||
1978 | |||
1979 | version = eax.split.version_id; | ||
1980 | if (version < 2) | ||
1981 | return -ENODEV; | ||
1982 | |||
1983 | x86_pmu = intel_pmu; | ||
1984 | x86_pmu.version = version; | ||
1985 | x86_pmu.num_events = eax.split.num_events; | ||
1986 | x86_pmu.event_bits = eax.split.bit_width; | ||
1987 | x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; | ||
1988 | |||
1989 | /* | ||
1990 | * Quirk: v2 perfmon does not report fixed-purpose events, so | ||
1991 | * assume at least 3 events: | ||
1992 | */ | ||
1993 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | ||
1994 | |||
1995 | /* | ||
1996 | * Install the hw-cache-events table: | ||
1997 | */ | ||
1998 | switch (boot_cpu_data.x86_model) { | ||
1999 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
2000 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
2001 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
2002 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
2003 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
2004 | sizeof(hw_cache_event_ids)); | ||
2005 | |||
2006 | pr_cont("Core2 events, "); | ||
2007 | break; | ||
2008 | default: | ||
2009 | case 26: | ||
2010 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
2011 | sizeof(hw_cache_event_ids)); | ||
2012 | |||
2013 | pr_cont("Nehalem/Corei7 events, "); | ||
2014 | break; | ||
2015 | case 28: | ||
2016 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
2017 | sizeof(hw_cache_event_ids)); | ||
2018 | |||
2019 | pr_cont("Atom events, "); | ||
2020 | break; | ||
2021 | } | ||
2022 | return 0; | ||
2023 | } | ||
2024 | |||
2025 | static int amd_pmu_init(void) | ||
2026 | { | ||
2027 | /* Performance-monitoring supported from K7 and later: */ | ||
2028 | if (boot_cpu_data.x86 < 6) | ||
2029 | return -ENODEV; | ||
2030 | |||
2031 | x86_pmu = amd_pmu; | ||
2032 | |||
2033 | /* Events are common for all AMDs */ | ||
2034 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
2035 | sizeof(hw_cache_event_ids)); | ||
2036 | |||
2037 | return 0; | ||
2038 | } | ||
2039 | |||
2040 | void __init init_hw_perf_events(void) | ||
2041 | { | ||
2042 | int err; | ||
2043 | |||
2044 | pr_info("Performance Events: "); | ||
2045 | |||
2046 | switch (boot_cpu_data.x86_vendor) { | ||
2047 | case X86_VENDOR_INTEL: | ||
2048 | err = intel_pmu_init(); | ||
2049 | break; | ||
2050 | case X86_VENDOR_AMD: | ||
2051 | err = amd_pmu_init(); | ||
2052 | break; | ||
2053 | default: | ||
2054 | return; | ||
2055 | } | ||
2056 | if (err != 0) { | ||
2057 | pr_cont("no PMU driver, software events only.\n"); | ||
2058 | return; | ||
2059 | } | ||
2060 | |||
2061 | pr_cont("%s PMU driver.\n", x86_pmu.name); | ||
2062 | |||
2063 | if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { | ||
2064 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | ||
2065 | x86_pmu.num_events, X86_PMC_MAX_GENERIC); | ||
2066 | x86_pmu.num_events = X86_PMC_MAX_GENERIC; | ||
2067 | } | ||
2068 | perf_event_mask = (1 << x86_pmu.num_events) - 1; | ||
2069 | perf_max_events = x86_pmu.num_events; | ||
2070 | |||
2071 | if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { | ||
2072 | WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", | ||
2073 | x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); | ||
2074 | x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; | ||
2075 | } | ||
2076 | |||
2077 | perf_event_mask |= | ||
2078 | ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; | ||
2079 | x86_pmu.intel_ctrl = perf_event_mask; | ||
2080 | |||
2081 | perf_events_lapic_init(); | ||
2082 | register_die_notifier(&perf_event_nmi_notifier); | ||
2083 | |||
2084 | pr_info("... version: %d\n", x86_pmu.version); | ||
2085 | pr_info("... bit width: %d\n", x86_pmu.event_bits); | ||
2086 | pr_info("... generic events: %d\n", x86_pmu.num_events); | ||
2087 | pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); | ||
2088 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | ||
2089 | pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); | ||
2090 | pr_info("... event mask: %016Lx\n", perf_event_mask); | ||
2091 | } | ||
2092 | |||
2093 | static inline void x86_pmu_read(struct perf_event *event) | ||
2094 | { | ||
2095 | x86_perf_event_update(event, &event->hw, event->hw.idx); | ||
2096 | } | ||
2097 | |||
2098 | static const struct pmu pmu = { | ||
2099 | .enable = x86_pmu_enable, | ||
2100 | .disable = x86_pmu_disable, | ||
2101 | .read = x86_pmu_read, | ||
2102 | .unthrottle = x86_pmu_unthrottle, | ||
2103 | }; | ||
2104 | |||
2105 | const struct pmu *hw_perf_event_init(struct perf_event *event) | ||
2106 | { | ||
2107 | int err; | ||
2108 | |||
2109 | err = __hw_perf_event_init(event); | ||
2110 | if (err) { | ||
2111 | if (event->destroy) | ||
2112 | event->destroy(event); | ||
2113 | return ERR_PTR(err); | ||
2114 | } | ||
2115 | |||
2116 | return &pmu; | ||
2117 | } | ||
2118 | |||
2119 | /* | ||
2120 | * callchain support | ||
2121 | */ | ||
2122 | |||
2123 | static inline | ||
2124 | void callchain_store(struct perf_callchain_entry *entry, u64 ip) | ||
2125 | { | ||
2126 | if (entry->nr < PERF_MAX_STACK_DEPTH) | ||
2127 | entry->ip[entry->nr++] = ip; | ||
2128 | } | ||
2129 | |||
2130 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | ||
2131 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); | ||
2132 | static DEFINE_PER_CPU(int, in_nmi_frame); | ||
2133 | |||
2134 | |||
2135 | static void | ||
2136 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
2137 | { | ||
2138 | /* Ignore warnings */ | ||
2139 | } | ||
2140 | |||
2141 | static void backtrace_warning(void *data, char *msg) | ||
2142 | { | ||
2143 | /* Ignore warnings */ | ||
2144 | } | ||
2145 | |||
2146 | static int backtrace_stack(void *data, char *name) | ||
2147 | { | ||
2148 | per_cpu(in_nmi_frame, smp_processor_id()) = | ||
2149 | x86_is_stack_id(NMI_STACK, name); | ||
2150 | |||
2151 | return 0; | ||
2152 | } | ||
2153 | |||
2154 | static void backtrace_address(void *data, unsigned long addr, int reliable) | ||
2155 | { | ||
2156 | struct perf_callchain_entry *entry = data; | ||
2157 | |||
2158 | if (per_cpu(in_nmi_frame, smp_processor_id())) | ||
2159 | return; | ||
2160 | |||
2161 | if (reliable) | ||
2162 | callchain_store(entry, addr); | ||
2163 | } | ||
2164 | |||
2165 | static const struct stacktrace_ops backtrace_ops = { | ||
2166 | .warning = backtrace_warning, | ||
2167 | .warning_symbol = backtrace_warning_symbol, | ||
2168 | .stack = backtrace_stack, | ||
2169 | .address = backtrace_address, | ||
2170 | }; | ||
2171 | |||
2172 | #include "../dumpstack.h" | ||
2173 | |||
2174 | static void | ||
2175 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
2176 | { | ||
2177 | callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
2178 | callchain_store(entry, regs->ip); | ||
2179 | |||
2180 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); | ||
2181 | } | ||
2182 | |||
2183 | /* | ||
2184 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
2185 | */ | ||
2186 | static unsigned long | ||
2187 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
2188 | { | ||
2189 | unsigned long offset, addr = (unsigned long)from; | ||
2190 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
2191 | unsigned long size, len = 0; | ||
2192 | struct page *page; | ||
2193 | void *map; | ||
2194 | int ret; | ||
2195 | |||
2196 | do { | ||
2197 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
2198 | if (!ret) | ||
2199 | break; | ||
2200 | |||
2201 | offset = addr & (PAGE_SIZE - 1); | ||
2202 | size = min(PAGE_SIZE - offset, n - len); | ||
2203 | |||
2204 | map = kmap_atomic(page, type); | ||
2205 | memcpy(to, map+offset, size); | ||
2206 | kunmap_atomic(map, type); | ||
2207 | put_page(page); | ||
2208 | |||
2209 | len += size; | ||
2210 | to += size; | ||
2211 | addr += size; | ||
2212 | |||
2213 | } while (len < n); | ||
2214 | |||
2215 | return len; | ||
2216 | } | ||
2217 | |||
2218 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
2219 | { | ||
2220 | unsigned long bytes; | ||
2221 | |||
2222 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); | ||
2223 | |||
2224 | return bytes == sizeof(*frame); | ||
2225 | } | ||
2226 | |||
2227 | static void | ||
2228 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
2229 | { | ||
2230 | struct stack_frame frame; | ||
2231 | const void __user *fp; | ||
2232 | |||
2233 | if (!user_mode(regs)) | ||
2234 | regs = task_pt_regs(current); | ||
2235 | |||
2236 | fp = (void __user *)regs->bp; | ||
2237 | |||
2238 | callchain_store(entry, PERF_CONTEXT_USER); | ||
2239 | callchain_store(entry, regs->ip); | ||
2240 | |||
2241 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | ||
2242 | frame.next_frame = NULL; | ||
2243 | frame.return_address = 0; | ||
2244 | |||
2245 | if (!copy_stack_frame(fp, &frame)) | ||
2246 | break; | ||
2247 | |||
2248 | if ((unsigned long)fp < regs->sp) | ||
2249 | break; | ||
2250 | |||
2251 | callchain_store(entry, frame.return_address); | ||
2252 | fp = frame.next_frame; | ||
2253 | } | ||
2254 | } | ||
2255 | |||
2256 | static void | ||
2257 | perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
2258 | { | ||
2259 | int is_user; | ||
2260 | |||
2261 | if (!regs) | ||
2262 | return; | ||
2263 | |||
2264 | is_user = user_mode(regs); | ||
2265 | |||
2266 | if (!current || current->pid == 0) | ||
2267 | return; | ||
2268 | |||
2269 | if (is_user && current->state != TASK_RUNNING) | ||
2270 | return; | ||
2271 | |||
2272 | if (!is_user) | ||
2273 | perf_callchain_kernel(regs, entry); | ||
2274 | |||
2275 | if (current->mm) | ||
2276 | perf_callchain_user(regs, entry); | ||
2277 | } | ||
2278 | |||
2279 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
2280 | { | ||
2281 | struct perf_callchain_entry *entry; | ||
2282 | |||
2283 | if (in_nmi()) | ||
2284 | entry = &__get_cpu_var(pmc_nmi_entry); | ||
2285 | else | ||
2286 | entry = &__get_cpu_var(pmc_irq_entry); | ||
2287 | |||
2288 | entry->nr = 0; | ||
2289 | |||
2290 | perf_do_callchain(regs, entry); | ||
2291 | |||
2292 | return entry; | ||
2293 | } | ||
2294 | |||
2295 | void hw_perf_event_setup_online(int cpu) | ||
2296 | { | ||
2297 | init_debug_store_on_cpu(cpu); | ||
2298 | } | ||