aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-21 06:02:48 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-21 08:28:04 -0400
commitcdd6c482c9ff9c55475ee7392ec8f672eddb7be6 (patch)
tree81f98a3ab46c589792057fe2392c1e10f8ad7893 /arch/x86/kernel/cpu/perf_event.c
parentdfc65094d0313cc48969fa60bcf33d693aeb05a7 (diff)
perf: Do the big rename: Performance Counters -> Performance Events
Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\<event\>/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian <eranian@google.com> Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Acked-by: Paul Mackerras <paulus@samba.org> Reviewed-by: Arjan van de Ven <arjan@linux.intel.com> Cc: Mike Galbraith <efault@gmx.de> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: David Howells <dhowells@redhat.com> Cc: Kyle McMartin <kyle@mcmartin.ca> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: <linux-arch@vger.kernel.org> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c2298
1 files changed, 2298 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
new file mode 100644
index 000000000000..0d03629fb1a5
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -0,0 +1,2298 @@
1/*
2 * Performance events x86 architecture code
3 *
4 * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de>
5 * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar
6 * Copyright (C) 2009 Jaswinder Singh Rajput
7 * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter
8 * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
9 * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com>
10 *
11 * For licencing details see kernel-base/COPYING
12 */
13
14#include <linux/perf_event.h>
15#include <linux/capability.h>
16#include <linux/notifier.h>
17#include <linux/hardirq.h>
18#include <linux/kprobes.h>
19#include <linux/module.h>
20#include <linux/kdebug.h>
21#include <linux/sched.h>
22#include <linux/uaccess.h>
23#include <linux/highmem.h>
24#include <linux/cpu.h>
25
26#include <asm/apic.h>
27#include <asm/stacktrace.h>
28#include <asm/nmi.h>
29
30static u64 perf_event_mask __read_mostly;
31
32/* The maximal number of PEBS events: */
33#define MAX_PEBS_EVENTS 4
34
35/* The size of a BTS record in bytes: */
36#define BTS_RECORD_SIZE 24
37
38/* The size of a per-cpu BTS buffer in bytes: */
39#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048)
40
41/* The BTS overflow threshold in bytes from the end of the buffer: */
42#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128)
43
44
45/*
46 * Bits in the debugctlmsr controlling branch tracing.
47 */
48#define X86_DEBUGCTL_TR (1 << 6)
49#define X86_DEBUGCTL_BTS (1 << 7)
50#define X86_DEBUGCTL_BTINT (1 << 8)
51#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
52#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
53
54/*
55 * A debug store configuration.
56 *
57 * We only support architectures that use 64bit fields.
58 */
59struct debug_store {
60 u64 bts_buffer_base;
61 u64 bts_index;
62 u64 bts_absolute_maximum;
63 u64 bts_interrupt_threshold;
64 u64 pebs_buffer_base;
65 u64 pebs_index;
66 u64 pebs_absolute_maximum;
67 u64 pebs_interrupt_threshold;
68 u64 pebs_event_reset[MAX_PEBS_EVENTS];
69};
70
71struct cpu_hw_events {
72 struct perf_event *events[X86_PMC_IDX_MAX];
73 unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
74 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
75 unsigned long interrupts;
76 int enabled;
77 struct debug_store *ds;
78};
79
80/*
81 * struct x86_pmu - generic x86 pmu
82 */
83struct x86_pmu {
84 const char *name;
85 int version;
86 int (*handle_irq)(struct pt_regs *);
87 void (*disable_all)(void);
88 void (*enable_all)(void);
89 void (*enable)(struct hw_perf_event *, int);
90 void (*disable)(struct hw_perf_event *, int);
91 unsigned eventsel;
92 unsigned perfctr;
93 u64 (*event_map)(int);
94 u64 (*raw_event)(u64);
95 int max_events;
96 int num_events;
97 int num_events_fixed;
98 int event_bits;
99 u64 event_mask;
100 int apic;
101 u64 max_period;
102 u64 intel_ctrl;
103 void (*enable_bts)(u64 config);
104 void (*disable_bts)(void);
105};
106
107static struct x86_pmu x86_pmu __read_mostly;
108
109static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = {
110 .enabled = 1,
111};
112
113/*
114 * Not sure about some of these
115 */
116static const u64 p6_perfmon_event_map[] =
117{
118 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
119 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
120 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
121 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
122 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
123 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
124 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
125};
126
127static u64 p6_pmu_event_map(int hw_event)
128{
129 return p6_perfmon_event_map[hw_event];
130}
131
132/*
133 * Event setting that is specified not to count anything.
134 * We use this to effectively disable a counter.
135 *
136 * L2_RQSTS with 0 MESI unit mask.
137 */
138#define P6_NOP_EVENT 0x0000002EULL
139
140static u64 p6_pmu_raw_event(u64 hw_event)
141{
142#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
143#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
144#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
145#define P6_EVNTSEL_INV_MASK 0x00800000ULL
146#define P6_EVNTSEL_REG_MASK 0xFF000000ULL
147
148#define P6_EVNTSEL_MASK \
149 (P6_EVNTSEL_EVENT_MASK | \
150 P6_EVNTSEL_UNIT_MASK | \
151 P6_EVNTSEL_EDGE_MASK | \
152 P6_EVNTSEL_INV_MASK | \
153 P6_EVNTSEL_REG_MASK)
154
155 return hw_event & P6_EVNTSEL_MASK;
156}
157
158
159/*
160 * Intel PerfMon v3. Used on Core2 and later.
161 */
162static const u64 intel_perfmon_event_map[] =
163{
164 [PERF_COUNT_HW_CPU_CYCLES] = 0x003c,
165 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
166 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e,
167 [PERF_COUNT_HW_CACHE_MISSES] = 0x412e,
168 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
169 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
170 [PERF_COUNT_HW_BUS_CYCLES] = 0x013c,
171};
172
173static u64 intel_pmu_event_map(int hw_event)
174{
175 return intel_perfmon_event_map[hw_event];
176}
177
178/*
179 * Generalized hw caching related hw_event table, filled
180 * in on a per model basis. A value of 0 means
181 * 'not supported', -1 means 'hw_event makes no sense on
182 * this CPU', any other value means the raw hw_event
183 * ID.
184 */
185
186#define C(x) PERF_COUNT_HW_CACHE_##x
187
188static u64 __read_mostly hw_cache_event_ids
189 [PERF_COUNT_HW_CACHE_MAX]
190 [PERF_COUNT_HW_CACHE_OP_MAX]
191 [PERF_COUNT_HW_CACHE_RESULT_MAX];
192
193static const u64 nehalem_hw_cache_event_ids
194 [PERF_COUNT_HW_CACHE_MAX]
195 [PERF_COUNT_HW_CACHE_OP_MAX]
196 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
197{
198 [ C(L1D) ] = {
199 [ C(OP_READ) ] = {
200 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
201 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
202 },
203 [ C(OP_WRITE) ] = {
204 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
205 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
206 },
207 [ C(OP_PREFETCH) ] = {
208 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */
209 [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */
210 },
211 },
212 [ C(L1I ) ] = {
213 [ C(OP_READ) ] = {
214 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
215 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
216 },
217 [ C(OP_WRITE) ] = {
218 [ C(RESULT_ACCESS) ] = -1,
219 [ C(RESULT_MISS) ] = -1,
220 },
221 [ C(OP_PREFETCH) ] = {
222 [ C(RESULT_ACCESS) ] = 0x0,
223 [ C(RESULT_MISS) ] = 0x0,
224 },
225 },
226 [ C(LL ) ] = {
227 [ C(OP_READ) ] = {
228 [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */
229 [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */
230 },
231 [ C(OP_WRITE) ] = {
232 [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */
233 [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */
234 },
235 [ C(OP_PREFETCH) ] = {
236 [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */
237 [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */
238 },
239 },
240 [ C(DTLB) ] = {
241 [ C(OP_READ) ] = {
242 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
243 [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */
244 },
245 [ C(OP_WRITE) ] = {
246 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
247 [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */
248 },
249 [ C(OP_PREFETCH) ] = {
250 [ C(RESULT_ACCESS) ] = 0x0,
251 [ C(RESULT_MISS) ] = 0x0,
252 },
253 },
254 [ C(ITLB) ] = {
255 [ C(OP_READ) ] = {
256 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */
257 [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */
258 },
259 [ C(OP_WRITE) ] = {
260 [ C(RESULT_ACCESS) ] = -1,
261 [ C(RESULT_MISS) ] = -1,
262 },
263 [ C(OP_PREFETCH) ] = {
264 [ C(RESULT_ACCESS) ] = -1,
265 [ C(RESULT_MISS) ] = -1,
266 },
267 },
268 [ C(BPU ) ] = {
269 [ C(OP_READ) ] = {
270 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
271 [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */
272 },
273 [ C(OP_WRITE) ] = {
274 [ C(RESULT_ACCESS) ] = -1,
275 [ C(RESULT_MISS) ] = -1,
276 },
277 [ C(OP_PREFETCH) ] = {
278 [ C(RESULT_ACCESS) ] = -1,
279 [ C(RESULT_MISS) ] = -1,
280 },
281 },
282};
283
284static const u64 core2_hw_cache_event_ids
285 [PERF_COUNT_HW_CACHE_MAX]
286 [PERF_COUNT_HW_CACHE_OP_MAX]
287 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
288{
289 [ C(L1D) ] = {
290 [ C(OP_READ) ] = {
291 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */
292 [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */
293 },
294 [ C(OP_WRITE) ] = {
295 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */
296 [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */
297 },
298 [ C(OP_PREFETCH) ] = {
299 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */
300 [ C(RESULT_MISS) ] = 0,
301 },
302 },
303 [ C(L1I ) ] = {
304 [ C(OP_READ) ] = {
305 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */
306 [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */
307 },
308 [ C(OP_WRITE) ] = {
309 [ C(RESULT_ACCESS) ] = -1,
310 [ C(RESULT_MISS) ] = -1,
311 },
312 [ C(OP_PREFETCH) ] = {
313 [ C(RESULT_ACCESS) ] = 0,
314 [ C(RESULT_MISS) ] = 0,
315 },
316 },
317 [ C(LL ) ] = {
318 [ C(OP_READ) ] = {
319 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
320 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
321 },
322 [ C(OP_WRITE) ] = {
323 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
324 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
325 },
326 [ C(OP_PREFETCH) ] = {
327 [ C(RESULT_ACCESS) ] = 0,
328 [ C(RESULT_MISS) ] = 0,
329 },
330 },
331 [ C(DTLB) ] = {
332 [ C(OP_READ) ] = {
333 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */
334 [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */
335 },
336 [ C(OP_WRITE) ] = {
337 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */
338 [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */
339 },
340 [ C(OP_PREFETCH) ] = {
341 [ C(RESULT_ACCESS) ] = 0,
342 [ C(RESULT_MISS) ] = 0,
343 },
344 },
345 [ C(ITLB) ] = {
346 [ C(OP_READ) ] = {
347 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
348 [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */
349 },
350 [ C(OP_WRITE) ] = {
351 [ C(RESULT_ACCESS) ] = -1,
352 [ C(RESULT_MISS) ] = -1,
353 },
354 [ C(OP_PREFETCH) ] = {
355 [ C(RESULT_ACCESS) ] = -1,
356 [ C(RESULT_MISS) ] = -1,
357 },
358 },
359 [ C(BPU ) ] = {
360 [ C(OP_READ) ] = {
361 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
362 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
363 },
364 [ C(OP_WRITE) ] = {
365 [ C(RESULT_ACCESS) ] = -1,
366 [ C(RESULT_MISS) ] = -1,
367 },
368 [ C(OP_PREFETCH) ] = {
369 [ C(RESULT_ACCESS) ] = -1,
370 [ C(RESULT_MISS) ] = -1,
371 },
372 },
373};
374
375static const u64 atom_hw_cache_event_ids
376 [PERF_COUNT_HW_CACHE_MAX]
377 [PERF_COUNT_HW_CACHE_OP_MAX]
378 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
379{
380 [ C(L1D) ] = {
381 [ C(OP_READ) ] = {
382 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */
383 [ C(RESULT_MISS) ] = 0,
384 },
385 [ C(OP_WRITE) ] = {
386 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */
387 [ C(RESULT_MISS) ] = 0,
388 },
389 [ C(OP_PREFETCH) ] = {
390 [ C(RESULT_ACCESS) ] = 0x0,
391 [ C(RESULT_MISS) ] = 0,
392 },
393 },
394 [ C(L1I ) ] = {
395 [ C(OP_READ) ] = {
396 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */
397 [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */
398 },
399 [ C(OP_WRITE) ] = {
400 [ C(RESULT_ACCESS) ] = -1,
401 [ C(RESULT_MISS) ] = -1,
402 },
403 [ C(OP_PREFETCH) ] = {
404 [ C(RESULT_ACCESS) ] = 0,
405 [ C(RESULT_MISS) ] = 0,
406 },
407 },
408 [ C(LL ) ] = {
409 [ C(OP_READ) ] = {
410 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */
411 [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */
412 },
413 [ C(OP_WRITE) ] = {
414 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */
415 [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */
416 },
417 [ C(OP_PREFETCH) ] = {
418 [ C(RESULT_ACCESS) ] = 0,
419 [ C(RESULT_MISS) ] = 0,
420 },
421 },
422 [ C(DTLB) ] = {
423 [ C(OP_READ) ] = {
424 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */
425 [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */
426 },
427 [ C(OP_WRITE) ] = {
428 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */
429 [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */
430 },
431 [ C(OP_PREFETCH) ] = {
432 [ C(RESULT_ACCESS) ] = 0,
433 [ C(RESULT_MISS) ] = 0,
434 },
435 },
436 [ C(ITLB) ] = {
437 [ C(OP_READ) ] = {
438 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
439 [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */
440 },
441 [ C(OP_WRITE) ] = {
442 [ C(RESULT_ACCESS) ] = -1,
443 [ C(RESULT_MISS) ] = -1,
444 },
445 [ C(OP_PREFETCH) ] = {
446 [ C(RESULT_ACCESS) ] = -1,
447 [ C(RESULT_MISS) ] = -1,
448 },
449 },
450 [ C(BPU ) ] = {
451 [ C(OP_READ) ] = {
452 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
453 [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
454 },
455 [ C(OP_WRITE) ] = {
456 [ C(RESULT_ACCESS) ] = -1,
457 [ C(RESULT_MISS) ] = -1,
458 },
459 [ C(OP_PREFETCH) ] = {
460 [ C(RESULT_ACCESS) ] = -1,
461 [ C(RESULT_MISS) ] = -1,
462 },
463 },
464};
465
466static u64 intel_pmu_raw_event(u64 hw_event)
467{
468#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL
469#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL
470#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL
471#define CORE_EVNTSEL_INV_MASK 0x00800000ULL
472#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL
473
474#define CORE_EVNTSEL_MASK \
475 (CORE_EVNTSEL_EVENT_MASK | \
476 CORE_EVNTSEL_UNIT_MASK | \
477 CORE_EVNTSEL_EDGE_MASK | \
478 CORE_EVNTSEL_INV_MASK | \
479 CORE_EVNTSEL_REG_MASK)
480
481 return hw_event & CORE_EVNTSEL_MASK;
482}
483
484static const u64 amd_hw_cache_event_ids
485 [PERF_COUNT_HW_CACHE_MAX]
486 [PERF_COUNT_HW_CACHE_OP_MAX]
487 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
488{
489 [ C(L1D) ] = {
490 [ C(OP_READ) ] = {
491 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
492 [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */
493 },
494 [ C(OP_WRITE) ] = {
495 [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */
496 [ C(RESULT_MISS) ] = 0,
497 },
498 [ C(OP_PREFETCH) ] = {
499 [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */
500 [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */
501 },
502 },
503 [ C(L1I ) ] = {
504 [ C(OP_READ) ] = {
505 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */
506 [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */
507 },
508 [ C(OP_WRITE) ] = {
509 [ C(RESULT_ACCESS) ] = -1,
510 [ C(RESULT_MISS) ] = -1,
511 },
512 [ C(OP_PREFETCH) ] = {
513 [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */
514 [ C(RESULT_MISS) ] = 0,
515 },
516 },
517 [ C(LL ) ] = {
518 [ C(OP_READ) ] = {
519 [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
520 [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */
521 },
522 [ C(OP_WRITE) ] = {
523 [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */
524 [ C(RESULT_MISS) ] = 0,
525 },
526 [ C(OP_PREFETCH) ] = {
527 [ C(RESULT_ACCESS) ] = 0,
528 [ C(RESULT_MISS) ] = 0,
529 },
530 },
531 [ C(DTLB) ] = {
532 [ C(OP_READ) ] = {
533 [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */
534 [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
535 },
536 [ C(OP_WRITE) ] = {
537 [ C(RESULT_ACCESS) ] = 0,
538 [ C(RESULT_MISS) ] = 0,
539 },
540 [ C(OP_PREFETCH) ] = {
541 [ C(RESULT_ACCESS) ] = 0,
542 [ C(RESULT_MISS) ] = 0,
543 },
544 },
545 [ C(ITLB) ] = {
546 [ C(OP_READ) ] = {
547 [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */
548 [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */
549 },
550 [ C(OP_WRITE) ] = {
551 [ C(RESULT_ACCESS) ] = -1,
552 [ C(RESULT_MISS) ] = -1,
553 },
554 [ C(OP_PREFETCH) ] = {
555 [ C(RESULT_ACCESS) ] = -1,
556 [ C(RESULT_MISS) ] = -1,
557 },
558 },
559 [ C(BPU ) ] = {
560 [ C(OP_READ) ] = {
561 [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */
562 [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */
563 },
564 [ C(OP_WRITE) ] = {
565 [ C(RESULT_ACCESS) ] = -1,
566 [ C(RESULT_MISS) ] = -1,
567 },
568 [ C(OP_PREFETCH) ] = {
569 [ C(RESULT_ACCESS) ] = -1,
570 [ C(RESULT_MISS) ] = -1,
571 },
572 },
573};
574
575/*
576 * AMD Performance Monitor K7 and later.
577 */
578static const u64 amd_perfmon_event_map[] =
579{
580 [PERF_COUNT_HW_CPU_CYCLES] = 0x0076,
581 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
582 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080,
583 [PERF_COUNT_HW_CACHE_MISSES] = 0x0081,
584 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
585 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
586};
587
588static u64 amd_pmu_event_map(int hw_event)
589{
590 return amd_perfmon_event_map[hw_event];
591}
592
593static u64 amd_pmu_raw_event(u64 hw_event)
594{
595#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL
596#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL
597#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL
598#define K7_EVNTSEL_INV_MASK 0x000800000ULL
599#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL
600
601#define K7_EVNTSEL_MASK \
602 (K7_EVNTSEL_EVENT_MASK | \
603 K7_EVNTSEL_UNIT_MASK | \
604 K7_EVNTSEL_EDGE_MASK | \
605 K7_EVNTSEL_INV_MASK | \
606 K7_EVNTSEL_REG_MASK)
607
608 return hw_event & K7_EVNTSEL_MASK;
609}
610
611/*
612 * Propagate event elapsed time into the generic event.
613 * Can only be executed on the CPU where the event is active.
614 * Returns the delta events processed.
615 */
616static u64
617x86_perf_event_update(struct perf_event *event,
618 struct hw_perf_event *hwc, int idx)
619{
620 int shift = 64 - x86_pmu.event_bits;
621 u64 prev_raw_count, new_raw_count;
622 s64 delta;
623
624 if (idx == X86_PMC_IDX_FIXED_BTS)
625 return 0;
626
627 /*
628 * Careful: an NMI might modify the previous event value.
629 *
630 * Our tactic to handle this is to first atomically read and
631 * exchange a new raw count - then add that new-prev delta
632 * count to the generic event atomically:
633 */
634again:
635 prev_raw_count = atomic64_read(&hwc->prev_count);
636 rdmsrl(hwc->event_base + idx, new_raw_count);
637
638 if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
639 new_raw_count) != prev_raw_count)
640 goto again;
641
642 /*
643 * Now we have the new raw value and have updated the prev
644 * timestamp already. We can now calculate the elapsed delta
645 * (event-)time and add that to the generic event.
646 *
647 * Careful, not all hw sign-extends above the physical width
648 * of the count.
649 */
650 delta = (new_raw_count << shift) - (prev_raw_count << shift);
651 delta >>= shift;
652
653 atomic64_add(delta, &event->count);
654 atomic64_sub(delta, &hwc->period_left);
655
656 return new_raw_count;
657}
658
659static atomic_t active_events;
660static DEFINE_MUTEX(pmc_reserve_mutex);
661
662static bool reserve_pmc_hardware(void)
663{
664#ifdef CONFIG_X86_LOCAL_APIC
665 int i;
666
667 if (nmi_watchdog == NMI_LOCAL_APIC)
668 disable_lapic_nmi_watchdog();
669
670 for (i = 0; i < x86_pmu.num_events; i++) {
671 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
672 goto perfctr_fail;
673 }
674
675 for (i = 0; i < x86_pmu.num_events; i++) {
676 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
677 goto eventsel_fail;
678 }
679#endif
680
681 return true;
682
683#ifdef CONFIG_X86_LOCAL_APIC
684eventsel_fail:
685 for (i--; i >= 0; i--)
686 release_evntsel_nmi(x86_pmu.eventsel + i);
687
688 i = x86_pmu.num_events;
689
690perfctr_fail:
691 for (i--; i >= 0; i--)
692 release_perfctr_nmi(x86_pmu.perfctr + i);
693
694 if (nmi_watchdog == NMI_LOCAL_APIC)
695 enable_lapic_nmi_watchdog();
696
697 return false;
698#endif
699}
700
701static void release_pmc_hardware(void)
702{
703#ifdef CONFIG_X86_LOCAL_APIC
704 int i;
705
706 for (i = 0; i < x86_pmu.num_events; i++) {
707 release_perfctr_nmi(x86_pmu.perfctr + i);
708 release_evntsel_nmi(x86_pmu.eventsel + i);
709 }
710
711 if (nmi_watchdog == NMI_LOCAL_APIC)
712 enable_lapic_nmi_watchdog();
713#endif
714}
715
716static inline bool bts_available(void)
717{
718 return x86_pmu.enable_bts != NULL;
719}
720
721static inline void init_debug_store_on_cpu(int cpu)
722{
723 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
724
725 if (!ds)
726 return;
727
728 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
729 (u32)((u64)(unsigned long)ds),
730 (u32)((u64)(unsigned long)ds >> 32));
731}
732
733static inline void fini_debug_store_on_cpu(int cpu)
734{
735 if (!per_cpu(cpu_hw_events, cpu).ds)
736 return;
737
738 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
739}
740
741static void release_bts_hardware(void)
742{
743 int cpu;
744
745 if (!bts_available())
746 return;
747
748 get_online_cpus();
749
750 for_each_online_cpu(cpu)
751 fini_debug_store_on_cpu(cpu);
752
753 for_each_possible_cpu(cpu) {
754 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
755
756 if (!ds)
757 continue;
758
759 per_cpu(cpu_hw_events, cpu).ds = NULL;
760
761 kfree((void *)(unsigned long)ds->bts_buffer_base);
762 kfree(ds);
763 }
764
765 put_online_cpus();
766}
767
768static int reserve_bts_hardware(void)
769{
770 int cpu, err = 0;
771
772 if (!bts_available())
773 return 0;
774
775 get_online_cpus();
776
777 for_each_possible_cpu(cpu) {
778 struct debug_store *ds;
779 void *buffer;
780
781 err = -ENOMEM;
782 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
783 if (unlikely(!buffer))
784 break;
785
786 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
787 if (unlikely(!ds)) {
788 kfree(buffer);
789 break;
790 }
791
792 ds->bts_buffer_base = (u64)(unsigned long)buffer;
793 ds->bts_index = ds->bts_buffer_base;
794 ds->bts_absolute_maximum =
795 ds->bts_buffer_base + BTS_BUFFER_SIZE;
796 ds->bts_interrupt_threshold =
797 ds->bts_absolute_maximum - BTS_OVFL_TH;
798
799 per_cpu(cpu_hw_events, cpu).ds = ds;
800 err = 0;
801 }
802
803 if (err)
804 release_bts_hardware();
805 else {
806 for_each_online_cpu(cpu)
807 init_debug_store_on_cpu(cpu);
808 }
809
810 put_online_cpus();
811
812 return err;
813}
814
815static void hw_perf_event_destroy(struct perf_event *event)
816{
817 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
818 release_pmc_hardware();
819 release_bts_hardware();
820 mutex_unlock(&pmc_reserve_mutex);
821 }
822}
823
824static inline int x86_pmu_initialized(void)
825{
826 return x86_pmu.handle_irq != NULL;
827}
828
829static inline int
830set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
831{
832 unsigned int cache_type, cache_op, cache_result;
833 u64 config, val;
834
835 config = attr->config;
836
837 cache_type = (config >> 0) & 0xff;
838 if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
839 return -EINVAL;
840
841 cache_op = (config >> 8) & 0xff;
842 if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
843 return -EINVAL;
844
845 cache_result = (config >> 16) & 0xff;
846 if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
847 return -EINVAL;
848
849 val = hw_cache_event_ids[cache_type][cache_op][cache_result];
850
851 if (val == 0)
852 return -ENOENT;
853
854 if (val == -1)
855 return -EINVAL;
856
857 hwc->config |= val;
858
859 return 0;
860}
861
862static void intel_pmu_enable_bts(u64 config)
863{
864 unsigned long debugctlmsr;
865
866 debugctlmsr = get_debugctlmsr();
867
868 debugctlmsr |= X86_DEBUGCTL_TR;
869 debugctlmsr |= X86_DEBUGCTL_BTS;
870 debugctlmsr |= X86_DEBUGCTL_BTINT;
871
872 if (!(config & ARCH_PERFMON_EVENTSEL_OS))
873 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
874
875 if (!(config & ARCH_PERFMON_EVENTSEL_USR))
876 debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
877
878 update_debugctlmsr(debugctlmsr);
879}
880
881static void intel_pmu_disable_bts(void)
882{
883 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
884 unsigned long debugctlmsr;
885
886 if (!cpuc->ds)
887 return;
888
889 debugctlmsr = get_debugctlmsr();
890
891 debugctlmsr &=
892 ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
893 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
894
895 update_debugctlmsr(debugctlmsr);
896}
897
898/*
899 * Setup the hardware configuration for a given attr_type
900 */
901static int __hw_perf_event_init(struct perf_event *event)
902{
903 struct perf_event_attr *attr = &event->attr;
904 struct hw_perf_event *hwc = &event->hw;
905 u64 config;
906 int err;
907
908 if (!x86_pmu_initialized())
909 return -ENODEV;
910
911 err = 0;
912 if (!atomic_inc_not_zero(&active_events)) {
913 mutex_lock(&pmc_reserve_mutex);
914 if (atomic_read(&active_events) == 0) {
915 if (!reserve_pmc_hardware())
916 err = -EBUSY;
917 else
918 err = reserve_bts_hardware();
919 }
920 if (!err)
921 atomic_inc(&active_events);
922 mutex_unlock(&pmc_reserve_mutex);
923 }
924 if (err)
925 return err;
926
927 event->destroy = hw_perf_event_destroy;
928
929 /*
930 * Generate PMC IRQs:
931 * (keep 'enabled' bit clear for now)
932 */
933 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
934
935 /*
936 * Count user and OS events unless requested not to.
937 */
938 if (!attr->exclude_user)
939 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
940 if (!attr->exclude_kernel)
941 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
942
943 if (!hwc->sample_period) {
944 hwc->sample_period = x86_pmu.max_period;
945 hwc->last_period = hwc->sample_period;
946 atomic64_set(&hwc->period_left, hwc->sample_period);
947 } else {
948 /*
949 * If we have a PMU initialized but no APIC
950 * interrupts, we cannot sample hardware
951 * events (user-space has to fall back and
952 * sample via a hrtimer based software event):
953 */
954 if (!x86_pmu.apic)
955 return -EOPNOTSUPP;
956 }
957
958 /*
959 * Raw hw_event type provide the config in the hw_event structure
960 */
961 if (attr->type == PERF_TYPE_RAW) {
962 hwc->config |= x86_pmu.raw_event(attr->config);
963 return 0;
964 }
965
966 if (attr->type == PERF_TYPE_HW_CACHE)
967 return set_ext_hw_attr(hwc, attr);
968
969 if (attr->config >= x86_pmu.max_events)
970 return -EINVAL;
971
972 /*
973 * The generic map:
974 */
975 config = x86_pmu.event_map(attr->config);
976
977 if (config == 0)
978 return -ENOENT;
979
980 if (config == -1LL)
981 return -EINVAL;
982
983 /*
984 * Branch tracing:
985 */
986 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
987 (hwc->sample_period == 1)) {
988 /* BTS is not supported by this architecture. */
989 if (!bts_available())
990 return -EOPNOTSUPP;
991
992 /* BTS is currently only allowed for user-mode. */
993 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
994 return -EOPNOTSUPP;
995 }
996
997 hwc->config |= config;
998
999 return 0;
1000}
1001
1002static void p6_pmu_disable_all(void)
1003{
1004 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1005 u64 val;
1006
1007 if (!cpuc->enabled)
1008 return;
1009
1010 cpuc->enabled = 0;
1011 barrier();
1012
1013 /* p6 only has one enable register */
1014 rdmsrl(MSR_P6_EVNTSEL0, val);
1015 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1016 wrmsrl(MSR_P6_EVNTSEL0, val);
1017}
1018
1019static void intel_pmu_disable_all(void)
1020{
1021 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1022
1023 if (!cpuc->enabled)
1024 return;
1025
1026 cpuc->enabled = 0;
1027 barrier();
1028
1029 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1030
1031 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1032 intel_pmu_disable_bts();
1033}
1034
1035static void amd_pmu_disable_all(void)
1036{
1037 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1038 int idx;
1039
1040 if (!cpuc->enabled)
1041 return;
1042
1043 cpuc->enabled = 0;
1044 /*
1045 * ensure we write the disable before we start disabling the
1046 * events proper, so that amd_pmu_enable_event() does the
1047 * right thing.
1048 */
1049 barrier();
1050
1051 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1052 u64 val;
1053
1054 if (!test_bit(idx, cpuc->active_mask))
1055 continue;
1056 rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
1057 if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE))
1058 continue;
1059 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
1060 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1061 }
1062}
1063
1064void hw_perf_disable(void)
1065{
1066 if (!x86_pmu_initialized())
1067 return;
1068 return x86_pmu.disable_all();
1069}
1070
1071static void p6_pmu_enable_all(void)
1072{
1073 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1074 unsigned long val;
1075
1076 if (cpuc->enabled)
1077 return;
1078
1079 cpuc->enabled = 1;
1080 barrier();
1081
1082 /* p6 only has one enable register */
1083 rdmsrl(MSR_P6_EVNTSEL0, val);
1084 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1085 wrmsrl(MSR_P6_EVNTSEL0, val);
1086}
1087
1088static void intel_pmu_enable_all(void)
1089{
1090 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1091
1092 if (cpuc->enabled)
1093 return;
1094
1095 cpuc->enabled = 1;
1096 barrier();
1097
1098 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
1099
1100 if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1101 struct perf_event *event =
1102 cpuc->events[X86_PMC_IDX_FIXED_BTS];
1103
1104 if (WARN_ON_ONCE(!event))
1105 return;
1106
1107 intel_pmu_enable_bts(event->hw.config);
1108 }
1109}
1110
1111static void amd_pmu_enable_all(void)
1112{
1113 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1114 int idx;
1115
1116 if (cpuc->enabled)
1117 return;
1118
1119 cpuc->enabled = 1;
1120 barrier();
1121
1122 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1123 struct perf_event *event = cpuc->events[idx];
1124 u64 val;
1125
1126 if (!test_bit(idx, cpuc->active_mask))
1127 continue;
1128
1129 val = event->hw.config;
1130 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1131 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
1132 }
1133}
1134
1135void hw_perf_enable(void)
1136{
1137 if (!x86_pmu_initialized())
1138 return;
1139 x86_pmu.enable_all();
1140}
1141
1142static inline u64 intel_pmu_get_status(void)
1143{
1144 u64 status;
1145
1146 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1147
1148 return status;
1149}
1150
1151static inline void intel_pmu_ack_status(u64 ack)
1152{
1153 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
1154}
1155
1156static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1157{
1158 (void)checking_wrmsrl(hwc->config_base + idx,
1159 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
1160}
1161
1162static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1163{
1164 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
1165}
1166
1167static inline void
1168intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx)
1169{
1170 int idx = __idx - X86_PMC_IDX_FIXED;
1171 u64 ctrl_val, mask;
1172
1173 mask = 0xfULL << (idx * 4);
1174
1175 rdmsrl(hwc->config_base, ctrl_val);
1176 ctrl_val &= ~mask;
1177 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
1178}
1179
1180static inline void
1181p6_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1182{
1183 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1184 u64 val = P6_NOP_EVENT;
1185
1186 if (cpuc->enabled)
1187 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1188
1189 (void)checking_wrmsrl(hwc->config_base + idx, val);
1190}
1191
1192static inline void
1193intel_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1194{
1195 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1196 intel_pmu_disable_bts();
1197 return;
1198 }
1199
1200 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1201 intel_pmu_disable_fixed(hwc, idx);
1202 return;
1203 }
1204
1205 x86_pmu_disable_event(hwc, idx);
1206}
1207
1208static inline void
1209amd_pmu_disable_event(struct hw_perf_event *hwc, int idx)
1210{
1211 x86_pmu_disable_event(hwc, idx);
1212}
1213
1214static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1215
1216/*
1217 * Set the next IRQ period, based on the hwc->period_left value.
1218 * To be called with the event disabled in hw:
1219 */
1220static int
1221x86_perf_event_set_period(struct perf_event *event,
1222 struct hw_perf_event *hwc, int idx)
1223{
1224 s64 left = atomic64_read(&hwc->period_left);
1225 s64 period = hwc->sample_period;
1226 int err, ret = 0;
1227
1228 if (idx == X86_PMC_IDX_FIXED_BTS)
1229 return 0;
1230
1231 /*
1232 * If we are way outside a reasoable range then just skip forward:
1233 */
1234 if (unlikely(left <= -period)) {
1235 left = period;
1236 atomic64_set(&hwc->period_left, left);
1237 hwc->last_period = period;
1238 ret = 1;
1239 }
1240
1241 if (unlikely(left <= 0)) {
1242 left += period;
1243 atomic64_set(&hwc->period_left, left);
1244 hwc->last_period = period;
1245 ret = 1;
1246 }
1247 /*
1248 * Quirk: certain CPUs dont like it if just 1 hw_event is left:
1249 */
1250 if (unlikely(left < 2))
1251 left = 2;
1252
1253 if (left > x86_pmu.max_period)
1254 left = x86_pmu.max_period;
1255
1256 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1257
1258 /*
1259 * The hw event starts counting from this event offset,
1260 * mark it to be able to extra future deltas:
1261 */
1262 atomic64_set(&hwc->prev_count, (u64)-left);
1263
1264 err = checking_wrmsrl(hwc->event_base + idx,
1265 (u64)(-left) & x86_pmu.event_mask);
1266
1267 perf_event_update_userpage(event);
1268
1269 return ret;
1270}
1271
1272static inline void
1273intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx)
1274{
1275 int idx = __idx - X86_PMC_IDX_FIXED;
1276 u64 ctrl_val, bits, mask;
1277 int err;
1278
1279 /*
1280 * Enable IRQ generation (0x8),
1281 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
1282 * if requested:
1283 */
1284 bits = 0x8ULL;
1285 if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
1286 bits |= 0x2;
1287 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
1288 bits |= 0x1;
1289 bits <<= (idx * 4);
1290 mask = 0xfULL << (idx * 4);
1291
1292 rdmsrl(hwc->config_base, ctrl_val);
1293 ctrl_val &= ~mask;
1294 ctrl_val |= bits;
1295 err = checking_wrmsrl(hwc->config_base, ctrl_val);
1296}
1297
1298static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1299{
1300 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1301 u64 val;
1302
1303 val = hwc->config;
1304 if (cpuc->enabled)
1305 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1306
1307 (void)checking_wrmsrl(hwc->config_base + idx, val);
1308}
1309
1310
1311static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1312{
1313 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) {
1314 if (!__get_cpu_var(cpu_hw_events).enabled)
1315 return;
1316
1317 intel_pmu_enable_bts(hwc->config);
1318 return;
1319 }
1320
1321 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
1322 intel_pmu_enable_fixed(hwc, idx);
1323 return;
1324 }
1325
1326 x86_pmu_enable_event(hwc, idx);
1327}
1328
1329static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx)
1330{
1331 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1332
1333 if (cpuc->enabled)
1334 x86_pmu_enable_event(hwc, idx);
1335}
1336
1337static int
1338fixed_mode_idx(struct perf_event *event, struct hw_perf_event *hwc)
1339{
1340 unsigned int hw_event;
1341
1342 hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK;
1343
1344 if (unlikely((hw_event ==
1345 x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
1346 (hwc->sample_period == 1)))
1347 return X86_PMC_IDX_FIXED_BTS;
1348
1349 if (!x86_pmu.num_events_fixed)
1350 return -1;
1351
1352 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS)))
1353 return X86_PMC_IDX_FIXED_INSTRUCTIONS;
1354 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES)))
1355 return X86_PMC_IDX_FIXED_CPU_CYCLES;
1356 if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES)))
1357 return X86_PMC_IDX_FIXED_BUS_CYCLES;
1358
1359 return -1;
1360}
1361
1362/*
1363 * Find a PMC slot for the freshly enabled / scheduled in event:
1364 */
1365static int x86_pmu_enable(struct perf_event *event)
1366{
1367 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1368 struct hw_perf_event *hwc = &event->hw;
1369 int idx;
1370
1371 idx = fixed_mode_idx(event, hwc);
1372 if (idx == X86_PMC_IDX_FIXED_BTS) {
1373 /* BTS is already occupied. */
1374 if (test_and_set_bit(idx, cpuc->used_mask))
1375 return -EAGAIN;
1376
1377 hwc->config_base = 0;
1378 hwc->event_base = 0;
1379 hwc->idx = idx;
1380 } else if (idx >= 0) {
1381 /*
1382 * Try to get the fixed event, if that is already taken
1383 * then try to get a generic event:
1384 */
1385 if (test_and_set_bit(idx, cpuc->used_mask))
1386 goto try_generic;
1387
1388 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
1389 /*
1390 * We set it so that event_base + idx in wrmsr/rdmsr maps to
1391 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
1392 */
1393 hwc->event_base =
1394 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
1395 hwc->idx = idx;
1396 } else {
1397 idx = hwc->idx;
1398 /* Try to get the previous generic event again */
1399 if (test_and_set_bit(idx, cpuc->used_mask)) {
1400try_generic:
1401 idx = find_first_zero_bit(cpuc->used_mask,
1402 x86_pmu.num_events);
1403 if (idx == x86_pmu.num_events)
1404 return -EAGAIN;
1405
1406 set_bit(idx, cpuc->used_mask);
1407 hwc->idx = idx;
1408 }
1409 hwc->config_base = x86_pmu.eventsel;
1410 hwc->event_base = x86_pmu.perfctr;
1411 }
1412
1413 perf_events_lapic_init();
1414
1415 x86_pmu.disable(hwc, idx);
1416
1417 cpuc->events[idx] = event;
1418 set_bit(idx, cpuc->active_mask);
1419
1420 x86_perf_event_set_period(event, hwc, idx);
1421 x86_pmu.enable(hwc, idx);
1422
1423 perf_event_update_userpage(event);
1424
1425 return 0;
1426}
1427
1428static void x86_pmu_unthrottle(struct perf_event *event)
1429{
1430 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1431 struct hw_perf_event *hwc = &event->hw;
1432
1433 if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
1434 cpuc->events[hwc->idx] != event))
1435 return;
1436
1437 x86_pmu.enable(hwc, hwc->idx);
1438}
1439
1440void perf_event_print_debug(void)
1441{
1442 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1443 struct cpu_hw_events *cpuc;
1444 unsigned long flags;
1445 int cpu, idx;
1446
1447 if (!x86_pmu.num_events)
1448 return;
1449
1450 local_irq_save(flags);
1451
1452 cpu = smp_processor_id();
1453 cpuc = &per_cpu(cpu_hw_events, cpu);
1454
1455 if (x86_pmu.version >= 2) {
1456 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
1457 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1458 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1459 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1460
1461 pr_info("\n");
1462 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1463 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1464 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1465 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1466 }
1467 pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask);
1468
1469 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1470 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1471 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1472
1473 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1474
1475 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
1476 cpu, idx, pmc_ctrl);
1477 pr_info("CPU#%d: gen-PMC%d count: %016llx\n",
1478 cpu, idx, pmc_count);
1479 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1480 cpu, idx, prev_left);
1481 }
1482 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1483 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1484
1485 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
1486 cpu, idx, pmc_count);
1487 }
1488 local_irq_restore(flags);
1489}
1490
1491static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc)
1492{
1493 struct debug_store *ds = cpuc->ds;
1494 struct bts_record {
1495 u64 from;
1496 u64 to;
1497 u64 flags;
1498 };
1499 struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
1500 struct bts_record *at, *top;
1501 struct perf_output_handle handle;
1502 struct perf_event_header header;
1503 struct perf_sample_data data;
1504 struct pt_regs regs;
1505
1506 if (!event)
1507 return;
1508
1509 if (!ds)
1510 return;
1511
1512 at = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
1513 top = (struct bts_record *)(unsigned long)ds->bts_index;
1514
1515 if (top <= at)
1516 return;
1517
1518 ds->bts_index = ds->bts_buffer_base;
1519
1520
1521 data.period = event->hw.last_period;
1522 data.addr = 0;
1523 regs.ip = 0;
1524
1525 /*
1526 * Prepare a generic sample, i.e. fill in the invariant fields.
1527 * We will overwrite the from and to address before we output
1528 * the sample.
1529 */
1530 perf_prepare_sample(&header, &data, event, &regs);
1531
1532 if (perf_output_begin(&handle, event,
1533 header.size * (top - at), 1, 1))
1534 return;
1535
1536 for (; at < top; at++) {
1537 data.ip = at->from;
1538 data.addr = at->to;
1539
1540 perf_output_sample(&handle, &header, &data, event);
1541 }
1542
1543 perf_output_end(&handle);
1544
1545 /* There's new data available. */
1546 event->hw.interrupts++;
1547 event->pending_kill = POLL_IN;
1548}
1549
1550static void x86_pmu_disable(struct perf_event *event)
1551{
1552 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1553 struct hw_perf_event *hwc = &event->hw;
1554 int idx = hwc->idx;
1555
1556 /*
1557 * Must be done before we disable, otherwise the nmi handler
1558 * could reenable again:
1559 */
1560 clear_bit(idx, cpuc->active_mask);
1561 x86_pmu.disable(hwc, idx);
1562
1563 /*
1564 * Make sure the cleared pointer becomes visible before we
1565 * (potentially) free the event:
1566 */
1567 barrier();
1568
1569 /*
1570 * Drain the remaining delta count out of a event
1571 * that we are disabling:
1572 */
1573 x86_perf_event_update(event, hwc, idx);
1574
1575 /* Drain the remaining BTS records. */
1576 if (unlikely(idx == X86_PMC_IDX_FIXED_BTS))
1577 intel_pmu_drain_bts_buffer(cpuc);
1578
1579 cpuc->events[idx] = NULL;
1580 clear_bit(idx, cpuc->used_mask);
1581
1582 perf_event_update_userpage(event);
1583}
1584
1585/*
1586 * Save and restart an expired event. Called by NMI contexts,
1587 * so it has to be careful about preempting normal event ops:
1588 */
1589static int intel_pmu_save_and_restart(struct perf_event *event)
1590{
1591 struct hw_perf_event *hwc = &event->hw;
1592 int idx = hwc->idx;
1593 int ret;
1594
1595 x86_perf_event_update(event, hwc, idx);
1596 ret = x86_perf_event_set_period(event, hwc, idx);
1597
1598 if (event->state == PERF_EVENT_STATE_ACTIVE)
1599 intel_pmu_enable_event(hwc, idx);
1600
1601 return ret;
1602}
1603
1604static void intel_pmu_reset(void)
1605{
1606 struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
1607 unsigned long flags;
1608 int idx;
1609
1610 if (!x86_pmu.num_events)
1611 return;
1612
1613 local_irq_save(flags);
1614
1615 printk("clearing PMU state on CPU#%d\n", smp_processor_id());
1616
1617 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1618 checking_wrmsrl(x86_pmu.eventsel + idx, 0ull);
1619 checking_wrmsrl(x86_pmu.perfctr + idx, 0ull);
1620 }
1621 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) {
1622 checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
1623 }
1624 if (ds)
1625 ds->bts_index = ds->bts_buffer_base;
1626
1627 local_irq_restore(flags);
1628}
1629
1630static int p6_pmu_handle_irq(struct pt_regs *regs)
1631{
1632 struct perf_sample_data data;
1633 struct cpu_hw_events *cpuc;
1634 struct perf_event *event;
1635 struct hw_perf_event *hwc;
1636 int idx, handled = 0;
1637 u64 val;
1638
1639 data.addr = 0;
1640
1641 cpuc = &__get_cpu_var(cpu_hw_events);
1642
1643 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1644 if (!test_bit(idx, cpuc->active_mask))
1645 continue;
1646
1647 event = cpuc->events[idx];
1648 hwc = &event->hw;
1649
1650 val = x86_perf_event_update(event, hwc, idx);
1651 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1652 continue;
1653
1654 /*
1655 * event overflow
1656 */
1657 handled = 1;
1658 data.period = event->hw.last_period;
1659
1660 if (!x86_perf_event_set_period(event, hwc, idx))
1661 continue;
1662
1663 if (perf_event_overflow(event, 1, &data, regs))
1664 p6_pmu_disable_event(hwc, idx);
1665 }
1666
1667 if (handled)
1668 inc_irq_stat(apic_perf_irqs);
1669
1670 return handled;
1671}
1672
1673/*
1674 * This handler is triggered by the local APIC, so the APIC IRQ handling
1675 * rules apply:
1676 */
1677static int intel_pmu_handle_irq(struct pt_regs *regs)
1678{
1679 struct perf_sample_data data;
1680 struct cpu_hw_events *cpuc;
1681 int bit, loops;
1682 u64 ack, status;
1683
1684 data.addr = 0;
1685
1686 cpuc = &__get_cpu_var(cpu_hw_events);
1687
1688 perf_disable();
1689 intel_pmu_drain_bts_buffer(cpuc);
1690 status = intel_pmu_get_status();
1691 if (!status) {
1692 perf_enable();
1693 return 0;
1694 }
1695
1696 loops = 0;
1697again:
1698 if (++loops > 100) {
1699 WARN_ONCE(1, "perfevents: irq loop stuck!\n");
1700 perf_event_print_debug();
1701 intel_pmu_reset();
1702 perf_enable();
1703 return 1;
1704 }
1705
1706 inc_irq_stat(apic_perf_irqs);
1707 ack = status;
1708 for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
1709 struct perf_event *event = cpuc->events[bit];
1710
1711 clear_bit(bit, (unsigned long *) &status);
1712 if (!test_bit(bit, cpuc->active_mask))
1713 continue;
1714
1715 if (!intel_pmu_save_and_restart(event))
1716 continue;
1717
1718 data.period = event->hw.last_period;
1719
1720 if (perf_event_overflow(event, 1, &data, regs))
1721 intel_pmu_disable_event(&event->hw, bit);
1722 }
1723
1724 intel_pmu_ack_status(ack);
1725
1726 /*
1727 * Repeat if there is more work to be done:
1728 */
1729 status = intel_pmu_get_status();
1730 if (status)
1731 goto again;
1732
1733 perf_enable();
1734
1735 return 1;
1736}
1737
1738static int amd_pmu_handle_irq(struct pt_regs *regs)
1739{
1740 struct perf_sample_data data;
1741 struct cpu_hw_events *cpuc;
1742 struct perf_event *event;
1743 struct hw_perf_event *hwc;
1744 int idx, handled = 0;
1745 u64 val;
1746
1747 data.addr = 0;
1748
1749 cpuc = &__get_cpu_var(cpu_hw_events);
1750
1751 for (idx = 0; idx < x86_pmu.num_events; idx++) {
1752 if (!test_bit(idx, cpuc->active_mask))
1753 continue;
1754
1755 event = cpuc->events[idx];
1756 hwc = &event->hw;
1757
1758 val = x86_perf_event_update(event, hwc, idx);
1759 if (val & (1ULL << (x86_pmu.event_bits - 1)))
1760 continue;
1761
1762 /*
1763 * event overflow
1764 */
1765 handled = 1;
1766 data.period = event->hw.last_period;
1767
1768 if (!x86_perf_event_set_period(event, hwc, idx))
1769 continue;
1770
1771 if (perf_event_overflow(event, 1, &data, regs))
1772 amd_pmu_disable_event(hwc, idx);
1773 }
1774
1775 if (handled)
1776 inc_irq_stat(apic_perf_irqs);
1777
1778 return handled;
1779}
1780
1781void smp_perf_pending_interrupt(struct pt_regs *regs)
1782{
1783 irq_enter();
1784 ack_APIC_irq();
1785 inc_irq_stat(apic_pending_irqs);
1786 perf_event_do_pending();
1787 irq_exit();
1788}
1789
1790void set_perf_event_pending(void)
1791{
1792#ifdef CONFIG_X86_LOCAL_APIC
1793 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1794#endif
1795}
1796
1797void perf_events_lapic_init(void)
1798{
1799#ifdef CONFIG_X86_LOCAL_APIC
1800 if (!x86_pmu.apic || !x86_pmu_initialized())
1801 return;
1802
1803 /*
1804 * Always use NMI for PMU
1805 */
1806 apic_write(APIC_LVTPC, APIC_DM_NMI);
1807#endif
1808}
1809
1810static int __kprobes
1811perf_event_nmi_handler(struct notifier_block *self,
1812 unsigned long cmd, void *__args)
1813{
1814 struct die_args *args = __args;
1815 struct pt_regs *regs;
1816
1817 if (!atomic_read(&active_events))
1818 return NOTIFY_DONE;
1819
1820 switch (cmd) {
1821 case DIE_NMI:
1822 case DIE_NMI_IPI:
1823 break;
1824
1825 default:
1826 return NOTIFY_DONE;
1827 }
1828
1829 regs = args->regs;
1830
1831#ifdef CONFIG_X86_LOCAL_APIC
1832 apic_write(APIC_LVTPC, APIC_DM_NMI);
1833#endif
1834 /*
1835 * Can't rely on the handled return value to say it was our NMI, two
1836 * events could trigger 'simultaneously' raising two back-to-back NMIs.
1837 *
1838 * If the first NMI handles both, the latter will be empty and daze
1839 * the CPU.
1840 */
1841 x86_pmu.handle_irq(regs);
1842
1843 return NOTIFY_STOP;
1844}
1845
1846static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1847 .notifier_call = perf_event_nmi_handler,
1848 .next = NULL,
1849 .priority = 1
1850};
1851
1852static struct x86_pmu p6_pmu = {
1853 .name = "p6",
1854 .handle_irq = p6_pmu_handle_irq,
1855 .disable_all = p6_pmu_disable_all,
1856 .enable_all = p6_pmu_enable_all,
1857 .enable = p6_pmu_enable_event,
1858 .disable = p6_pmu_disable_event,
1859 .eventsel = MSR_P6_EVNTSEL0,
1860 .perfctr = MSR_P6_PERFCTR0,
1861 .event_map = p6_pmu_event_map,
1862 .raw_event = p6_pmu_raw_event,
1863 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1864 .apic = 1,
1865 .max_period = (1ULL << 31) - 1,
1866 .version = 0,
1867 .num_events = 2,
1868 /*
1869 * Events have 40 bits implemented. However they are designed such
1870 * that bits [32-39] are sign extensions of bit 31. As such the
1871 * effective width of a event for P6-like PMU is 32 bits only.
1872 *
1873 * See IA-32 Intel Architecture Software developer manual Vol 3B
1874 */
1875 .event_bits = 32,
1876 .event_mask = (1ULL << 32) - 1,
1877};
1878
1879static struct x86_pmu intel_pmu = {
1880 .name = "Intel",
1881 .handle_irq = intel_pmu_handle_irq,
1882 .disable_all = intel_pmu_disable_all,
1883 .enable_all = intel_pmu_enable_all,
1884 .enable = intel_pmu_enable_event,
1885 .disable = intel_pmu_disable_event,
1886 .eventsel = MSR_ARCH_PERFMON_EVENTSEL0,
1887 .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
1888 .event_map = intel_pmu_event_map,
1889 .raw_event = intel_pmu_raw_event,
1890 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1891 .apic = 1,
1892 /*
1893 * Intel PMCs cannot be accessed sanely above 32 bit width,
1894 * so we install an artificial 1<<31 period regardless of
1895 * the generic event period:
1896 */
1897 .max_period = (1ULL << 31) - 1,
1898 .enable_bts = intel_pmu_enable_bts,
1899 .disable_bts = intel_pmu_disable_bts,
1900};
1901
1902static struct x86_pmu amd_pmu = {
1903 .name = "AMD",
1904 .handle_irq = amd_pmu_handle_irq,
1905 .disable_all = amd_pmu_disable_all,
1906 .enable_all = amd_pmu_enable_all,
1907 .enable = amd_pmu_enable_event,
1908 .disable = amd_pmu_disable_event,
1909 .eventsel = MSR_K7_EVNTSEL0,
1910 .perfctr = MSR_K7_PERFCTR0,
1911 .event_map = amd_pmu_event_map,
1912 .raw_event = amd_pmu_raw_event,
1913 .max_events = ARRAY_SIZE(amd_perfmon_event_map),
1914 .num_events = 4,
1915 .event_bits = 48,
1916 .event_mask = (1ULL << 48) - 1,
1917 .apic = 1,
1918 /* use highest bit to detect overflow */
1919 .max_period = (1ULL << 47) - 1,
1920};
1921
1922static int p6_pmu_init(void)
1923{
1924 switch (boot_cpu_data.x86_model) {
1925 case 1:
1926 case 3: /* Pentium Pro */
1927 case 5:
1928 case 6: /* Pentium II */
1929 case 7:
1930 case 8:
1931 case 11: /* Pentium III */
1932 break;
1933 case 9:
1934 case 13:
1935 /* Pentium M */
1936 break;
1937 default:
1938 pr_cont("unsupported p6 CPU model %d ",
1939 boot_cpu_data.x86_model);
1940 return -ENODEV;
1941 }
1942
1943 x86_pmu = p6_pmu;
1944
1945 if (!cpu_has_apic) {
1946 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1947 pr_info("no hardware sampling interrupt available.\n");
1948 x86_pmu.apic = 0;
1949 }
1950
1951 return 0;
1952}
1953
1954static int intel_pmu_init(void)
1955{
1956 union cpuid10_edx edx;
1957 union cpuid10_eax eax;
1958 unsigned int unused;
1959 unsigned int ebx;
1960 int version;
1961
1962 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1963 /* check for P6 processor family */
1964 if (boot_cpu_data.x86 == 6) {
1965 return p6_pmu_init();
1966 } else {
1967 return -ENODEV;
1968 }
1969 }
1970
1971 /*
1972 * Check whether the Architectural PerfMon supports
1973 * Branch Misses Retired hw_event or not.
1974 */
1975 cpuid(10, &eax.full, &ebx, &unused, &edx.full);
1976 if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
1977 return -ENODEV;
1978
1979 version = eax.split.version_id;
1980 if (version < 2)
1981 return -ENODEV;
1982
1983 x86_pmu = intel_pmu;
1984 x86_pmu.version = version;
1985 x86_pmu.num_events = eax.split.num_events;
1986 x86_pmu.event_bits = eax.split.bit_width;
1987 x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1;
1988
1989 /*
1990 * Quirk: v2 perfmon does not report fixed-purpose events, so
1991 * assume at least 3 events:
1992 */
1993 x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
1994
1995 /*
1996 * Install the hw-cache-events table:
1997 */
1998 switch (boot_cpu_data.x86_model) {
1999 case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
2000 case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
2001 case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
2002 case 29: /* six-core 45 nm xeon "Dunnington" */
2003 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
2004 sizeof(hw_cache_event_ids));
2005
2006 pr_cont("Core2 events, ");
2007 break;
2008 default:
2009 case 26:
2010 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
2011 sizeof(hw_cache_event_ids));
2012
2013 pr_cont("Nehalem/Corei7 events, ");
2014 break;
2015 case 28:
2016 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
2017 sizeof(hw_cache_event_ids));
2018
2019 pr_cont("Atom events, ");
2020 break;
2021 }
2022 return 0;
2023}
2024
2025static int amd_pmu_init(void)
2026{
2027 /* Performance-monitoring supported from K7 and later: */
2028 if (boot_cpu_data.x86 < 6)
2029 return -ENODEV;
2030
2031 x86_pmu = amd_pmu;
2032
2033 /* Events are common for all AMDs */
2034 memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
2035 sizeof(hw_cache_event_ids));
2036
2037 return 0;
2038}
2039
2040void __init init_hw_perf_events(void)
2041{
2042 int err;
2043
2044 pr_info("Performance Events: ");
2045
2046 switch (boot_cpu_data.x86_vendor) {
2047 case X86_VENDOR_INTEL:
2048 err = intel_pmu_init();
2049 break;
2050 case X86_VENDOR_AMD:
2051 err = amd_pmu_init();
2052 break;
2053 default:
2054 return;
2055 }
2056 if (err != 0) {
2057 pr_cont("no PMU driver, software events only.\n");
2058 return;
2059 }
2060
2061 pr_cont("%s PMU driver.\n", x86_pmu.name);
2062
2063 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) {
2064 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
2065 x86_pmu.num_events, X86_PMC_MAX_GENERIC);
2066 x86_pmu.num_events = X86_PMC_MAX_GENERIC;
2067 }
2068 perf_event_mask = (1 << x86_pmu.num_events) - 1;
2069 perf_max_events = x86_pmu.num_events;
2070
2071 if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) {
2072 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
2073 x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED);
2074 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED;
2075 }
2076
2077 perf_event_mask |=
2078 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED;
2079 x86_pmu.intel_ctrl = perf_event_mask;
2080
2081 perf_events_lapic_init();
2082 register_die_notifier(&perf_event_nmi_notifier);
2083
2084 pr_info("... version: %d\n", x86_pmu.version);
2085 pr_info("... bit width: %d\n", x86_pmu.event_bits);
2086 pr_info("... generic events: %d\n", x86_pmu.num_events);
2087 pr_info("... value mask: %016Lx\n", x86_pmu.event_mask);
2088 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
2089 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed);
2090 pr_info("... event mask: %016Lx\n", perf_event_mask);
2091}
2092
2093static inline void x86_pmu_read(struct perf_event *event)
2094{
2095 x86_perf_event_update(event, &event->hw, event->hw.idx);
2096}
2097
2098static const struct pmu pmu = {
2099 .enable = x86_pmu_enable,
2100 .disable = x86_pmu_disable,
2101 .read = x86_pmu_read,
2102 .unthrottle = x86_pmu_unthrottle,
2103};
2104
2105const struct pmu *hw_perf_event_init(struct perf_event *event)
2106{
2107 int err;
2108
2109 err = __hw_perf_event_init(event);
2110 if (err) {
2111 if (event->destroy)
2112 event->destroy(event);
2113 return ERR_PTR(err);
2114 }
2115
2116 return &pmu;
2117}
2118
2119/*
2120 * callchain support
2121 */
2122
2123static inline
2124void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2125{
2126 if (entry->nr < PERF_MAX_STACK_DEPTH)
2127 entry->ip[entry->nr++] = ip;
2128}
2129
2130static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2131static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2132static DEFINE_PER_CPU(int, in_nmi_frame);
2133
2134
2135static void
2136backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
2137{
2138 /* Ignore warnings */
2139}
2140
2141static void backtrace_warning(void *data, char *msg)
2142{
2143 /* Ignore warnings */
2144}
2145
2146static int backtrace_stack(void *data, char *name)
2147{
2148 per_cpu(in_nmi_frame, smp_processor_id()) =
2149 x86_is_stack_id(NMI_STACK, name);
2150
2151 return 0;
2152}
2153
2154static void backtrace_address(void *data, unsigned long addr, int reliable)
2155{
2156 struct perf_callchain_entry *entry = data;
2157
2158 if (per_cpu(in_nmi_frame, smp_processor_id()))
2159 return;
2160
2161 if (reliable)
2162 callchain_store(entry, addr);
2163}
2164
2165static const struct stacktrace_ops backtrace_ops = {
2166 .warning = backtrace_warning,
2167 .warning_symbol = backtrace_warning_symbol,
2168 .stack = backtrace_stack,
2169 .address = backtrace_address,
2170};
2171
2172#include "../dumpstack.h"
2173
2174static void
2175perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
2176{
2177 callchain_store(entry, PERF_CONTEXT_KERNEL);
2178 callchain_store(entry, regs->ip);
2179
2180 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
2181}
2182
2183/*
2184 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
2185 */
2186static unsigned long
2187copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
2188{
2189 unsigned long offset, addr = (unsigned long)from;
2190 int type = in_nmi() ? KM_NMI : KM_IRQ0;
2191 unsigned long size, len = 0;
2192 struct page *page;
2193 void *map;
2194 int ret;
2195
2196 do {
2197 ret = __get_user_pages_fast(addr, 1, 0, &page);
2198 if (!ret)
2199 break;
2200
2201 offset = addr & (PAGE_SIZE - 1);
2202 size = min(PAGE_SIZE - offset, n - len);
2203
2204 map = kmap_atomic(page, type);
2205 memcpy(to, map+offset, size);
2206 kunmap_atomic(map, type);
2207 put_page(page);
2208
2209 len += size;
2210 to += size;
2211 addr += size;
2212
2213 } while (len < n);
2214
2215 return len;
2216}
2217
2218static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
2219{
2220 unsigned long bytes;
2221
2222 bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
2223
2224 return bytes == sizeof(*frame);
2225}
2226
2227static void
2228perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
2229{
2230 struct stack_frame frame;
2231 const void __user *fp;
2232
2233 if (!user_mode(regs))
2234 regs = task_pt_regs(current);
2235
2236 fp = (void __user *)regs->bp;
2237
2238 callchain_store(entry, PERF_CONTEXT_USER);
2239 callchain_store(entry, regs->ip);
2240
2241 while (entry->nr < PERF_MAX_STACK_DEPTH) {
2242 frame.next_frame = NULL;
2243 frame.return_address = 0;
2244
2245 if (!copy_stack_frame(fp, &frame))
2246 break;
2247
2248 if ((unsigned long)fp < regs->sp)
2249 break;
2250
2251 callchain_store(entry, frame.return_address);
2252 fp = frame.next_frame;
2253 }
2254}
2255
2256static void
2257perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
2258{
2259 int is_user;
2260
2261 if (!regs)
2262 return;
2263
2264 is_user = user_mode(regs);
2265
2266 if (!current || current->pid == 0)
2267 return;
2268
2269 if (is_user && current->state != TASK_RUNNING)
2270 return;
2271
2272 if (!is_user)
2273 perf_callchain_kernel(regs, entry);
2274
2275 if (current->mm)
2276 perf_callchain_user(regs, entry);
2277}
2278
2279struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2280{
2281 struct perf_callchain_entry *entry;
2282
2283 if (in_nmi())
2284 entry = &__get_cpu_var(pmc_nmi_entry);
2285 else
2286 entry = &__get_cpu_var(pmc_irq_entry);
2287
2288 entry->nr = 0;
2289
2290 perf_do_callchain(regs, entry);
2291
2292 return entry;
2293}
2294
2295void hw_perf_event_setup_online(int cpu)
2296{
2297 init_debug_store_on_cpu(cpu);
2298}