diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-26 11:03:38 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-26 11:03:38 -0400 |
commit | 7115e3fcf45514db7525a05365b10454ff7f345e (patch) | |
tree | 17450e6337d559cc35dae6a7a73abab01ac63f00 /arch/x86/kernel/cpu/perf_event.h | |
parent | 1f6e05171bb5cc32a4d6437ab2269fc21d169ca7 (diff) | |
parent | c752d04066a36ae30b29795f3fa3f536292c1f8c (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (121 commits)
perf symbols: Increase symbol KSYM_NAME_LEN size
perf hists browser: Refuse 'a' hotkey on non symbolic views
perf ui browser: Use libslang to read keys
perf tools: Fix tracing info recording
perf hists browser: Elide DSO column when it is set to just one DSO, ditto for threads
perf hists: Don't consider filtered entries when calculating column widths
perf hists: Don't decay total_period for filtered entries
perf hists browser: Honour symbol_conf.show_{nr_samples,total_period}
perf hists browser: Do not exit on tab key with single event
perf annotate browser: Don't change selection line when returning from callq
perf tools: handle endianness of feature bitmap
perf tools: Add prelink suggestion to dso update message
perf script: Fix unknown feature comment
perf hists browser: Apply the dso and thread filters when merging new batches
perf hists: Move the dso and thread filters from hist_browser
perf ui browser: Honour the xterm colors
perf top tui: Give color hints just on the percentage, like on --stdio
perf ui browser: Make the colors configurable and change the defaults
perf tui: Remove unneeded call to newtCls on startup
perf hists: Don't format the percentage on hist_entry__snprintf
...
Fix up conflicts in arch/x86/kernel/kprobes.c manually.
Ingo's tree did the insane "add volatile to const array", which just
doesn't make sense ("volatile const"?). But we could remove the const
*and* make the array volatile to make doubly sure that gcc doesn't
optimize it away..
Also fix up kernel/trace/ring_buffer.c non-data-conflicts manually: the
reader_lock has been turned into a raw lock by the core locking merge,
and there was a new user of it introduced in this perf core merge. Make
sure that new use also uses the raw accessor functions.
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.h')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 505 |
1 files changed, 505 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h new file mode 100644 index 000000000000..b9698d40ac4b --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -0,0 +1,505 @@ | |||
1 | /* | ||
2 | * Performance events x86 architecture header | ||
3 | * | ||
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
10 | * Copyright (C) 2009 Google, Inc., Stephane Eranian | ||
11 | * | ||
12 | * For licencing details see kernel-base/COPYING | ||
13 | */ | ||
14 | |||
15 | #include <linux/perf_event.h> | ||
16 | |||
17 | /* | ||
18 | * | NHM/WSM | SNB | | ||
19 | * register ------------------------------- | ||
20 | * | HT | no HT | HT | no HT | | ||
21 | *----------------------------------------- | ||
22 | * offcore | core | core | cpu | core | | ||
23 | * lbr_sel | core | core | cpu | core | | ||
24 | * ld_lat | cpu | core | cpu | core | | ||
25 | *----------------------------------------- | ||
26 | * | ||
27 | * Given that there is a small number of shared regs, | ||
28 | * we can pre-allocate their slot in the per-cpu | ||
29 | * per-core reg tables. | ||
30 | */ | ||
31 | enum extra_reg_type { | ||
32 | EXTRA_REG_NONE = -1, /* not used */ | ||
33 | |||
34 | EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ | ||
35 | EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ | ||
36 | |||
37 | EXTRA_REG_MAX /* number of entries needed */ | ||
38 | }; | ||
39 | |||
40 | struct event_constraint { | ||
41 | union { | ||
42 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
43 | u64 idxmsk64; | ||
44 | }; | ||
45 | u64 code; | ||
46 | u64 cmask; | ||
47 | int weight; | ||
48 | }; | ||
49 | |||
50 | struct amd_nb { | ||
51 | int nb_id; /* NorthBridge id */ | ||
52 | int refcnt; /* reference count */ | ||
53 | struct perf_event *owners[X86_PMC_IDX_MAX]; | ||
54 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | ||
55 | }; | ||
56 | |||
57 | /* The maximal number of PEBS events: */ | ||
58 | #define MAX_PEBS_EVENTS 4 | ||
59 | |||
60 | /* | ||
61 | * A debug store configuration. | ||
62 | * | ||
63 | * We only support architectures that use 64bit fields. | ||
64 | */ | ||
65 | struct debug_store { | ||
66 | u64 bts_buffer_base; | ||
67 | u64 bts_index; | ||
68 | u64 bts_absolute_maximum; | ||
69 | u64 bts_interrupt_threshold; | ||
70 | u64 pebs_buffer_base; | ||
71 | u64 pebs_index; | ||
72 | u64 pebs_absolute_maximum; | ||
73 | u64 pebs_interrupt_threshold; | ||
74 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
75 | }; | ||
76 | |||
77 | /* | ||
78 | * Per register state. | ||
79 | */ | ||
80 | struct er_account { | ||
81 | raw_spinlock_t lock; /* per-core: protect structure */ | ||
82 | u64 config; /* extra MSR config */ | ||
83 | u64 reg; /* extra MSR number */ | ||
84 | atomic_t ref; /* reference count */ | ||
85 | }; | ||
86 | |||
87 | /* | ||
88 | * Per core/cpu state | ||
89 | * | ||
90 | * Used to coordinate shared registers between HT threads or | ||
91 | * among events on a single PMU. | ||
92 | */ | ||
93 | struct intel_shared_regs { | ||
94 | struct er_account regs[EXTRA_REG_MAX]; | ||
95 | int refcnt; /* per-core: #HT threads */ | ||
96 | unsigned core_id; /* per-core: core id */ | ||
97 | }; | ||
98 | |||
99 | #define MAX_LBR_ENTRIES 16 | ||
100 | |||
101 | struct cpu_hw_events { | ||
102 | /* | ||
103 | * Generic x86 PMC bits | ||
104 | */ | ||
105 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | ||
106 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
107 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
108 | int enabled; | ||
109 | |||
110 | int n_events; | ||
111 | int n_added; | ||
112 | int n_txn; | ||
113 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | ||
114 | u64 tags[X86_PMC_IDX_MAX]; | ||
115 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | ||
116 | |||
117 | unsigned int group_flag; | ||
118 | |||
119 | /* | ||
120 | * Intel DebugStore bits | ||
121 | */ | ||
122 | struct debug_store *ds; | ||
123 | u64 pebs_enabled; | ||
124 | |||
125 | /* | ||
126 | * Intel LBR bits | ||
127 | */ | ||
128 | int lbr_users; | ||
129 | void *lbr_context; | ||
130 | struct perf_branch_stack lbr_stack; | ||
131 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | ||
132 | |||
133 | /* | ||
134 | * Intel host/guest exclude bits | ||
135 | */ | ||
136 | u64 intel_ctrl_guest_mask; | ||
137 | u64 intel_ctrl_host_mask; | ||
138 | struct perf_guest_switch_msr guest_switch_msrs[X86_PMC_IDX_MAX]; | ||
139 | |||
140 | /* | ||
141 | * manage shared (per-core, per-cpu) registers | ||
142 | * used on Intel NHM/WSM/SNB | ||
143 | */ | ||
144 | struct intel_shared_regs *shared_regs; | ||
145 | |||
146 | /* | ||
147 | * AMD specific bits | ||
148 | */ | ||
149 | struct amd_nb *amd_nb; | ||
150 | |||
151 | void *kfree_on_online; | ||
152 | }; | ||
153 | |||
154 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | ||
155 | { .idxmsk64 = (n) }, \ | ||
156 | .code = (c), \ | ||
157 | .cmask = (m), \ | ||
158 | .weight = (w), \ | ||
159 | } | ||
160 | |||
161 | #define EVENT_CONSTRAINT(c, n, m) \ | ||
162 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | ||
163 | |||
164 | /* | ||
165 | * Constraint on the Event code. | ||
166 | */ | ||
167 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | ||
168 | EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) | ||
169 | |||
170 | /* | ||
171 | * Constraint on the Event code + UMask + fixed-mask | ||
172 | * | ||
173 | * filter mask to validate fixed counter events. | ||
174 | * the following filters disqualify for fixed counters: | ||
175 | * - inv | ||
176 | * - edge | ||
177 | * - cnt-mask | ||
178 | * The other filters are supported by fixed counters. | ||
179 | * The any-thread option is supported starting with v3. | ||
180 | */ | ||
181 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | ||
182 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) | ||
183 | |||
184 | /* | ||
185 | * Constraint on the Event code + UMask | ||
186 | */ | ||
187 | #define INTEL_UEVENT_CONSTRAINT(c, n) \ | ||
188 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
189 | |||
190 | #define EVENT_CONSTRAINT_END \ | ||
191 | EVENT_CONSTRAINT(0, 0, 0) | ||
192 | |||
193 | #define for_each_event_constraint(e, c) \ | ||
194 | for ((e) = (c); (e)->weight; (e)++) | ||
195 | |||
196 | /* | ||
197 | * Extra registers for specific events. | ||
198 | * | ||
199 | * Some events need large masks and require external MSRs. | ||
200 | * Those extra MSRs end up being shared for all events on | ||
201 | * a PMU and sometimes between PMU of sibling HT threads. | ||
202 | * In either case, the kernel needs to handle conflicting | ||
203 | * accesses to those extra, shared, regs. The data structure | ||
204 | * to manage those registers is stored in cpu_hw_event. | ||
205 | */ | ||
206 | struct extra_reg { | ||
207 | unsigned int event; | ||
208 | unsigned int msr; | ||
209 | u64 config_mask; | ||
210 | u64 valid_mask; | ||
211 | int idx; /* per_xxx->regs[] reg index */ | ||
212 | }; | ||
213 | |||
214 | #define EVENT_EXTRA_REG(e, ms, m, vm, i) { \ | ||
215 | .event = (e), \ | ||
216 | .msr = (ms), \ | ||
217 | .config_mask = (m), \ | ||
218 | .valid_mask = (vm), \ | ||
219 | .idx = EXTRA_REG_##i \ | ||
220 | } | ||
221 | |||
222 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ | ||
223 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) | ||
224 | |||
225 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) | ||
226 | |||
227 | union perf_capabilities { | ||
228 | struct { | ||
229 | u64 lbr_format:6; | ||
230 | u64 pebs_trap:1; | ||
231 | u64 pebs_arch_reg:1; | ||
232 | u64 pebs_format:4; | ||
233 | u64 smm_freeze:1; | ||
234 | }; | ||
235 | u64 capabilities; | ||
236 | }; | ||
237 | |||
238 | /* | ||
239 | * struct x86_pmu - generic x86 pmu | ||
240 | */ | ||
241 | struct x86_pmu { | ||
242 | /* | ||
243 | * Generic x86 PMC bits | ||
244 | */ | ||
245 | const char *name; | ||
246 | int version; | ||
247 | int (*handle_irq)(struct pt_regs *); | ||
248 | void (*disable_all)(void); | ||
249 | void (*enable_all)(int added); | ||
250 | void (*enable)(struct perf_event *); | ||
251 | void (*disable)(struct perf_event *); | ||
252 | int (*hw_config)(struct perf_event *event); | ||
253 | int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); | ||
254 | unsigned eventsel; | ||
255 | unsigned perfctr; | ||
256 | u64 (*event_map)(int); | ||
257 | int max_events; | ||
258 | int num_counters; | ||
259 | int num_counters_fixed; | ||
260 | int cntval_bits; | ||
261 | u64 cntval_mask; | ||
262 | int apic; | ||
263 | u64 max_period; | ||
264 | struct event_constraint * | ||
265 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | ||
266 | struct perf_event *event); | ||
267 | |||
268 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | ||
269 | struct perf_event *event); | ||
270 | struct event_constraint *event_constraints; | ||
271 | void (*quirks)(void); | ||
272 | int perfctr_second_write; | ||
273 | |||
274 | int (*cpu_prepare)(int cpu); | ||
275 | void (*cpu_starting)(int cpu); | ||
276 | void (*cpu_dying)(int cpu); | ||
277 | void (*cpu_dead)(int cpu); | ||
278 | |||
279 | /* | ||
280 | * Intel Arch Perfmon v2+ | ||
281 | */ | ||
282 | u64 intel_ctrl; | ||
283 | union perf_capabilities intel_cap; | ||
284 | |||
285 | /* | ||
286 | * Intel DebugStore bits | ||
287 | */ | ||
288 | int bts, pebs; | ||
289 | int bts_active, pebs_active; | ||
290 | int pebs_record_size; | ||
291 | void (*drain_pebs)(struct pt_regs *regs); | ||
292 | struct event_constraint *pebs_constraints; | ||
293 | |||
294 | /* | ||
295 | * Intel LBR | ||
296 | */ | ||
297 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | ||
298 | int lbr_nr; /* hardware stack size */ | ||
299 | |||
300 | /* | ||
301 | * Extra registers for events | ||
302 | */ | ||
303 | struct extra_reg *extra_regs; | ||
304 | unsigned int er_flags; | ||
305 | |||
306 | /* | ||
307 | * Intel host/guest support (KVM) | ||
308 | */ | ||
309 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); | ||
310 | }; | ||
311 | |||
312 | #define ERF_NO_HT_SHARING 1 | ||
313 | #define ERF_HAS_RSP_1 2 | ||
314 | |||
315 | extern struct x86_pmu x86_pmu __read_mostly; | ||
316 | |||
317 | DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); | ||
318 | |||
319 | int x86_perf_event_set_period(struct perf_event *event); | ||
320 | |||
321 | /* | ||
322 | * Generalized hw caching related hw_event table, filled | ||
323 | * in on a per model basis. A value of 0 means | ||
324 | * 'not supported', -1 means 'hw_event makes no sense on | ||
325 | * this CPU', any other value means the raw hw_event | ||
326 | * ID. | ||
327 | */ | ||
328 | |||
329 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
330 | |||
331 | extern u64 __read_mostly hw_cache_event_ids | ||
332 | [PERF_COUNT_HW_CACHE_MAX] | ||
333 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
334 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
335 | extern u64 __read_mostly hw_cache_extra_regs | ||
336 | [PERF_COUNT_HW_CACHE_MAX] | ||
337 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
338 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
339 | |||
340 | u64 x86_perf_event_update(struct perf_event *event); | ||
341 | |||
342 | static inline int x86_pmu_addr_offset(int index) | ||
343 | { | ||
344 | int offset; | ||
345 | |||
346 | /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */ | ||
347 | alternative_io(ASM_NOP2, | ||
348 | "shll $1, %%eax", | ||
349 | X86_FEATURE_PERFCTR_CORE, | ||
350 | "=a" (offset), | ||
351 | "a" (index)); | ||
352 | |||
353 | return offset; | ||
354 | } | ||
355 | |||
356 | static inline unsigned int x86_pmu_config_addr(int index) | ||
357 | { | ||
358 | return x86_pmu.eventsel + x86_pmu_addr_offset(index); | ||
359 | } | ||
360 | |||
361 | static inline unsigned int x86_pmu_event_addr(int index) | ||
362 | { | ||
363 | return x86_pmu.perfctr + x86_pmu_addr_offset(index); | ||
364 | } | ||
365 | |||
366 | int x86_setup_perfctr(struct perf_event *event); | ||
367 | |||
368 | int x86_pmu_hw_config(struct perf_event *event); | ||
369 | |||
370 | void x86_pmu_disable_all(void); | ||
371 | |||
372 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | ||
373 | u64 enable_mask) | ||
374 | { | ||
375 | if (hwc->extra_reg.reg) | ||
376 | wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); | ||
377 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | ||
378 | } | ||
379 | |||
380 | void x86_pmu_enable_all(int added); | ||
381 | |||
382 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign); | ||
383 | |||
384 | void x86_pmu_stop(struct perf_event *event, int flags); | ||
385 | |||
386 | static inline void x86_pmu_disable_event(struct perf_event *event) | ||
387 | { | ||
388 | struct hw_perf_event *hwc = &event->hw; | ||
389 | |||
390 | wrmsrl(hwc->config_base, hwc->config); | ||
391 | } | ||
392 | |||
393 | void x86_pmu_enable_event(struct perf_event *event); | ||
394 | |||
395 | int x86_pmu_handle_irq(struct pt_regs *regs); | ||
396 | |||
397 | extern struct event_constraint emptyconstraint; | ||
398 | |||
399 | extern struct event_constraint unconstrained; | ||
400 | |||
401 | #ifdef CONFIG_CPU_SUP_AMD | ||
402 | |||
403 | int amd_pmu_init(void); | ||
404 | |||
405 | #else /* CONFIG_CPU_SUP_AMD */ | ||
406 | |||
407 | static inline int amd_pmu_init(void) | ||
408 | { | ||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | #endif /* CONFIG_CPU_SUP_AMD */ | ||
413 | |||
414 | #ifdef CONFIG_CPU_SUP_INTEL | ||
415 | |||
416 | int intel_pmu_save_and_restart(struct perf_event *event); | ||
417 | |||
418 | struct event_constraint * | ||
419 | x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event); | ||
420 | |||
421 | struct intel_shared_regs *allocate_shared_regs(int cpu); | ||
422 | |||
423 | int intel_pmu_init(void); | ||
424 | |||
425 | void init_debug_store_on_cpu(int cpu); | ||
426 | |||
427 | void fini_debug_store_on_cpu(int cpu); | ||
428 | |||
429 | void release_ds_buffers(void); | ||
430 | |||
431 | void reserve_ds_buffers(void); | ||
432 | |||
433 | extern struct event_constraint bts_constraint; | ||
434 | |||
435 | void intel_pmu_enable_bts(u64 config); | ||
436 | |||
437 | void intel_pmu_disable_bts(void); | ||
438 | |||
439 | int intel_pmu_drain_bts_buffer(void); | ||
440 | |||
441 | extern struct event_constraint intel_core2_pebs_event_constraints[]; | ||
442 | |||
443 | extern struct event_constraint intel_atom_pebs_event_constraints[]; | ||
444 | |||
445 | extern struct event_constraint intel_nehalem_pebs_event_constraints[]; | ||
446 | |||
447 | extern struct event_constraint intel_westmere_pebs_event_constraints[]; | ||
448 | |||
449 | extern struct event_constraint intel_snb_pebs_event_constraints[]; | ||
450 | |||
451 | struct event_constraint *intel_pebs_constraints(struct perf_event *event); | ||
452 | |||
453 | void intel_pmu_pebs_enable(struct perf_event *event); | ||
454 | |||
455 | void intel_pmu_pebs_disable(struct perf_event *event); | ||
456 | |||
457 | void intel_pmu_pebs_enable_all(void); | ||
458 | |||
459 | void intel_pmu_pebs_disable_all(void); | ||
460 | |||
461 | void intel_ds_init(void); | ||
462 | |||
463 | void intel_pmu_lbr_reset(void); | ||
464 | |||
465 | void intel_pmu_lbr_enable(struct perf_event *event); | ||
466 | |||
467 | void intel_pmu_lbr_disable(struct perf_event *event); | ||
468 | |||
469 | void intel_pmu_lbr_enable_all(void); | ||
470 | |||
471 | void intel_pmu_lbr_disable_all(void); | ||
472 | |||
473 | void intel_pmu_lbr_read(void); | ||
474 | |||
475 | void intel_pmu_lbr_init_core(void); | ||
476 | |||
477 | void intel_pmu_lbr_init_nhm(void); | ||
478 | |||
479 | void intel_pmu_lbr_init_atom(void); | ||
480 | |||
481 | int p4_pmu_init(void); | ||
482 | |||
483 | int p6_pmu_init(void); | ||
484 | |||
485 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
486 | |||
487 | static inline void reserve_ds_buffers(void) | ||
488 | { | ||
489 | } | ||
490 | |||
491 | static inline void release_ds_buffers(void) | ||
492 | { | ||
493 | } | ||
494 | |||
495 | static inline int intel_pmu_init(void) | ||
496 | { | ||
497 | return 0; | ||
498 | } | ||
499 | |||
500 | static inline struct intel_shared_regs *allocate_shared_regs(int cpu) | ||
501 | { | ||
502 | return NULL; | ||
503 | } | ||
504 | |||
505 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||