diff options
author | Andi Kleen <ak@linux.intel.com> | 2011-03-02 21:34:47 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-03-04 05:32:53 -0500 |
commit | a7e3ed1e470116c9d12c2f778431a481a6be8ab6 (patch) | |
tree | 10e72043f3eb0d6a31fe27188f74267a5796dbcd /arch/x86/kernel/cpu/perf_event.c | |
parent | 17e3162972cbb9796035fff1e2fd30669b0eef65 (diff) |
perf: Add support for supplementary event registers
Change logs against Andi's original version:
- Extends perf_event_attr:config to config{,1,2} (Peter Zijlstra)
- Fixed a major event scheduling issue. There cannot be a ref++ on an
event that has already done ref++ once and without calling
put_constraint() in between. (Stephane Eranian)
- Use thread_cpumask for percore allocation. (Lin Ming)
- Use MSR names in the extra reg lists. (Lin Ming)
- Remove redundant "c = NULL" in intel_percore_constraints
- Fix comment of perf_event_attr::config1
Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event
that can be used to monitor any offcore accesses from a core.
This is a very useful event for various tunings, and it's
also needed to implement the generic LLC-* events correctly.
Unfortunately this event requires programming a mask in a separate
register. And worse this separate register is per core, not per
CPU thread.
This patch:
- Teaches perf_events that OFFCORE_RESPONSE needs extra parameters.
The extra parameters are passed by user space in the
perf_event_attr::config1 field.
- Adds support to the Intel perf_event core to schedule per
core resources. This adds fairly generic infrastructure that
can be also used for other per core resources.
The basic code has is patterned after the similar AMD northbridge
constraints code.
Thanks to Stephane Eranian who pointed out some problems
in the original version and suggested improvements.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1299119690-13991-2-git-send-email-ming.m.lin@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ea03c725e465..ec6a6db07332 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -93,6 +93,8 @@ struct amd_nb { | |||
93 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; | 93 | struct event_constraint event_constraints[X86_PMC_IDX_MAX]; |
94 | }; | 94 | }; |
95 | 95 | ||
96 | struct intel_percore; | ||
97 | |||
96 | #define MAX_LBR_ENTRIES 16 | 98 | #define MAX_LBR_ENTRIES 16 |
97 | 99 | ||
98 | struct cpu_hw_events { | 100 | struct cpu_hw_events { |
@@ -128,6 +130,13 @@ struct cpu_hw_events { | |||
128 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; | 130 | struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; |
129 | 131 | ||
130 | /* | 132 | /* |
133 | * Intel percore register state. | ||
134 | * Coordinate shared resources between HT threads. | ||
135 | */ | ||
136 | int percore_used; /* Used by this CPU? */ | ||
137 | struct intel_percore *per_core; | ||
138 | |||
139 | /* | ||
131 | * AMD specific bits | 140 | * AMD specific bits |
132 | */ | 141 | */ |
133 | struct amd_nb *amd_nb; | 142 | struct amd_nb *amd_nb; |
@@ -177,6 +186,28 @@ struct cpu_hw_events { | |||
177 | #define for_each_event_constraint(e, c) \ | 186 | #define for_each_event_constraint(e, c) \ |
178 | for ((e) = (c); (e)->weight; (e)++) | 187 | for ((e) = (c); (e)->weight; (e)++) |
179 | 188 | ||
189 | /* | ||
190 | * Extra registers for specific events. | ||
191 | * Some events need large masks and require external MSRs. | ||
192 | * Define a mapping to these extra registers. | ||
193 | */ | ||
194 | struct extra_reg { | ||
195 | unsigned int event; | ||
196 | unsigned int msr; | ||
197 | u64 config_mask; | ||
198 | u64 valid_mask; | ||
199 | }; | ||
200 | |||
201 | #define EVENT_EXTRA_REG(e, ms, m, vm) { \ | ||
202 | .event = (e), \ | ||
203 | .msr = (ms), \ | ||
204 | .config_mask = (m), \ | ||
205 | .valid_mask = (vm), \ | ||
206 | } | ||
207 | #define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ | ||
208 | EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) | ||
209 | #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) | ||
210 | |||
180 | union perf_capabilities { | 211 | union perf_capabilities { |
181 | struct { | 212 | struct { |
182 | u64 lbr_format : 6; | 213 | u64 lbr_format : 6; |
@@ -221,6 +252,7 @@ struct x86_pmu { | |||
221 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 252 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
222 | struct perf_event *event); | 253 | struct perf_event *event); |
223 | struct event_constraint *event_constraints; | 254 | struct event_constraint *event_constraints; |
255 | struct event_constraint *percore_constraints; | ||
224 | void (*quirks)(void); | 256 | void (*quirks)(void); |
225 | int perfctr_second_write; | 257 | int perfctr_second_write; |
226 | 258 | ||
@@ -249,6 +281,11 @@ struct x86_pmu { | |||
249 | */ | 281 | */ |
250 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 282 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
251 | int lbr_nr; /* hardware stack size */ | 283 | int lbr_nr; /* hardware stack size */ |
284 | |||
285 | /* | ||
286 | * Extra registers for events | ||
287 | */ | ||
288 | struct extra_reg *extra_regs; | ||
252 | }; | 289 | }; |
253 | 290 | ||
254 | static struct x86_pmu x86_pmu __read_mostly; | 291 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -341,6 +378,31 @@ static inline unsigned int x86_pmu_event_addr(int index) | |||
341 | return x86_pmu.perfctr + x86_pmu_addr_offset(index); | 378 | return x86_pmu.perfctr + x86_pmu_addr_offset(index); |
342 | } | 379 | } |
343 | 380 | ||
381 | /* | ||
382 | * Find and validate any extra registers to set up. | ||
383 | */ | ||
384 | static int x86_pmu_extra_regs(u64 config, struct perf_event *event) | ||
385 | { | ||
386 | struct extra_reg *er; | ||
387 | |||
388 | event->hw.extra_reg = 0; | ||
389 | event->hw.extra_config = 0; | ||
390 | |||
391 | if (!x86_pmu.extra_regs) | ||
392 | return 0; | ||
393 | |||
394 | for (er = x86_pmu.extra_regs; er->msr; er++) { | ||
395 | if (er->event != (config & er->config_mask)) | ||
396 | continue; | ||
397 | if (event->attr.config1 & ~er->valid_mask) | ||
398 | return -EINVAL; | ||
399 | event->hw.extra_reg = er->msr; | ||
400 | event->hw.extra_config = event->attr.config1; | ||
401 | break; | ||
402 | } | ||
403 | return 0; | ||
404 | } | ||
405 | |||
344 | static atomic_t active_events; | 406 | static atomic_t active_events; |
345 | static DEFINE_MUTEX(pmc_reserve_mutex); | 407 | static DEFINE_MUTEX(pmc_reserve_mutex); |
346 | 408 | ||
@@ -665,6 +727,8 @@ static void x86_pmu_disable(struct pmu *pmu) | |||
665 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, | 727 | static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, |
666 | u64 enable_mask) | 728 | u64 enable_mask) |
667 | { | 729 | { |
730 | if (hwc->extra_reg) | ||
731 | wrmsrl(hwc->extra_reg, hwc->extra_config); | ||
668 | wrmsrl(hwc->config_base, hwc->config | enable_mask); | 732 | wrmsrl(hwc->config_base, hwc->config | enable_mask); |
669 | } | 733 | } |
670 | 734 | ||