diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-03-03 11:07:40 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-10 07:23:33 -0500 |
commit | 8db909a7e3c888b5d45aef7650d74ccebe3ce725 (patch) | |
tree | 1f930572b6468fa212f599285e2de772aefdd361 /arch | |
parent | 1676b8a077c352085d52578fb4f29350b58b6e74 (diff) |
perf, x86: Clean up IA32_PERF_CAPABILITIES usage
Saner PERF_CAPABILITIES support, which also exposes pebs_trap. Use that
latter to make PEBS's use of LBR conditional since a fault-like pebs
should already report the correct IP.
( As of this writing there is no known hardware that implements
!pebs_trap )
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.770650663@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 30 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_lbr.c | 18 |
4 files changed, 42 insertions, 31 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5cb4e8dcee4b..7b5430b2efe7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -154,6 +154,17 @@ struct cpu_hw_events { | |||
154 | #define for_each_event_constraint(e, c) \ | 154 | #define for_each_event_constraint(e, c) \ |
155 | for ((e) = (c); (e)->cmask; (e)++) | 155 | for ((e) = (c); (e)->cmask; (e)++) |
156 | 156 | ||
157 | union perf_capabilities { | ||
158 | struct { | ||
159 | u64 lbr_format : 6; | ||
160 | u64 pebs_trap : 1; | ||
161 | u64 pebs_arch_reg : 1; | ||
162 | u64 pebs_format : 4; | ||
163 | u64 smm_freeze : 1; | ||
164 | }; | ||
165 | u64 capabilities; | ||
166 | }; | ||
167 | |||
157 | /* | 168 | /* |
158 | * struct x86_pmu - generic x86 pmu | 169 | * struct x86_pmu - generic x86 pmu |
159 | */ | 170 | */ |
@@ -195,7 +206,8 @@ struct x86_pmu { | |||
195 | /* | 206 | /* |
196 | * Intel Arch Perfmon v2+ | 207 | * Intel Arch Perfmon v2+ |
197 | */ | 208 | */ |
198 | u64 intel_ctrl; | 209 | u64 intel_ctrl; |
210 | union perf_capabilities intel_cap; | ||
199 | 211 | ||
200 | /* | 212 | /* |
201 | * Intel DebugStore bits | 213 | * Intel DebugStore bits |
@@ -210,7 +222,6 @@ struct x86_pmu { | |||
210 | */ | 222 | */ |
211 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ | 223 | unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ |
212 | int lbr_nr; /* hardware stack size */ | 224 | int lbr_nr; /* hardware stack size */ |
213 | int lbr_format; /* hardware format */ | ||
214 | }; | 225 | }; |
215 | 226 | ||
216 | static struct x86_pmu x86_pmu __read_mostly; | 227 | static struct x86_pmu x86_pmu __read_mostly; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 7eb78be3b229..246c07238823 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -835,6 +835,16 @@ static __init int intel_pmu_init(void) | |||
835 | if (version > 1) | 835 | if (version > 1) |
836 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 836 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); |
837 | 837 | ||
838 | /* | ||
839 | * v2 and above have a perf capabilities MSR | ||
840 | */ | ||
841 | if (version > 1) { | ||
842 | u64 capabilities; | ||
843 | |||
844 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
845 | x86_pmu.intel_cap.capabilities = capabilities; | ||
846 | } | ||
847 | |||
838 | intel_ds_init(); | 848 | intel_ds_init(); |
839 | 849 | ||
840 | /* | 850 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 50e6ff3281fc..5e4029441b2d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -342,7 +342,8 @@ static void intel_pmu_pebs_enable(struct perf_event *event) | |||
342 | val |= 1ULL << hwc->idx; | 342 | val |= 1ULL << hwc->idx; |
343 | wrmsrl(MSR_IA32_PEBS_ENABLE, val); | 343 | wrmsrl(MSR_IA32_PEBS_ENABLE, val); |
344 | 344 | ||
345 | intel_pmu_lbr_enable(event); | 345 | if (x86_pmu.intel_cap.pebs_trap) |
346 | intel_pmu_lbr_enable(event); | ||
346 | } | 347 | } |
347 | 348 | ||
348 | static void intel_pmu_pebs_disable(struct perf_event *event) | 349 | static void intel_pmu_pebs_disable(struct perf_event *event) |
@@ -356,7 +357,8 @@ static void intel_pmu_pebs_disable(struct perf_event *event) | |||
356 | 357 | ||
357 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | 358 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; |
358 | 359 | ||
359 | intel_pmu_lbr_disable(event); | 360 | if (x86_pmu.intel_cap.pebs_trap) |
361 | intel_pmu_lbr_disable(event); | ||
360 | } | 362 | } |
361 | 363 | ||
362 | static void intel_pmu_pebs_enable_all(void) | 364 | static void intel_pmu_pebs_enable_all(void) |
@@ -395,6 +397,12 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
395 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | 397 | unsigned long old_to, to = cpuc->lbr_entries[0].to; |
396 | unsigned long ip = regs->ip; | 398 | unsigned long ip = regs->ip; |
397 | 399 | ||
400 | /* | ||
401 | * We don't need to fixup if the PEBS assist is fault like | ||
402 | */ | ||
403 | if (!x86_pmu.intel_cap.pebs_trap) | ||
404 | return 1; | ||
405 | |||
398 | if (!cpuc->lbr_stack.nr || !from || !to) | 406 | if (!cpuc->lbr_stack.nr || !from || !to) |
399 | return 0; | 407 | return 0; |
400 | 408 | ||
@@ -589,34 +597,26 @@ static void intel_ds_init(void) | |||
589 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | 597 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); |
590 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | 598 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); |
591 | if (x86_pmu.pebs) { | 599 | if (x86_pmu.pebs) { |
592 | int format = 0; | 600 | char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; |
593 | 601 | int format = x86_pmu.intel_cap.pebs_format; | |
594 | if (x86_pmu.version > 1) { | ||
595 | u64 capabilities; | ||
596 | /* | ||
597 | * v2+ has a PEBS format field | ||
598 | */ | ||
599 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
600 | format = (capabilities >> 8) & 0xf; | ||
601 | } | ||
602 | 602 | ||
603 | switch (format) { | 603 | switch (format) { |
604 | case 0: | 604 | case 0: |
605 | printk(KERN_CONT "PEBS v0, "); | 605 | printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); |
606 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | 606 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); |
607 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | 607 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; |
608 | x86_pmu.pebs_constraints = intel_core_pebs_events; | 608 | x86_pmu.pebs_constraints = intel_core_pebs_events; |
609 | break; | 609 | break; |
610 | 610 | ||
611 | case 1: | 611 | case 1: |
612 | printk(KERN_CONT "PEBS v1, "); | 612 | printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); |
613 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | 613 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); |
614 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | 614 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; |
615 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | 615 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; |
616 | break; | 616 | break; |
617 | 617 | ||
618 | default: | 618 | default: |
619 | printk(KERN_CONT "PEBS unknown format: %d, ", format); | 619 | printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); |
620 | x86_pmu.pebs = 0; | 620 | x86_pmu.pebs = 0; |
621 | break; | 621 | break; |
622 | } | 622 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index ea3e99ed82ce..4f3a124329c4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c | |||
@@ -53,7 +53,7 @@ static void intel_pmu_lbr_reset_64(void) | |||
53 | 53 | ||
54 | static void intel_pmu_lbr_reset(void) | 54 | static void intel_pmu_lbr_reset(void) |
55 | { | 55 | { |
56 | if (x86_pmu.lbr_format == LBR_FORMAT_32) | 56 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) |
57 | intel_pmu_lbr_reset_32(); | 57 | intel_pmu_lbr_reset_32(); |
58 | else | 58 | else |
59 | intel_pmu_lbr_reset_64(); | 59 | intel_pmu_lbr_reset_64(); |
@@ -155,6 +155,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) | |||
155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | 155 | static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) |
156 | { | 156 | { |
157 | unsigned long mask = x86_pmu.lbr_nr - 1; | 157 | unsigned long mask = x86_pmu.lbr_nr - 1; |
158 | int lbr_format = x86_pmu.intel_cap.lbr_format; | ||
158 | u64 tos = intel_pmu_lbr_tos(); | 159 | u64 tos = intel_pmu_lbr_tos(); |
159 | int i; | 160 | int i; |
160 | 161 | ||
@@ -165,7 +166,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) | |||
165 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); | 166 | rdmsrl(x86_pmu.lbr_from + lbr_idx, from); |
166 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); | 167 | rdmsrl(x86_pmu.lbr_to + lbr_idx, to); |
167 | 168 | ||
168 | if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) { | 169 | if (lbr_format == LBR_FORMAT_EIP_FLAGS) { |
169 | flags = !!(from & LBR_FROM_FLAG_MISPRED); | 170 | flags = !!(from & LBR_FROM_FLAG_MISPRED); |
170 | from = (u64)((((s64)from) << 1) >> 1); | 171 | from = (u64)((((s64)from) << 1) >> 1); |
171 | } | 172 | } |
@@ -184,23 +185,14 @@ static void intel_pmu_lbr_read(void) | |||
184 | if (!cpuc->lbr_users) | 185 | if (!cpuc->lbr_users) |
185 | return; | 186 | return; |
186 | 187 | ||
187 | if (x86_pmu.lbr_format == LBR_FORMAT_32) | 188 | if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) |
188 | intel_pmu_lbr_read_32(cpuc); | 189 | intel_pmu_lbr_read_32(cpuc); |
189 | else | 190 | else |
190 | intel_pmu_lbr_read_64(cpuc); | 191 | intel_pmu_lbr_read_64(cpuc); |
191 | } | 192 | } |
192 | 193 | ||
193 | static int intel_pmu_lbr_format(void) | ||
194 | { | ||
195 | u64 capabilities; | ||
196 | |||
197 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
198 | return capabilities & 0x1f; | ||
199 | } | ||
200 | |||
201 | static void intel_pmu_lbr_init_core(void) | 194 | static void intel_pmu_lbr_init_core(void) |
202 | { | 195 | { |
203 | x86_pmu.lbr_format = intel_pmu_lbr_format(); | ||
204 | x86_pmu.lbr_nr = 4; | 196 | x86_pmu.lbr_nr = 4; |
205 | x86_pmu.lbr_tos = 0x01c9; | 197 | x86_pmu.lbr_tos = 0x01c9; |
206 | x86_pmu.lbr_from = 0x40; | 198 | x86_pmu.lbr_from = 0x40; |
@@ -209,7 +201,6 @@ static void intel_pmu_lbr_init_core(void) | |||
209 | 201 | ||
210 | static void intel_pmu_lbr_init_nhm(void) | 202 | static void intel_pmu_lbr_init_nhm(void) |
211 | { | 203 | { |
212 | x86_pmu.lbr_format = intel_pmu_lbr_format(); | ||
213 | x86_pmu.lbr_nr = 16; | 204 | x86_pmu.lbr_nr = 16; |
214 | x86_pmu.lbr_tos = 0x01c9; | 205 | x86_pmu.lbr_tos = 0x01c9; |
215 | x86_pmu.lbr_from = 0x680; | 206 | x86_pmu.lbr_from = 0x680; |
@@ -218,7 +209,6 @@ static void intel_pmu_lbr_init_nhm(void) | |||
218 | 209 | ||
219 | static void intel_pmu_lbr_init_atom(void) | 210 | static void intel_pmu_lbr_init_atom(void) |
220 | { | 211 | { |
221 | x86_pmu.lbr_format = intel_pmu_lbr_format(); | ||
222 | x86_pmu.lbr_nr = 8; | 212 | x86_pmu.lbr_nr = 8; |
223 | x86_pmu.lbr_tos = 0x01c9; | 213 | x86_pmu.lbr_tos = 0x01c9; |
224 | x86_pmu.lbr_from = 0x40; | 214 | x86_pmu.lbr_from = 0x40; |