diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-03-02 13:52:12 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-10 07:23:31 -0500 |
commit | ca037701a025334e724e5c61b3b1082940c8b981 (patch) | |
tree | 12e3651ae6b35e9a5df4b49f9f571a01fc5a42a4 | |
parent | d4944a06666054707d23e11888e480af239e5abf (diff) |
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 223 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 150 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 557 | ||||
-rw-r--r-- | include/linux/perf_event.h | 3 |
4 files changed, 671 insertions, 262 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1d665a0b202c..0c03d5c1671f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -31,45 +31,6 @@ | |||
31 | 31 | ||
32 | static u64 perf_event_mask __read_mostly; | 32 | static u64 perf_event_mask __read_mostly; |
33 | 33 | ||
34 | /* The maximal number of PEBS events: */ | ||
35 | #define MAX_PEBS_EVENTS 4 | ||
36 | |||
37 | /* The size of a BTS record in bytes: */ | ||
38 | #define BTS_RECORD_SIZE 24 | ||
39 | |||
40 | /* The size of a per-cpu BTS buffer in bytes: */ | ||
41 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | ||
42 | |||
43 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | ||
44 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | ||
45 | |||
46 | |||
47 | /* | ||
48 | * Bits in the debugctlmsr controlling branch tracing. | ||
49 | */ | ||
50 | #define X86_DEBUGCTL_TR (1 << 6) | ||
51 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
52 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
53 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
54 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
55 | |||
56 | /* | ||
57 | * A debug store configuration. | ||
58 | * | ||
59 | * We only support architectures that use 64bit fields. | ||
60 | */ | ||
61 | struct debug_store { | ||
62 | u64 bts_buffer_base; | ||
63 | u64 bts_index; | ||
64 | u64 bts_absolute_maximum; | ||
65 | u64 bts_interrupt_threshold; | ||
66 | u64 pebs_buffer_base; | ||
67 | u64 pebs_index; | ||
68 | u64 pebs_absolute_maximum; | ||
69 | u64 pebs_interrupt_threshold; | ||
70 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
71 | }; | ||
72 | |||
73 | struct event_constraint { | 34 | struct event_constraint { |
74 | union { | 35 | union { |
75 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 36 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
@@ -88,17 +49,29 @@ struct amd_nb { | |||
88 | }; | 49 | }; |
89 | 50 | ||
90 | struct cpu_hw_events { | 51 | struct cpu_hw_events { |
52 | /* | ||
53 | * Generic x86 PMC bits | ||
54 | */ | ||
91 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 55 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
92 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 56 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
93 | unsigned long interrupts; | 57 | unsigned long interrupts; |
94 | int enabled; | 58 | int enabled; |
95 | struct debug_store *ds; | ||
96 | 59 | ||
97 | int n_events; | 60 | int n_events; |
98 | int n_added; | 61 | int n_added; |
99 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 62 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
100 | u64 tags[X86_PMC_IDX_MAX]; | 63 | u64 tags[X86_PMC_IDX_MAX]; |
101 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 64 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
65 | |||
66 | /* | ||
67 | * Intel DebugStore bits | ||
68 | */ | ||
69 | struct debug_store *ds; | ||
70 | u64 pebs_enabled; | ||
71 | |||
72 | /* | ||
73 | * AMD specific bits | ||
74 | */ | ||
102 | struct amd_nb *amd_nb; | 75 | struct amd_nb *amd_nb; |
103 | }; | 76 | }; |
104 | 77 | ||
@@ -112,12 +85,24 @@ struct cpu_hw_events { | |||
112 | #define EVENT_CONSTRAINT(c, n, m) \ | 85 | #define EVENT_CONSTRAINT(c, n, m) \ |
113 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 86 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
114 | 87 | ||
88 | /* | ||
89 | * Constraint on the Event code. | ||
90 | */ | ||
115 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | 91 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
116 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 92 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) |
117 | 93 | ||
94 | /* | ||
95 | * Constraint on the Event code + UMask + fixed-mask | ||
96 | */ | ||
118 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 97 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
119 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | 98 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) |
120 | 99 | ||
100 | /* | ||
101 | * Constraint on the Event code + UMask | ||
102 | */ | ||
103 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
104 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
105 | |||
121 | #define EVENT_CONSTRAINT_END \ | 106 | #define EVENT_CONSTRAINT_END \ |
122 | EVENT_CONSTRAINT(0, 0, 0) | 107 | EVENT_CONSTRAINT(0, 0, 0) |
123 | 108 | ||
@@ -128,6 +113,9 @@ struct cpu_hw_events { | |||
128 | * struct x86_pmu - generic x86 pmu | 113 | * struct x86_pmu - generic x86 pmu |
129 | */ | 114 | */ |
130 | struct x86_pmu { | 115 | struct x86_pmu { |
116 | /* | ||
117 | * Generic x86 PMC bits | ||
118 | */ | ||
131 | const char *name; | 119 | const char *name; |
132 | int version; | 120 | int version; |
133 | int (*handle_irq)(struct pt_regs *); | 121 | int (*handle_irq)(struct pt_regs *); |
@@ -146,10 +134,6 @@ struct x86_pmu { | |||
146 | u64 event_mask; | 134 | u64 event_mask; |
147 | int apic; | 135 | int apic; |
148 | u64 max_period; | 136 | u64 max_period; |
149 | u64 intel_ctrl; | ||
150 | void (*enable_bts)(u64 config); | ||
151 | void (*disable_bts)(void); | ||
152 | |||
153 | struct event_constraint * | 137 | struct event_constraint * |
154 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | 138 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
155 | struct perf_event *event); | 139 | struct perf_event *event); |
@@ -162,6 +146,19 @@ struct x86_pmu { | |||
162 | void (*cpu_starting)(int cpu); | 146 | void (*cpu_starting)(int cpu); |
163 | void (*cpu_dying)(int cpu); | 147 | void (*cpu_dying)(int cpu); |
164 | void (*cpu_dead)(int cpu); | 148 | void (*cpu_dead)(int cpu); |
149 | |||
150 | /* | ||
151 | * Intel Arch Perfmon v2+ | ||
152 | */ | ||
153 | u64 intel_ctrl; | ||
154 | |||
155 | /* | ||
156 | * Intel DebugStore bits | ||
157 | */ | ||
158 | int bts, pebs; | ||
159 | int pebs_record_size; | ||
160 | void (*drain_pebs)(struct pt_regs *regs); | ||
161 | struct event_constraint *pebs_constraints; | ||
165 | }; | 162 | }; |
166 | 163 | ||
167 | static struct x86_pmu x86_pmu __read_mostly; | 164 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -293,110 +290,14 @@ static void release_pmc_hardware(void) | |||
293 | #endif | 290 | #endif |
294 | } | 291 | } |
295 | 292 | ||
296 | static inline bool bts_available(void) | 293 | static int reserve_ds_buffers(void); |
297 | { | 294 | static void release_ds_buffers(void); |
298 | return x86_pmu.enable_bts != NULL; | ||
299 | } | ||
300 | |||
301 | static void init_debug_store_on_cpu(int cpu) | ||
302 | { | ||
303 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
304 | |||
305 | if (!ds) | ||
306 | return; | ||
307 | |||
308 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
309 | (u32)((u64)(unsigned long)ds), | ||
310 | (u32)((u64)(unsigned long)ds >> 32)); | ||
311 | } | ||
312 | |||
313 | static void fini_debug_store_on_cpu(int cpu) | ||
314 | { | ||
315 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
316 | return; | ||
317 | |||
318 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
319 | } | ||
320 | |||
321 | static void release_bts_hardware(void) | ||
322 | { | ||
323 | int cpu; | ||
324 | |||
325 | if (!bts_available()) | ||
326 | return; | ||
327 | |||
328 | get_online_cpus(); | ||
329 | |||
330 | for_each_online_cpu(cpu) | ||
331 | fini_debug_store_on_cpu(cpu); | ||
332 | |||
333 | for_each_possible_cpu(cpu) { | ||
334 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
335 | |||
336 | if (!ds) | ||
337 | continue; | ||
338 | |||
339 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
340 | |||
341 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
342 | kfree(ds); | ||
343 | } | ||
344 | |||
345 | put_online_cpus(); | ||
346 | } | ||
347 | |||
348 | static int reserve_bts_hardware(void) | ||
349 | { | ||
350 | int cpu, err = 0; | ||
351 | |||
352 | if (!bts_available()) | ||
353 | return 0; | ||
354 | |||
355 | get_online_cpus(); | ||
356 | |||
357 | for_each_possible_cpu(cpu) { | ||
358 | struct debug_store *ds; | ||
359 | void *buffer; | ||
360 | |||
361 | err = -ENOMEM; | ||
362 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
363 | if (unlikely(!buffer)) | ||
364 | break; | ||
365 | |||
366 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
367 | if (unlikely(!ds)) { | ||
368 | kfree(buffer); | ||
369 | break; | ||
370 | } | ||
371 | |||
372 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
373 | ds->bts_index = ds->bts_buffer_base; | ||
374 | ds->bts_absolute_maximum = | ||
375 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
376 | ds->bts_interrupt_threshold = | ||
377 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
378 | |||
379 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
380 | err = 0; | ||
381 | } | ||
382 | |||
383 | if (err) | ||
384 | release_bts_hardware(); | ||
385 | else { | ||
386 | for_each_online_cpu(cpu) | ||
387 | init_debug_store_on_cpu(cpu); | ||
388 | } | ||
389 | |||
390 | put_online_cpus(); | ||
391 | |||
392 | return err; | ||
393 | } | ||
394 | 295 | ||
395 | static void hw_perf_event_destroy(struct perf_event *event) | 296 | static void hw_perf_event_destroy(struct perf_event *event) |
396 | { | 297 | { |
397 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 298 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
398 | release_pmc_hardware(); | 299 | release_pmc_hardware(); |
399 | release_bts_hardware(); | 300 | release_ds_buffers(); |
400 | mutex_unlock(&pmc_reserve_mutex); | 301 | mutex_unlock(&pmc_reserve_mutex); |
401 | } | 302 | } |
402 | } | 303 | } |
@@ -459,7 +360,7 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
459 | if (!reserve_pmc_hardware()) | 360 | if (!reserve_pmc_hardware()) |
460 | err = -EBUSY; | 361 | err = -EBUSY; |
461 | else | 362 | else |
462 | err = reserve_bts_hardware(); | 363 | err = reserve_ds_buffers(); |
463 | } | 364 | } |
464 | if (!err) | 365 | if (!err) |
465 | atomic_inc(&active_events); | 366 | atomic_inc(&active_events); |
@@ -537,7 +438,7 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
537 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 438 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
538 | (hwc->sample_period == 1)) { | 439 | (hwc->sample_period == 1)) { |
539 | /* BTS is not supported by this architecture. */ | 440 | /* BTS is not supported by this architecture. */ |
540 | if (!bts_available()) | 441 | if (!x86_pmu.bts) |
541 | return -EOPNOTSUPP; | 442 | return -EOPNOTSUPP; |
542 | 443 | ||
543 | /* BTS is currently only allowed for user-mode. */ | 444 | /* BTS is currently only allowed for user-mode. */ |
@@ -995,6 +896,7 @@ static void x86_pmu_unthrottle(struct perf_event *event) | |||
995 | void perf_event_print_debug(void) | 896 | void perf_event_print_debug(void) |
996 | { | 897 | { |
997 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 898 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
899 | u64 pebs; | ||
998 | struct cpu_hw_events *cpuc; | 900 | struct cpu_hw_events *cpuc; |
999 | unsigned long flags; | 901 | unsigned long flags; |
1000 | int cpu, idx; | 902 | int cpu, idx; |
@@ -1012,12 +914,14 @@ void perf_event_print_debug(void) | |||
1012 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 914 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
1013 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 915 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
1014 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 916 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
917 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
1015 | 918 | ||
1016 | pr_info("\n"); | 919 | pr_info("\n"); |
1017 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 920 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
1018 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 921 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
1019 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 922 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
1020 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 923 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
924 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
1021 | } | 925 | } |
1022 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 926 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1023 | 927 | ||
@@ -1333,6 +1237,7 @@ undo: | |||
1333 | 1237 | ||
1334 | #include "perf_event_amd.c" | 1238 | #include "perf_event_amd.c" |
1335 | #include "perf_event_p6.c" | 1239 | #include "perf_event_p6.c" |
1240 | #include "perf_event_intel_ds.c" | ||
1336 | #include "perf_event_intel.c" | 1241 | #include "perf_event_intel.c" |
1337 | 1242 | ||
1338 | static int __cpuinit | 1243 | static int __cpuinit |
@@ -1465,6 +1370,32 @@ static const struct pmu pmu = { | |||
1465 | }; | 1370 | }; |
1466 | 1371 | ||
1467 | /* | 1372 | /* |
1373 | * validate that we can schedule this event | ||
1374 | */ | ||
1375 | static int validate_event(struct perf_event *event) | ||
1376 | { | ||
1377 | struct cpu_hw_events *fake_cpuc; | ||
1378 | struct event_constraint *c; | ||
1379 | int ret = 0; | ||
1380 | |||
1381 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1382 | if (!fake_cpuc) | ||
1383 | return -ENOMEM; | ||
1384 | |||
1385 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
1386 | |||
1387 | if (!c || !c->weight) | ||
1388 | ret = -ENOSPC; | ||
1389 | |||
1390 | if (x86_pmu.put_event_constraints) | ||
1391 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
1392 | |||
1393 | kfree(fake_cpuc); | ||
1394 | |||
1395 | return ret; | ||
1396 | } | ||
1397 | |||
1398 | /* | ||
1468 | * validate a single event group | 1399 | * validate a single event group |
1469 | * | 1400 | * |
1470 | * validation include: | 1401 | * validation include: |
@@ -1529,6 +1460,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1529 | 1460 | ||
1530 | if (event->group_leader != event) | 1461 | if (event->group_leader != event) |
1531 | err = validate_group(event); | 1462 | err = validate_group(event); |
1463 | else | ||
1464 | err = validate_event(event); | ||
1532 | 1465 | ||
1533 | event->pmu = tmp; | 1466 | event->pmu = tmp; |
1534 | } | 1467 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 84bfde64a337..11446412e4c7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -470,42 +470,6 @@ static u64 intel_pmu_raw_event(u64 hw_event) | |||
470 | return hw_event & CORE_EVNTSEL_MASK; | 470 | return hw_event & CORE_EVNTSEL_MASK; |
471 | } | 471 | } |
472 | 472 | ||
473 | static void intel_pmu_enable_bts(u64 config) | ||
474 | { | ||
475 | unsigned long debugctlmsr; | ||
476 | |||
477 | debugctlmsr = get_debugctlmsr(); | ||
478 | |||
479 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
480 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
481 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
482 | |||
483 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
484 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
485 | |||
486 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
487 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
488 | |||
489 | update_debugctlmsr(debugctlmsr); | ||
490 | } | ||
491 | |||
492 | static void intel_pmu_disable_bts(void) | ||
493 | { | ||
494 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
495 | unsigned long debugctlmsr; | ||
496 | |||
497 | if (!cpuc->ds) | ||
498 | return; | ||
499 | |||
500 | debugctlmsr = get_debugctlmsr(); | ||
501 | |||
502 | debugctlmsr &= | ||
503 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
504 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
505 | |||
506 | update_debugctlmsr(debugctlmsr); | ||
507 | } | ||
508 | |||
509 | static void intel_pmu_disable_all(void) | 473 | static void intel_pmu_disable_all(void) |
510 | { | 474 | { |
511 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | 475 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); |
@@ -514,6 +478,8 @@ static void intel_pmu_disable_all(void) | |||
514 | 478 | ||
515 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | 479 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) |
516 | intel_pmu_disable_bts(); | 480 | intel_pmu_disable_bts(); |
481 | |||
482 | intel_pmu_pebs_disable_all(); | ||
517 | } | 483 | } |
518 | 484 | ||
519 | static void intel_pmu_enable_all(void) | 485 | static void intel_pmu_enable_all(void) |
@@ -531,6 +497,8 @@ static void intel_pmu_enable_all(void) | |||
531 | 497 | ||
532 | intel_pmu_enable_bts(event->hw.config); | 498 | intel_pmu_enable_bts(event->hw.config); |
533 | } | 499 | } |
500 | |||
501 | intel_pmu_pebs_enable_all(); | ||
534 | } | 502 | } |
535 | 503 | ||
536 | static inline u64 intel_pmu_get_status(void) | 504 | static inline u64 intel_pmu_get_status(void) |
@@ -547,8 +515,7 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
547 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | 515 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); |
548 | } | 516 | } |
549 | 517 | ||
550 | static inline void | 518 | static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) |
551 | intel_pmu_disable_fixed(struct hw_perf_event *hwc) | ||
552 | { | 519 | { |
553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 520 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
554 | u64 ctrl_val, mask; | 521 | u64 ctrl_val, mask; |
@@ -560,68 +527,7 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc) | |||
560 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); | 527 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); |
561 | } | 528 | } |
562 | 529 | ||
563 | static void intel_pmu_drain_bts_buffer(void) | 530 | static void intel_pmu_disable_event(struct perf_event *event) |
564 | { | ||
565 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
566 | struct debug_store *ds = cpuc->ds; | ||
567 | struct bts_record { | ||
568 | u64 from; | ||
569 | u64 to; | ||
570 | u64 flags; | ||
571 | }; | ||
572 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
573 | struct bts_record *at, *top; | ||
574 | struct perf_output_handle handle; | ||
575 | struct perf_event_header header; | ||
576 | struct perf_sample_data data; | ||
577 | struct pt_regs regs; | ||
578 | |||
579 | if (!event) | ||
580 | return; | ||
581 | |||
582 | if (!ds) | ||
583 | return; | ||
584 | |||
585 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
586 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
587 | |||
588 | if (top <= at) | ||
589 | return; | ||
590 | |||
591 | ds->bts_index = ds->bts_buffer_base; | ||
592 | |||
593 | perf_sample_data_init(&data, 0); | ||
594 | |||
595 | data.period = event->hw.last_period; | ||
596 | regs.ip = 0; | ||
597 | |||
598 | /* | ||
599 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
600 | * We will overwrite the from and to address before we output | ||
601 | * the sample. | ||
602 | */ | ||
603 | perf_prepare_sample(&header, &data, event, ®s); | ||
604 | |||
605 | if (perf_output_begin(&handle, event, | ||
606 | header.size * (top - at), 1, 1)) | ||
607 | return; | ||
608 | |||
609 | for (; at < top; at++) { | ||
610 | data.ip = at->from; | ||
611 | data.addr = at->to; | ||
612 | |||
613 | perf_output_sample(&handle, &header, &data, event); | ||
614 | } | ||
615 | |||
616 | perf_output_end(&handle); | ||
617 | |||
618 | /* There's new data available. */ | ||
619 | event->hw.interrupts++; | ||
620 | event->pending_kill = POLL_IN; | ||
621 | } | ||
622 | |||
623 | static inline void | ||
624 | intel_pmu_disable_event(struct perf_event *event) | ||
625 | { | 531 | { |
626 | struct hw_perf_event *hwc = &event->hw; | 532 | struct hw_perf_event *hwc = &event->hw; |
627 | 533 | ||
@@ -637,10 +543,12 @@ intel_pmu_disable_event(struct perf_event *event) | |||
637 | } | 543 | } |
638 | 544 | ||
639 | x86_pmu_disable_event(event); | 545 | x86_pmu_disable_event(event); |
546 | |||
547 | if (unlikely(event->attr.precise)) | ||
548 | intel_pmu_pebs_disable(hwc); | ||
640 | } | 549 | } |
641 | 550 | ||
642 | static inline void | 551 | static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) |
643 | intel_pmu_enable_fixed(struct hw_perf_event *hwc) | ||
644 | { | 552 | { |
645 | int idx = hwc->idx - X86_PMC_IDX_FIXED; | 553 | int idx = hwc->idx - X86_PMC_IDX_FIXED; |
646 | u64 ctrl_val, bits, mask; | 554 | u64 ctrl_val, bits, mask; |
@@ -689,6 +597,9 @@ static void intel_pmu_enable_event(struct perf_event *event) | |||
689 | return; | 597 | return; |
690 | } | 598 | } |
691 | 599 | ||
600 | if (unlikely(event->attr.precise)) | ||
601 | intel_pmu_pebs_enable(hwc); | ||
602 | |||
692 | __x86_pmu_enable_event(hwc); | 603 | __x86_pmu_enable_event(hwc); |
693 | } | 604 | } |
694 | 605 | ||
@@ -762,6 +673,13 @@ again: | |||
762 | 673 | ||
763 | inc_irq_stat(apic_perf_irqs); | 674 | inc_irq_stat(apic_perf_irqs); |
764 | ack = status; | 675 | ack = status; |
676 | |||
677 | /* | ||
678 | * PEBS overflow sets bit 62 in the global status register | ||
679 | */ | ||
680 | if (__test_and_clear_bit(62, (unsigned long *)&status)) | ||
681 | x86_pmu.drain_pebs(regs); | ||
682 | |||
765 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | 683 | for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { |
766 | struct perf_event *event = cpuc->events[bit]; | 684 | struct perf_event *event = cpuc->events[bit]; |
767 | 685 | ||
@@ -791,22 +709,18 @@ done: | |||
791 | return 1; | 709 | return 1; |
792 | } | 710 | } |
793 | 711 | ||
794 | static struct event_constraint bts_constraint = | ||
795 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
796 | |||
797 | static struct event_constraint * | 712 | static struct event_constraint * |
798 | intel_special_constraints(struct perf_event *event) | 713 | intel_bts_constraints(struct perf_event *event) |
799 | { | 714 | { |
800 | unsigned int hw_event; | 715 | struct hw_perf_event *hwc = &event->hw; |
801 | 716 | unsigned int hw_event, bts_event; | |
802 | hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; | ||
803 | 717 | ||
804 | if (unlikely((hw_event == | 718 | hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; |
805 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | 719 | bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); |
806 | (event->hw.sample_period == 1))) { | ||
807 | 720 | ||
721 | if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) | ||
808 | return &bts_constraint; | 722 | return &bts_constraint; |
809 | } | 723 | |
810 | return NULL; | 724 | return NULL; |
811 | } | 725 | } |
812 | 726 | ||
@@ -815,7 +729,11 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event | |||
815 | { | 729 | { |
816 | struct event_constraint *c; | 730 | struct event_constraint *c; |
817 | 731 | ||
818 | c = intel_special_constraints(event); | 732 | c = intel_bts_constraints(event); |
733 | if (c) | ||
734 | return c; | ||
735 | |||
736 | c = intel_pebs_constraints(event); | ||
819 | if (c) | 737 | if (c) |
820 | return c; | 738 | return c; |
821 | 739 | ||
@@ -864,8 +782,6 @@ static __initconst struct x86_pmu intel_pmu = { | |||
864 | * the generic event period: | 782 | * the generic event period: |
865 | */ | 783 | */ |
866 | .max_period = (1ULL << 31) - 1, | 784 | .max_period = (1ULL << 31) - 1, |
867 | .enable_bts = intel_pmu_enable_bts, | ||
868 | .disable_bts = intel_pmu_disable_bts, | ||
869 | .get_event_constraints = intel_get_event_constraints, | 785 | .get_event_constraints = intel_get_event_constraints, |
870 | 786 | ||
871 | .cpu_starting = init_debug_store_on_cpu, | 787 | .cpu_starting = init_debug_store_on_cpu, |
@@ -915,6 +831,8 @@ static __init int intel_pmu_init(void) | |||
915 | if (version > 1) | 831 | if (version > 1) |
916 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); | 832 | x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); |
917 | 833 | ||
834 | intel_ds_init(); | ||
835 | |||
918 | /* | 836 | /* |
919 | * Install the hw-cache-events table: | 837 | * Install the hw-cache-events table: |
920 | */ | 838 | */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 000000000000..0d994ef213b9 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -0,0 +1,557 @@ | |||
1 | #ifdef CONFIG_CPU_SUP_INTEL | ||
2 | |||
3 | /* The maximal number of PEBS events: */ | ||
4 | #define MAX_PEBS_EVENTS 4 | ||
5 | |||
6 | /* The size of a BTS record in bytes: */ | ||
7 | #define BTS_RECORD_SIZE 24 | ||
8 | |||
9 | #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) | ||
10 | #define PEBS_BUFFER_SIZE PAGE_SIZE | ||
11 | |||
12 | /* | ||
13 | * pebs_record_32 for p4 and core not supported | ||
14 | |||
15 | struct pebs_record_32 { | ||
16 | u32 flags, ip; | ||
17 | u32 ax, bc, cx, dx; | ||
18 | u32 si, di, bp, sp; | ||
19 | }; | ||
20 | |||
21 | */ | ||
22 | |||
23 | struct pebs_record_core { | ||
24 | u64 flags, ip; | ||
25 | u64 ax, bx, cx, dx; | ||
26 | u64 si, di, bp, sp; | ||
27 | u64 r8, r9, r10, r11; | ||
28 | u64 r12, r13, r14, r15; | ||
29 | }; | ||
30 | |||
31 | struct pebs_record_nhm { | ||
32 | u64 flags, ip; | ||
33 | u64 ax, bx, cx, dx; | ||
34 | u64 si, di, bp, sp; | ||
35 | u64 r8, r9, r10, r11; | ||
36 | u64 r12, r13, r14, r15; | ||
37 | u64 status, dla, dse, lat; | ||
38 | }; | ||
39 | |||
40 | /* | ||
41 | * Bits in the debugctlmsr controlling branch tracing. | ||
42 | */ | ||
43 | #define X86_DEBUGCTL_TR (1 << 6) | ||
44 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
45 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
46 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
47 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
48 | |||
49 | /* | ||
50 | * A debug store configuration. | ||
51 | * | ||
52 | * We only support architectures that use 64bit fields. | ||
53 | */ | ||
54 | struct debug_store { | ||
55 | u64 bts_buffer_base; | ||
56 | u64 bts_index; | ||
57 | u64 bts_absolute_maximum; | ||
58 | u64 bts_interrupt_threshold; | ||
59 | u64 pebs_buffer_base; | ||
60 | u64 pebs_index; | ||
61 | u64 pebs_absolute_maximum; | ||
62 | u64 pebs_interrupt_threshold; | ||
63 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
64 | }; | ||
65 | |||
66 | static void init_debug_store_on_cpu(int cpu) | ||
67 | { | ||
68 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
69 | |||
70 | if (!ds) | ||
71 | return; | ||
72 | |||
73 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
74 | (u32)((u64)(unsigned long)ds), | ||
75 | (u32)((u64)(unsigned long)ds >> 32)); | ||
76 | } | ||
77 | |||
78 | static void fini_debug_store_on_cpu(int cpu) | ||
79 | { | ||
80 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
81 | return; | ||
82 | |||
83 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
84 | } | ||
85 | |||
86 | static void release_ds_buffers(void) | ||
87 | { | ||
88 | int cpu; | ||
89 | |||
90 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
91 | return; | ||
92 | |||
93 | get_online_cpus(); | ||
94 | |||
95 | for_each_online_cpu(cpu) | ||
96 | fini_debug_store_on_cpu(cpu); | ||
97 | |||
98 | for_each_possible_cpu(cpu) { | ||
99 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
100 | |||
101 | if (!ds) | ||
102 | continue; | ||
103 | |||
104 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
105 | |||
106 | kfree((void *)(unsigned long)ds->pebs_buffer_base); | ||
107 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
108 | kfree(ds); | ||
109 | } | ||
110 | |||
111 | put_online_cpus(); | ||
112 | } | ||
113 | |||
114 | static int reserve_ds_buffers(void) | ||
115 | { | ||
116 | int cpu, err = 0; | ||
117 | |||
118 | if (!x86_pmu.bts && !x86_pmu.pebs) | ||
119 | return 0; | ||
120 | |||
121 | get_online_cpus(); | ||
122 | |||
123 | for_each_possible_cpu(cpu) { | ||
124 | struct debug_store *ds; | ||
125 | void *buffer; | ||
126 | int max, thresh; | ||
127 | |||
128 | err = -ENOMEM; | ||
129 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
130 | if (unlikely(!ds)) { | ||
131 | kfree(buffer); | ||
132 | break; | ||
133 | } | ||
134 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
135 | |||
136 | if (x86_pmu.bts) { | ||
137 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
138 | if (unlikely(!buffer)) | ||
139 | break; | ||
140 | |||
141 | max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; | ||
142 | thresh = max / 16; | ||
143 | |||
144 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
145 | ds->bts_index = ds->bts_buffer_base; | ||
146 | ds->bts_absolute_maximum = ds->bts_buffer_base + | ||
147 | max * BTS_RECORD_SIZE; | ||
148 | ds->bts_interrupt_threshold = ds->bts_absolute_maximum - | ||
149 | thresh * BTS_RECORD_SIZE; | ||
150 | } | ||
151 | |||
152 | if (x86_pmu.pebs) { | ||
153 | buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); | ||
154 | if (unlikely(!buffer)) | ||
155 | break; | ||
156 | |||
157 | max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; | ||
158 | |||
159 | ds->pebs_buffer_base = (u64)(unsigned long)buffer; | ||
160 | ds->pebs_index = ds->pebs_buffer_base; | ||
161 | ds->pebs_absolute_maximum = ds->pebs_buffer_base + | ||
162 | max * x86_pmu.pebs_record_size; | ||
163 | /* | ||
164 | * Always use single record PEBS | ||
165 | */ | ||
166 | ds->pebs_interrupt_threshold = ds->pebs_buffer_base + | ||
167 | x86_pmu.pebs_record_size; | ||
168 | } | ||
169 | |||
170 | err = 0; | ||
171 | } | ||
172 | |||
173 | if (err) | ||
174 | release_ds_buffers(); | ||
175 | else { | ||
176 | for_each_online_cpu(cpu) | ||
177 | init_debug_store_on_cpu(cpu); | ||
178 | } | ||
179 | |||
180 | put_online_cpus(); | ||
181 | |||
182 | return err; | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * BTS | ||
187 | */ | ||
188 | |||
189 | static struct event_constraint bts_constraint = | ||
190 | EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); | ||
191 | |||
192 | static void intel_pmu_enable_bts(u64 config) | ||
193 | { | ||
194 | unsigned long debugctlmsr; | ||
195 | |||
196 | debugctlmsr = get_debugctlmsr(); | ||
197 | |||
198 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
199 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
200 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
201 | |||
202 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
203 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
204 | |||
205 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
206 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
207 | |||
208 | update_debugctlmsr(debugctlmsr); | ||
209 | } | ||
210 | |||
211 | static void intel_pmu_disable_bts(void) | ||
212 | { | ||
213 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
214 | unsigned long debugctlmsr; | ||
215 | |||
216 | if (!cpuc->ds) | ||
217 | return; | ||
218 | |||
219 | debugctlmsr = get_debugctlmsr(); | ||
220 | |||
221 | debugctlmsr &= | ||
222 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
223 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
224 | |||
225 | update_debugctlmsr(debugctlmsr); | ||
226 | } | ||
227 | |||
228 | static void intel_pmu_drain_bts_buffer(void) | ||
229 | { | ||
230 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
231 | struct debug_store *ds = cpuc->ds; | ||
232 | struct bts_record { | ||
233 | u64 from; | ||
234 | u64 to; | ||
235 | u64 flags; | ||
236 | }; | ||
237 | struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; | ||
238 | struct bts_record *at, *top; | ||
239 | struct perf_output_handle handle; | ||
240 | struct perf_event_header header; | ||
241 | struct perf_sample_data data; | ||
242 | struct pt_regs regs; | ||
243 | |||
244 | if (!event) | ||
245 | return; | ||
246 | |||
247 | if (!ds) | ||
248 | return; | ||
249 | |||
250 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
251 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
252 | |||
253 | if (top <= at) | ||
254 | return; | ||
255 | |||
256 | ds->bts_index = ds->bts_buffer_base; | ||
257 | |||
258 | perf_sample_data_init(&data, 0); | ||
259 | data.period = event->hw.last_period; | ||
260 | regs.ip = 0; | ||
261 | |||
262 | /* | ||
263 | * Prepare a generic sample, i.e. fill in the invariant fields. | ||
264 | * We will overwrite the from and to address before we output | ||
265 | * the sample. | ||
266 | */ | ||
267 | perf_prepare_sample(&header, &data, event, ®s); | ||
268 | |||
269 | if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) | ||
270 | return; | ||
271 | |||
272 | for (; at < top; at++) { | ||
273 | data.ip = at->from; | ||
274 | data.addr = at->to; | ||
275 | |||
276 | perf_output_sample(&handle, &header, &data, event); | ||
277 | } | ||
278 | |||
279 | perf_output_end(&handle); | ||
280 | |||
281 | /* There's new data available. */ | ||
282 | event->hw.interrupts++; | ||
283 | event->pending_kill = POLL_IN; | ||
284 | } | ||
285 | |||
286 | /* | ||
287 | * PEBS | ||
288 | */ | ||
289 | |||
290 | static struct event_constraint intel_core_pebs_events[] = { | ||
291 | PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ | ||
292 | PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ | ||
293 | PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ | ||
294 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ | ||
295 | PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
296 | PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
297 | PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
298 | PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
299 | PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
300 | EVENT_CONSTRAINT_END | ||
301 | }; | ||
302 | |||
303 | static struct event_constraint intel_nehalem_pebs_events[] = { | ||
304 | PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ | ||
305 | PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ | ||
306 | PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ | ||
307 | PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ | ||
308 | PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ | ||
309 | PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ | ||
310 | PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ | ||
311 | PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ | ||
312 | PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ | ||
313 | EVENT_CONSTRAINT_END | ||
314 | }; | ||
315 | |||
316 | static struct event_constraint * | ||
317 | intel_pebs_constraints(struct perf_event *event) | ||
318 | { | ||
319 | struct event_constraint *c; | ||
320 | |||
321 | if (!event->attr.precise) | ||
322 | return NULL; | ||
323 | |||
324 | if (x86_pmu.pebs_constraints) { | ||
325 | for_each_event_constraint(c, x86_pmu.pebs_constraints) { | ||
326 | if ((event->hw.config & c->cmask) == c->code) | ||
327 | return c; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | return &emptyconstraint; | ||
332 | } | ||
333 | |||
334 | static void intel_pmu_pebs_enable(struct hw_perf_event *hwc) | ||
335 | { | ||
336 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
337 | u64 val = cpuc->pebs_enabled; | ||
338 | |||
339 | hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; | ||
340 | |||
341 | val |= 1ULL << hwc->idx; | ||
342 | wrmsrl(MSR_IA32_PEBS_ENABLE, val); | ||
343 | } | ||
344 | |||
345 | static void intel_pmu_pebs_disable(struct hw_perf_event *hwc) | ||
346 | { | ||
347 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
348 | u64 val = cpuc->pebs_enabled; | ||
349 | |||
350 | val &= ~(1ULL << hwc->idx); | ||
351 | wrmsrl(MSR_IA32_PEBS_ENABLE, val); | ||
352 | |||
353 | hwc->config |= ARCH_PERFMON_EVENTSEL_INT; | ||
354 | } | ||
355 | |||
356 | static void intel_pmu_pebs_enable_all(void) | ||
357 | { | ||
358 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
359 | |||
360 | if (cpuc->pebs_enabled) | ||
361 | wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); | ||
362 | } | ||
363 | |||
364 | static void intel_pmu_pebs_disable_all(void) | ||
365 | { | ||
366 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
367 | |||
368 | if (cpuc->pebs_enabled) | ||
369 | wrmsrl(MSR_IA32_PEBS_ENABLE, 0); | ||
370 | } | ||
371 | |||
372 | static int intel_pmu_save_and_restart(struct perf_event *event); | ||
373 | static void intel_pmu_disable_event(struct perf_event *event); | ||
374 | |||
375 | static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) | ||
376 | { | ||
377 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
378 | struct debug_store *ds = cpuc->ds; | ||
379 | struct perf_event *event = cpuc->events[0]; /* PMC0 only */ | ||
380 | struct pebs_record_core *at, *top; | ||
381 | struct perf_sample_data data; | ||
382 | struct pt_regs regs; | ||
383 | int n; | ||
384 | |||
385 | if (!event || !ds || !x86_pmu.pebs) | ||
386 | return; | ||
387 | |||
388 | intel_pmu_pebs_disable_all(); | ||
389 | |||
390 | at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; | ||
391 | top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; | ||
392 | |||
393 | if (top <= at) | ||
394 | goto out; | ||
395 | |||
396 | ds->pebs_index = ds->pebs_buffer_base; | ||
397 | |||
398 | if (!intel_pmu_save_and_restart(event)) | ||
399 | goto out; | ||
400 | |||
401 | perf_sample_data_init(&data, 0); | ||
402 | data.period = event->hw.last_period; | ||
403 | |||
404 | n = top - at; | ||
405 | |||
406 | /* | ||
407 | * Should not happen, we program the threshold at 1 and do not | ||
408 | * set a reset value. | ||
409 | */ | ||
410 | WARN_ON_ONCE(n > 1); | ||
411 | |||
412 | /* | ||
413 | * We use the interrupt regs as a base because the PEBS record | ||
414 | * does not contain a full regs set, specifically it seems to | ||
415 | * lack segment descriptors, which get used by things like | ||
416 | * user_mode(). | ||
417 | * | ||
418 | * In the simple case fix up only the IP and BP,SP regs, for | ||
419 | * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. | ||
420 | * A possible PERF_SAMPLE_REGS will have to transfer all regs. | ||
421 | */ | ||
422 | regs = *iregs; | ||
423 | regs.ip = at->ip; | ||
424 | regs.bp = at->bp; | ||
425 | regs.sp = at->sp; | ||
426 | |||
427 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
428 | intel_pmu_disable_event(event); | ||
429 | |||
430 | out: | ||
431 | intel_pmu_pebs_enable_all(); | ||
432 | } | ||
433 | |||
434 | static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) | ||
435 | { | ||
436 | struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); | ||
437 | struct debug_store *ds = cpuc->ds; | ||
438 | struct pebs_record_nhm *at, *top; | ||
439 | struct perf_sample_data data; | ||
440 | struct perf_event *event = NULL; | ||
441 | struct pt_regs regs; | ||
442 | int bit, n; | ||
443 | |||
444 | if (!ds || !x86_pmu.pebs) | ||
445 | return; | ||
446 | |||
447 | intel_pmu_pebs_disable_all(); | ||
448 | |||
449 | at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; | ||
450 | top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; | ||
451 | |||
452 | if (top <= at) | ||
453 | goto out; | ||
454 | |||
455 | ds->pebs_index = ds->pebs_buffer_base; | ||
456 | |||
457 | n = top - at; | ||
458 | |||
459 | /* | ||
460 | * Should not happen, we program the threshold at 1 and do not | ||
461 | * set a reset value. | ||
462 | */ | ||
463 | WARN_ON_ONCE(n > MAX_PEBS_EVENTS); | ||
464 | |||
465 | for ( ; at < top; at++) { | ||
466 | for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { | ||
467 | if (!cpuc->events[bit]->attr.precise) | ||
468 | continue; | ||
469 | |||
470 | event = cpuc->events[bit]; | ||
471 | } | ||
472 | |||
473 | if (!event) | ||
474 | continue; | ||
475 | |||
476 | if (!intel_pmu_save_and_restart(event)) | ||
477 | continue; | ||
478 | |||
479 | perf_sample_data_init(&data, 0); | ||
480 | data.period = event->hw.last_period; | ||
481 | |||
482 | /* | ||
483 | * See the comment in intel_pmu_drain_pebs_core() | ||
484 | */ | ||
485 | regs = *iregs; | ||
486 | regs.ip = at->ip; | ||
487 | regs.bp = at->bp; | ||
488 | regs.sp = at->sp; | ||
489 | |||
490 | if (perf_event_overflow(event, 1, &data, ®s)) | ||
491 | intel_pmu_disable_event(event); | ||
492 | } | ||
493 | out: | ||
494 | intel_pmu_pebs_enable_all(); | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * BTS, PEBS probe and setup | ||
499 | */ | ||
500 | |||
501 | static void intel_ds_init(void) | ||
502 | { | ||
503 | /* | ||
504 | * No support for 32bit formats | ||
505 | */ | ||
506 | if (!boot_cpu_has(X86_FEATURE_DTES64)) | ||
507 | return; | ||
508 | |||
509 | x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); | ||
510 | x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); | ||
511 | if (x86_pmu.pebs) { | ||
512 | int format = 0; | ||
513 | |||
514 | if (x86_pmu.version > 1) { | ||
515 | u64 capabilities; | ||
516 | /* | ||
517 | * v2+ has a PEBS format field | ||
518 | */ | ||
519 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | ||
520 | format = (capabilities >> 8) & 0xf; | ||
521 | } | ||
522 | |||
523 | switch (format) { | ||
524 | case 0: | ||
525 | printk(KERN_CONT "PEBS v0, "); | ||
526 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); | ||
527 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; | ||
528 | x86_pmu.pebs_constraints = intel_core_pebs_events; | ||
529 | break; | ||
530 | |||
531 | case 1: | ||
532 | printk(KERN_CONT "PEBS v1, "); | ||
533 | x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); | ||
534 | x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; | ||
535 | x86_pmu.pebs_constraints = intel_nehalem_pebs_events; | ||
536 | break; | ||
537 | |||
538 | default: | ||
539 | printk(KERN_CONT "PEBS unknown format: %d, ", format); | ||
540 | x86_pmu.pebs = 0; | ||
541 | break; | ||
542 | } | ||
543 | } | ||
544 | } | ||
545 | |||
546 | #else /* CONFIG_CPU_SUP_INTEL */ | ||
547 | |||
548 | static int reseve_ds_buffers(void) | ||
549 | { | ||
550 | return 0; | ||
551 | } | ||
552 | |||
553 | static void release_ds_buffers(void) | ||
554 | { | ||
555 | } | ||
556 | |||
557 | #endif /* CONFIG_CPU_SUP_INTEL */ | ||
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 80acbf3d5de1..42307b50c787 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h | |||
@@ -203,8 +203,9 @@ struct perf_event_attr { | |||
203 | enable_on_exec : 1, /* next exec enables */ | 203 | enable_on_exec : 1, /* next exec enables */ |
204 | task : 1, /* trace fork/exit */ | 204 | task : 1, /* trace fork/exit */ |
205 | watermark : 1, /* wakeup_watermark */ | 205 | watermark : 1, /* wakeup_watermark */ |
206 | precise : 1, /* OoO invariant counter */ | ||
206 | 207 | ||
207 | __reserved_1 : 49; | 208 | __reserved_1 : 48; |
208 | 209 | ||
209 | union { | 210 | union { |
210 | __u32 wakeup_events; /* wakeup every n events */ | 211 | __u32 wakeup_events; /* wakeup every n events */ |