aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2010-03-02 13:52:12 -0500
committerIngo Molnar <mingo@elte.hu>2010-03-10 07:23:31 -0500
commitca037701a025334e724e5c61b3b1082940c8b981 (patch)
tree12e3651ae6b35e9a5df4b49f9f571a01fc5a42a4 /arch/x86/kernel/cpu/perf_event.c
parentd4944a06666054707d23e11888e480af239e5abf (diff)
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling, which is an alternative counter mode in which the counter triggers a hardware assist to collect information on events. The hardware assist takes a trap like snapshot of a subset of the machine registers. This data is written to the Intel Debug-Store, which can be programmed with a data threshold at which to raise a PMI. With the PEBS hardware assist being trap like, the reported IP is always one instruction after the actual instruction that triggered the event. This implements a simple PEBS model that always takes a single PEBS event at a time. This is done so that the interaction with the rest of the system is as expected (freq adjust, period randomization, lbr, callchains, etc.). It adds an ABI element: perf_event_attr::precise, which indicates that we wish to use this (constrained, but precise) mode. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Arnaldo Carvalho de Melo <acme@infradead.org> Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.392111285@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c223
1 files changed, 78 insertions, 145 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1d665a0b202c..0c03d5c1671f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,45 +31,6 @@
31 31
32static u64 perf_event_mask __read_mostly; 32static u64 perf_event_mask __read_mostly;
33 33
34/* The maximal number of PEBS events: */
35#define MAX_PEBS_EVENTS 4
36
37/* The size of a BTS record in bytes: */
38#define BTS_RECORD_SIZE 24
39
40/* The size of a per-cpu BTS buffer in bytes: */
41#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048)
42
43/* The BTS overflow threshold in bytes from the end of the buffer: */
44#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128)
45
46
47/*
48 * Bits in the debugctlmsr controlling branch tracing.
49 */
50#define X86_DEBUGCTL_TR (1 << 6)
51#define X86_DEBUGCTL_BTS (1 << 7)
52#define X86_DEBUGCTL_BTINT (1 << 8)
53#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
54#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
55
56/*
57 * A debug store configuration.
58 *
59 * We only support architectures that use 64bit fields.
60 */
61struct debug_store {
62 u64 bts_buffer_base;
63 u64 bts_index;
64 u64 bts_absolute_maximum;
65 u64 bts_interrupt_threshold;
66 u64 pebs_buffer_base;
67 u64 pebs_index;
68 u64 pebs_absolute_maximum;
69 u64 pebs_interrupt_threshold;
70 u64 pebs_event_reset[MAX_PEBS_EVENTS];
71};
72
73struct event_constraint { 34struct event_constraint {
74 union { 35 union {
75 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 36 unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -88,17 +49,29 @@ struct amd_nb {
88}; 49};
89 50
90struct cpu_hw_events { 51struct cpu_hw_events {
52 /*
53 * Generic x86 PMC bits
54 */
91 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ 55 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
92 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 56 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
93 unsigned long interrupts; 57 unsigned long interrupts;
94 int enabled; 58 int enabled;
95 struct debug_store *ds;
96 59
97 int n_events; 60 int n_events;
98 int n_added; 61 int n_added;
99 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 62 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
100 u64 tags[X86_PMC_IDX_MAX]; 63 u64 tags[X86_PMC_IDX_MAX];
101 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 64 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
65
66 /*
67 * Intel DebugStore bits
68 */
69 struct debug_store *ds;
70 u64 pebs_enabled;
71
72 /*
73 * AMD specific bits
74 */
102 struct amd_nb *amd_nb; 75 struct amd_nb *amd_nb;
103}; 76};
104 77
@@ -112,12 +85,24 @@ struct cpu_hw_events {
112#define EVENT_CONSTRAINT(c, n, m) \ 85#define EVENT_CONSTRAINT(c, n, m) \
113 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 86 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
114 87
88/*
89 * Constraint on the Event code.
90 */
115#define INTEL_EVENT_CONSTRAINT(c, n) \ 91#define INTEL_EVENT_CONSTRAINT(c, n) \
116 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) 92 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
117 93
94/*
95 * Constraint on the Event code + UMask + fixed-mask
96 */
118#define FIXED_EVENT_CONSTRAINT(c, n) \ 97#define FIXED_EVENT_CONSTRAINT(c, n) \
119 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) 98 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
120 99
100/*
101 * Constraint on the Event code + UMask
102 */
103#define PEBS_EVENT_CONSTRAINT(c, n) \
104 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
105
121#define EVENT_CONSTRAINT_END \ 106#define EVENT_CONSTRAINT_END \
122 EVENT_CONSTRAINT(0, 0, 0) 107 EVENT_CONSTRAINT(0, 0, 0)
123 108
@@ -128,6 +113,9 @@ struct cpu_hw_events {
128 * struct x86_pmu - generic x86 pmu 113 * struct x86_pmu - generic x86 pmu
129 */ 114 */
130struct x86_pmu { 115struct x86_pmu {
116 /*
117 * Generic x86 PMC bits
118 */
131 const char *name; 119 const char *name;
132 int version; 120 int version;
133 int (*handle_irq)(struct pt_regs *); 121 int (*handle_irq)(struct pt_regs *);
@@ -146,10 +134,6 @@ struct x86_pmu {
146 u64 event_mask; 134 u64 event_mask;
147 int apic; 135 int apic;
148 u64 max_period; 136 u64 max_period;
149 u64 intel_ctrl;
150 void (*enable_bts)(u64 config);
151 void (*disable_bts)(void);
152
153 struct event_constraint * 137 struct event_constraint *
154 (*get_event_constraints)(struct cpu_hw_events *cpuc, 138 (*get_event_constraints)(struct cpu_hw_events *cpuc,
155 struct perf_event *event); 139 struct perf_event *event);
@@ -162,6 +146,19 @@ struct x86_pmu {
162 void (*cpu_starting)(int cpu); 146 void (*cpu_starting)(int cpu);
163 void (*cpu_dying)(int cpu); 147 void (*cpu_dying)(int cpu);
164 void (*cpu_dead)(int cpu); 148 void (*cpu_dead)(int cpu);
149
150 /*
151 * Intel Arch Perfmon v2+
152 */
153 u64 intel_ctrl;
154
155 /*
156 * Intel DebugStore bits
157 */
158 int bts, pebs;
159 int pebs_record_size;
160 void (*drain_pebs)(struct pt_regs *regs);
161 struct event_constraint *pebs_constraints;
165}; 162};
166 163
167static struct x86_pmu x86_pmu __read_mostly; 164static struct x86_pmu x86_pmu __read_mostly;
@@ -293,110 +290,14 @@ static void release_pmc_hardware(void)
293#endif 290#endif
294} 291}
295 292
296static inline bool bts_available(void) 293static int reserve_ds_buffers(void);
297{ 294static void release_ds_buffers(void);
298 return x86_pmu.enable_bts != NULL;
299}
300
301static void init_debug_store_on_cpu(int cpu)
302{
303 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
304
305 if (!ds)
306 return;
307
308 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
309 (u32)((u64)(unsigned long)ds),
310 (u32)((u64)(unsigned long)ds >> 32));
311}
312
313static void fini_debug_store_on_cpu(int cpu)
314{
315 if (!per_cpu(cpu_hw_events, cpu).ds)
316 return;
317
318 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
319}
320
321static void release_bts_hardware(void)
322{
323 int cpu;
324
325 if (!bts_available())
326 return;
327
328 get_online_cpus();
329
330 for_each_online_cpu(cpu)
331 fini_debug_store_on_cpu(cpu);
332
333 for_each_possible_cpu(cpu) {
334 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
335
336 if (!ds)
337 continue;
338
339 per_cpu(cpu_hw_events, cpu).ds = NULL;
340
341 kfree((void *)(unsigned long)ds->bts_buffer_base);
342 kfree(ds);
343 }
344
345 put_online_cpus();
346}
347
348static int reserve_bts_hardware(void)
349{
350 int cpu, err = 0;
351
352 if (!bts_available())
353 return 0;
354
355 get_online_cpus();
356
357 for_each_possible_cpu(cpu) {
358 struct debug_store *ds;
359 void *buffer;
360
361 err = -ENOMEM;
362 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
363 if (unlikely(!buffer))
364 break;
365
366 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
367 if (unlikely(!ds)) {
368 kfree(buffer);
369 break;
370 }
371
372 ds->bts_buffer_base = (u64)(unsigned long)buffer;
373 ds->bts_index = ds->bts_buffer_base;
374 ds->bts_absolute_maximum =
375 ds->bts_buffer_base + BTS_BUFFER_SIZE;
376 ds->bts_interrupt_threshold =
377 ds->bts_absolute_maximum - BTS_OVFL_TH;
378
379 per_cpu(cpu_hw_events, cpu).ds = ds;
380 err = 0;
381 }
382
383 if (err)
384 release_bts_hardware();
385 else {
386 for_each_online_cpu(cpu)
387 init_debug_store_on_cpu(cpu);
388 }
389
390 put_online_cpus();
391
392 return err;
393}
394 295
395static void hw_perf_event_destroy(struct perf_event *event) 296static void hw_perf_event_destroy(struct perf_event *event)
396{ 297{
397 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { 298 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
398 release_pmc_hardware(); 299 release_pmc_hardware();
399 release_bts_hardware(); 300 release_ds_buffers();
400 mutex_unlock(&pmc_reserve_mutex); 301 mutex_unlock(&pmc_reserve_mutex);
401 } 302 }
402} 303}
@@ -459,7 +360,7 @@ static int __hw_perf_event_init(struct perf_event *event)
459 if (!reserve_pmc_hardware()) 360 if (!reserve_pmc_hardware())
460 err = -EBUSY; 361 err = -EBUSY;
461 else 362 else
462 err = reserve_bts_hardware(); 363 err = reserve_ds_buffers();
463 } 364 }
464 if (!err) 365 if (!err)
465 atomic_inc(&active_events); 366 atomic_inc(&active_events);
@@ -537,7 +438,7 @@ static int __hw_perf_event_init(struct perf_event *event)
537 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 438 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
538 (hwc->sample_period == 1)) { 439 (hwc->sample_period == 1)) {
539 /* BTS is not supported by this architecture. */ 440 /* BTS is not supported by this architecture. */
540 if (!bts_available()) 441 if (!x86_pmu.bts)
541 return -EOPNOTSUPP; 442 return -EOPNOTSUPP;
542 443
543 /* BTS is currently only allowed for user-mode. */ 444 /* BTS is currently only allowed for user-mode. */
@@ -995,6 +896,7 @@ static void x86_pmu_unthrottle(struct perf_event *event)
995void perf_event_print_debug(void) 896void perf_event_print_debug(void)
996{ 897{
997 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; 898 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
899 u64 pebs;
998 struct cpu_hw_events *cpuc; 900 struct cpu_hw_events *cpuc;
999 unsigned long flags; 901 unsigned long flags;
1000 int cpu, idx; 902 int cpu, idx;
@@ -1012,12 +914,14 @@ void perf_event_print_debug(void)
1012 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 914 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1013 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); 915 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1014 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); 916 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
917 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
1015 918
1016 pr_info("\n"); 919 pr_info("\n");
1017 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); 920 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1018 pr_info("CPU#%d: status: %016llx\n", cpu, status); 921 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1019 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 922 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1020 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 923 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
924 pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
1021 } 925 }
1022 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 926 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1023 927
@@ -1333,6 +1237,7 @@ undo:
1333 1237
1334#include "perf_event_amd.c" 1238#include "perf_event_amd.c"
1335#include "perf_event_p6.c" 1239#include "perf_event_p6.c"
1240#include "perf_event_intel_ds.c"
1336#include "perf_event_intel.c" 1241#include "perf_event_intel.c"
1337 1242
1338static int __cpuinit 1243static int __cpuinit
@@ -1465,6 +1370,32 @@ static const struct pmu pmu = {
1465}; 1370};
1466 1371
1467/* 1372/*
1373 * validate that we can schedule this event
1374 */
1375static int validate_event(struct perf_event *event)
1376{
1377 struct cpu_hw_events *fake_cpuc;
1378 struct event_constraint *c;
1379 int ret = 0;
1380
1381 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1382 if (!fake_cpuc)
1383 return -ENOMEM;
1384
1385 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1386
1387 if (!c || !c->weight)
1388 ret = -ENOSPC;
1389
1390 if (x86_pmu.put_event_constraints)
1391 x86_pmu.put_event_constraints(fake_cpuc, event);
1392
1393 kfree(fake_cpuc);
1394
1395 return ret;
1396}
1397
1398/*
1468 * validate a single event group 1399 * validate a single event group
1469 * 1400 *
1470 * validation include: 1401 * validation include:
@@ -1529,6 +1460,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1529 1460
1530 if (event->group_leader != event) 1461 if (event->group_leader != event)
1531 err = validate_group(event); 1462 err = validate_group(event);
1463 else
1464 err = validate_event(event);
1532 1465
1533 event->pmu = tmp; 1466 event->pmu = tmp;
1534 } 1467 }