diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-03-02 13:52:12 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-03-10 07:23:31 -0500 |
commit | ca037701a025334e724e5c61b3b1082940c8b981 (patch) | |
tree | 12e3651ae6b35e9a5df4b49f9f571a01fc5a42a4 /arch/x86/kernel/cpu/perf_event.c | |
parent | d4944a06666054707d23e11888e480af239e5abf (diff) |
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 223 |
1 files changed, 78 insertions, 145 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1d665a0b202c..0c03d5c1671f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -31,45 +31,6 @@ | |||
31 | 31 | ||
32 | static u64 perf_event_mask __read_mostly; | 32 | static u64 perf_event_mask __read_mostly; |
33 | 33 | ||
34 | /* The maximal number of PEBS events: */ | ||
35 | #define MAX_PEBS_EVENTS 4 | ||
36 | |||
37 | /* The size of a BTS record in bytes: */ | ||
38 | #define BTS_RECORD_SIZE 24 | ||
39 | |||
40 | /* The size of a per-cpu BTS buffer in bytes: */ | ||
41 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | ||
42 | |||
43 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | ||
44 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | ||
45 | |||
46 | |||
47 | /* | ||
48 | * Bits in the debugctlmsr controlling branch tracing. | ||
49 | */ | ||
50 | #define X86_DEBUGCTL_TR (1 << 6) | ||
51 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
52 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
53 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
54 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
55 | |||
56 | /* | ||
57 | * A debug store configuration. | ||
58 | * | ||
59 | * We only support architectures that use 64bit fields. | ||
60 | */ | ||
61 | struct debug_store { | ||
62 | u64 bts_buffer_base; | ||
63 | u64 bts_index; | ||
64 | u64 bts_absolute_maximum; | ||
65 | u64 bts_interrupt_threshold; | ||
66 | u64 pebs_buffer_base; | ||
67 | u64 pebs_index; | ||
68 | u64 pebs_absolute_maximum; | ||
69 | u64 pebs_interrupt_threshold; | ||
70 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
71 | }; | ||
72 | |||
73 | struct event_constraint { | 34 | struct event_constraint { |
74 | union { | 35 | union { |
75 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 36 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
@@ -88,17 +49,29 @@ struct amd_nb { | |||
88 | }; | 49 | }; |
89 | 50 | ||
90 | struct cpu_hw_events { | 51 | struct cpu_hw_events { |
52 | /* | ||
53 | * Generic x86 PMC bits | ||
54 | */ | ||
91 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 55 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
92 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 56 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
93 | unsigned long interrupts; | 57 | unsigned long interrupts; |
94 | int enabled; | 58 | int enabled; |
95 | struct debug_store *ds; | ||
96 | 59 | ||
97 | int n_events; | 60 | int n_events; |
98 | int n_added; | 61 | int n_added; |
99 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 62 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
100 | u64 tags[X86_PMC_IDX_MAX]; | 63 | u64 tags[X86_PMC_IDX_MAX]; |
101 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 64 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
65 | |||
66 | /* | ||
67 | * Intel DebugStore bits | ||
68 | */ | ||
69 | struct debug_store *ds; | ||
70 | u64 pebs_enabled; | ||
71 | |||
72 | /* | ||
73 | * AMD specific bits | ||
74 | */ | ||
102 | struct amd_nb *amd_nb; | 75 | struct amd_nb *amd_nb; |
103 | }; | 76 | }; |
104 | 77 | ||
@@ -112,12 +85,24 @@ struct cpu_hw_events { | |||
112 | #define EVENT_CONSTRAINT(c, n, m) \ | 85 | #define EVENT_CONSTRAINT(c, n, m) \ |
113 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 86 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
114 | 87 | ||
88 | /* | ||
89 | * Constraint on the Event code. | ||
90 | */ | ||
115 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | 91 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
116 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 92 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) |
117 | 93 | ||
94 | /* | ||
95 | * Constraint on the Event code + UMask + fixed-mask | ||
96 | */ | ||
118 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 97 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
119 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | 98 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) |
120 | 99 | ||
100 | /* | ||
101 | * Constraint on the Event code + UMask | ||
102 | */ | ||
103 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
104 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
105 | |||
121 | #define EVENT_CONSTRAINT_END \ | 106 | #define EVENT_CONSTRAINT_END \ |
122 | EVENT_CONSTRAINT(0, 0, 0) | 107 | EVENT_CONSTRAINT(0, 0, 0) |
123 | 108 | ||
@@ -128,6 +113,9 @@ struct cpu_hw_events { | |||
128 | * struct x86_pmu - generic x86 pmu | 113 | * struct x86_pmu - generic x86 pmu |
129 | */ | 114 | */ |
130 | struct x86_pmu { | 115 | struct x86_pmu { |
116 | /* | ||
117 | * Generic x86 PMC bits | ||
118 | */ | ||
131 | const char *name; | 119 | const char *name; |
132 | int version; | 120 | int version; |
133 | int (*handle_irq)(struct pt_regs *); | 121 | int (*handle_irq)(struct pt_regs *); |
@@ -146,10 +134,6 @@ struct x86_pmu { | |||
146 | u64 event_mask; | 134 | u64 event_mask; |
147 | int apic; | 135 | int apic; |
148 | u64 max_period; | 136 | u64 max_period; |
149 | u64 intel_ctrl; | ||
150 | void (*enable_bts)(u64 config); | ||
151 | void (*disable_bts)(void); | ||
152 | |||
153 | struct event_constraint * | 137 | struct event_constraint * |
154 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | 138 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
155 | struct perf_event *event); | 139 | struct perf_event *event); |
@@ -162,6 +146,19 @@ struct x86_pmu { | |||
162 | void (*cpu_starting)(int cpu); | 146 | void (*cpu_starting)(int cpu); |
163 | void (*cpu_dying)(int cpu); | 147 | void (*cpu_dying)(int cpu); |
164 | void (*cpu_dead)(int cpu); | 148 | void (*cpu_dead)(int cpu); |
149 | |||
150 | /* | ||
151 | * Intel Arch Perfmon v2+ | ||
152 | */ | ||
153 | u64 intel_ctrl; | ||
154 | |||
155 | /* | ||
156 | * Intel DebugStore bits | ||
157 | */ | ||
158 | int bts, pebs; | ||
159 | int pebs_record_size; | ||
160 | void (*drain_pebs)(struct pt_regs *regs); | ||
161 | struct event_constraint *pebs_constraints; | ||
165 | }; | 162 | }; |
166 | 163 | ||
167 | static struct x86_pmu x86_pmu __read_mostly; | 164 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -293,110 +290,14 @@ static void release_pmc_hardware(void) | |||
293 | #endif | 290 | #endif |
294 | } | 291 | } |
295 | 292 | ||
296 | static inline bool bts_available(void) | 293 | static int reserve_ds_buffers(void); |
297 | { | 294 | static void release_ds_buffers(void); |
298 | return x86_pmu.enable_bts != NULL; | ||
299 | } | ||
300 | |||
301 | static void init_debug_store_on_cpu(int cpu) | ||
302 | { | ||
303 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
304 | |||
305 | if (!ds) | ||
306 | return; | ||
307 | |||
308 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
309 | (u32)((u64)(unsigned long)ds), | ||
310 | (u32)((u64)(unsigned long)ds >> 32)); | ||
311 | } | ||
312 | |||
313 | static void fini_debug_store_on_cpu(int cpu) | ||
314 | { | ||
315 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
316 | return; | ||
317 | |||
318 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
319 | } | ||
320 | |||
321 | static void release_bts_hardware(void) | ||
322 | { | ||
323 | int cpu; | ||
324 | |||
325 | if (!bts_available()) | ||
326 | return; | ||
327 | |||
328 | get_online_cpus(); | ||
329 | |||
330 | for_each_online_cpu(cpu) | ||
331 | fini_debug_store_on_cpu(cpu); | ||
332 | |||
333 | for_each_possible_cpu(cpu) { | ||
334 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
335 | |||
336 | if (!ds) | ||
337 | continue; | ||
338 | |||
339 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
340 | |||
341 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
342 | kfree(ds); | ||
343 | } | ||
344 | |||
345 | put_online_cpus(); | ||
346 | } | ||
347 | |||
348 | static int reserve_bts_hardware(void) | ||
349 | { | ||
350 | int cpu, err = 0; | ||
351 | |||
352 | if (!bts_available()) | ||
353 | return 0; | ||
354 | |||
355 | get_online_cpus(); | ||
356 | |||
357 | for_each_possible_cpu(cpu) { | ||
358 | struct debug_store *ds; | ||
359 | void *buffer; | ||
360 | |||
361 | err = -ENOMEM; | ||
362 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
363 | if (unlikely(!buffer)) | ||
364 | break; | ||
365 | |||
366 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
367 | if (unlikely(!ds)) { | ||
368 | kfree(buffer); | ||
369 | break; | ||
370 | } | ||
371 | |||
372 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
373 | ds->bts_index = ds->bts_buffer_base; | ||
374 | ds->bts_absolute_maximum = | ||
375 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
376 | ds->bts_interrupt_threshold = | ||
377 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
378 | |||
379 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
380 | err = 0; | ||
381 | } | ||
382 | |||
383 | if (err) | ||
384 | release_bts_hardware(); | ||
385 | else { | ||
386 | for_each_online_cpu(cpu) | ||
387 | init_debug_store_on_cpu(cpu); | ||
388 | } | ||
389 | |||
390 | put_online_cpus(); | ||
391 | |||
392 | return err; | ||
393 | } | ||
394 | 295 | ||
395 | static void hw_perf_event_destroy(struct perf_event *event) | 296 | static void hw_perf_event_destroy(struct perf_event *event) |
396 | { | 297 | { |
397 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 298 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
398 | release_pmc_hardware(); | 299 | release_pmc_hardware(); |
399 | release_bts_hardware(); | 300 | release_ds_buffers(); |
400 | mutex_unlock(&pmc_reserve_mutex); | 301 | mutex_unlock(&pmc_reserve_mutex); |
401 | } | 302 | } |
402 | } | 303 | } |
@@ -459,7 +360,7 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
459 | if (!reserve_pmc_hardware()) | 360 | if (!reserve_pmc_hardware()) |
460 | err = -EBUSY; | 361 | err = -EBUSY; |
461 | else | 362 | else |
462 | err = reserve_bts_hardware(); | 363 | err = reserve_ds_buffers(); |
463 | } | 364 | } |
464 | if (!err) | 365 | if (!err) |
465 | atomic_inc(&active_events); | 366 | atomic_inc(&active_events); |
@@ -537,7 +438,7 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
537 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 438 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
538 | (hwc->sample_period == 1)) { | 439 | (hwc->sample_period == 1)) { |
539 | /* BTS is not supported by this architecture. */ | 440 | /* BTS is not supported by this architecture. */ |
540 | if (!bts_available()) | 441 | if (!x86_pmu.bts) |
541 | return -EOPNOTSUPP; | 442 | return -EOPNOTSUPP; |
542 | 443 | ||
543 | /* BTS is currently only allowed for user-mode. */ | 444 | /* BTS is currently only allowed for user-mode. */ |
@@ -995,6 +896,7 @@ static void x86_pmu_unthrottle(struct perf_event *event) | |||
995 | void perf_event_print_debug(void) | 896 | void perf_event_print_debug(void) |
996 | { | 897 | { |
997 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 898 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
899 | u64 pebs; | ||
998 | struct cpu_hw_events *cpuc; | 900 | struct cpu_hw_events *cpuc; |
999 | unsigned long flags; | 901 | unsigned long flags; |
1000 | int cpu, idx; | 902 | int cpu, idx; |
@@ -1012,12 +914,14 @@ void perf_event_print_debug(void) | |||
1012 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 914 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
1013 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 915 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
1014 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 916 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
917 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
1015 | 918 | ||
1016 | pr_info("\n"); | 919 | pr_info("\n"); |
1017 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 920 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
1018 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 921 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
1019 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 922 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
1020 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 923 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
924 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
1021 | } | 925 | } |
1022 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 926 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
1023 | 927 | ||
@@ -1333,6 +1237,7 @@ undo: | |||
1333 | 1237 | ||
1334 | #include "perf_event_amd.c" | 1238 | #include "perf_event_amd.c" |
1335 | #include "perf_event_p6.c" | 1239 | #include "perf_event_p6.c" |
1240 | #include "perf_event_intel_ds.c" | ||
1336 | #include "perf_event_intel.c" | 1241 | #include "perf_event_intel.c" |
1337 | 1242 | ||
1338 | static int __cpuinit | 1243 | static int __cpuinit |
@@ -1465,6 +1370,32 @@ static const struct pmu pmu = { | |||
1465 | }; | 1370 | }; |
1466 | 1371 | ||
1467 | /* | 1372 | /* |
1373 | * validate that we can schedule this event | ||
1374 | */ | ||
1375 | static int validate_event(struct perf_event *event) | ||
1376 | { | ||
1377 | struct cpu_hw_events *fake_cpuc; | ||
1378 | struct event_constraint *c; | ||
1379 | int ret = 0; | ||
1380 | |||
1381 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
1382 | if (!fake_cpuc) | ||
1383 | return -ENOMEM; | ||
1384 | |||
1385 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
1386 | |||
1387 | if (!c || !c->weight) | ||
1388 | ret = -ENOSPC; | ||
1389 | |||
1390 | if (x86_pmu.put_event_constraints) | ||
1391 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
1392 | |||
1393 | kfree(fake_cpuc); | ||
1394 | |||
1395 | return ret; | ||
1396 | } | ||
1397 | |||
1398 | /* | ||
1468 | * validate a single event group | 1399 | * validate a single event group |
1469 | * | 1400 | * |
1470 | * validation include: | 1401 | * validation include: |
@@ -1529,6 +1460,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
1529 | 1460 | ||
1530 | if (event->group_leader != event) | 1461 | if (event->group_leader != event) |
1531 | err = validate_group(event); | 1462 | err = validate_group(event); |
1463 | else | ||
1464 | err = validate_event(event); | ||
1532 | 1465 | ||
1533 | event->pmu = tmp; | 1466 | event->pmu = tmp; |
1534 | } | 1467 | } |