diff options
| author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2010-03-02 13:52:12 -0500 |
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2010-03-10 07:23:31 -0500 |
| commit | ca037701a025334e724e5c61b3b1082940c8b981 (patch) | |
| tree | 12e3651ae6b35e9a5df4b49f9f571a01fc5a42a4 /arch/x86/kernel/cpu/perf_event.c | |
| parent | d4944a06666054707d23e11888e480af239e5abf (diff) | |
perf, x86: Add PEBS infrastructure
This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.
This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.
With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.
This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).
It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 223 |
1 files changed, 78 insertions, 145 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1d665a0b202c..0c03d5c1671f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
| @@ -31,45 +31,6 @@ | |||
| 31 | 31 | ||
| 32 | static u64 perf_event_mask __read_mostly; | 32 | static u64 perf_event_mask __read_mostly; |
| 33 | 33 | ||
| 34 | /* The maximal number of PEBS events: */ | ||
| 35 | #define MAX_PEBS_EVENTS 4 | ||
| 36 | |||
| 37 | /* The size of a BTS record in bytes: */ | ||
| 38 | #define BTS_RECORD_SIZE 24 | ||
| 39 | |||
| 40 | /* The size of a per-cpu BTS buffer in bytes: */ | ||
| 41 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) | ||
| 42 | |||
| 43 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | ||
| 44 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) | ||
| 45 | |||
| 46 | |||
| 47 | /* | ||
| 48 | * Bits in the debugctlmsr controlling branch tracing. | ||
| 49 | */ | ||
| 50 | #define X86_DEBUGCTL_TR (1 << 6) | ||
| 51 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
| 52 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
| 53 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
| 54 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
| 55 | |||
| 56 | /* | ||
| 57 | * A debug store configuration. | ||
| 58 | * | ||
| 59 | * We only support architectures that use 64bit fields. | ||
| 60 | */ | ||
| 61 | struct debug_store { | ||
| 62 | u64 bts_buffer_base; | ||
| 63 | u64 bts_index; | ||
| 64 | u64 bts_absolute_maximum; | ||
| 65 | u64 bts_interrupt_threshold; | ||
| 66 | u64 pebs_buffer_base; | ||
| 67 | u64 pebs_index; | ||
| 68 | u64 pebs_absolute_maximum; | ||
| 69 | u64 pebs_interrupt_threshold; | ||
| 70 | u64 pebs_event_reset[MAX_PEBS_EVENTS]; | ||
| 71 | }; | ||
| 72 | |||
| 73 | struct event_constraint { | 34 | struct event_constraint { |
| 74 | union { | 35 | union { |
| 75 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 36 | unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| @@ -88,17 +49,29 @@ struct amd_nb { | |||
| 88 | }; | 49 | }; |
| 89 | 50 | ||
| 90 | struct cpu_hw_events { | 51 | struct cpu_hw_events { |
| 52 | /* | ||
| 53 | * Generic x86 PMC bits | ||
| 54 | */ | ||
| 91 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ | 55 | struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ |
| 92 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 56 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
| 93 | unsigned long interrupts; | 57 | unsigned long interrupts; |
| 94 | int enabled; | 58 | int enabled; |
| 95 | struct debug_store *ds; | ||
| 96 | 59 | ||
| 97 | int n_events; | 60 | int n_events; |
| 98 | int n_added; | 61 | int n_added; |
| 99 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 62 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
| 100 | u64 tags[X86_PMC_IDX_MAX]; | 63 | u64 tags[X86_PMC_IDX_MAX]; |
| 101 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 64 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
| 65 | |||
| 66 | /* | ||
| 67 | * Intel DebugStore bits | ||
| 68 | */ | ||
| 69 | struct debug_store *ds; | ||
| 70 | u64 pebs_enabled; | ||
| 71 | |||
| 72 | /* | ||
| 73 | * AMD specific bits | ||
| 74 | */ | ||
| 102 | struct amd_nb *amd_nb; | 75 | struct amd_nb *amd_nb; |
| 103 | }; | 76 | }; |
| 104 | 77 | ||
| @@ -112,12 +85,24 @@ struct cpu_hw_events { | |||
| 112 | #define EVENT_CONSTRAINT(c, n, m) \ | 85 | #define EVENT_CONSTRAINT(c, n, m) \ |
| 113 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 86 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) |
| 114 | 87 | ||
| 88 | /* | ||
| 89 | * Constraint on the Event code. | ||
| 90 | */ | ||
| 115 | #define INTEL_EVENT_CONSTRAINT(c, n) \ | 91 | #define INTEL_EVENT_CONSTRAINT(c, n) \ |
| 116 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) | 92 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) |
| 117 | 93 | ||
| 94 | /* | ||
| 95 | * Constraint on the Event code + UMask + fixed-mask | ||
| 96 | */ | ||
| 118 | #define FIXED_EVENT_CONSTRAINT(c, n) \ | 97 | #define FIXED_EVENT_CONSTRAINT(c, n) \ |
| 119 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) | 98 | EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) |
| 120 | 99 | ||
| 100 | /* | ||
| 101 | * Constraint on the Event code + UMask | ||
| 102 | */ | ||
| 103 | #define PEBS_EVENT_CONSTRAINT(c, n) \ | ||
| 104 | EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) | ||
| 105 | |||
| 121 | #define EVENT_CONSTRAINT_END \ | 106 | #define EVENT_CONSTRAINT_END \ |
| 122 | EVENT_CONSTRAINT(0, 0, 0) | 107 | EVENT_CONSTRAINT(0, 0, 0) |
| 123 | 108 | ||
| @@ -128,6 +113,9 @@ struct cpu_hw_events { | |||
| 128 | * struct x86_pmu - generic x86 pmu | 113 | * struct x86_pmu - generic x86 pmu |
| 129 | */ | 114 | */ |
| 130 | struct x86_pmu { | 115 | struct x86_pmu { |
| 116 | /* | ||
| 117 | * Generic x86 PMC bits | ||
| 118 | */ | ||
| 131 | const char *name; | 119 | const char *name; |
| 132 | int version; | 120 | int version; |
| 133 | int (*handle_irq)(struct pt_regs *); | 121 | int (*handle_irq)(struct pt_regs *); |
| @@ -146,10 +134,6 @@ struct x86_pmu { | |||
| 146 | u64 event_mask; | 134 | u64 event_mask; |
| 147 | int apic; | 135 | int apic; |
| 148 | u64 max_period; | 136 | u64 max_period; |
| 149 | u64 intel_ctrl; | ||
| 150 | void (*enable_bts)(u64 config); | ||
| 151 | void (*disable_bts)(void); | ||
| 152 | |||
| 153 | struct event_constraint * | 137 | struct event_constraint * |
| 154 | (*get_event_constraints)(struct cpu_hw_events *cpuc, | 138 | (*get_event_constraints)(struct cpu_hw_events *cpuc, |
| 155 | struct perf_event *event); | 139 | struct perf_event *event); |
| @@ -162,6 +146,19 @@ struct x86_pmu { | |||
| 162 | void (*cpu_starting)(int cpu); | 146 | void (*cpu_starting)(int cpu); |
| 163 | void (*cpu_dying)(int cpu); | 147 | void (*cpu_dying)(int cpu); |
| 164 | void (*cpu_dead)(int cpu); | 148 | void (*cpu_dead)(int cpu); |
| 149 | |||
| 150 | /* | ||
| 151 | * Intel Arch Perfmon v2+ | ||
| 152 | */ | ||
| 153 | u64 intel_ctrl; | ||
| 154 | |||
| 155 | /* | ||
| 156 | * Intel DebugStore bits | ||
| 157 | */ | ||
| 158 | int bts, pebs; | ||
| 159 | int pebs_record_size; | ||
| 160 | void (*drain_pebs)(struct pt_regs *regs); | ||
| 161 | struct event_constraint *pebs_constraints; | ||
| 165 | }; | 162 | }; |
| 166 | 163 | ||
| 167 | static struct x86_pmu x86_pmu __read_mostly; | 164 | static struct x86_pmu x86_pmu __read_mostly; |
| @@ -293,110 +290,14 @@ static void release_pmc_hardware(void) | |||
| 293 | #endif | 290 | #endif |
| 294 | } | 291 | } |
| 295 | 292 | ||
| 296 | static inline bool bts_available(void) | 293 | static int reserve_ds_buffers(void); |
| 297 | { | 294 | static void release_ds_buffers(void); |
| 298 | return x86_pmu.enable_bts != NULL; | ||
| 299 | } | ||
| 300 | |||
| 301 | static void init_debug_store_on_cpu(int cpu) | ||
| 302 | { | ||
| 303 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
| 304 | |||
| 305 | if (!ds) | ||
| 306 | return; | ||
| 307 | |||
| 308 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
| 309 | (u32)((u64)(unsigned long)ds), | ||
| 310 | (u32)((u64)(unsigned long)ds >> 32)); | ||
| 311 | } | ||
| 312 | |||
| 313 | static void fini_debug_store_on_cpu(int cpu) | ||
| 314 | { | ||
| 315 | if (!per_cpu(cpu_hw_events, cpu).ds) | ||
| 316 | return; | ||
| 317 | |||
| 318 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
| 319 | } | ||
| 320 | |||
| 321 | static void release_bts_hardware(void) | ||
| 322 | { | ||
| 323 | int cpu; | ||
| 324 | |||
| 325 | if (!bts_available()) | ||
| 326 | return; | ||
| 327 | |||
| 328 | get_online_cpus(); | ||
| 329 | |||
| 330 | for_each_online_cpu(cpu) | ||
| 331 | fini_debug_store_on_cpu(cpu); | ||
| 332 | |||
| 333 | for_each_possible_cpu(cpu) { | ||
| 334 | struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; | ||
| 335 | |||
| 336 | if (!ds) | ||
| 337 | continue; | ||
| 338 | |||
| 339 | per_cpu(cpu_hw_events, cpu).ds = NULL; | ||
| 340 | |||
| 341 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
| 342 | kfree(ds); | ||
| 343 | } | ||
| 344 | |||
| 345 | put_online_cpus(); | ||
| 346 | } | ||
| 347 | |||
| 348 | static int reserve_bts_hardware(void) | ||
| 349 | { | ||
| 350 | int cpu, err = 0; | ||
| 351 | |||
| 352 | if (!bts_available()) | ||
| 353 | return 0; | ||
| 354 | |||
| 355 | get_online_cpus(); | ||
| 356 | |||
| 357 | for_each_possible_cpu(cpu) { | ||
| 358 | struct debug_store *ds; | ||
| 359 | void *buffer; | ||
| 360 | |||
| 361 | err = -ENOMEM; | ||
| 362 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
| 363 | if (unlikely(!buffer)) | ||
| 364 | break; | ||
| 365 | |||
| 366 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
| 367 | if (unlikely(!ds)) { | ||
| 368 | kfree(buffer); | ||
| 369 | break; | ||
| 370 | } | ||
| 371 | |||
| 372 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
| 373 | ds->bts_index = ds->bts_buffer_base; | ||
| 374 | ds->bts_absolute_maximum = | ||
| 375 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
| 376 | ds->bts_interrupt_threshold = | ||
| 377 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
| 378 | |||
| 379 | per_cpu(cpu_hw_events, cpu).ds = ds; | ||
| 380 | err = 0; | ||
| 381 | } | ||
| 382 | |||
| 383 | if (err) | ||
| 384 | release_bts_hardware(); | ||
| 385 | else { | ||
| 386 | for_each_online_cpu(cpu) | ||
| 387 | init_debug_store_on_cpu(cpu); | ||
| 388 | } | ||
| 389 | |||
| 390 | put_online_cpus(); | ||
| 391 | |||
| 392 | return err; | ||
| 393 | } | ||
| 394 | 295 | ||
| 395 | static void hw_perf_event_destroy(struct perf_event *event) | 296 | static void hw_perf_event_destroy(struct perf_event *event) |
| 396 | { | 297 | { |
| 397 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { | 298 | if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { |
| 398 | release_pmc_hardware(); | 299 | release_pmc_hardware(); |
| 399 | release_bts_hardware(); | 300 | release_ds_buffers(); |
| 400 | mutex_unlock(&pmc_reserve_mutex); | 301 | mutex_unlock(&pmc_reserve_mutex); |
| 401 | } | 302 | } |
| 402 | } | 303 | } |
| @@ -459,7 +360,7 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
| 459 | if (!reserve_pmc_hardware()) | 360 | if (!reserve_pmc_hardware()) |
| 460 | err = -EBUSY; | 361 | err = -EBUSY; |
| 461 | else | 362 | else |
| 462 | err = reserve_bts_hardware(); | 363 | err = reserve_ds_buffers(); |
| 463 | } | 364 | } |
| 464 | if (!err) | 365 | if (!err) |
| 465 | atomic_inc(&active_events); | 366 | atomic_inc(&active_events); |
| @@ -537,7 +438,7 @@ static int __hw_perf_event_init(struct perf_event *event) | |||
| 537 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | 438 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && |
| 538 | (hwc->sample_period == 1)) { | 439 | (hwc->sample_period == 1)) { |
| 539 | /* BTS is not supported by this architecture. */ | 440 | /* BTS is not supported by this architecture. */ |
| 540 | if (!bts_available()) | 441 | if (!x86_pmu.bts) |
| 541 | return -EOPNOTSUPP; | 442 | return -EOPNOTSUPP; |
| 542 | 443 | ||
| 543 | /* BTS is currently only allowed for user-mode. */ | 444 | /* BTS is currently only allowed for user-mode. */ |
| @@ -995,6 +896,7 @@ static void x86_pmu_unthrottle(struct perf_event *event) | |||
| 995 | void perf_event_print_debug(void) | 896 | void perf_event_print_debug(void) |
| 996 | { | 897 | { |
| 997 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | 898 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; |
| 899 | u64 pebs; | ||
| 998 | struct cpu_hw_events *cpuc; | 900 | struct cpu_hw_events *cpuc; |
| 999 | unsigned long flags; | 901 | unsigned long flags; |
| 1000 | int cpu, idx; | 902 | int cpu, idx; |
| @@ -1012,12 +914,14 @@ void perf_event_print_debug(void) | |||
| 1012 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | 914 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); |
| 1013 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | 915 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); |
| 1014 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | 916 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); |
| 917 | rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); | ||
| 1015 | 918 | ||
| 1016 | pr_info("\n"); | 919 | pr_info("\n"); |
| 1017 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | 920 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); |
| 1018 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | 921 | pr_info("CPU#%d: status: %016llx\n", cpu, status); |
| 1019 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | 922 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); |
| 1020 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | 923 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); |
| 924 | pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); | ||
| 1021 | } | 925 | } |
| 1022 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); | 926 | pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); |
| 1023 | 927 | ||
| @@ -1333,6 +1237,7 @@ undo: | |||
| 1333 | 1237 | ||
| 1334 | #include "perf_event_amd.c" | 1238 | #include "perf_event_amd.c" |
| 1335 | #include "perf_event_p6.c" | 1239 | #include "perf_event_p6.c" |
| 1240 | #include "perf_event_intel_ds.c" | ||
| 1336 | #include "perf_event_intel.c" | 1241 | #include "perf_event_intel.c" |
| 1337 | 1242 | ||
| 1338 | static int __cpuinit | 1243 | static int __cpuinit |
| @@ -1465,6 +1370,32 @@ static const struct pmu pmu = { | |||
| 1465 | }; | 1370 | }; |
| 1466 | 1371 | ||
| 1467 | /* | 1372 | /* |
| 1373 | * validate that we can schedule this event | ||
| 1374 | */ | ||
| 1375 | static int validate_event(struct perf_event *event) | ||
| 1376 | { | ||
| 1377 | struct cpu_hw_events *fake_cpuc; | ||
| 1378 | struct event_constraint *c; | ||
| 1379 | int ret = 0; | ||
| 1380 | |||
| 1381 | fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); | ||
| 1382 | if (!fake_cpuc) | ||
| 1383 | return -ENOMEM; | ||
| 1384 | |||
| 1385 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | ||
| 1386 | |||
| 1387 | if (!c || !c->weight) | ||
| 1388 | ret = -ENOSPC; | ||
| 1389 | |||
| 1390 | if (x86_pmu.put_event_constraints) | ||
| 1391 | x86_pmu.put_event_constraints(fake_cpuc, event); | ||
| 1392 | |||
| 1393 | kfree(fake_cpuc); | ||
| 1394 | |||
| 1395 | return ret; | ||
| 1396 | } | ||
| 1397 | |||
| 1398 | /* | ||
| 1468 | * validate a single event group | 1399 | * validate a single event group |
| 1469 | * | 1400 | * |
| 1470 | * validation include: | 1401 | * validation include: |
| @@ -1529,6 +1460,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) | |||
| 1529 | 1460 | ||
| 1530 | if (event->group_leader != event) | 1461 | if (event->group_leader != event) |
| 1531 | err = validate_group(event); | 1462 | err = validate_group(event); |
| 1463 | else | ||
| 1464 | err = validate_event(event); | ||
| 1532 | 1465 | ||
| 1533 | event->pmu = tmp; | 1466 | event->pmu = tmp; |
| 1534 | } | 1467 | } |
