aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c521
1 files changed, 257 insertions, 264 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index db5bdc8addf..626154a9f53 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,46 +31,51 @@
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33 33
34static u64 perf_event_mask __read_mostly; 34#if 0
35#undef wrmsrl
36#define wrmsrl(msr, val) \
37do { \
38 trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
39 (unsigned long)(val)); \
40 native_write_msr((msr), (u32)((u64)(val)), \
41 (u32)((u64)(val) >> 32)); \
42} while (0)
43#endif
35 44
36/* The maximal number of PEBS events: */ 45/*
37#define MAX_PEBS_EVENTS 4 46 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
47 */
48static unsigned long
49copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
50{
51 unsigned long offset, addr = (unsigned long)from;
52 int type = in_nmi() ? KM_NMI : KM_IRQ0;
53 unsigned long size, len = 0;
54 struct page *page;
55 void *map;
56 int ret;
38 57
39/* The size of a BTS record in bytes: */ 58 do {
40#define BTS_RECORD_SIZE 24 59 ret = __get_user_pages_fast(addr, 1, 0, &page);
60 if (!ret)
61 break;
41 62
42/* The size of a per-cpu BTS buffer in bytes: */ 63 offset = addr & (PAGE_SIZE - 1);
43#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) 64 size = min(PAGE_SIZE - offset, n - len);
44 65
45/* The BTS overflow threshold in bytes from the end of the buffer: */ 66 map = kmap_atomic(page, type);
46#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) 67 memcpy(to, map+offset, size);
68 kunmap_atomic(map, type);
69 put_page(page);
47 70
71 len += size;
72 to += size;
73 addr += size;
48 74
49/* 75 } while (len < n);
50 * Bits in the debugctlmsr controlling branch tracing.
51 */
52#define X86_DEBUGCTL_TR (1 << 6)
53#define X86_DEBUGCTL_BTS (1 << 7)
54#define X86_DEBUGCTL_BTINT (1 << 8)
55#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
56#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
57 76
58/* 77 return len;
59 * A debug store configuration. 78}
60 *
61 * We only support architectures that use 64bit fields.
62 */
63struct debug_store {
64 u64 bts_buffer_base;
65 u64 bts_index;
66 u64 bts_absolute_maximum;
67 u64 bts_interrupt_threshold;
68 u64 pebs_buffer_base;
69 u64 pebs_index;
70 u64 pebs_absolute_maximum;
71 u64 pebs_interrupt_threshold;
72 u64 pebs_event_reset[MAX_PEBS_EVENTS];
73};
74 79
75struct event_constraint { 80struct event_constraint {
76 union { 81 union {
@@ -89,18 +94,39 @@ struct amd_nb {
89 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 94 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
90}; 95};
91 96
97#define MAX_LBR_ENTRIES 16
98
92struct cpu_hw_events { 99struct cpu_hw_events {
100 /*
101 * Generic x86 PMC bits
102 */
93 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ 103 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
94 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 104 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
95 unsigned long interrupts;
96 int enabled; 105 int enabled;
97 struct debug_store *ds;
98 106
99 int n_events; 107 int n_events;
100 int n_added; 108 int n_added;
101 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 109 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
102 u64 tags[X86_PMC_IDX_MAX]; 110 u64 tags[X86_PMC_IDX_MAX];
103 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 111 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
112
113 /*
114 * Intel DebugStore bits
115 */
116 struct debug_store *ds;
117 u64 pebs_enabled;
118
119 /*
120 * Intel LBR bits
121 */
122 int lbr_users;
123 void *lbr_context;
124 struct perf_branch_stack lbr_stack;
125 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
126
127 /*
128 * AMD specific bits
129 */
104 struct amd_nb *amd_nb; 130 struct amd_nb *amd_nb;
105}; 131};
106 132
@@ -114,11 +140,31 @@ struct cpu_hw_events {
114#define EVENT_CONSTRAINT(c, n, m) \ 140#define EVENT_CONSTRAINT(c, n, m) \
115 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 141 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
116 142
143/*
144 * Constraint on the Event code.
145 */
117#define INTEL_EVENT_CONSTRAINT(c, n) \ 146#define INTEL_EVENT_CONSTRAINT(c, n) \
118 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) 147 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
119 148
149/*
150 * Constraint on the Event code + UMask + fixed-mask
151 *
152 * filter mask to validate fixed counter events.
153 * the following filters disqualify for fixed counters:
154 * - inv
155 * - edge
156 * - cnt-mask
157 * The other filters are supported by fixed counters.
158 * The any-thread option is supported starting with v3.
159 */
120#define FIXED_EVENT_CONSTRAINT(c, n) \ 160#define FIXED_EVENT_CONSTRAINT(c, n) \
121 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) 161 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
162
163/*
164 * Constraint on the Event code + UMask
165 */
166#define PEBS_EVENT_CONSTRAINT(c, n) \
167 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
122 168
123#define EVENT_CONSTRAINT_END \ 169#define EVENT_CONSTRAINT_END \
124 EVENT_CONSTRAINT(0, 0, 0) 170 EVENT_CONSTRAINT(0, 0, 0)
@@ -126,32 +172,43 @@ struct cpu_hw_events {
126#define for_each_event_constraint(e, c) \ 172#define for_each_event_constraint(e, c) \
127 for ((e) = (c); (e)->cmask; (e)++) 173 for ((e) = (c); (e)->cmask; (e)++)
128 174
175union perf_capabilities {
176 struct {
177 u64 lbr_format : 6;
178 u64 pebs_trap : 1;
179 u64 pebs_arch_reg : 1;
180 u64 pebs_format : 4;
181 u64 smm_freeze : 1;
182 };
183 u64 capabilities;
184};
185
129/* 186/*
130 * struct x86_pmu - generic x86 pmu 187 * struct x86_pmu - generic x86 pmu
131 */ 188 */
132struct x86_pmu { 189struct x86_pmu {
190 /*
191 * Generic x86 PMC bits
192 */
133 const char *name; 193 const char *name;
134 int version; 194 int version;
135 int (*handle_irq)(struct pt_regs *); 195 int (*handle_irq)(struct pt_regs *);
136 void (*disable_all)(void); 196 void (*disable_all)(void);
137 void (*enable_all)(void); 197 void (*enable_all)(int added);
138 void (*enable)(struct perf_event *); 198 void (*enable)(struct perf_event *);
139 void (*disable)(struct perf_event *); 199 void (*disable)(struct perf_event *);
200 int (*hw_config)(struct perf_event *event);
201 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
140 unsigned eventsel; 202 unsigned eventsel;
141 unsigned perfctr; 203 unsigned perfctr;
142 u64 (*event_map)(int); 204 u64 (*event_map)(int);
143 u64 (*raw_event)(u64);
144 int max_events; 205 int max_events;
145 int num_events; 206 int num_counters;
146 int num_events_fixed; 207 int num_counters_fixed;
147 int event_bits; 208 int cntval_bits;
148 u64 event_mask; 209 u64 cntval_mask;
149 int apic; 210 int apic;
150 u64 max_period; 211 u64 max_period;
151 u64 intel_ctrl;
152 void (*enable_bts)(u64 config);
153 void (*disable_bts)(void);
154
155 struct event_constraint * 212 struct event_constraint *
156 (*get_event_constraints)(struct cpu_hw_events *cpuc, 213 (*get_event_constraints)(struct cpu_hw_events *cpuc,
157 struct perf_event *event); 214 struct perf_event *event);
@@ -159,11 +216,32 @@ struct x86_pmu {
159 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 216 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
160 struct perf_event *event); 217 struct perf_event *event);
161 struct event_constraint *event_constraints; 218 struct event_constraint *event_constraints;
219 void (*quirks)(void);
162 220
163 int (*cpu_prepare)(int cpu); 221 int (*cpu_prepare)(int cpu);
164 void (*cpu_starting)(int cpu); 222 void (*cpu_starting)(int cpu);
165 void (*cpu_dying)(int cpu); 223 void (*cpu_dying)(int cpu);
166 void (*cpu_dead)(int cpu); 224 void (*cpu_dead)(int cpu);
225
226 /*
227 * Intel Arch Perfmon v2+
228 */
229 u64 intel_ctrl;
230 union perf_capabilities intel_cap;
231
232 /*
233 * Intel DebugStore bits
234 */
235 int bts, pebs;
236 int pebs_record_size;
237 void (*drain_pebs)(struct pt_regs *regs);
238 struct event_constraint *pebs_constraints;
239
240 /*
241 * Intel LBR
242 */
243 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
244 int lbr_nr; /* hardware stack size */
167}; 245};
168 246
169static struct x86_pmu x86_pmu __read_mostly; 247static struct x86_pmu x86_pmu __read_mostly;
@@ -198,7 +276,7 @@ static u64
198x86_perf_event_update(struct perf_event *event) 276x86_perf_event_update(struct perf_event *event)
199{ 277{
200 struct hw_perf_event *hwc = &event->hw; 278 struct hw_perf_event *hwc = &event->hw;
201 int shift = 64 - x86_pmu.event_bits; 279 int shift = 64 - x86_pmu.cntval_bits;
202 u64 prev_raw_count, new_raw_count; 280 u64 prev_raw_count, new_raw_count;
203 int idx = hwc->idx; 281 int idx = hwc->idx;
204 s64 delta; 282 s64 delta;
@@ -241,33 +319,32 @@ again:
241static atomic_t active_events; 319static atomic_t active_events;
242static DEFINE_MUTEX(pmc_reserve_mutex); 320static DEFINE_MUTEX(pmc_reserve_mutex);
243 321
322#ifdef CONFIG_X86_LOCAL_APIC
323
244static bool reserve_pmc_hardware(void) 324static bool reserve_pmc_hardware(void)
245{ 325{
246#ifdef CONFIG_X86_LOCAL_APIC
247 int i; 326 int i;
248 327
249 if (nmi_watchdog == NMI_LOCAL_APIC) 328 if (nmi_watchdog == NMI_LOCAL_APIC)
250 disable_lapic_nmi_watchdog(); 329 disable_lapic_nmi_watchdog();
251 330
252 for (i = 0; i < x86_pmu.num_events; i++) { 331 for (i = 0; i < x86_pmu.num_counters; i++) {
253 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 332 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
254 goto perfctr_fail; 333 goto perfctr_fail;
255 } 334 }
256 335
257 for (i = 0; i < x86_pmu.num_events; i++) { 336 for (i = 0; i < x86_pmu.num_counters; i++) {
258 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 337 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
259 goto eventsel_fail; 338 goto eventsel_fail;
260 } 339 }
261#endif
262 340
263 return true; 341 return true;
264 342
265#ifdef CONFIG_X86_LOCAL_APIC
266eventsel_fail: 343eventsel_fail:
267 for (i--; i >= 0; i--) 344 for (i--; i >= 0; i--)
268 release_evntsel_nmi(x86_pmu.eventsel + i); 345 release_evntsel_nmi(x86_pmu.eventsel + i);
269 346
270 i = x86_pmu.num_events; 347 i = x86_pmu.num_counters;
271 348
272perfctr_fail: 349perfctr_fail:
273 for (i--; i >= 0; i--) 350 for (i--; i >= 0; i--)
@@ -277,128 +354,36 @@ perfctr_fail:
277 enable_lapic_nmi_watchdog(); 354 enable_lapic_nmi_watchdog();
278 355
279 return false; 356 return false;
280#endif
281} 357}
282 358
283static void release_pmc_hardware(void) 359static void release_pmc_hardware(void)
284{ 360{
285#ifdef CONFIG_X86_LOCAL_APIC
286 int i; 361 int i;
287 362
288 for (i = 0; i < x86_pmu.num_events; i++) { 363 for (i = 0; i < x86_pmu.num_counters; i++) {
289 release_perfctr_nmi(x86_pmu.perfctr + i); 364 release_perfctr_nmi(x86_pmu.perfctr + i);
290 release_evntsel_nmi(x86_pmu.eventsel + i); 365 release_evntsel_nmi(x86_pmu.eventsel + i);
291 } 366 }
292 367
293 if (nmi_watchdog == NMI_LOCAL_APIC) 368 if (nmi_watchdog == NMI_LOCAL_APIC)
294 enable_lapic_nmi_watchdog(); 369 enable_lapic_nmi_watchdog();
295#endif
296}
297
298static inline bool bts_available(void)
299{
300 return x86_pmu.enable_bts != NULL;
301}
302
303static void init_debug_store_on_cpu(int cpu)
304{
305 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
306
307 if (!ds)
308 return;
309
310 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
311 (u32)((u64)(unsigned long)ds),
312 (u32)((u64)(unsigned long)ds >> 32));
313}
314
315static void fini_debug_store_on_cpu(int cpu)
316{
317 if (!per_cpu(cpu_hw_events, cpu).ds)
318 return;
319
320 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
321}
322
323static void release_bts_hardware(void)
324{
325 int cpu;
326
327 if (!bts_available())
328 return;
329
330 get_online_cpus();
331
332 for_each_online_cpu(cpu)
333 fini_debug_store_on_cpu(cpu);
334
335 for_each_possible_cpu(cpu) {
336 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
337
338 if (!ds)
339 continue;
340
341 per_cpu(cpu_hw_events, cpu).ds = NULL;
342
343 kfree((void *)(unsigned long)ds->bts_buffer_base);
344 kfree(ds);
345 }
346
347 put_online_cpus();
348} 370}
349 371
350static int reserve_bts_hardware(void) 372#else
351{
352 int cpu, err = 0;
353
354 if (!bts_available())
355 return 0;
356
357 get_online_cpus();
358
359 for_each_possible_cpu(cpu) {
360 struct debug_store *ds;
361 void *buffer;
362
363 err = -ENOMEM;
364 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
365 if (unlikely(!buffer))
366 break;
367
368 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
369 if (unlikely(!ds)) {
370 kfree(buffer);
371 break;
372 }
373
374 ds->bts_buffer_base = (u64)(unsigned long)buffer;
375 ds->bts_index = ds->bts_buffer_base;
376 ds->bts_absolute_maximum =
377 ds->bts_buffer_base + BTS_BUFFER_SIZE;
378 ds->bts_interrupt_threshold =
379 ds->bts_absolute_maximum - BTS_OVFL_TH;
380 373
381 per_cpu(cpu_hw_events, cpu).ds = ds; 374static bool reserve_pmc_hardware(void) { return true; }
382 err = 0; 375static void release_pmc_hardware(void) {}
383 }
384 376
385 if (err) 377#endif
386 release_bts_hardware();
387 else {
388 for_each_online_cpu(cpu)
389 init_debug_store_on_cpu(cpu);
390 }
391
392 put_online_cpus();
393 378
394 return err; 379static int reserve_ds_buffers(void);
395} 380static void release_ds_buffers(void);
396 381
397static void hw_perf_event_destroy(struct perf_event *event) 382static void hw_perf_event_destroy(struct perf_event *event)
398{ 383{
399 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { 384 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
400 release_pmc_hardware(); 385 release_pmc_hardware();
401 release_bts_hardware(); 386 release_ds_buffers();
402 mutex_unlock(&pmc_reserve_mutex); 387 mutex_unlock(&pmc_reserve_mutex);
403 } 388 }
404} 389}
@@ -441,6 +426,28 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
441 return 0; 426 return 0;
442} 427}
443 428
429static int x86_pmu_hw_config(struct perf_event *event)
430{
431 /*
432 * Generate PMC IRQs:
433 * (keep 'enabled' bit clear for now)
434 */
435 event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
436
437 /*
438 * Count user and OS events unless requested not to
439 */
440 if (!event->attr.exclude_user)
441 event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
442 if (!event->attr.exclude_kernel)
443 event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
444
445 if (event->attr.type == PERF_TYPE_RAW)
446 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
447
448 return 0;
449}
450
444/* 451/*
445 * Setup the hardware configuration for a given attr_type 452 * Setup the hardware configuration for a given attr_type
446 */ 453 */
@@ -460,8 +467,11 @@ static int __hw_perf_event_init(struct perf_event *event)
460 if (atomic_read(&active_events) == 0) { 467 if (atomic_read(&active_events) == 0) {
461 if (!reserve_pmc_hardware()) 468 if (!reserve_pmc_hardware())
462 err = -EBUSY; 469 err = -EBUSY;
463 else 470 else {
464 err = reserve_bts_hardware(); 471 err = reserve_ds_buffers();
472 if (err)
473 release_pmc_hardware();
474 }
465 } 475 }
466 if (!err) 476 if (!err)
467 atomic_inc(&active_events); 477 atomic_inc(&active_events);
@@ -472,23 +482,14 @@ static int __hw_perf_event_init(struct perf_event *event)
472 482
473 event->destroy = hw_perf_event_destroy; 483 event->destroy = hw_perf_event_destroy;
474 484
475 /*
476 * Generate PMC IRQs:
477 * (keep 'enabled' bit clear for now)
478 */
479 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
480
481 hwc->idx = -1; 485 hwc->idx = -1;
482 hwc->last_cpu = -1; 486 hwc->last_cpu = -1;
483 hwc->last_tag = ~0ULL; 487 hwc->last_tag = ~0ULL;
484 488
485 /* 489 /* Processor specifics */
486 * Count user and OS events unless requested not to. 490 err = x86_pmu.hw_config(event);
487 */ 491 if (err)
488 if (!attr->exclude_user) 492 return err;
489 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
490 if (!attr->exclude_kernel)
491 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
492 493
493 if (!hwc->sample_period) { 494 if (!hwc->sample_period) {
494 hwc->sample_period = x86_pmu.max_period; 495 hwc->sample_period = x86_pmu.max_period;
@@ -505,16 +506,8 @@ static int __hw_perf_event_init(struct perf_event *event)
505 return -EOPNOTSUPP; 506 return -EOPNOTSUPP;
506 } 507 }
507 508
508 /* 509 if (attr->type == PERF_TYPE_RAW)
509 * Raw hw_event type provide the config in the hw_event structure
510 */
511 if (attr->type == PERF_TYPE_RAW) {
512 hwc->config |= x86_pmu.raw_event(attr->config);
513 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
514 perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
515 return -EACCES;
516 return 0; 510 return 0;
517 }
518 511
519 if (attr->type == PERF_TYPE_HW_CACHE) 512 if (attr->type == PERF_TYPE_HW_CACHE)
520 return set_ext_hw_attr(hwc, attr); 513 return set_ext_hw_attr(hwc, attr);
@@ -539,11 +532,11 @@ static int __hw_perf_event_init(struct perf_event *event)
539 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 532 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
540 (hwc->sample_period == 1)) { 533 (hwc->sample_period == 1)) {
541 /* BTS is not supported by this architecture. */ 534 /* BTS is not supported by this architecture. */
542 if (!bts_available()) 535 if (!x86_pmu.bts)
543 return -EOPNOTSUPP; 536 return -EOPNOTSUPP;
544 537
545 /* BTS is currently only allowed for user-mode. */ 538 /* BTS is currently only allowed for user-mode. */
546 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 539 if (!attr->exclude_kernel)
547 return -EOPNOTSUPP; 540 return -EOPNOTSUPP;
548 } 541 }
549 542
@@ -557,7 +550,7 @@ static void x86_pmu_disable_all(void)
557 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 550 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
558 int idx; 551 int idx;
559 552
560 for (idx = 0; idx < x86_pmu.num_events; idx++) { 553 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
561 u64 val; 554 u64 val;
562 555
563 if (!test_bit(idx, cpuc->active_mask)) 556 if (!test_bit(idx, cpuc->active_mask))
@@ -587,12 +580,12 @@ void hw_perf_disable(void)
587 x86_pmu.disable_all(); 580 x86_pmu.disable_all();
588} 581}
589 582
590static void x86_pmu_enable_all(void) 583static void x86_pmu_enable_all(int added)
591{ 584{
592 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 585 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
593 int idx; 586 int idx;
594 587
595 for (idx = 0; idx < x86_pmu.num_events; idx++) { 588 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
596 struct perf_event *event = cpuc->events[idx]; 589 struct perf_event *event = cpuc->events[idx];
597 u64 val; 590 u64 val;
598 591
@@ -667,14 +660,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
667 * assign events to counters starting with most 660 * assign events to counters starting with most
668 * constrained events. 661 * constrained events.
669 */ 662 */
670 wmax = x86_pmu.num_events; 663 wmax = x86_pmu.num_counters;
671 664
672 /* 665 /*
673 * when fixed event counters are present, 666 * when fixed event counters are present,
674 * wmax is incremented by 1 to account 667 * wmax is incremented by 1 to account
675 * for one more choice 668 * for one more choice
676 */ 669 */
677 if (x86_pmu.num_events_fixed) 670 if (x86_pmu.num_counters_fixed)
678 wmax++; 671 wmax++;
679 672
680 for (w = 1, num = n; num && w <= wmax; w++) { 673 for (w = 1, num = n; num && w <= wmax; w++) {
@@ -724,7 +717,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
724 struct perf_event *event; 717 struct perf_event *event;
725 int n, max_count; 718 int n, max_count;
726 719
727 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; 720 max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
728 721
729 /* current number of events already accepted */ 722 /* current number of events already accepted */
730 n = cpuc->n_events; 723 n = cpuc->n_events;
@@ -795,7 +788,7 @@ void hw_perf_enable(void)
795 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 788 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
796 struct perf_event *event; 789 struct perf_event *event;
797 struct hw_perf_event *hwc; 790 struct hw_perf_event *hwc;
798 int i; 791 int i, added = cpuc->n_added;
799 792
800 if (!x86_pmu_initialized()) 793 if (!x86_pmu_initialized())
801 return; 794 return;
@@ -847,19 +840,20 @@ void hw_perf_enable(void)
847 cpuc->enabled = 1; 840 cpuc->enabled = 1;
848 barrier(); 841 barrier();
849 842
850 x86_pmu.enable_all(); 843 x86_pmu.enable_all(added);
851} 844}
852 845
853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) 846static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
854{ 847{
855 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 848 wrmsrl(hwc->config_base + hwc->idx,
856 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); 849 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
857} 850}
858 851
859static inline void x86_pmu_disable_event(struct perf_event *event) 852static inline void x86_pmu_disable_event(struct perf_event *event)
860{ 853{
861 struct hw_perf_event *hwc = &event->hw; 854 struct hw_perf_event *hwc = &event->hw;
862 (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); 855
856 wrmsrl(hwc->config_base + hwc->idx, hwc->config);
863} 857}
864 858
865static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 859static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -874,7 +868,7 @@ x86_perf_event_set_period(struct perf_event *event)
874 struct hw_perf_event *hwc = &event->hw; 868 struct hw_perf_event *hwc = &event->hw;
875 s64 left = atomic64_read(&hwc->period_left); 869 s64 left = atomic64_read(&hwc->period_left);
876 s64 period = hwc->sample_period; 870 s64 period = hwc->sample_period;
877 int err, ret = 0, idx = hwc->idx; 871 int ret = 0, idx = hwc->idx;
878 872
879 if (idx == X86_PMC_IDX_FIXED_BTS) 873 if (idx == X86_PMC_IDX_FIXED_BTS)
880 return 0; 874 return 0;
@@ -912,8 +906,8 @@ x86_perf_event_set_period(struct perf_event *event)
912 */ 906 */
913 atomic64_set(&hwc->prev_count, (u64)-left); 907 atomic64_set(&hwc->prev_count, (u64)-left);
914 908
915 err = checking_wrmsrl(hwc->event_base + idx, 909 wrmsrl(hwc->event_base + idx,
916 (u64)(-left) & x86_pmu.event_mask); 910 (u64)(-left) & x86_pmu.cntval_mask);
917 911
918 perf_event_update_userpage(event); 912 perf_event_update_userpage(event);
919 913
@@ -950,7 +944,7 @@ static int x86_pmu_enable(struct perf_event *event)
950 if (n < 0) 944 if (n < 0)
951 return n; 945 return n;
952 946
953 ret = x86_schedule_events(cpuc, n, assign); 947 ret = x86_pmu.schedule_events(cpuc, n, assign);
954 if (ret) 948 if (ret)
955 return ret; 949 return ret;
956 /* 950 /*
@@ -991,11 +985,12 @@ static void x86_pmu_unthrottle(struct perf_event *event)
991void perf_event_print_debug(void) 985void perf_event_print_debug(void)
992{ 986{
993 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; 987 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
988 u64 pebs;
994 struct cpu_hw_events *cpuc; 989 struct cpu_hw_events *cpuc;
995 unsigned long flags; 990 unsigned long flags;
996 int cpu, idx; 991 int cpu, idx;
997 992
998 if (!x86_pmu.num_events) 993 if (!x86_pmu.num_counters)
999 return; 994 return;
1000 995
1001 local_irq_save(flags); 996 local_irq_save(flags);
@@ -1008,16 +1003,18 @@ void perf_event_print_debug(void)
1008 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 1003 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1009 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); 1004 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1010 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); 1005 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1006 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
1011 1007
1012 pr_info("\n"); 1008 pr_info("\n");
1013 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); 1009 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1014 pr_info("CPU#%d: status: %016llx\n", cpu, status); 1010 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1015 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 1011 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1016 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 1012 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1013 pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
1017 } 1014 }
1018 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 1015 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1019 1016
1020 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1017 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1021 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1018 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1022 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1019 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1023 1020
@@ -1030,7 +1027,7 @@ void perf_event_print_debug(void)
1030 pr_info("CPU#%d: gen-PMC%d left: %016llx\n", 1027 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1031 cpu, idx, prev_left); 1028 cpu, idx, prev_left);
1032 } 1029 }
1033 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { 1030 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1034 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); 1031 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1035 1032
1036 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", 1033 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1095,7 +1092,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1095 1092
1096 cpuc = &__get_cpu_var(cpu_hw_events); 1093 cpuc = &__get_cpu_var(cpu_hw_events);
1097 1094
1098 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1095 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1099 if (!test_bit(idx, cpuc->active_mask)) 1096 if (!test_bit(idx, cpuc->active_mask))
1100 continue; 1097 continue;
1101 1098
@@ -1103,7 +1100,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1103 hwc = &event->hw; 1100 hwc = &event->hw;
1104 1101
1105 val = x86_perf_event_update(event); 1102 val = x86_perf_event_update(event);
1106 if (val & (1ULL << (x86_pmu.event_bits - 1))) 1103 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
1107 continue; 1104 continue;
1108 1105
1109 /* 1106 /*
@@ -1146,7 +1143,6 @@ void set_perf_event_pending(void)
1146 1143
1147void perf_events_lapic_init(void) 1144void perf_events_lapic_init(void)
1148{ 1145{
1149#ifdef CONFIG_X86_LOCAL_APIC
1150 if (!x86_pmu.apic || !x86_pmu_initialized()) 1146 if (!x86_pmu.apic || !x86_pmu_initialized())
1151 return; 1147 return;
1152 1148
@@ -1154,7 +1150,6 @@ void perf_events_lapic_init(void)
1154 * Always use NMI for PMU 1150 * Always use NMI for PMU
1155 */ 1151 */
1156 apic_write(APIC_LVTPC, APIC_DM_NMI); 1152 apic_write(APIC_LVTPC, APIC_DM_NMI);
1157#endif
1158} 1153}
1159 1154
1160static int __kprobes 1155static int __kprobes
@@ -1178,9 +1173,7 @@ perf_event_nmi_handler(struct notifier_block *self,
1178 1173
1179 regs = args->regs; 1174 regs = args->regs;
1180 1175
1181#ifdef CONFIG_X86_LOCAL_APIC
1182 apic_write(APIC_LVTPC, APIC_DM_NMI); 1176 apic_write(APIC_LVTPC, APIC_DM_NMI);
1183#endif
1184 /* 1177 /*
1185 * Can't rely on the handled return value to say it was our NMI, two 1178 * Can't rely on the handled return value to say it was our NMI, two
1186 * events could trigger 'simultaneously' raising two back-to-back NMIs. 1179 * events could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1274,12 +1267,15 @@ int hw_perf_group_sched_in(struct perf_event *leader,
1274 int assign[X86_PMC_IDX_MAX]; 1267 int assign[X86_PMC_IDX_MAX];
1275 int n0, n1, ret; 1268 int n0, n1, ret;
1276 1269
1270 if (!x86_pmu_initialized())
1271 return 0;
1272
1277 /* n0 = total number of events */ 1273 /* n0 = total number of events */
1278 n0 = collect_events(cpuc, leader, true); 1274 n0 = collect_events(cpuc, leader, true);
1279 if (n0 < 0) 1275 if (n0 < 0)
1280 return n0; 1276 return n0;
1281 1277
1282 ret = x86_schedule_events(cpuc, n0, assign); 1278 ret = x86_pmu.schedule_events(cpuc, n0, assign);
1283 if (ret) 1279 if (ret)
1284 return ret; 1280 return ret;
1285 1281
@@ -1329,6 +1325,9 @@ undo:
1329 1325
1330#include "perf_event_amd.c" 1326#include "perf_event_amd.c"
1331#include "perf_event_p6.c" 1327#include "perf_event_p6.c"
1328#include "perf_event_p4.c"
1329#include "perf_event_intel_lbr.c"
1330#include "perf_event_intel_ds.c"
1332#include "perf_event_intel.c" 1331#include "perf_event_intel.c"
1333 1332
1334static int __cpuinit 1333static int __cpuinit
@@ -1402,48 +1401,50 @@ void __init init_hw_perf_events(void)
1402 1401
1403 pr_cont("%s PMU driver.\n", x86_pmu.name); 1402 pr_cont("%s PMU driver.\n", x86_pmu.name);
1404 1403
1405 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { 1404 if (x86_pmu.quirks)
1405 x86_pmu.quirks();
1406
1407 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1406 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1408 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
1407 x86_pmu.num_events, X86_PMC_MAX_GENERIC); 1409 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1408 x86_pmu.num_events = X86_PMC_MAX_GENERIC; 1410 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1409 } 1411 }
1410 perf_event_mask = (1 << x86_pmu.num_events) - 1; 1412 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1411 perf_max_events = x86_pmu.num_events; 1413 perf_max_events = x86_pmu.num_counters;
1412 1414
1413 if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { 1415 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1414 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1416 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
1415 x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); 1417 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1416 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; 1418 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1417 } 1419 }
1418 1420
1419 perf_event_mask |= 1421 x86_pmu.intel_ctrl |=
1420 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; 1422 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1421 x86_pmu.intel_ctrl = perf_event_mask;
1422 1423
1423 perf_events_lapic_init(); 1424 perf_events_lapic_init();
1424 register_die_notifier(&perf_event_nmi_notifier); 1425 register_die_notifier(&perf_event_nmi_notifier);
1425 1426
1426 unconstrained = (struct event_constraint) 1427 unconstrained = (struct event_constraint)
1427 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 1428 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1428 0, x86_pmu.num_events); 1429 0, x86_pmu.num_counters);
1429 1430
1430 if (x86_pmu.event_constraints) { 1431 if (x86_pmu.event_constraints) {
1431 for_each_event_constraint(c, x86_pmu.event_constraints) { 1432 for_each_event_constraint(c, x86_pmu.event_constraints) {
1432 if (c->cmask != INTEL_ARCH_FIXED_MASK) 1433 if (c->cmask != X86_RAW_EVENT_MASK)
1433 continue; 1434 continue;
1434 1435
1435 c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; 1436 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1436 c->weight += x86_pmu.num_events; 1437 c->weight += x86_pmu.num_counters;
1437 } 1438 }
1438 } 1439 }
1439 1440
1440 pr_info("... version: %d\n", x86_pmu.version); 1441 pr_info("... version: %d\n", x86_pmu.version);
1441 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1442 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1442 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1443 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
1443 pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); 1444 pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
1444 pr_info("... max period: %016Lx\n", x86_pmu.max_period); 1445 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1445 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); 1446 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1446 pr_info("... event mask: %016Lx\n", perf_event_mask); 1447 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1447 1448
1448 perf_cpu_notifier(x86_pmu_notifier); 1449 perf_cpu_notifier(x86_pmu_notifier);
1449} 1450}
@@ -1463,6 +1464,32 @@ static const struct pmu pmu = {
1463}; 1464};
1464 1465
1465/* 1466/*
1467 * validate that we can schedule this event
1468 */
1469static int validate_event(struct perf_event *event)
1470{
1471 struct cpu_hw_events *fake_cpuc;
1472 struct event_constraint *c;
1473 int ret = 0;
1474
1475 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1476 if (!fake_cpuc)
1477 return -ENOMEM;
1478
1479 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1480
1481 if (!c || !c->weight)
1482 ret = -ENOSPC;
1483
1484 if (x86_pmu.put_event_constraints)
1485 x86_pmu.put_event_constraints(fake_cpuc, event);
1486
1487 kfree(fake_cpuc);
1488
1489 return ret;
1490}
1491
1492/*
1466 * validate a single event group 1493 * validate a single event group
1467 * 1494 *
1468 * validation include: 1495 * validation include:
@@ -1502,7 +1529,7 @@ static int validate_group(struct perf_event *event)
1502 1529
1503 fake_cpuc->n_events = n; 1530 fake_cpuc->n_events = n;
1504 1531
1505 ret = x86_schedule_events(fake_cpuc, n, NULL); 1532 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
1506 1533
1507out_free: 1534out_free:
1508 kfree(fake_cpuc); 1535 kfree(fake_cpuc);
@@ -1527,6 +1554,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1527 1554
1528 if (event->group_leader != event) 1555 if (event->group_leader != event)
1529 err = validate_group(event); 1556 err = validate_group(event);
1557 else
1558 err = validate_event(event);
1530 1559
1531 event->pmu = tmp; 1560 event->pmu = tmp;
1532 } 1561 }
@@ -1574,8 +1603,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1574{ 1603{
1575 struct perf_callchain_entry *entry = data; 1604 struct perf_callchain_entry *entry = data;
1576 1605
1577 if (reliable) 1606 callchain_store(entry, addr);
1578 callchain_store(entry, addr);
1579} 1607}
1580 1608
1581static const struct stacktrace_ops backtrace_ops = { 1609static const struct stacktrace_ops backtrace_ops = {
@@ -1597,41 +1625,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1597 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1625 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1598} 1626}
1599 1627
1600/*
1601 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1602 */
1603static unsigned long
1604copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1605{
1606 unsigned long offset, addr = (unsigned long)from;
1607 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1608 unsigned long size, len = 0;
1609 struct page *page;
1610 void *map;
1611 int ret;
1612
1613 do {
1614 ret = __get_user_pages_fast(addr, 1, 0, &page);
1615 if (!ret)
1616 break;
1617
1618 offset = addr & (PAGE_SIZE - 1);
1619 size = min(PAGE_SIZE - offset, n - len);
1620
1621 map = kmap_atomic(page, type);
1622 memcpy(to, map+offset, size);
1623 kunmap_atomic(map, type);
1624 put_page(page);
1625
1626 len += size;
1627 to += size;
1628 addr += size;
1629
1630 } while (len < n);
1631
1632 return len;
1633}
1634
1635#ifdef CONFIG_COMPAT 1628#ifdef CONFIG_COMPAT
1636static inline int 1629static inline int
1637perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) 1630perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)