aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c815
1 files changed, 409 insertions, 406 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index db5bdc8addf..fd4db0db370 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,46 +31,51 @@
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33 33
34static u64 perf_event_mask __read_mostly; 34#if 0
35#undef wrmsrl
36#define wrmsrl(msr, val) \
37do { \
38 trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\
39 (unsigned long)(val)); \
40 native_write_msr((msr), (u32)((u64)(val)), \
41 (u32)((u64)(val) >> 32)); \
42} while (0)
43#endif
35 44
36/* The maximal number of PEBS events: */ 45/*
37#define MAX_PEBS_EVENTS 4 46 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
47 */
48static unsigned long
49copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
50{
51 unsigned long offset, addr = (unsigned long)from;
52 int type = in_nmi() ? KM_NMI : KM_IRQ0;
53 unsigned long size, len = 0;
54 struct page *page;
55 void *map;
56 int ret;
38 57
39/* The size of a BTS record in bytes: */ 58 do {
40#define BTS_RECORD_SIZE 24 59 ret = __get_user_pages_fast(addr, 1, 0, &page);
60 if (!ret)
61 break;
41 62
42/* The size of a per-cpu BTS buffer in bytes: */ 63 offset = addr & (PAGE_SIZE - 1);
43#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) 64 size = min(PAGE_SIZE - offset, n - len);
44 65
45/* The BTS overflow threshold in bytes from the end of the buffer: */ 66 map = kmap_atomic(page, type);
46#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) 67 memcpy(to, map+offset, size);
68 kunmap_atomic(map, type);
69 put_page(page);
47 70
71 len += size;
72 to += size;
73 addr += size;
48 74
49/* 75 } while (len < n);
50 * Bits in the debugctlmsr controlling branch tracing.
51 */
52#define X86_DEBUGCTL_TR (1 << 6)
53#define X86_DEBUGCTL_BTS (1 << 7)
54#define X86_DEBUGCTL_BTINT (1 << 8)
55#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9)
56#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10)
57 76
58/* 77 return len;
59 * A debug store configuration. 78}
60 *
61 * We only support architectures that use 64bit fields.
62 */
63struct debug_store {
64 u64 bts_buffer_base;
65 u64 bts_index;
66 u64 bts_absolute_maximum;
67 u64 bts_interrupt_threshold;
68 u64 pebs_buffer_base;
69 u64 pebs_index;
70 u64 pebs_absolute_maximum;
71 u64 pebs_interrupt_threshold;
72 u64 pebs_event_reset[MAX_PEBS_EVENTS];
73};
74 79
75struct event_constraint { 80struct event_constraint {
76 union { 81 union {
@@ -89,18 +94,41 @@ struct amd_nb {
89 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 94 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
90}; 95};
91 96
97#define MAX_LBR_ENTRIES 16
98
92struct cpu_hw_events { 99struct cpu_hw_events {
100 /*
101 * Generic x86 PMC bits
102 */
93 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ 103 struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */
94 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; 104 unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
95 unsigned long interrupts;
96 int enabled; 105 int enabled;
97 struct debug_store *ds;
98 106
99 int n_events; 107 int n_events;
100 int n_added; 108 int n_added;
101 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ 109 int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
102 u64 tags[X86_PMC_IDX_MAX]; 110 u64 tags[X86_PMC_IDX_MAX];
103 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ 111 struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
112
113 unsigned int group_flag;
114
115 /*
116 * Intel DebugStore bits
117 */
118 struct debug_store *ds;
119 u64 pebs_enabled;
120
121 /*
122 * Intel LBR bits
123 */
124 int lbr_users;
125 void *lbr_context;
126 struct perf_branch_stack lbr_stack;
127 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
128
129 /*
130 * AMD specific bits
131 */
104 struct amd_nb *amd_nb; 132 struct amd_nb *amd_nb;
105}; 133};
106 134
@@ -114,44 +142,75 @@ struct cpu_hw_events {
114#define EVENT_CONSTRAINT(c, n, m) \ 142#define EVENT_CONSTRAINT(c, n, m) \
115 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) 143 __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
116 144
145/*
146 * Constraint on the Event code.
147 */
117#define INTEL_EVENT_CONSTRAINT(c, n) \ 148#define INTEL_EVENT_CONSTRAINT(c, n) \
118 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) 149 EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
119 150
151/*
152 * Constraint on the Event code + UMask + fixed-mask
153 *
154 * filter mask to validate fixed counter events.
155 * the following filters disqualify for fixed counters:
156 * - inv
157 * - edge
158 * - cnt-mask
159 * The other filters are supported by fixed counters.
160 * The any-thread option is supported starting with v3.
161 */
120#define FIXED_EVENT_CONSTRAINT(c, n) \ 162#define FIXED_EVENT_CONSTRAINT(c, n) \
121 EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) 163 EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK)
164
165/*
166 * Constraint on the Event code + UMask
167 */
168#define PEBS_EVENT_CONSTRAINT(c, n) \
169 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
122 170
123#define EVENT_CONSTRAINT_END \ 171#define EVENT_CONSTRAINT_END \
124 EVENT_CONSTRAINT(0, 0, 0) 172 EVENT_CONSTRAINT(0, 0, 0)
125 173
126#define for_each_event_constraint(e, c) \ 174#define for_each_event_constraint(e, c) \
127 for ((e) = (c); (e)->cmask; (e)++) 175 for ((e) = (c); (e)->weight; (e)++)
176
177union perf_capabilities {
178 struct {
179 u64 lbr_format : 6;
180 u64 pebs_trap : 1;
181 u64 pebs_arch_reg : 1;
182 u64 pebs_format : 4;
183 u64 smm_freeze : 1;
184 };
185 u64 capabilities;
186};
128 187
129/* 188/*
130 * struct x86_pmu - generic x86 pmu 189 * struct x86_pmu - generic x86 pmu
131 */ 190 */
132struct x86_pmu { 191struct x86_pmu {
192 /*
193 * Generic x86 PMC bits
194 */
133 const char *name; 195 const char *name;
134 int version; 196 int version;
135 int (*handle_irq)(struct pt_regs *); 197 int (*handle_irq)(struct pt_regs *);
136 void (*disable_all)(void); 198 void (*disable_all)(void);
137 void (*enable_all)(void); 199 void (*enable_all)(int added);
138 void (*enable)(struct perf_event *); 200 void (*enable)(struct perf_event *);
139 void (*disable)(struct perf_event *); 201 void (*disable)(struct perf_event *);
202 int (*hw_config)(struct perf_event *event);
203 int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
140 unsigned eventsel; 204 unsigned eventsel;
141 unsigned perfctr; 205 unsigned perfctr;
142 u64 (*event_map)(int); 206 u64 (*event_map)(int);
143 u64 (*raw_event)(u64);
144 int max_events; 207 int max_events;
145 int num_events; 208 int num_counters;
146 int num_events_fixed; 209 int num_counters_fixed;
147 int event_bits; 210 int cntval_bits;
148 u64 event_mask; 211 u64 cntval_mask;
149 int apic; 212 int apic;
150 u64 max_period; 213 u64 max_period;
151 u64 intel_ctrl;
152 void (*enable_bts)(u64 config);
153 void (*disable_bts)(void);
154
155 struct event_constraint * 214 struct event_constraint *
156 (*get_event_constraints)(struct cpu_hw_events *cpuc, 215 (*get_event_constraints)(struct cpu_hw_events *cpuc,
157 struct perf_event *event); 216 struct perf_event *event);
@@ -159,11 +218,32 @@ struct x86_pmu {
159 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 218 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
160 struct perf_event *event); 219 struct perf_event *event);
161 struct event_constraint *event_constraints; 220 struct event_constraint *event_constraints;
221 void (*quirks)(void);
162 222
163 int (*cpu_prepare)(int cpu); 223 int (*cpu_prepare)(int cpu);
164 void (*cpu_starting)(int cpu); 224 void (*cpu_starting)(int cpu);
165 void (*cpu_dying)(int cpu); 225 void (*cpu_dying)(int cpu);
166 void (*cpu_dead)(int cpu); 226 void (*cpu_dead)(int cpu);
227
228 /*
229 * Intel Arch Perfmon v2+
230 */
231 u64 intel_ctrl;
232 union perf_capabilities intel_cap;
233
234 /*
235 * Intel DebugStore bits
236 */
237 int bts, pebs;
238 int pebs_record_size;
239 void (*drain_pebs)(struct pt_regs *regs);
240 struct event_constraint *pebs_constraints;
241
242 /*
243 * Intel LBR
244 */
245 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
246 int lbr_nr; /* hardware stack size */
167}; 247};
168 248
169static struct x86_pmu x86_pmu __read_mostly; 249static struct x86_pmu x86_pmu __read_mostly;
@@ -198,7 +278,7 @@ static u64
198x86_perf_event_update(struct perf_event *event) 278x86_perf_event_update(struct perf_event *event)
199{ 279{
200 struct hw_perf_event *hwc = &event->hw; 280 struct hw_perf_event *hwc = &event->hw;
201 int shift = 64 - x86_pmu.event_bits; 281 int shift = 64 - x86_pmu.cntval_bits;
202 u64 prev_raw_count, new_raw_count; 282 u64 prev_raw_count, new_raw_count;
203 int idx = hwc->idx; 283 int idx = hwc->idx;
204 s64 delta; 284 s64 delta;
@@ -241,33 +321,32 @@ again:
241static atomic_t active_events; 321static atomic_t active_events;
242static DEFINE_MUTEX(pmc_reserve_mutex); 322static DEFINE_MUTEX(pmc_reserve_mutex);
243 323
324#ifdef CONFIG_X86_LOCAL_APIC
325
244static bool reserve_pmc_hardware(void) 326static bool reserve_pmc_hardware(void)
245{ 327{
246#ifdef CONFIG_X86_LOCAL_APIC
247 int i; 328 int i;
248 329
249 if (nmi_watchdog == NMI_LOCAL_APIC) 330 if (nmi_watchdog == NMI_LOCAL_APIC)
250 disable_lapic_nmi_watchdog(); 331 disable_lapic_nmi_watchdog();
251 332
252 for (i = 0; i < x86_pmu.num_events; i++) { 333 for (i = 0; i < x86_pmu.num_counters; i++) {
253 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 334 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
254 goto perfctr_fail; 335 goto perfctr_fail;
255 } 336 }
256 337
257 for (i = 0; i < x86_pmu.num_events; i++) { 338 for (i = 0; i < x86_pmu.num_counters; i++) {
258 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 339 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
259 goto eventsel_fail; 340 goto eventsel_fail;
260 } 341 }
261#endif
262 342
263 return true; 343 return true;
264 344
265#ifdef CONFIG_X86_LOCAL_APIC
266eventsel_fail: 345eventsel_fail:
267 for (i--; i >= 0; i--) 346 for (i--; i >= 0; i--)
268 release_evntsel_nmi(x86_pmu.eventsel + i); 347 release_evntsel_nmi(x86_pmu.eventsel + i);
269 348
270 i = x86_pmu.num_events; 349 i = x86_pmu.num_counters;
271 350
272perfctr_fail: 351perfctr_fail:
273 for (i--; i >= 0; i--) 352 for (i--; i >= 0; i--)
@@ -277,128 +356,36 @@ perfctr_fail:
277 enable_lapic_nmi_watchdog(); 356 enable_lapic_nmi_watchdog();
278 357
279 return false; 358 return false;
280#endif
281} 359}
282 360
283static void release_pmc_hardware(void) 361static void release_pmc_hardware(void)
284{ 362{
285#ifdef CONFIG_X86_LOCAL_APIC
286 int i; 363 int i;
287 364
288 for (i = 0; i < x86_pmu.num_events; i++) { 365 for (i = 0; i < x86_pmu.num_counters; i++) {
289 release_perfctr_nmi(x86_pmu.perfctr + i); 366 release_perfctr_nmi(x86_pmu.perfctr + i);
290 release_evntsel_nmi(x86_pmu.eventsel + i); 367 release_evntsel_nmi(x86_pmu.eventsel + i);
291 } 368 }
292 369
293 if (nmi_watchdog == NMI_LOCAL_APIC) 370 if (nmi_watchdog == NMI_LOCAL_APIC)
294 enable_lapic_nmi_watchdog(); 371 enable_lapic_nmi_watchdog();
295#endif
296}
297
298static inline bool bts_available(void)
299{
300 return x86_pmu.enable_bts != NULL;
301} 372}
302 373
303static void init_debug_store_on_cpu(int cpu) 374#else
304{
305 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
306
307 if (!ds)
308 return;
309
310 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
311 (u32)((u64)(unsigned long)ds),
312 (u32)((u64)(unsigned long)ds >> 32));
313}
314
315static void fini_debug_store_on_cpu(int cpu)
316{
317 if (!per_cpu(cpu_hw_events, cpu).ds)
318 return;
319
320 wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
321}
322
323static void release_bts_hardware(void)
324{
325 int cpu;
326
327 if (!bts_available())
328 return;
329
330 get_online_cpus();
331
332 for_each_online_cpu(cpu)
333 fini_debug_store_on_cpu(cpu);
334
335 for_each_possible_cpu(cpu) {
336 struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
337
338 if (!ds)
339 continue;
340
341 per_cpu(cpu_hw_events, cpu).ds = NULL;
342
343 kfree((void *)(unsigned long)ds->bts_buffer_base);
344 kfree(ds);
345 }
346
347 put_online_cpus();
348}
349
350static int reserve_bts_hardware(void)
351{
352 int cpu, err = 0;
353
354 if (!bts_available())
355 return 0;
356
357 get_online_cpus();
358
359 for_each_possible_cpu(cpu) {
360 struct debug_store *ds;
361 void *buffer;
362
363 err = -ENOMEM;
364 buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
365 if (unlikely(!buffer))
366 break;
367
368 ds = kzalloc(sizeof(*ds), GFP_KERNEL);
369 if (unlikely(!ds)) {
370 kfree(buffer);
371 break;
372 }
373
374 ds->bts_buffer_base = (u64)(unsigned long)buffer;
375 ds->bts_index = ds->bts_buffer_base;
376 ds->bts_absolute_maximum =
377 ds->bts_buffer_base + BTS_BUFFER_SIZE;
378 ds->bts_interrupt_threshold =
379 ds->bts_absolute_maximum - BTS_OVFL_TH;
380
381 per_cpu(cpu_hw_events, cpu).ds = ds;
382 err = 0;
383 }
384 375
385 if (err) 376static bool reserve_pmc_hardware(void) { return true; }
386 release_bts_hardware(); 377static void release_pmc_hardware(void) {}
387 else {
388 for_each_online_cpu(cpu)
389 init_debug_store_on_cpu(cpu);
390 }
391 378
392 put_online_cpus(); 379#endif
393 380
394 return err; 381static int reserve_ds_buffers(void);
395} 382static void release_ds_buffers(void);
396 383
397static void hw_perf_event_destroy(struct perf_event *event) 384static void hw_perf_event_destroy(struct perf_event *event)
398{ 385{
399 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { 386 if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
400 release_pmc_hardware(); 387 release_pmc_hardware();
401 release_bts_hardware(); 388 release_ds_buffers();
402 mutex_unlock(&pmc_reserve_mutex); 389 mutex_unlock(&pmc_reserve_mutex);
403 } 390 }
404} 391}
@@ -441,54 +428,11 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
441 return 0; 428 return 0;
442} 429}
443 430
444/* 431static int x86_setup_perfctr(struct perf_event *event)
445 * Setup the hardware configuration for a given attr_type
446 */
447static int __hw_perf_event_init(struct perf_event *event)
448{ 432{
449 struct perf_event_attr *attr = &event->attr; 433 struct perf_event_attr *attr = &event->attr;
450 struct hw_perf_event *hwc = &event->hw; 434 struct hw_perf_event *hwc = &event->hw;
451 u64 config; 435 u64 config;
452 int err;
453
454 if (!x86_pmu_initialized())
455 return -ENODEV;
456
457 err = 0;
458 if (!atomic_inc_not_zero(&active_events)) {
459 mutex_lock(&pmc_reserve_mutex);
460 if (atomic_read(&active_events) == 0) {
461 if (!reserve_pmc_hardware())
462 err = -EBUSY;
463 else
464 err = reserve_bts_hardware();
465 }
466 if (!err)
467 atomic_inc(&active_events);
468 mutex_unlock(&pmc_reserve_mutex);
469 }
470 if (err)
471 return err;
472
473 event->destroy = hw_perf_event_destroy;
474
475 /*
476 * Generate PMC IRQs:
477 * (keep 'enabled' bit clear for now)
478 */
479 hwc->config = ARCH_PERFMON_EVENTSEL_INT;
480
481 hwc->idx = -1;
482 hwc->last_cpu = -1;
483 hwc->last_tag = ~0ULL;
484
485 /*
486 * Count user and OS events unless requested not to.
487 */
488 if (!attr->exclude_user)
489 hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
490 if (!attr->exclude_kernel)
491 hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
492 436
493 if (!hwc->sample_period) { 437 if (!hwc->sample_period) {
494 hwc->sample_period = x86_pmu.max_period; 438 hwc->sample_period = x86_pmu.max_period;
@@ -505,16 +449,8 @@ static int __hw_perf_event_init(struct perf_event *event)
505 return -EOPNOTSUPP; 449 return -EOPNOTSUPP;
506 } 450 }
507 451
508 /* 452 if (attr->type == PERF_TYPE_RAW)
509 * Raw hw_event type provide the config in the hw_event structure
510 */
511 if (attr->type == PERF_TYPE_RAW) {
512 hwc->config |= x86_pmu.raw_event(attr->config);
513 if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) &&
514 perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
515 return -EACCES;
516 return 0; 453 return 0;
517 }
518 454
519 if (attr->type == PERF_TYPE_HW_CACHE) 455 if (attr->type == PERF_TYPE_HW_CACHE)
520 return set_ext_hw_attr(hwc, attr); 456 return set_ext_hw_attr(hwc, attr);
@@ -539,11 +475,11 @@ static int __hw_perf_event_init(struct perf_event *event)
539 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 475 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
540 (hwc->sample_period == 1)) { 476 (hwc->sample_period == 1)) {
541 /* BTS is not supported by this architecture. */ 477 /* BTS is not supported by this architecture. */
542 if (!bts_available()) 478 if (!x86_pmu.bts)
543 return -EOPNOTSUPP; 479 return -EOPNOTSUPP;
544 480
545 /* BTS is currently only allowed for user-mode. */ 481 /* BTS is currently only allowed for user-mode. */
546 if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) 482 if (!attr->exclude_kernel)
547 return -EOPNOTSUPP; 483 return -EOPNOTSUPP;
548 } 484 }
549 485
@@ -552,12 +488,87 @@ static int __hw_perf_event_init(struct perf_event *event)
552 return 0; 488 return 0;
553} 489}
554 490
491static int x86_pmu_hw_config(struct perf_event *event)
492{
493 if (event->attr.precise_ip) {
494 int precise = 0;
495
496 /* Support for constant skid */
497 if (x86_pmu.pebs)
498 precise++;
499
500 /* Support for IP fixup */
501 if (x86_pmu.lbr_nr)
502 precise++;
503
504 if (event->attr.precise_ip > precise)
505 return -EOPNOTSUPP;
506 }
507
508 /*
509 * Generate PMC IRQs:
510 * (keep 'enabled' bit clear for now)
511 */
512 event->hw.config = ARCH_PERFMON_EVENTSEL_INT;
513
514 /*
515 * Count user and OS events unless requested not to
516 */
517 if (!event->attr.exclude_user)
518 event->hw.config |= ARCH_PERFMON_EVENTSEL_USR;
519 if (!event->attr.exclude_kernel)
520 event->hw.config |= ARCH_PERFMON_EVENTSEL_OS;
521
522 if (event->attr.type == PERF_TYPE_RAW)
523 event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK;
524
525 return x86_setup_perfctr(event);
526}
527
528/*
529 * Setup the hardware configuration for a given attr_type
530 */
531static int __hw_perf_event_init(struct perf_event *event)
532{
533 int err;
534
535 if (!x86_pmu_initialized())
536 return -ENODEV;
537
538 err = 0;
539 if (!atomic_inc_not_zero(&active_events)) {
540 mutex_lock(&pmc_reserve_mutex);
541 if (atomic_read(&active_events) == 0) {
542 if (!reserve_pmc_hardware())
543 err = -EBUSY;
544 else {
545 err = reserve_ds_buffers();
546 if (err)
547 release_pmc_hardware();
548 }
549 }
550 if (!err)
551 atomic_inc(&active_events);
552 mutex_unlock(&pmc_reserve_mutex);
553 }
554 if (err)
555 return err;
556
557 event->destroy = hw_perf_event_destroy;
558
559 event->hw.idx = -1;
560 event->hw.last_cpu = -1;
561 event->hw.last_tag = ~0ULL;
562
563 return x86_pmu.hw_config(event);
564}
565
555static void x86_pmu_disable_all(void) 566static void x86_pmu_disable_all(void)
556{ 567{
557 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 568 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
558 int idx; 569 int idx;
559 570
560 for (idx = 0; idx < x86_pmu.num_events; idx++) { 571 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
561 u64 val; 572 u64 val;
562 573
563 if (!test_bit(idx, cpuc->active_mask)) 574 if (!test_bit(idx, cpuc->active_mask))
@@ -587,12 +598,12 @@ void hw_perf_disable(void)
587 x86_pmu.disable_all(); 598 x86_pmu.disable_all();
588} 599}
589 600
590static void x86_pmu_enable_all(void) 601static void x86_pmu_enable_all(int added)
591{ 602{
592 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 603 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
593 int idx; 604 int idx;
594 605
595 for (idx = 0; idx < x86_pmu.num_events; idx++) { 606 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
596 struct perf_event *event = cpuc->events[idx]; 607 struct perf_event *event = cpuc->events[idx];
597 u64 val; 608 u64 val;
598 609
@@ -667,14 +678,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
667 * assign events to counters starting with most 678 * assign events to counters starting with most
668 * constrained events. 679 * constrained events.
669 */ 680 */
670 wmax = x86_pmu.num_events; 681 wmax = x86_pmu.num_counters;
671 682
672 /* 683 /*
673 * when fixed event counters are present, 684 * when fixed event counters are present,
674 * wmax is incremented by 1 to account 685 * wmax is incremented by 1 to account
675 * for one more choice 686 * for one more choice
676 */ 687 */
677 if (x86_pmu.num_events_fixed) 688 if (x86_pmu.num_counters_fixed)
678 wmax++; 689 wmax++;
679 690
680 for (w = 1, num = n; num && w <= wmax; w++) { 691 for (w = 1, num = n; num && w <= wmax; w++) {
@@ -724,7 +735,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader,
724 struct perf_event *event; 735 struct perf_event *event;
725 int n, max_count; 736 int n, max_count;
726 737
727 max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; 738 max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed;
728 739
729 /* current number of events already accepted */ 740 /* current number of events already accepted */
730 n = cpuc->n_events; 741 n = cpuc->n_events;
@@ -795,7 +806,7 @@ void hw_perf_enable(void)
795 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 806 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
796 struct perf_event *event; 807 struct perf_event *event;
797 struct hw_perf_event *hwc; 808 struct hw_perf_event *hwc;
798 int i; 809 int i, added = cpuc->n_added;
799 810
800 if (!x86_pmu_initialized()) 811 if (!x86_pmu_initialized())
801 return; 812 return;
@@ -847,19 +858,20 @@ void hw_perf_enable(void)
847 cpuc->enabled = 1; 858 cpuc->enabled = 1;
848 barrier(); 859 barrier();
849 860
850 x86_pmu.enable_all(); 861 x86_pmu.enable_all(added);
851} 862}
852 863
853static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) 864static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
865 u64 enable_mask)
854{ 866{
855 (void)checking_wrmsrl(hwc->config_base + hwc->idx, 867 wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
856 hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE);
857} 868}
858 869
859static inline void x86_pmu_disable_event(struct perf_event *event) 870static inline void x86_pmu_disable_event(struct perf_event *event)
860{ 871{
861 struct hw_perf_event *hwc = &event->hw; 872 struct hw_perf_event *hwc = &event->hw;
862 (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); 873
874 wrmsrl(hwc->config_base + hwc->idx, hwc->config);
863} 875}
864 876
865static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 877static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -874,7 +886,7 @@ x86_perf_event_set_period(struct perf_event *event)
874 struct hw_perf_event *hwc = &event->hw; 886 struct hw_perf_event *hwc = &event->hw;
875 s64 left = atomic64_read(&hwc->period_left); 887 s64 left = atomic64_read(&hwc->period_left);
876 s64 period = hwc->sample_period; 888 s64 period = hwc->sample_period;
877 int err, ret = 0, idx = hwc->idx; 889 int ret = 0, idx = hwc->idx;
878 890
879 if (idx == X86_PMC_IDX_FIXED_BTS) 891 if (idx == X86_PMC_IDX_FIXED_BTS)
880 return 0; 892 return 0;
@@ -912,8 +924,8 @@ x86_perf_event_set_period(struct perf_event *event)
912 */ 924 */
913 atomic64_set(&hwc->prev_count, (u64)-left); 925 atomic64_set(&hwc->prev_count, (u64)-left);
914 926
915 err = checking_wrmsrl(hwc->event_base + idx, 927 wrmsrl(hwc->event_base + idx,
916 (u64)(-left) & x86_pmu.event_mask); 928 (u64)(-left) & x86_pmu.cntval_mask);
917 929
918 perf_event_update_userpage(event); 930 perf_event_update_userpage(event);
919 931
@@ -924,7 +936,8 @@ static void x86_pmu_enable_event(struct perf_event *event)
924{ 936{
925 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 937 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
926 if (cpuc->enabled) 938 if (cpuc->enabled)
927 __x86_pmu_enable_event(&event->hw); 939 __x86_pmu_enable_event(&event->hw,
940 ARCH_PERFMON_EVENTSEL_ENABLE);
928} 941}
929 942
930/* 943/*
@@ -950,7 +963,15 @@ static int x86_pmu_enable(struct perf_event *event)
950 if (n < 0) 963 if (n < 0)
951 return n; 964 return n;
952 965
953 ret = x86_schedule_events(cpuc, n, assign); 966 /*
967 * If group events scheduling transaction was started,
968 * skip the schedulability test here, it will be peformed
969 * at commit time(->commit_txn) as a whole
970 */
971 if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
972 goto out;
973
974 ret = x86_pmu.schedule_events(cpuc, n, assign);
954 if (ret) 975 if (ret)
955 return ret; 976 return ret;
956 /* 977 /*
@@ -959,6 +980,7 @@ static int x86_pmu_enable(struct perf_event *event)
959 */ 980 */
960 memcpy(cpuc->assign, assign, n*sizeof(int)); 981 memcpy(cpuc->assign, assign, n*sizeof(int));
961 982
983out:
962 cpuc->n_events = n; 984 cpuc->n_events = n;
963 cpuc->n_added += n - n0; 985 cpuc->n_added += n - n0;
964 986
@@ -991,11 +1013,12 @@ static void x86_pmu_unthrottle(struct perf_event *event)
991void perf_event_print_debug(void) 1013void perf_event_print_debug(void)
992{ 1014{
993 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; 1015 u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
1016 u64 pebs;
994 struct cpu_hw_events *cpuc; 1017 struct cpu_hw_events *cpuc;
995 unsigned long flags; 1018 unsigned long flags;
996 int cpu, idx; 1019 int cpu, idx;
997 1020
998 if (!x86_pmu.num_events) 1021 if (!x86_pmu.num_counters)
999 return; 1022 return;
1000 1023
1001 local_irq_save(flags); 1024 local_irq_save(flags);
@@ -1008,16 +1031,18 @@ void perf_event_print_debug(void)
1008 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); 1031 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
1009 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); 1032 rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
1010 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); 1033 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
1034 rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
1011 1035
1012 pr_info("\n"); 1036 pr_info("\n");
1013 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); 1037 pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1014 pr_info("CPU#%d: status: %016llx\n", cpu, status); 1038 pr_info("CPU#%d: status: %016llx\n", cpu, status);
1015 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); 1039 pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow);
1016 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); 1040 pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed);
1041 pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs);
1017 } 1042 }
1018 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 1043 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1019 1044
1020 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1045 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1021 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1046 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1022 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1047 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1023 1048
@@ -1030,7 +1055,7 @@ void perf_event_print_debug(void)
1030 pr_info("CPU#%d: gen-PMC%d left: %016llx\n", 1055 pr_info("CPU#%d: gen-PMC%d left: %016llx\n",
1031 cpu, idx, prev_left); 1056 cpu, idx, prev_left);
1032 } 1057 }
1033 for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { 1058 for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) {
1034 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); 1059 rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
1035 1060
1036 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", 1061 pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
@@ -1095,7 +1120,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1095 1120
1096 cpuc = &__get_cpu_var(cpu_hw_events); 1121 cpuc = &__get_cpu_var(cpu_hw_events);
1097 1122
1098 for (idx = 0; idx < x86_pmu.num_events; idx++) { 1123 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1099 if (!test_bit(idx, cpuc->active_mask)) 1124 if (!test_bit(idx, cpuc->active_mask))
1100 continue; 1125 continue;
1101 1126
@@ -1103,7 +1128,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1103 hwc = &event->hw; 1128 hwc = &event->hw;
1104 1129
1105 val = x86_perf_event_update(event); 1130 val = x86_perf_event_update(event);
1106 if (val & (1ULL << (x86_pmu.event_bits - 1))) 1131 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
1107 continue; 1132 continue;
1108 1133
1109 /* 1134 /*
@@ -1146,7 +1171,6 @@ void set_perf_event_pending(void)
1146 1171
1147void perf_events_lapic_init(void) 1172void perf_events_lapic_init(void)
1148{ 1173{
1149#ifdef CONFIG_X86_LOCAL_APIC
1150 if (!x86_pmu.apic || !x86_pmu_initialized()) 1174 if (!x86_pmu.apic || !x86_pmu_initialized())
1151 return; 1175 return;
1152 1176
@@ -1154,7 +1178,6 @@ void perf_events_lapic_init(void)
1154 * Always use NMI for PMU 1178 * Always use NMI for PMU
1155 */ 1179 */
1156 apic_write(APIC_LVTPC, APIC_DM_NMI); 1180 apic_write(APIC_LVTPC, APIC_DM_NMI);
1157#endif
1158} 1181}
1159 1182
1160static int __kprobes 1183static int __kprobes
@@ -1178,9 +1201,7 @@ perf_event_nmi_handler(struct notifier_block *self,
1178 1201
1179 regs = args->regs; 1202 regs = args->regs;
1180 1203
1181#ifdef CONFIG_X86_LOCAL_APIC
1182 apic_write(APIC_LVTPC, APIC_DM_NMI); 1204 apic_write(APIC_LVTPC, APIC_DM_NMI);
1183#endif
1184 /* 1205 /*
1185 * Can't rely on the handled return value to say it was our NMI, two 1206 * Can't rely on the handled return value to say it was our NMI, two
1186 * events could trigger 'simultaneously' raising two back-to-back NMIs. 1207 * events could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1217,118 +1238,11 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
1217 return &unconstrained; 1238 return &unconstrained;
1218} 1239}
1219 1240
1220static int x86_event_sched_in(struct perf_event *event,
1221 struct perf_cpu_context *cpuctx)
1222{
1223 int ret = 0;
1224
1225 event->state = PERF_EVENT_STATE_ACTIVE;
1226 event->oncpu = smp_processor_id();
1227 event->tstamp_running += event->ctx->time - event->tstamp_stopped;
1228
1229 if (!is_x86_event(event))
1230 ret = event->pmu->enable(event);
1231
1232 if (!ret && !is_software_event(event))
1233 cpuctx->active_oncpu++;
1234
1235 if (!ret && event->attr.exclusive)
1236 cpuctx->exclusive = 1;
1237
1238 return ret;
1239}
1240
1241static void x86_event_sched_out(struct perf_event *event,
1242 struct perf_cpu_context *cpuctx)
1243{
1244 event->state = PERF_EVENT_STATE_INACTIVE;
1245 event->oncpu = -1;
1246
1247 if (!is_x86_event(event))
1248 event->pmu->disable(event);
1249
1250 event->tstamp_running -= event->ctx->time - event->tstamp_stopped;
1251
1252 if (!is_software_event(event))
1253 cpuctx->active_oncpu--;
1254
1255 if (event->attr.exclusive || !cpuctx->active_oncpu)
1256 cpuctx->exclusive = 0;
1257}
1258
1259/*
1260 * Called to enable a whole group of events.
1261 * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
1262 * Assumes the caller has disabled interrupts and has
1263 * frozen the PMU with hw_perf_save_disable.
1264 *
1265 * called with PMU disabled. If successful and return value 1,
1266 * then guaranteed to call perf_enable() and hw_perf_enable()
1267 */
1268int hw_perf_group_sched_in(struct perf_event *leader,
1269 struct perf_cpu_context *cpuctx,
1270 struct perf_event_context *ctx)
1271{
1272 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1273 struct perf_event *sub;
1274 int assign[X86_PMC_IDX_MAX];
1275 int n0, n1, ret;
1276
1277 /* n0 = total number of events */
1278 n0 = collect_events(cpuc, leader, true);
1279 if (n0 < 0)
1280 return n0;
1281
1282 ret = x86_schedule_events(cpuc, n0, assign);
1283 if (ret)
1284 return ret;
1285
1286 ret = x86_event_sched_in(leader, cpuctx);
1287 if (ret)
1288 return ret;
1289
1290 n1 = 1;
1291 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1292 if (sub->state > PERF_EVENT_STATE_OFF) {
1293 ret = x86_event_sched_in(sub, cpuctx);
1294 if (ret)
1295 goto undo;
1296 ++n1;
1297 }
1298 }
1299 /*
1300 * copy new assignment, now we know it is possible
1301 * will be used by hw_perf_enable()
1302 */
1303 memcpy(cpuc->assign, assign, n0*sizeof(int));
1304
1305 cpuc->n_events = n0;
1306 cpuc->n_added += n1;
1307 ctx->nr_active += n1;
1308
1309 /*
1310 * 1 means successful and events are active
1311 * This is not quite true because we defer
1312 * actual activation until hw_perf_enable() but
1313 * this way we* ensure caller won't try to enable
1314 * individual events
1315 */
1316 return 1;
1317undo:
1318 x86_event_sched_out(leader, cpuctx);
1319 n0 = 1;
1320 list_for_each_entry(sub, &leader->sibling_list, group_entry) {
1321 if (sub->state == PERF_EVENT_STATE_ACTIVE) {
1322 x86_event_sched_out(sub, cpuctx);
1323 if (++n0 == n1)
1324 break;
1325 }
1326 }
1327 return ret;
1328}
1329
1330#include "perf_event_amd.c" 1241#include "perf_event_amd.c"
1331#include "perf_event_p6.c" 1242#include "perf_event_p6.c"
1243#include "perf_event_p4.c"
1244#include "perf_event_intel_lbr.c"
1245#include "perf_event_intel_ds.c"
1332#include "perf_event_intel.c" 1246#include "perf_event_intel.c"
1333 1247
1334static int __cpuinit 1248static int __cpuinit
@@ -1402,48 +1316,50 @@ void __init init_hw_perf_events(void)
1402 1316
1403 pr_cont("%s PMU driver.\n", x86_pmu.name); 1317 pr_cont("%s PMU driver.\n", x86_pmu.name);
1404 1318
1405 if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { 1319 if (x86_pmu.quirks)
1320 x86_pmu.quirks();
1321
1322 if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) {
1406 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", 1323 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
1407 x86_pmu.num_events, X86_PMC_MAX_GENERIC); 1324 x86_pmu.num_counters, X86_PMC_MAX_GENERIC);
1408 x86_pmu.num_events = X86_PMC_MAX_GENERIC; 1325 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1409 } 1326 }
1410 perf_event_mask = (1 << x86_pmu.num_events) - 1; 1327 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1411 perf_max_events = x86_pmu.num_events; 1328 perf_max_events = x86_pmu.num_counters;
1412 1329
1413 if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { 1330 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1414 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1331 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
1415 x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); 1332 x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED);
1416 x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; 1333 x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED;
1417 } 1334 }
1418 1335
1419 perf_event_mask |= 1336 x86_pmu.intel_ctrl |=
1420 ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; 1337 ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED;
1421 x86_pmu.intel_ctrl = perf_event_mask;
1422 1338
1423 perf_events_lapic_init(); 1339 perf_events_lapic_init();
1424 register_die_notifier(&perf_event_nmi_notifier); 1340 register_die_notifier(&perf_event_nmi_notifier);
1425 1341
1426 unconstrained = (struct event_constraint) 1342 unconstrained = (struct event_constraint)
1427 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 1343 __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1,
1428 0, x86_pmu.num_events); 1344 0, x86_pmu.num_counters);
1429 1345
1430 if (x86_pmu.event_constraints) { 1346 if (x86_pmu.event_constraints) {
1431 for_each_event_constraint(c, x86_pmu.event_constraints) { 1347 for_each_event_constraint(c, x86_pmu.event_constraints) {
1432 if (c->cmask != INTEL_ARCH_FIXED_MASK) 1348 if (c->cmask != X86_RAW_EVENT_MASK)
1433 continue; 1349 continue;
1434 1350
1435 c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; 1351 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
1436 c->weight += x86_pmu.num_events; 1352 c->weight += x86_pmu.num_counters;
1437 } 1353 }
1438 } 1354 }
1439 1355
1440 pr_info("... version: %d\n", x86_pmu.version); 1356 pr_info("... version: %d\n", x86_pmu.version);
1441 pr_info("... bit width: %d\n", x86_pmu.event_bits); 1357 pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
1442 pr_info("... generic registers: %d\n", x86_pmu.num_events); 1358 pr_info("... generic registers: %d\n", x86_pmu.num_counters);
1443 pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); 1359 pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask);
1444 pr_info("... max period: %016Lx\n", x86_pmu.max_period); 1360 pr_info("... max period: %016Lx\n", x86_pmu.max_period);
1445 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); 1361 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1446 pr_info("... event mask: %016Lx\n", perf_event_mask); 1362 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1447 1363
1448 perf_cpu_notifier(x86_pmu_notifier); 1364 perf_cpu_notifier(x86_pmu_notifier);
1449} 1365}
@@ -1453,6 +1369,59 @@ static inline void x86_pmu_read(struct perf_event *event)
1453 x86_perf_event_update(event); 1369 x86_perf_event_update(event);
1454} 1370}
1455 1371
1372/*
1373 * Start group events scheduling transaction
1374 * Set the flag to make pmu::enable() not perform the
1375 * schedulability test, it will be performed at commit time
1376 */
1377static void x86_pmu_start_txn(const struct pmu *pmu)
1378{
1379 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1380
1381 cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
1382}
1383
1384/*
1385 * Stop group events scheduling transaction
1386 * Clear the flag and pmu::enable() will perform the
1387 * schedulability test.
1388 */
1389static void x86_pmu_cancel_txn(const struct pmu *pmu)
1390{
1391 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1392
1393 cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
1394}
1395
1396/*
1397 * Commit group events scheduling transaction
1398 * Perform the group schedulability test as a whole
1399 * Return 0 if success
1400 */
1401static int x86_pmu_commit_txn(const struct pmu *pmu)
1402{
1403 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1404 int assign[X86_PMC_IDX_MAX];
1405 int n, ret;
1406
1407 n = cpuc->n_events;
1408
1409 if (!x86_pmu_initialized())
1410 return -EAGAIN;
1411
1412 ret = x86_pmu.schedule_events(cpuc, n, assign);
1413 if (ret)
1414 return ret;
1415
1416 /*
1417 * copy new assignment, now we know it is possible
1418 * will be used by hw_perf_enable()
1419 */
1420 memcpy(cpuc->assign, assign, n*sizeof(int));
1421
1422 return 0;
1423}
1424
1456static const struct pmu pmu = { 1425static const struct pmu pmu = {
1457 .enable = x86_pmu_enable, 1426 .enable = x86_pmu_enable,
1458 .disable = x86_pmu_disable, 1427 .disable = x86_pmu_disable,
@@ -1460,9 +1429,38 @@ static const struct pmu pmu = {
1460 .stop = x86_pmu_stop, 1429 .stop = x86_pmu_stop,
1461 .read = x86_pmu_read, 1430 .read = x86_pmu_read,
1462 .unthrottle = x86_pmu_unthrottle, 1431 .unthrottle = x86_pmu_unthrottle,
1432 .start_txn = x86_pmu_start_txn,
1433 .cancel_txn = x86_pmu_cancel_txn,
1434 .commit_txn = x86_pmu_commit_txn,
1463}; 1435};
1464 1436
1465/* 1437/*
1438 * validate that we can schedule this event
1439 */
1440static int validate_event(struct perf_event *event)
1441{
1442 struct cpu_hw_events *fake_cpuc;
1443 struct event_constraint *c;
1444 int ret = 0;
1445
1446 fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
1447 if (!fake_cpuc)
1448 return -ENOMEM;
1449
1450 c = x86_pmu.get_event_constraints(fake_cpuc, event);
1451
1452 if (!c || !c->weight)
1453 ret = -ENOSPC;
1454
1455 if (x86_pmu.put_event_constraints)
1456 x86_pmu.put_event_constraints(fake_cpuc, event);
1457
1458 kfree(fake_cpuc);
1459
1460 return ret;
1461}
1462
1463/*
1466 * validate a single event group 1464 * validate a single event group
1467 * 1465 *
1468 * validation include: 1466 * validation include:
@@ -1502,7 +1500,7 @@ static int validate_group(struct perf_event *event)
1502 1500
1503 fake_cpuc->n_events = n; 1501 fake_cpuc->n_events = n;
1504 1502
1505 ret = x86_schedule_events(fake_cpuc, n, NULL); 1503 ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
1506 1504
1507out_free: 1505out_free:
1508 kfree(fake_cpuc); 1506 kfree(fake_cpuc);
@@ -1527,6 +1525,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1527 1525
1528 if (event->group_leader != event) 1526 if (event->group_leader != event)
1529 err = validate_group(event); 1527 err = validate_group(event);
1528 else
1529 err = validate_event(event);
1530 1530
1531 event->pmu = tmp; 1531 event->pmu = tmp;
1532 } 1532 }
@@ -1574,8 +1574,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1574{ 1574{
1575 struct perf_callchain_entry *entry = data; 1575 struct perf_callchain_entry *entry = data;
1576 1576
1577 if (reliable) 1577 callchain_store(entry, addr);
1578 callchain_store(entry, addr);
1579} 1578}
1580 1579
1581static const struct stacktrace_ops backtrace_ops = { 1580static const struct stacktrace_ops backtrace_ops = {
@@ -1597,41 +1596,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
1597 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1596 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
1598} 1597}
1599 1598
1600/*
1601 * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
1602 */
1603static unsigned long
1604copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
1605{
1606 unsigned long offset, addr = (unsigned long)from;
1607 int type = in_nmi() ? KM_NMI : KM_IRQ0;
1608 unsigned long size, len = 0;
1609 struct page *page;
1610 void *map;
1611 int ret;
1612
1613 do {
1614 ret = __get_user_pages_fast(addr, 1, 0, &page);
1615 if (!ret)
1616 break;
1617
1618 offset = addr & (PAGE_SIZE - 1);
1619 size = min(PAGE_SIZE - offset, n - len);
1620
1621 map = kmap_atomic(page, type);
1622 memcpy(to, map+offset, size);
1623 kunmap_atomic(map, type);
1624 put_page(page);
1625
1626 len += size;
1627 to += size;
1628 addr += size;
1629
1630 } while (len < n);
1631
1632 return len;
1633}
1634
1635#ifdef CONFIG_COMPAT 1599#ifdef CONFIG_COMPAT
1636static inline int 1600static inline int
1637perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry) 1601perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
@@ -1727,6 +1691,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1727{ 1691{
1728 struct perf_callchain_entry *entry; 1692 struct perf_callchain_entry *entry;
1729 1693
1694 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1695 /* TODO: We don't support guest os callchain now */
1696 return NULL;
1697 }
1698
1730 if (in_nmi()) 1699 if (in_nmi())
1731 entry = &__get_cpu_var(pmc_nmi_entry); 1700 entry = &__get_cpu_var(pmc_nmi_entry);
1732 else 1701 else
@@ -1750,3 +1719,37 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
1750 regs->cs = __KERNEL_CS; 1719 regs->cs = __KERNEL_CS;
1751 local_save_flags(regs->flags); 1720 local_save_flags(regs->flags);
1752} 1721}
1722
1723unsigned long perf_instruction_pointer(struct pt_regs *regs)
1724{
1725 unsigned long ip;
1726
1727 if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1728 ip = perf_guest_cbs->get_guest_ip();
1729 else
1730 ip = instruction_pointer(regs);
1731
1732 return ip;
1733}
1734
1735unsigned long perf_misc_flags(struct pt_regs *regs)
1736{
1737 int misc = 0;
1738
1739 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1740 if (perf_guest_cbs->is_user_mode())
1741 misc |= PERF_RECORD_MISC_GUEST_USER;
1742 else
1743 misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1744 } else {
1745 if (user_mode(regs))
1746 misc |= PERF_RECORD_MISC_USER;
1747 else
1748 misc |= PERF_RECORD_MISC_KERNEL;
1749 }
1750
1751 if (regs->flags & PERF_EFLAGS_EXACT)
1752 misc |= PERF_RECORD_MISC_EXACT_IP;
1753
1754 return misc;
1755}