aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_event.c
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/kernel/cpu/perf_event.c
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event.c')
-rw-r--r--arch/x86/kernel/cpu/perf_event.c615
1 files changed, 357 insertions, 258 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 03a5b0385ad6..3a0338b4b179 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -30,6 +30,8 @@
30#include <asm/stacktrace.h> 30#include <asm/stacktrace.h>
31#include <asm/nmi.h> 31#include <asm/nmi.h>
32#include <asm/compat.h> 32#include <asm/compat.h>
33#include <asm/smp.h>
34#include <asm/alternative.h>
33 35
34#if 0 36#if 0
35#undef wrmsrl 37#undef wrmsrl
@@ -49,7 +51,6 @@ static unsigned long
49copy_from_user_nmi(void *to, const void __user *from, unsigned long n) 51copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
50{ 52{
51 unsigned long offset, addr = (unsigned long)from; 53 unsigned long offset, addr = (unsigned long)from;
52 int type = in_nmi() ? KM_NMI : KM_IRQ0;
53 unsigned long size, len = 0; 54 unsigned long size, len = 0;
54 struct page *page; 55 struct page *page;
55 void *map; 56 void *map;
@@ -63,9 +64,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
63 offset = addr & (PAGE_SIZE - 1); 64 offset = addr & (PAGE_SIZE - 1);
64 size = min(PAGE_SIZE - offset, n - len); 65 size = min(PAGE_SIZE - offset, n - len);
65 66
66 map = kmap_atomic(page, type); 67 map = kmap_atomic(page);
67 memcpy(to, map+offset, size); 68 memcpy(to, map+offset, size);
68 kunmap_atomic(map, type); 69 kunmap_atomic(map);
69 put_page(page); 70 put_page(page);
70 71
71 len += size; 72 len += size;
@@ -94,6 +95,8 @@ struct amd_nb {
94 struct event_constraint event_constraints[X86_PMC_IDX_MAX]; 95 struct event_constraint event_constraints[X86_PMC_IDX_MAX];
95}; 96};
96 97
98struct intel_percore;
99
97#define MAX_LBR_ENTRIES 16 100#define MAX_LBR_ENTRIES 16
98 101
99struct cpu_hw_events { 102struct cpu_hw_events {
@@ -129,6 +132,13 @@ struct cpu_hw_events {
129 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; 132 struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
130 133
131 /* 134 /*
135 * Intel percore register state.
136 * Coordinate shared resources between HT threads.
137 */
138 int percore_used; /* Used by this CPU? */
139 struct intel_percore *per_core;
140
141 /*
132 * AMD specific bits 142 * AMD specific bits
133 */ 143 */
134 struct amd_nb *amd_nb; 144 struct amd_nb *amd_nb;
@@ -167,7 +177,7 @@ struct cpu_hw_events {
167/* 177/*
168 * Constraint on the Event code + UMask 178 * Constraint on the Event code + UMask
169 */ 179 */
170#define PEBS_EVENT_CONSTRAINT(c, n) \ 180#define INTEL_UEVENT_CONSTRAINT(c, n) \
171 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) 181 EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
172 182
173#define EVENT_CONSTRAINT_END \ 183#define EVENT_CONSTRAINT_END \
@@ -176,6 +186,28 @@ struct cpu_hw_events {
176#define for_each_event_constraint(e, c) \ 186#define for_each_event_constraint(e, c) \
177 for ((e) = (c); (e)->weight; (e)++) 187 for ((e) = (c); (e)->weight; (e)++)
178 188
189/*
190 * Extra registers for specific events.
191 * Some events need large masks and require external MSRs.
192 * Define a mapping to these extra registers.
193 */
194struct extra_reg {
195 unsigned int event;
196 unsigned int msr;
197 u64 config_mask;
198 u64 valid_mask;
199};
200
201#define EVENT_EXTRA_REG(e, ms, m, vm) { \
202 .event = (e), \
203 .msr = (ms), \
204 .config_mask = (m), \
205 .valid_mask = (vm), \
206 }
207#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
208 EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
209#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)
210
179union perf_capabilities { 211union perf_capabilities {
180 struct { 212 struct {
181 u64 lbr_format : 6; 213 u64 lbr_format : 6;
@@ -220,6 +252,7 @@ struct x86_pmu {
220 void (*put_event_constraints)(struct cpu_hw_events *cpuc, 252 void (*put_event_constraints)(struct cpu_hw_events *cpuc,
221 struct perf_event *event); 253 struct perf_event *event);
222 struct event_constraint *event_constraints; 254 struct event_constraint *event_constraints;
255 struct event_constraint *percore_constraints;
223 void (*quirks)(void); 256 void (*quirks)(void);
224 int perfctr_second_write; 257 int perfctr_second_write;
225 258
@@ -238,6 +271,7 @@ struct x86_pmu {
238 * Intel DebugStore bits 271 * Intel DebugStore bits
239 */ 272 */
240 int bts, pebs; 273 int bts, pebs;
274 int bts_active, pebs_active;
241 int pebs_record_size; 275 int pebs_record_size;
242 void (*drain_pebs)(struct pt_regs *regs); 276 void (*drain_pebs)(struct pt_regs *regs);
243 struct event_constraint *pebs_constraints; 277 struct event_constraint *pebs_constraints;
@@ -247,6 +281,11 @@ struct x86_pmu {
247 */ 281 */
248 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ 282 unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
249 int lbr_nr; /* hardware stack size */ 283 int lbr_nr; /* hardware stack size */
284
285 /*
286 * Extra registers for events
287 */
288 struct extra_reg *extra_regs;
250}; 289};
251 290
252static struct x86_pmu x86_pmu __read_mostly; 291static struct x86_pmu x86_pmu __read_mostly;
@@ -271,6 +310,10 @@ static u64 __read_mostly hw_cache_event_ids
271 [PERF_COUNT_HW_CACHE_MAX] 310 [PERF_COUNT_HW_CACHE_MAX]
272 [PERF_COUNT_HW_CACHE_OP_MAX] 311 [PERF_COUNT_HW_CACHE_OP_MAX]
273 [PERF_COUNT_HW_CACHE_RESULT_MAX]; 312 [PERF_COUNT_HW_CACHE_RESULT_MAX];
313static u64 __read_mostly hw_cache_extra_regs
314 [PERF_COUNT_HW_CACHE_MAX]
315 [PERF_COUNT_HW_CACHE_OP_MAX]
316 [PERF_COUNT_HW_CACHE_RESULT_MAX];
274 317
275/* 318/*
276 * Propagate event elapsed time into the generic event. 319 * Propagate event elapsed time into the generic event.
@@ -298,7 +341,7 @@ x86_perf_event_update(struct perf_event *event)
298 */ 341 */
299again: 342again:
300 prev_raw_count = local64_read(&hwc->prev_count); 343 prev_raw_count = local64_read(&hwc->prev_count);
301 rdmsrl(hwc->event_base + idx, new_raw_count); 344 rdmsrl(hwc->event_base, new_raw_count);
302 345
303 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, 346 if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
304 new_raw_count) != prev_raw_count) 347 new_raw_count) != prev_raw_count)
@@ -321,6 +364,55 @@ again:
321 return new_raw_count; 364 return new_raw_count;
322} 365}
323 366
367static inline int x86_pmu_addr_offset(int index)
368{
369 int offset;
370
371 /* offset = X86_FEATURE_PERFCTR_CORE ? index << 1 : index */
372 alternative_io(ASM_NOP2,
373 "shll $1, %%eax",
374 X86_FEATURE_PERFCTR_CORE,
375 "=a" (offset),
376 "a" (index));
377
378 return offset;
379}
380
381static inline unsigned int x86_pmu_config_addr(int index)
382{
383 return x86_pmu.eventsel + x86_pmu_addr_offset(index);
384}
385
386static inline unsigned int x86_pmu_event_addr(int index)
387{
388 return x86_pmu.perfctr + x86_pmu_addr_offset(index);
389}
390
391/*
392 * Find and validate any extra registers to set up.
393 */
394static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
395{
396 struct extra_reg *er;
397
398 event->hw.extra_reg = 0;
399 event->hw.extra_config = 0;
400
401 if (!x86_pmu.extra_regs)
402 return 0;
403
404 for (er = x86_pmu.extra_regs; er->msr; er++) {
405 if (er->event != (config & er->config_mask))
406 continue;
407 if (event->attr.config1 & ~er->valid_mask)
408 return -EINVAL;
409 event->hw.extra_reg = er->msr;
410 event->hw.extra_config = event->attr.config1;
411 break;
412 }
413 return 0;
414}
415
324static atomic_t active_events; 416static atomic_t active_events;
325static DEFINE_MUTEX(pmc_reserve_mutex); 417static DEFINE_MUTEX(pmc_reserve_mutex);
326 418
@@ -330,16 +422,13 @@ static bool reserve_pmc_hardware(void)
330{ 422{
331 int i; 423 int i;
332 424
333 if (nmi_watchdog == NMI_LOCAL_APIC)
334 disable_lapic_nmi_watchdog();
335
336 for (i = 0; i < x86_pmu.num_counters; i++) { 425 for (i = 0; i < x86_pmu.num_counters; i++) {
337 if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) 426 if (!reserve_perfctr_nmi(x86_pmu_event_addr(i)))
338 goto perfctr_fail; 427 goto perfctr_fail;
339 } 428 }
340 429
341 for (i = 0; i < x86_pmu.num_counters; i++) { 430 for (i = 0; i < x86_pmu.num_counters; i++) {
342 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 431 if (!reserve_evntsel_nmi(x86_pmu_config_addr(i)))
343 goto eventsel_fail; 432 goto eventsel_fail;
344 } 433 }
345 434
@@ -347,16 +436,13 @@ static bool reserve_pmc_hardware(void)
347 436
348eventsel_fail: 437eventsel_fail:
349 for (i--; i >= 0; i--) 438 for (i--; i >= 0; i--)
350 release_evntsel_nmi(x86_pmu.eventsel + i); 439 release_evntsel_nmi(x86_pmu_config_addr(i));
351 440
352 i = x86_pmu.num_counters; 441 i = x86_pmu.num_counters;
353 442
354perfctr_fail: 443perfctr_fail:
355 for (i--; i >= 0; i--) 444 for (i--; i >= 0; i--)
356 release_perfctr_nmi(x86_pmu.perfctr + i); 445 release_perfctr_nmi(x86_pmu_event_addr(i));
357
358 if (nmi_watchdog == NMI_LOCAL_APIC)
359 enable_lapic_nmi_watchdog();
360 446
361 return false; 447 return false;
362} 448}
@@ -366,12 +452,9 @@ static void release_pmc_hardware(void)
366 int i; 452 int i;
367 453
368 for (i = 0; i < x86_pmu.num_counters; i++) { 454 for (i = 0; i < x86_pmu.num_counters; i++) {
369 release_perfctr_nmi(x86_pmu.perfctr + i); 455 release_perfctr_nmi(x86_pmu_event_addr(i));
370 release_evntsel_nmi(x86_pmu.eventsel + i); 456 release_evntsel_nmi(x86_pmu_config_addr(i));
371 } 457 }
372
373 if (nmi_watchdog == NMI_LOCAL_APIC)
374 enable_lapic_nmi_watchdog();
375} 458}
376 459
377#else 460#else
@@ -381,7 +464,64 @@ static void release_pmc_hardware(void) {}
381 464
382#endif 465#endif
383 466
384static int reserve_ds_buffers(void); 467static bool check_hw_exists(void)
468{
469 u64 val, val_new = 0;
470 int i, reg, ret = 0;
471
472 /*
473 * Check to see if the BIOS enabled any of the counters, if so
474 * complain and bail.
475 */
476 for (i = 0; i < x86_pmu.num_counters; i++) {
477 reg = x86_pmu_config_addr(i);
478 ret = rdmsrl_safe(reg, &val);
479 if (ret)
480 goto msr_fail;
481 if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
482 goto bios_fail;
483 }
484
485 if (x86_pmu.num_counters_fixed) {
486 reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
487 ret = rdmsrl_safe(reg, &val);
488 if (ret)
489 goto msr_fail;
490 for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
491 if (val & (0x03 << i*4))
492 goto bios_fail;
493 }
494 }
495
496 /*
497 * Now write a value and read it back to see if it matches,
498 * this is needed to detect certain hardware emulators (qemu/kvm)
499 * that don't trap on the MSR access and always return 0s.
500 */
501 val = 0xabcdUL;
502 ret = checking_wrmsrl(x86_pmu_event_addr(0), val);
503 ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new);
504 if (ret || val != val_new)
505 goto msr_fail;
506
507 return true;
508
509bios_fail:
510 /*
511 * We still allow the PMU driver to operate:
512 */
513 printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
514 printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
515
516 return true;
517
518msr_fail:
519 printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
520
521 return false;
522}
523
524static void reserve_ds_buffers(void);
385static void release_ds_buffers(void); 525static void release_ds_buffers(void);
386 526
387static void hw_perf_event_destroy(struct perf_event *event) 527static void hw_perf_event_destroy(struct perf_event *event)
@@ -399,8 +539,9 @@ static inline int x86_pmu_initialized(void)
399} 539}
400 540
401static inline int 541static inline int
402set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) 542set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
403{ 543{
544 struct perf_event_attr *attr = &event->attr;
404 unsigned int cache_type, cache_op, cache_result; 545 unsigned int cache_type, cache_op, cache_result;
405 u64 config, val; 546 u64 config, val;
406 547
@@ -427,8 +568,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
427 return -EINVAL; 568 return -EINVAL;
428 569
429 hwc->config |= val; 570 hwc->config |= val;
430 571 attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result];
431 return 0; 572 return x86_pmu_extra_regs(val, event);
432} 573}
433 574
434static int x86_setup_perfctr(struct perf_event *event) 575static int x86_setup_perfctr(struct perf_event *event)
@@ -437,7 +578,7 @@ static int x86_setup_perfctr(struct perf_event *event)
437 struct hw_perf_event *hwc = &event->hw; 578 struct hw_perf_event *hwc = &event->hw;
438 u64 config; 579 u64 config;
439 580
440 if (!hwc->sample_period) { 581 if (!is_sampling_event(event)) {
441 hwc->sample_period = x86_pmu.max_period; 582 hwc->sample_period = x86_pmu.max_period;
442 hwc->last_period = hwc->sample_period; 583 hwc->last_period = hwc->sample_period;
443 local64_set(&hwc->period_left, hwc->sample_period); 584 local64_set(&hwc->period_left, hwc->sample_period);
@@ -452,11 +593,15 @@ static int x86_setup_perfctr(struct perf_event *event)
452 return -EOPNOTSUPP; 593 return -EOPNOTSUPP;
453 } 594 }
454 595
596 /*
597 * Do not allow config1 (extended registers) to propagate,
598 * there's no sane user-space generalization yet:
599 */
455 if (attr->type == PERF_TYPE_RAW) 600 if (attr->type == PERF_TYPE_RAW)
456 return 0; 601 return 0;
457 602
458 if (attr->type == PERF_TYPE_HW_CACHE) 603 if (attr->type == PERF_TYPE_HW_CACHE)
459 return set_ext_hw_attr(hwc, attr); 604 return set_ext_hw_attr(hwc, event);
460 605
461 if (attr->config >= x86_pmu.max_events) 606 if (attr->config >= x86_pmu.max_events)
462 return -EINVAL; 607 return -EINVAL;
@@ -475,10 +620,10 @@ static int x86_setup_perfctr(struct perf_event *event)
475 /* 620 /*
476 * Branch tracing: 621 * Branch tracing:
477 */ 622 */
478 if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && 623 if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS &&
479 (hwc->sample_period == 1)) { 624 !attr->freq && hwc->sample_period == 1) {
480 /* BTS is not supported by this architecture. */ 625 /* BTS is not supported by this architecture. */
481 if (!x86_pmu.bts) 626 if (!x86_pmu.bts_active)
482 return -EOPNOTSUPP; 627 return -EOPNOTSUPP;
483 628
484 /* BTS is currently only allowed for user-mode. */ 629 /* BTS is currently only allowed for user-mode. */
@@ -497,12 +642,13 @@ static int x86_pmu_hw_config(struct perf_event *event)
497 int precise = 0; 642 int precise = 0;
498 643
499 /* Support for constant skid */ 644 /* Support for constant skid */
500 if (x86_pmu.pebs) 645 if (x86_pmu.pebs_active) {
501 precise++; 646 precise++;
502 647
503 /* Support for IP fixup */ 648 /* Support for IP fixup */
504 if (x86_pmu.lbr_nr) 649 if (x86_pmu.lbr_nr)
505 precise++; 650 precise++;
651 }
506 652
507 if (event->attr.precise_ip > precise) 653 if (event->attr.precise_ip > precise)
508 return -EOPNOTSUPP; 654 return -EOPNOTSUPP;
@@ -531,7 +677,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
531/* 677/*
532 * Setup the hardware configuration for a given attr_type 678 * Setup the hardware configuration for a given attr_type
533 */ 679 */
534static int __hw_perf_event_init(struct perf_event *event) 680static int __x86_pmu_event_init(struct perf_event *event)
535{ 681{
536 int err; 682 int err;
537 683
@@ -544,11 +690,8 @@ static int __hw_perf_event_init(struct perf_event *event)
544 if (atomic_read(&active_events) == 0) { 690 if (atomic_read(&active_events) == 0) {
545 if (!reserve_pmc_hardware()) 691 if (!reserve_pmc_hardware())
546 err = -EBUSY; 692 err = -EBUSY;
547 else { 693 else
548 err = reserve_ds_buffers(); 694 reserve_ds_buffers();
549 if (err)
550 release_pmc_hardware();
551 }
552 } 695 }
553 if (!err) 696 if (!err)
554 atomic_inc(&active_events); 697 atomic_inc(&active_events);
@@ -576,15 +719,15 @@ static void x86_pmu_disable_all(void)
576 719
577 if (!test_bit(idx, cpuc->active_mask)) 720 if (!test_bit(idx, cpuc->active_mask))
578 continue; 721 continue;
579 rdmsrl(x86_pmu.eventsel + idx, val); 722 rdmsrl(x86_pmu_config_addr(idx), val);
580 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) 723 if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE))
581 continue; 724 continue;
582 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; 725 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
583 wrmsrl(x86_pmu.eventsel + idx, val); 726 wrmsrl(x86_pmu_config_addr(idx), val);
584 } 727 }
585} 728}
586 729
587void hw_perf_disable(void) 730static void x86_pmu_disable(struct pmu *pmu)
588{ 731{
589 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 732 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
590 733
@@ -601,25 +744,30 @@ void hw_perf_disable(void)
601 x86_pmu.disable_all(); 744 x86_pmu.disable_all();
602} 745}
603 746
747static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
748 u64 enable_mask)
749{
750 if (hwc->extra_reg)
751 wrmsrl(hwc->extra_reg, hwc->extra_config);
752 wrmsrl(hwc->config_base, hwc->config | enable_mask);
753}
754
604static void x86_pmu_enable_all(int added) 755static void x86_pmu_enable_all(int added)
605{ 756{
606 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 757 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
607 int idx; 758 int idx;
608 759
609 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 760 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
610 struct perf_event *event = cpuc->events[idx]; 761 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
611 u64 val;
612 762
613 if (!test_bit(idx, cpuc->active_mask)) 763 if (!test_bit(idx, cpuc->active_mask))
614 continue; 764 continue;
615 765
616 val = event->hw.config; 766 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
617 val |= ARCH_PERFMON_EVENTSEL_ENABLE;
618 wrmsrl(x86_pmu.eventsel + idx, val);
619 } 767 }
620} 768}
621 769
622static const struct pmu pmu; 770static struct pmu pmu;
623 771
624static inline int is_x86_event(struct perf_event *event) 772static inline int is_x86_event(struct perf_event *event)
625{ 773{
@@ -780,15 +928,10 @@ static inline void x86_assign_hw_event(struct perf_event *event,
780 hwc->event_base = 0; 928 hwc->event_base = 0;
781 } else if (hwc->idx >= X86_PMC_IDX_FIXED) { 929 } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
782 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; 930 hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
783 /* 931 hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
784 * We set it so that event_base + idx in wrmsr/rdmsr maps to
785 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
786 */
787 hwc->event_base =
788 MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
789 } else { 932 } else {
790 hwc->config_base = x86_pmu.eventsel; 933 hwc->config_base = x86_pmu_config_addr(hwc->idx);
791 hwc->event_base = x86_pmu.perfctr; 934 hwc->event_base = x86_pmu_event_addr(hwc->idx);
792 } 935 }
793} 936}
794 937
@@ -801,10 +944,10 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc,
801 hwc->last_tag == cpuc->tags[i]; 944 hwc->last_tag == cpuc->tags[i];
802} 945}
803 946
804static int x86_pmu_start(struct perf_event *event); 947static void x86_pmu_start(struct perf_event *event, int flags);
805static void x86_pmu_stop(struct perf_event *event); 948static void x86_pmu_stop(struct perf_event *event, int flags);
806 949
807void hw_perf_enable(void) 950static void x86_pmu_enable(struct pmu *pmu)
808{ 951{
809 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 952 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
810 struct perf_event *event; 953 struct perf_event *event;
@@ -840,7 +983,14 @@ void hw_perf_enable(void)
840 match_prev_assignment(hwc, cpuc, i)) 983 match_prev_assignment(hwc, cpuc, i))
841 continue; 984 continue;
842 985
843 x86_pmu_stop(event); 986 /*
987 * Ensure we don't accidentally enable a stopped
988 * counter simply because we rescheduled.
989 */
990 if (hwc->state & PERF_HES_STOPPED)
991 hwc->state |= PERF_HES_ARCH;
992
993 x86_pmu_stop(event, PERF_EF_UPDATE);
844 } 994 }
845 995
846 for (i = 0; i < cpuc->n_events; i++) { 996 for (i = 0; i < cpuc->n_events; i++) {
@@ -852,7 +1002,10 @@ void hw_perf_enable(void)
852 else if (i < n_running) 1002 else if (i < n_running)
853 continue; 1003 continue;
854 1004
855 x86_pmu_start(event); 1005 if (hwc->state & PERF_HES_ARCH)
1006 continue;
1007
1008 x86_pmu_start(event, PERF_EF_RELOAD);
856 } 1009 }
857 cpuc->n_added = 0; 1010 cpuc->n_added = 0;
858 perf_events_lapic_init(); 1011 perf_events_lapic_init();
@@ -864,17 +1017,11 @@ void hw_perf_enable(void)
864 x86_pmu.enable_all(added); 1017 x86_pmu.enable_all(added);
865} 1018}
866 1019
867static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
868 u64 enable_mask)
869{
870 wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
871}
872
873static inline void x86_pmu_disable_event(struct perf_event *event) 1020static inline void x86_pmu_disable_event(struct perf_event *event)
874{ 1021{
875 struct hw_perf_event *hwc = &event->hw; 1022 struct hw_perf_event *hwc = &event->hw;
876 1023
877 wrmsrl(hwc->config_base + hwc->idx, hwc->config); 1024 wrmsrl(hwc->config_base, hwc->config);
878} 1025}
879 1026
880static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); 1027static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -927,7 +1074,7 @@ x86_perf_event_set_period(struct perf_event *event)
927 */ 1074 */
928 local64_set(&hwc->prev_count, (u64)-left); 1075 local64_set(&hwc->prev_count, (u64)-left);
929 1076
930 wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); 1077 wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
931 1078
932 /* 1079 /*
933 * Due to erratum on certan cpu we need 1080 * Due to erratum on certan cpu we need
@@ -935,7 +1082,7 @@ x86_perf_event_set_period(struct perf_event *event)
935 * is updated properly 1082 * is updated properly
936 */ 1083 */
937 if (x86_pmu.perfctr_second_write) { 1084 if (x86_pmu.perfctr_second_write) {
938 wrmsrl(hwc->event_base + idx, 1085 wrmsrl(hwc->event_base,
939 (u64)(-left) & x86_pmu.cntval_mask); 1086 (u64)(-left) & x86_pmu.cntval_mask);
940 } 1087 }
941 1088
@@ -946,22 +1093,18 @@ x86_perf_event_set_period(struct perf_event *event)
946 1093
947static void x86_pmu_enable_event(struct perf_event *event) 1094static void x86_pmu_enable_event(struct perf_event *event)
948{ 1095{
949 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1096 if (__this_cpu_read(cpu_hw_events.enabled))
950 if (cpuc->enabled)
951 __x86_pmu_enable_event(&event->hw, 1097 __x86_pmu_enable_event(&event->hw,
952 ARCH_PERFMON_EVENTSEL_ENABLE); 1098 ARCH_PERFMON_EVENTSEL_ENABLE);
953} 1099}
954 1100
955/* 1101/*
956 * activate a single event 1102 * Add a single event to the PMU.
957 * 1103 *
958 * The event is added to the group of enabled events 1104 * The event is added to the group of enabled events
959 * but only if it can be scehduled with existing events. 1105 * but only if it can be scehduled with existing events.
960 *
961 * Called with PMU disabled. If successful and return value 1,
962 * then guaranteed to call perf_enable() and hw_perf_enable()
963 */ 1106 */
964static int x86_pmu_enable(struct perf_event *event) 1107static int x86_pmu_add(struct perf_event *event, int flags)
965{ 1108{
966 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1109 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
967 struct hw_perf_event *hwc; 1110 struct hw_perf_event *hwc;
@@ -970,58 +1113,67 @@ static int x86_pmu_enable(struct perf_event *event)
970 1113
971 hwc = &event->hw; 1114 hwc = &event->hw;
972 1115
1116 perf_pmu_disable(event->pmu);
973 n0 = cpuc->n_events; 1117 n0 = cpuc->n_events;
974 n = collect_events(cpuc, event, false); 1118 ret = n = collect_events(cpuc, event, false);
975 if (n < 0) 1119 if (ret < 0)
976 return n; 1120 goto out;
1121
1122 hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
1123 if (!(flags & PERF_EF_START))
1124 hwc->state |= PERF_HES_ARCH;
977 1125
978 /* 1126 /*
979 * If group events scheduling transaction was started, 1127 * If group events scheduling transaction was started,
980 * skip the schedulability test here, it will be peformed 1128 * skip the schedulability test here, it will be performed
981 * at commit time(->commit_txn) as a whole 1129 * at commit time (->commit_txn) as a whole
982 */ 1130 */
983 if (cpuc->group_flag & PERF_EVENT_TXN) 1131 if (cpuc->group_flag & PERF_EVENT_TXN)
984 goto out; 1132 goto done_collect;
985 1133
986 ret = x86_pmu.schedule_events(cpuc, n, assign); 1134 ret = x86_pmu.schedule_events(cpuc, n, assign);
987 if (ret) 1135 if (ret)
988 return ret; 1136 goto out;
989 /* 1137 /*
990 * copy new assignment, now we know it is possible 1138 * copy new assignment, now we know it is possible
991 * will be used by hw_perf_enable() 1139 * will be used by hw_perf_enable()
992 */ 1140 */
993 memcpy(cpuc->assign, assign, n*sizeof(int)); 1141 memcpy(cpuc->assign, assign, n*sizeof(int));
994 1142
995out: 1143done_collect:
996 cpuc->n_events = n; 1144 cpuc->n_events = n;
997 cpuc->n_added += n - n0; 1145 cpuc->n_added += n - n0;
998 cpuc->n_txn += n - n0; 1146 cpuc->n_txn += n - n0;
999 1147
1000 return 0; 1148 ret = 0;
1149out:
1150 perf_pmu_enable(event->pmu);
1151 return ret;
1001} 1152}
1002 1153
1003static int x86_pmu_start(struct perf_event *event) 1154static void x86_pmu_start(struct perf_event *event, int flags)
1004{ 1155{
1005 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1156 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1006 int idx = event->hw.idx; 1157 int idx = event->hw.idx;
1007 1158
1008 if (idx == -1) 1159 if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
1009 return -EAGAIN; 1160 return;
1161
1162 if (WARN_ON_ONCE(idx == -1))
1163 return;
1164
1165 if (flags & PERF_EF_RELOAD) {
1166 WARN_ON_ONCE(!(event->hw.state & PERF_HES_UPTODATE));
1167 x86_perf_event_set_period(event);
1168 }
1169
1170 event->hw.state = 0;
1010 1171
1011 x86_perf_event_set_period(event);
1012 cpuc->events[idx] = event; 1172 cpuc->events[idx] = event;
1013 __set_bit(idx, cpuc->active_mask); 1173 __set_bit(idx, cpuc->active_mask);
1014 __set_bit(idx, cpuc->running); 1174 __set_bit(idx, cpuc->running);
1015 x86_pmu.enable(event); 1175 x86_pmu.enable(event);
1016 perf_event_update_userpage(event); 1176 perf_event_update_userpage(event);
1017
1018 return 0;
1019}
1020
1021static void x86_pmu_unthrottle(struct perf_event *event)
1022{
1023 int ret = x86_pmu_start(event);
1024 WARN_ON_ONCE(ret);
1025} 1177}
1026 1178
1027void perf_event_print_debug(void) 1179void perf_event_print_debug(void)
@@ -1057,8 +1209,8 @@ void perf_event_print_debug(void)
1057 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); 1209 pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask);
1058 1210
1059 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1211 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1060 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1212 rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
1061 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1213 rdmsrl(x86_pmu_event_addr(idx), pmc_count);
1062 1214
1063 prev_left = per_cpu(pmc_prev_left[idx], cpu); 1215 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1064 1216
@@ -1078,27 +1230,29 @@ void perf_event_print_debug(void)
1078 local_irq_restore(flags); 1230 local_irq_restore(flags);
1079} 1231}
1080 1232
1081static void x86_pmu_stop(struct perf_event *event) 1233static void x86_pmu_stop(struct perf_event *event, int flags)
1082{ 1234{
1083 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1235 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1084 struct hw_perf_event *hwc = &event->hw; 1236 struct hw_perf_event *hwc = &event->hw;
1085 int idx = hwc->idx;
1086
1087 if (!__test_and_clear_bit(idx, cpuc->active_mask))
1088 return;
1089 1237
1090 x86_pmu.disable(event); 1238 if (__test_and_clear_bit(hwc->idx, cpuc->active_mask)) {
1091 1239 x86_pmu.disable(event);
1092 /* 1240 cpuc->events[hwc->idx] = NULL;
1093 * Drain the remaining delta count out of a event 1241 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
1094 * that we are disabling: 1242 hwc->state |= PERF_HES_STOPPED;
1095 */ 1243 }
1096 x86_perf_event_update(event);
1097 1244
1098 cpuc->events[idx] = NULL; 1245 if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
1246 /*
1247 * Drain the remaining delta count out of a event
1248 * that we are disabling:
1249 */
1250 x86_perf_event_update(event);
1251 hwc->state |= PERF_HES_UPTODATE;
1252 }
1099} 1253}
1100 1254
1101static void x86_pmu_disable(struct perf_event *event) 1255static void x86_pmu_del(struct perf_event *event, int flags)
1102{ 1256{
1103 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1257 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1104 int i; 1258 int i;
@@ -1111,7 +1265,7 @@ static void x86_pmu_disable(struct perf_event *event)
1111 if (cpuc->group_flag & PERF_EVENT_TXN) 1265 if (cpuc->group_flag & PERF_EVENT_TXN)
1112 return; 1266 return;
1113 1267
1114 x86_pmu_stop(event); 1268 x86_pmu_stop(event, PERF_EF_UPDATE);
1115 1269
1116 for (i = 0; i < cpuc->n_events; i++) { 1270 for (i = 0; i < cpuc->n_events; i++) {
1117 if (event == cpuc->event_list[i]) { 1271 if (event == cpuc->event_list[i]) {
@@ -1134,7 +1288,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1134 struct perf_sample_data data; 1288 struct perf_sample_data data;
1135 struct cpu_hw_events *cpuc; 1289 struct cpu_hw_events *cpuc;
1136 struct perf_event *event; 1290 struct perf_event *event;
1137 struct hw_perf_event *hwc;
1138 int idx, handled = 0; 1291 int idx, handled = 0;
1139 u64 val; 1292 u64 val;
1140 1293
@@ -1142,6 +1295,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1142 1295
1143 cpuc = &__get_cpu_var(cpu_hw_events); 1296 cpuc = &__get_cpu_var(cpu_hw_events);
1144 1297
1298 /*
1299 * Some chipsets need to unmask the LVTPC in a particular spot
1300 * inside the nmi handler. As a result, the unmasking was pushed
1301 * into all the nmi handlers.
1302 *
1303 * This generic handler doesn't seem to have any issues where the
1304 * unmasking occurs so it was left at the top.
1305 */
1306 apic_write(APIC_LVTPC, APIC_DM_NMI);
1307
1145 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1308 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1146 if (!test_bit(idx, cpuc->active_mask)) { 1309 if (!test_bit(idx, cpuc->active_mask)) {
1147 /* 1310 /*
@@ -1155,7 +1318,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1155 } 1318 }
1156 1319
1157 event = cpuc->events[idx]; 1320 event = cpuc->events[idx];
1158 hwc = &event->hw;
1159 1321
1160 val = x86_perf_event_update(event); 1322 val = x86_perf_event_update(event);
1161 if (val & (1ULL << (x86_pmu.cntval_bits - 1))) 1323 if (val & (1ULL << (x86_pmu.cntval_bits - 1)))
@@ -1171,7 +1333,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1171 continue; 1333 continue;
1172 1334
1173 if (perf_event_overflow(event, 1, &data, regs)) 1335 if (perf_event_overflow(event, 1, &data, regs))
1174 x86_pmu_stop(event); 1336 x86_pmu_stop(event, 0);
1175 } 1337 }
1176 1338
1177 if (handled) 1339 if (handled)
@@ -1180,25 +1342,6 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
1180 return handled; 1342 return handled;
1181} 1343}
1182 1344
1183void smp_perf_pending_interrupt(struct pt_regs *regs)
1184{
1185 irq_enter();
1186 ack_APIC_irq();
1187 inc_irq_stat(apic_pending_irqs);
1188 perf_event_do_pending();
1189 irq_exit();
1190}
1191
1192void set_perf_event_pending(void)
1193{
1194#ifdef CONFIG_X86_LOCAL_APIC
1195 if (!x86_pmu.apic || !x86_pmu_initialized())
1196 return;
1197
1198 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1199#endif
1200}
1201
1202void perf_events_lapic_init(void) 1345void perf_events_lapic_init(void)
1203{ 1346{
1204 if (!x86_pmu.apic || !x86_pmu_initialized()) 1347 if (!x86_pmu.apic || !x86_pmu_initialized())
@@ -1230,11 +1373,10 @@ perf_event_nmi_handler(struct notifier_block *self,
1230 1373
1231 switch (cmd) { 1374 switch (cmd) {
1232 case DIE_NMI: 1375 case DIE_NMI:
1233 case DIE_NMI_IPI:
1234 break; 1376 break;
1235 case DIE_NMIUNKNOWN: 1377 case DIE_NMIUNKNOWN:
1236 this_nmi = percpu_read(irq_stat.__nmi_count); 1378 this_nmi = percpu_read(irq_stat.__nmi_count);
1237 if (this_nmi != __get_cpu_var(pmu_nmi).marked) 1379 if (this_nmi != __this_cpu_read(pmu_nmi.marked))
1238 /* let the kernel handle the unknown nmi */ 1380 /* let the kernel handle the unknown nmi */
1239 return NOTIFY_DONE; 1381 return NOTIFY_DONE;
1240 /* 1382 /*
@@ -1249,8 +1391,6 @@ perf_event_nmi_handler(struct notifier_block *self,
1249 return NOTIFY_DONE; 1391 return NOTIFY_DONE;
1250 } 1392 }
1251 1393
1252 apic_write(APIC_LVTPC, APIC_DM_NMI);
1253
1254 handled = x86_pmu.handle_irq(args->regs); 1394 handled = x86_pmu.handle_irq(args->regs);
1255 if (!handled) 1395 if (!handled)
1256 return NOTIFY_DONE; 1396 return NOTIFY_DONE;
@@ -1258,8 +1398,8 @@ perf_event_nmi_handler(struct notifier_block *self,
1258 this_nmi = percpu_read(irq_stat.__nmi_count); 1398 this_nmi = percpu_read(irq_stat.__nmi_count);
1259 if ((handled > 1) || 1399 if ((handled > 1) ||
1260 /* the next nmi could be a back-to-back nmi */ 1400 /* the next nmi could be a back-to-back nmi */
1261 ((__get_cpu_var(pmu_nmi).marked == this_nmi) && 1401 ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
1262 (__get_cpu_var(pmu_nmi).handled > 1))) { 1402 (__this_cpu_read(pmu_nmi.handled) > 1))) {
1263 /* 1403 /*
1264 * We could have two subsequent back-to-back nmis: The 1404 * We could have two subsequent back-to-back nmis: The
1265 * first handles more than one counter, the 2nd 1405 * first handles more than one counter, the 2nd
@@ -1270,8 +1410,8 @@ perf_event_nmi_handler(struct notifier_block *self,
1270 * handling more than one counter. We will mark the 1410 * handling more than one counter. We will mark the
1271 * next (3rd) and then drop it if unhandled. 1411 * next (3rd) and then drop it if unhandled.
1272 */ 1412 */
1273 __get_cpu_var(pmu_nmi).marked = this_nmi + 1; 1413 __this_cpu_write(pmu_nmi.marked, this_nmi + 1);
1274 __get_cpu_var(pmu_nmi).handled = handled; 1414 __this_cpu_write(pmu_nmi.handled, handled);
1275 } 1415 }
1276 1416
1277 return NOTIFY_STOP; 1417 return NOTIFY_STOP;
@@ -1280,7 +1420,7 @@ perf_event_nmi_handler(struct notifier_block *self,
1280static __read_mostly struct notifier_block perf_event_nmi_notifier = { 1420static __read_mostly struct notifier_block perf_event_nmi_notifier = {
1281 .notifier_call = perf_event_nmi_handler, 1421 .notifier_call = perf_event_nmi_handler,
1282 .next = NULL, 1422 .next = NULL,
1283 .priority = 1 1423 .priority = NMI_LOCAL_LOW_PRIOR,
1284}; 1424};
1285 1425
1286static struct event_constraint unconstrained; 1426static struct event_constraint unconstrained;
@@ -1353,7 +1493,7 @@ static void __init pmu_check_apic(void)
1353 pr_info("no hardware sampling interrupt available.\n"); 1493 pr_info("no hardware sampling interrupt available.\n");
1354} 1494}
1355 1495
1356void __init init_hw_perf_events(void) 1496static int __init init_hw_perf_events(void)
1357{ 1497{
1358 struct event_constraint *c; 1498 struct event_constraint *c;
1359 int err; 1499 int err;
@@ -1368,15 +1508,19 @@ void __init init_hw_perf_events(void)
1368 err = amd_pmu_init(); 1508 err = amd_pmu_init();
1369 break; 1509 break;
1370 default: 1510 default:
1371 return; 1511 return 0;
1372 } 1512 }
1373 if (err != 0) { 1513 if (err != 0) {
1374 pr_cont("no PMU driver, software events only.\n"); 1514 pr_cont("no PMU driver, software events only.\n");
1375 return; 1515 return 0;
1376 } 1516 }
1377 1517
1378 pmu_check_apic(); 1518 pmu_check_apic();
1379 1519
1520 /* sanity check that the hardware exists or is emulated */
1521 if (!check_hw_exists())
1522 return 0;
1523
1380 pr_cont("%s PMU driver.\n", x86_pmu.name); 1524 pr_cont("%s PMU driver.\n", x86_pmu.name);
1381 1525
1382 if (x86_pmu.quirks) 1526 if (x86_pmu.quirks)
@@ -1388,7 +1532,6 @@ void __init init_hw_perf_events(void)
1388 x86_pmu.num_counters = X86_PMC_MAX_GENERIC; 1532 x86_pmu.num_counters = X86_PMC_MAX_GENERIC;
1389 } 1533 }
1390 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; 1534 x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
1391 perf_max_events = x86_pmu.num_counters;
1392 1535
1393 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { 1536 if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) {
1394 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", 1537 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
@@ -1424,8 +1567,12 @@ void __init init_hw_perf_events(void)
1424 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); 1567 pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
1425 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); 1568 pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
1426 1569
1570 perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
1427 perf_cpu_notifier(x86_pmu_notifier); 1571 perf_cpu_notifier(x86_pmu_notifier);
1572
1573 return 0;
1428} 1574}
1575early_initcall(init_hw_perf_events);
1429 1576
1430static inline void x86_pmu_read(struct perf_event *event) 1577static inline void x86_pmu_read(struct perf_event *event)
1431{ 1578{
@@ -1437,12 +1584,11 @@ static inline void x86_pmu_read(struct perf_event *event)
1437 * Set the flag to make pmu::enable() not perform the 1584 * Set the flag to make pmu::enable() not perform the
1438 * schedulability test, it will be performed at commit time 1585 * schedulability test, it will be performed at commit time
1439 */ 1586 */
1440static void x86_pmu_start_txn(const struct pmu *pmu) 1587static void x86_pmu_start_txn(struct pmu *pmu)
1441{ 1588{
1442 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1589 perf_pmu_disable(pmu);
1443 1590 __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
1444 cpuc->group_flag |= PERF_EVENT_TXN; 1591 __this_cpu_write(cpu_hw_events.n_txn, 0);
1445 cpuc->n_txn = 0;
1446} 1592}
1447 1593
1448/* 1594/*
@@ -1450,16 +1596,15 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
1450 * Clear the flag and pmu::enable() will perform the 1596 * Clear the flag and pmu::enable() will perform the
1451 * schedulability test. 1597 * schedulability test.
1452 */ 1598 */
1453static void x86_pmu_cancel_txn(const struct pmu *pmu) 1599static void x86_pmu_cancel_txn(struct pmu *pmu)
1454{ 1600{
1455 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1601 __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
1456
1457 cpuc->group_flag &= ~PERF_EVENT_TXN;
1458 /* 1602 /*
1459 * Truncate the collected events. 1603 * Truncate the collected events.
1460 */ 1604 */
1461 cpuc->n_added -= cpuc->n_txn; 1605 __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
1462 cpuc->n_events -= cpuc->n_txn; 1606 __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
1607 perf_pmu_enable(pmu);
1463} 1608}
1464 1609
1465/* 1610/*
@@ -1467,7 +1612,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
1467 * Perform the group schedulability test as a whole 1612 * Perform the group schedulability test as a whole
1468 * Return 0 if success 1613 * Return 0 if success
1469 */ 1614 */
1470static int x86_pmu_commit_txn(const struct pmu *pmu) 1615static int x86_pmu_commit_txn(struct pmu *pmu)
1471{ 1616{
1472 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); 1617 struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
1473 int assign[X86_PMC_IDX_MAX]; 1618 int assign[X86_PMC_IDX_MAX];
@@ -1489,22 +1634,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
1489 memcpy(cpuc->assign, assign, n*sizeof(int)); 1634 memcpy(cpuc->assign, assign, n*sizeof(int));
1490 1635
1491 cpuc->group_flag &= ~PERF_EVENT_TXN; 1636 cpuc->group_flag &= ~PERF_EVENT_TXN;
1492 1637 perf_pmu_enable(pmu);
1493 return 0; 1638 return 0;
1494} 1639}
1495 1640
1496static const struct pmu pmu = {
1497 .enable = x86_pmu_enable,
1498 .disable = x86_pmu_disable,
1499 .start = x86_pmu_start,
1500 .stop = x86_pmu_stop,
1501 .read = x86_pmu_read,
1502 .unthrottle = x86_pmu_unthrottle,
1503 .start_txn = x86_pmu_start_txn,
1504 .cancel_txn = x86_pmu_cancel_txn,
1505 .commit_txn = x86_pmu_commit_txn,
1506};
1507
1508/* 1641/*
1509 * validate that we can schedule this event 1642 * validate that we can schedule this event
1510 */ 1643 */
@@ -1579,12 +1712,22 @@ out:
1579 return ret; 1712 return ret;
1580} 1713}
1581 1714
1582const struct pmu *hw_perf_event_init(struct perf_event *event) 1715static int x86_pmu_event_init(struct perf_event *event)
1583{ 1716{
1584 const struct pmu *tmp; 1717 struct pmu *tmp;
1585 int err; 1718 int err;
1586 1719
1587 err = __hw_perf_event_init(event); 1720 switch (event->attr.type) {
1721 case PERF_TYPE_RAW:
1722 case PERF_TYPE_HARDWARE:
1723 case PERF_TYPE_HW_CACHE:
1724 break;
1725
1726 default:
1727 return -ENOENT;
1728 }
1729
1730 err = __x86_pmu_event_init(event);
1588 if (!err) { 1731 if (!err) {
1589 /* 1732 /*
1590 * we temporarily connect event to its pmu 1733 * we temporarily connect event to its pmu
@@ -1604,37 +1747,31 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
1604 if (err) { 1747 if (err) {
1605 if (event->destroy) 1748 if (event->destroy)
1606 event->destroy(event); 1749 event->destroy(event);
1607 return ERR_PTR(err);
1608 } 1750 }
1609 1751
1610 return &pmu; 1752 return err;
1611} 1753}
1612 1754
1613/* 1755static struct pmu pmu = {
1614 * callchain support 1756 .pmu_enable = x86_pmu_enable,
1615 */ 1757 .pmu_disable = x86_pmu_disable,
1616
1617static inline
1618void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1619{
1620 if (entry->nr < PERF_MAX_STACK_DEPTH)
1621 entry->ip[entry->nr++] = ip;
1622}
1623 1758
1624static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); 1759 .event_init = x86_pmu_event_init,
1625static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
1626 1760
1761 .add = x86_pmu_add,
1762 .del = x86_pmu_del,
1763 .start = x86_pmu_start,
1764 .stop = x86_pmu_stop,
1765 .read = x86_pmu_read,
1627 1766
1628static void 1767 .start_txn = x86_pmu_start_txn,
1629backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) 1768 .cancel_txn = x86_pmu_cancel_txn,
1630{ 1769 .commit_txn = x86_pmu_commit_txn,
1631 /* Ignore warnings */ 1770};
1632}
1633 1771
1634static void backtrace_warning(void *data, char *msg) 1772/*
1635{ 1773 * callchain support
1636 /* Ignore warnings */ 1774 */
1637}
1638 1775
1639static int backtrace_stack(void *data, char *name) 1776static int backtrace_stack(void *data, char *name)
1640{ 1777{
@@ -1645,24 +1782,26 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1645{ 1782{
1646 struct perf_callchain_entry *entry = data; 1783 struct perf_callchain_entry *entry = data;
1647 1784
1648 callchain_store(entry, addr); 1785 perf_callchain_store(entry, addr);
1649} 1786}
1650 1787
1651static const struct stacktrace_ops backtrace_ops = { 1788static const struct stacktrace_ops backtrace_ops = {
1652 .warning = backtrace_warning,
1653 .warning_symbol = backtrace_warning_symbol,
1654 .stack = backtrace_stack, 1789 .stack = backtrace_stack,
1655 .address = backtrace_address, 1790 .address = backtrace_address,
1656 .walk_stack = print_context_stack_bp, 1791 .walk_stack = print_context_stack_bp,
1657}; 1792};
1658 1793
1659static void 1794void
1660perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) 1795perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
1661{ 1796{
1662 callchain_store(entry, PERF_CONTEXT_KERNEL); 1797 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1663 callchain_store(entry, regs->ip); 1798 /* TODO: We don't support guest os callchain now */
1799 return;
1800 }
1801
1802 perf_callchain_store(entry, regs->ip);
1664 1803
1665 dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); 1804 dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
1666} 1805}
1667 1806
1668#ifdef CONFIG_COMPAT 1807#ifdef CONFIG_COMPAT
@@ -1689,7 +1828,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1689 if (fp < compat_ptr(regs->sp)) 1828 if (fp < compat_ptr(regs->sp))
1690 break; 1829 break;
1691 1830
1692 callchain_store(entry, frame.return_address); 1831 perf_callchain_store(entry, frame.return_address);
1693 fp = compat_ptr(frame.next_frame); 1832 fp = compat_ptr(frame.next_frame);
1694 } 1833 }
1695 return 1; 1834 return 1;
@@ -1702,19 +1841,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
1702} 1841}
1703#endif 1842#endif
1704 1843
1705static void 1844void
1706perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) 1845perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
1707{ 1846{
1708 struct stack_frame frame; 1847 struct stack_frame frame;
1709 const void __user *fp; 1848 const void __user *fp;
1710 1849
1711 if (!user_mode(regs)) 1850 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1712 regs = task_pt_regs(current); 1851 /* TODO: We don't support guest os callchain now */
1852 return;
1853 }
1713 1854
1714 fp = (void __user *)regs->bp; 1855 fp = (void __user *)regs->bp;
1715 1856
1716 callchain_store(entry, PERF_CONTEXT_USER); 1857 perf_callchain_store(entry, regs->ip);
1717 callchain_store(entry, regs->ip);
1718 1858
1719 if (perf_callchain_user32(regs, entry)) 1859 if (perf_callchain_user32(regs, entry))
1720 return; 1860 return;
@@ -1731,52 +1871,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
1731 if ((unsigned long)fp < regs->sp) 1871 if ((unsigned long)fp < regs->sp)
1732 break; 1872 break;
1733 1873
1734 callchain_store(entry, frame.return_address); 1874 perf_callchain_store(entry, frame.return_address);
1735 fp = frame.next_frame; 1875 fp = frame.next_frame;
1736 } 1876 }
1737} 1877}
1738 1878
1739static void
1740perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
1741{
1742 int is_user;
1743
1744 if (!regs)
1745 return;
1746
1747 is_user = user_mode(regs);
1748
1749 if (is_user && current->state != TASK_RUNNING)
1750 return;
1751
1752 if (!is_user)
1753 perf_callchain_kernel(regs, entry);
1754
1755 if (current->mm)
1756 perf_callchain_user(regs, entry);
1757}
1758
1759struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
1760{
1761 struct perf_callchain_entry *entry;
1762
1763 if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1764 /* TODO: We don't support guest os callchain now */
1765 return NULL;
1766 }
1767
1768 if (in_nmi())
1769 entry = &__get_cpu_var(pmc_nmi_entry);
1770 else
1771 entry = &__get_cpu_var(pmc_irq_entry);
1772
1773 entry->nr = 0;
1774
1775 perf_do_callchain(regs, entry);
1776
1777 return entry;
1778}
1779
1780unsigned long perf_instruction_pointer(struct pt_regs *regs) 1879unsigned long perf_instruction_pointer(struct pt_regs *regs)
1781{ 1880{
1782 unsigned long ip; 1881 unsigned long ip;