diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/threshold.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mtrr/generic.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 278 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 51 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd_ibs.c | 29 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 94 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 2 |
10 files changed, 356 insertions, 112 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 787e06c84ea6..ce215616d5b9 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -397,8 +397,8 @@ static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | |||
397 | 397 | ||
398 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | 398 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) |
399 | { | 399 | { |
400 | exit_idle(); | ||
401 | irq_enter(); | 400 | irq_enter(); |
401 | exit_idle(); | ||
402 | inc_irq_stat(irq_thermal_count); | 402 | inc_irq_stat(irq_thermal_count); |
403 | smp_thermal_vector(); | 403 | smp_thermal_vector(); |
404 | irq_exit(); | 404 | irq_exit(); |
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index d746df2909c9..aa578cadb940 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c | |||
@@ -19,8 +19,8 @@ void (*mce_threshold_vector)(void) = default_threshold_interrupt; | |||
19 | 19 | ||
20 | asmlinkage void smp_threshold_interrupt(void) | 20 | asmlinkage void smp_threshold_interrupt(void) |
21 | { | 21 | { |
22 | exit_idle(); | ||
23 | irq_enter(); | 22 | irq_enter(); |
23 | exit_idle(); | ||
24 | inc_irq_stat(irq_threshold_count); | 24 | inc_irq_stat(irq_threshold_count); |
25 | mce_threshold_vector(); | 25 | mce_threshold_vector(); |
26 | irq_exit(); | 26 | irq_exit(); |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index a71efcdbb092..97b26356e9ee 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -547,6 +547,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
547 | 547 | ||
548 | if (tmp != mask_lo) { | 548 | if (tmp != mask_lo) { |
549 | printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); | 549 | printk(KERN_WARNING "mtrr: your BIOS has configured an incorrect mask, fixing it.\n"); |
550 | add_taint(TAINT_FIRMWARE_WORKAROUND); | ||
550 | mask_lo = tmp; | 551 | mask_lo = tmp; |
551 | } | 552 | } |
552 | } | 553 | } |
@@ -693,6 +694,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
693 | 694 | ||
694 | /* Disable MTRRs, and set the default type to uncached */ | 695 | /* Disable MTRRs, and set the default type to uncached */ |
695 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); | 696 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); |
697 | wbinvd(); | ||
696 | } | 698 | } |
697 | 699 | ||
698 | static void post_set(void) __releases(set_atomicity_lock) | 700 | static void post_set(void) __releases(set_atomicity_lock) |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 640891014b2a..5adce1040b11 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -312,12 +312,8 @@ int x86_setup_perfctr(struct perf_event *event) | |||
312 | return -EOPNOTSUPP; | 312 | return -EOPNOTSUPP; |
313 | } | 313 | } |
314 | 314 | ||
315 | /* | ||
316 | * Do not allow config1 (extended registers) to propagate, | ||
317 | * there's no sane user-space generalization yet: | ||
318 | */ | ||
319 | if (attr->type == PERF_TYPE_RAW) | 315 | if (attr->type == PERF_TYPE_RAW) |
320 | return 0; | 316 | return x86_pmu_extra_regs(event->attr.config, event); |
321 | 317 | ||
322 | if (attr->type == PERF_TYPE_HW_CACHE) | 318 | if (attr->type == PERF_TYPE_HW_CACHE) |
323 | return set_ext_hw_attr(hwc, event); | 319 | return set_ext_hw_attr(hwc, event); |
@@ -488,18 +484,195 @@ static inline int is_x86_event(struct perf_event *event) | |||
488 | return event->pmu == &pmu; | 484 | return event->pmu == &pmu; |
489 | } | 485 | } |
490 | 486 | ||
487 | /* | ||
488 | * Event scheduler state: | ||
489 | * | ||
490 | * Assign events iterating over all events and counters, beginning | ||
491 | * with events with least weights first. Keep the current iterator | ||
492 | * state in struct sched_state. | ||
493 | */ | ||
494 | struct sched_state { | ||
495 | int weight; | ||
496 | int event; /* event index */ | ||
497 | int counter; /* counter index */ | ||
498 | int unassigned; /* number of events to be assigned left */ | ||
499 | unsigned long used[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
500 | }; | ||
501 | |||
502 | /* Total max is X86_PMC_IDX_MAX, but we are O(n!) limited */ | ||
503 | #define SCHED_STATES_MAX 2 | ||
504 | |||
505 | struct perf_sched { | ||
506 | int max_weight; | ||
507 | int max_events; | ||
508 | struct event_constraint **constraints; | ||
509 | struct sched_state state; | ||
510 | int saved_states; | ||
511 | struct sched_state saved[SCHED_STATES_MAX]; | ||
512 | }; | ||
513 | |||
514 | /* | ||
515 | * Initialize interator that runs through all events and counters. | ||
516 | */ | ||
517 | static void perf_sched_init(struct perf_sched *sched, struct event_constraint **c, | ||
518 | int num, int wmin, int wmax) | ||
519 | { | ||
520 | int idx; | ||
521 | |||
522 | memset(sched, 0, sizeof(*sched)); | ||
523 | sched->max_events = num; | ||
524 | sched->max_weight = wmax; | ||
525 | sched->constraints = c; | ||
526 | |||
527 | for (idx = 0; idx < num; idx++) { | ||
528 | if (c[idx]->weight == wmin) | ||
529 | break; | ||
530 | } | ||
531 | |||
532 | sched->state.event = idx; /* start with min weight */ | ||
533 | sched->state.weight = wmin; | ||
534 | sched->state.unassigned = num; | ||
535 | } | ||
536 | |||
537 | static void perf_sched_save_state(struct perf_sched *sched) | ||
538 | { | ||
539 | if (WARN_ON_ONCE(sched->saved_states >= SCHED_STATES_MAX)) | ||
540 | return; | ||
541 | |||
542 | sched->saved[sched->saved_states] = sched->state; | ||
543 | sched->saved_states++; | ||
544 | } | ||
545 | |||
546 | static bool perf_sched_restore_state(struct perf_sched *sched) | ||
547 | { | ||
548 | if (!sched->saved_states) | ||
549 | return false; | ||
550 | |||
551 | sched->saved_states--; | ||
552 | sched->state = sched->saved[sched->saved_states]; | ||
553 | |||
554 | /* continue with next counter: */ | ||
555 | clear_bit(sched->state.counter++, sched->state.used); | ||
556 | |||
557 | return true; | ||
558 | } | ||
559 | |||
560 | /* | ||
561 | * Select a counter for the current event to schedule. Return true on | ||
562 | * success. | ||
563 | */ | ||
564 | static bool __perf_sched_find_counter(struct perf_sched *sched) | ||
565 | { | ||
566 | struct event_constraint *c; | ||
567 | int idx; | ||
568 | |||
569 | if (!sched->state.unassigned) | ||
570 | return false; | ||
571 | |||
572 | if (sched->state.event >= sched->max_events) | ||
573 | return false; | ||
574 | |||
575 | c = sched->constraints[sched->state.event]; | ||
576 | |||
577 | /* Prefer fixed purpose counters */ | ||
578 | if (x86_pmu.num_counters_fixed) { | ||
579 | idx = X86_PMC_IDX_FIXED; | ||
580 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_MAX) { | ||
581 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
582 | goto done; | ||
583 | } | ||
584 | } | ||
585 | /* Grab the first unused counter starting with idx */ | ||
586 | idx = sched->state.counter; | ||
587 | for_each_set_bit_cont(idx, c->idxmsk, X86_PMC_IDX_FIXED) { | ||
588 | if (!__test_and_set_bit(idx, sched->state.used)) | ||
589 | goto done; | ||
590 | } | ||
591 | |||
592 | return false; | ||
593 | |||
594 | done: | ||
595 | sched->state.counter = idx; | ||
596 | |||
597 | if (c->overlap) | ||
598 | perf_sched_save_state(sched); | ||
599 | |||
600 | return true; | ||
601 | } | ||
602 | |||
603 | static bool perf_sched_find_counter(struct perf_sched *sched) | ||
604 | { | ||
605 | while (!__perf_sched_find_counter(sched)) { | ||
606 | if (!perf_sched_restore_state(sched)) | ||
607 | return false; | ||
608 | } | ||
609 | |||
610 | return true; | ||
611 | } | ||
612 | |||
613 | /* | ||
614 | * Go through all unassigned events and find the next one to schedule. | ||
615 | * Take events with the least weight first. Return true on success. | ||
616 | */ | ||
617 | static bool perf_sched_next_event(struct perf_sched *sched) | ||
618 | { | ||
619 | struct event_constraint *c; | ||
620 | |||
621 | if (!sched->state.unassigned || !--sched->state.unassigned) | ||
622 | return false; | ||
623 | |||
624 | do { | ||
625 | /* next event */ | ||
626 | sched->state.event++; | ||
627 | if (sched->state.event >= sched->max_events) { | ||
628 | /* next weight */ | ||
629 | sched->state.event = 0; | ||
630 | sched->state.weight++; | ||
631 | if (sched->state.weight > sched->max_weight) | ||
632 | return false; | ||
633 | } | ||
634 | c = sched->constraints[sched->state.event]; | ||
635 | } while (c->weight != sched->state.weight); | ||
636 | |||
637 | sched->state.counter = 0; /* start with first counter */ | ||
638 | |||
639 | return true; | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * Assign a counter for each event. | ||
644 | */ | ||
645 | static int perf_assign_events(struct event_constraint **constraints, int n, | ||
646 | int wmin, int wmax, int *assign) | ||
647 | { | ||
648 | struct perf_sched sched; | ||
649 | |||
650 | perf_sched_init(&sched, constraints, n, wmin, wmax); | ||
651 | |||
652 | do { | ||
653 | if (!perf_sched_find_counter(&sched)) | ||
654 | break; /* failed */ | ||
655 | if (assign) | ||
656 | assign[sched.state.event] = sched.state.counter; | ||
657 | } while (perf_sched_next_event(&sched)); | ||
658 | |||
659 | return sched.state.unassigned; | ||
660 | } | ||
661 | |||
491 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | 662 | int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) |
492 | { | 663 | { |
493 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; | 664 | struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; |
494 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 665 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
495 | int i, j, w, wmax, num = 0; | 666 | int i, wmin, wmax, num = 0; |
496 | struct hw_perf_event *hwc; | 667 | struct hw_perf_event *hwc; |
497 | 668 | ||
498 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | 669 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); |
499 | 670 | ||
500 | for (i = 0; i < n; i++) { | 671 | for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) { |
501 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); | 672 | c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); |
502 | constraints[i] = c; | 673 | constraints[i] = c; |
674 | wmin = min(wmin, c->weight); | ||
675 | wmax = max(wmax, c->weight); | ||
503 | } | 676 | } |
504 | 677 | ||
505 | /* | 678 | /* |
@@ -525,59 +698,11 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) | |||
525 | if (assign) | 698 | if (assign) |
526 | assign[i] = hwc->idx; | 699 | assign[i] = hwc->idx; |
527 | } | 700 | } |
528 | if (i == n) | ||
529 | goto done; | ||
530 | |||
531 | /* | ||
532 | * begin slow path | ||
533 | */ | ||
534 | |||
535 | bitmap_zero(used_mask, X86_PMC_IDX_MAX); | ||
536 | |||
537 | /* | ||
538 | * weight = number of possible counters | ||
539 | * | ||
540 | * 1 = most constrained, only works on one counter | ||
541 | * wmax = least constrained, works on any counter | ||
542 | * | ||
543 | * assign events to counters starting with most | ||
544 | * constrained events. | ||
545 | */ | ||
546 | wmax = x86_pmu.num_counters; | ||
547 | |||
548 | /* | ||
549 | * when fixed event counters are present, | ||
550 | * wmax is incremented by 1 to account | ||
551 | * for one more choice | ||
552 | */ | ||
553 | if (x86_pmu.num_counters_fixed) | ||
554 | wmax++; | ||
555 | |||
556 | for (w = 1, num = n; num && w <= wmax; w++) { | ||
557 | /* for each event */ | ||
558 | for (i = 0; num && i < n; i++) { | ||
559 | c = constraints[i]; | ||
560 | hwc = &cpuc->event_list[i]->hw; | ||
561 | |||
562 | if (c->weight != w) | ||
563 | continue; | ||
564 | 701 | ||
565 | for_each_set_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { | 702 | /* slow path */ |
566 | if (!test_bit(j, used_mask)) | 703 | if (i != n) |
567 | break; | 704 | num = perf_assign_events(constraints, n, wmin, wmax, assign); |
568 | } | ||
569 | |||
570 | if (j == X86_PMC_IDX_MAX) | ||
571 | break; | ||
572 | 705 | ||
573 | __set_bit(j, used_mask); | ||
574 | |||
575 | if (assign) | ||
576 | assign[i] = j; | ||
577 | num--; | ||
578 | } | ||
579 | } | ||
580 | done: | ||
581 | /* | 706 | /* |
582 | * scheduling failed or is just a simulation, | 707 | * scheduling failed or is just a simulation, |
583 | * free resources if necessary | 708 | * free resources if necessary |
@@ -588,7 +713,7 @@ done: | |||
588 | x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); | 713 | x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); |
589 | } | 714 | } |
590 | } | 715 | } |
591 | return num ? -ENOSPC : 0; | 716 | return num ? -EINVAL : 0; |
592 | } | 717 | } |
593 | 718 | ||
594 | /* | 719 | /* |
@@ -607,7 +732,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
607 | 732 | ||
608 | if (is_x86_event(leader)) { | 733 | if (is_x86_event(leader)) { |
609 | if (n >= max_count) | 734 | if (n >= max_count) |
610 | return -ENOSPC; | 735 | return -EINVAL; |
611 | cpuc->event_list[n] = leader; | 736 | cpuc->event_list[n] = leader; |
612 | n++; | 737 | n++; |
613 | } | 738 | } |
@@ -620,7 +745,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, | |||
620 | continue; | 745 | continue; |
621 | 746 | ||
622 | if (n >= max_count) | 747 | if (n >= max_count) |
623 | return -ENOSPC; | 748 | return -EINVAL; |
624 | 749 | ||
625 | cpuc->event_list[n] = event; | 750 | cpuc->event_list[n] = event; |
626 | n++; | 751 | n++; |
@@ -1123,6 +1248,7 @@ static void __init pmu_check_apic(void) | |||
1123 | 1248 | ||
1124 | static int __init init_hw_perf_events(void) | 1249 | static int __init init_hw_perf_events(void) |
1125 | { | 1250 | { |
1251 | struct x86_pmu_quirk *quirk; | ||
1126 | struct event_constraint *c; | 1252 | struct event_constraint *c; |
1127 | int err; | 1253 | int err; |
1128 | 1254 | ||
@@ -1151,8 +1277,8 @@ static int __init init_hw_perf_events(void) | |||
1151 | 1277 | ||
1152 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1278 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1153 | 1279 | ||
1154 | if (x86_pmu.quirks) | 1280 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) |
1155 | x86_pmu.quirks(); | 1281 | quirk->func(); |
1156 | 1282 | ||
1157 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | 1283 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { |
1158 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", | 1284 | WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", |
@@ -1175,12 +1301,18 @@ static int __init init_hw_perf_events(void) | |||
1175 | 1301 | ||
1176 | unconstrained = (struct event_constraint) | 1302 | unconstrained = (struct event_constraint) |
1177 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1303 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1178 | 0, x86_pmu.num_counters); | 1304 | 0, x86_pmu.num_counters, 0); |
1179 | 1305 | ||
1180 | if (x86_pmu.event_constraints) { | 1306 | if (x86_pmu.event_constraints) { |
1307 | /* | ||
1308 | * event on fixed counter2 (REF_CYCLES) only works on this | ||
1309 | * counter, so do not extend mask to generic counters | ||
1310 | */ | ||
1181 | for_each_event_constraint(c, x86_pmu.event_constraints) { | 1311 | for_each_event_constraint(c, x86_pmu.event_constraints) { |
1182 | if (c->cmask != X86_RAW_EVENT_MASK) | 1312 | if (c->cmask != X86_RAW_EVENT_MASK |
1313 | || c->idxmsk64 == X86_PMC_MSK_FIXED_REF_CYCLES) { | ||
1183 | continue; | 1314 | continue; |
1315 | } | ||
1184 | 1316 | ||
1185 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; | 1317 | c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; |
1186 | c->weight += x86_pmu.num_counters; | 1318 | c->weight += x86_pmu.num_counters; |
@@ -1316,7 +1448,7 @@ static int validate_event(struct perf_event *event) | |||
1316 | c = x86_pmu.get_event_constraints(fake_cpuc, event); | 1448 | c = x86_pmu.get_event_constraints(fake_cpuc, event); |
1317 | 1449 | ||
1318 | if (!c || !c->weight) | 1450 | if (!c || !c->weight) |
1319 | ret = -ENOSPC; | 1451 | ret = -EINVAL; |
1320 | 1452 | ||
1321 | if (x86_pmu.put_event_constraints) | 1453 | if (x86_pmu.put_event_constraints) |
1322 | x86_pmu.put_event_constraints(fake_cpuc, event); | 1454 | x86_pmu.put_event_constraints(fake_cpuc, event); |
@@ -1341,7 +1473,7 @@ static int validate_group(struct perf_event *event) | |||
1341 | { | 1473 | { |
1342 | struct perf_event *leader = event->group_leader; | 1474 | struct perf_event *leader = event->group_leader; |
1343 | struct cpu_hw_events *fake_cpuc; | 1475 | struct cpu_hw_events *fake_cpuc; |
1344 | int ret = -ENOSPC, n; | 1476 | int ret = -EINVAL, n; |
1345 | 1477 | ||
1346 | fake_cpuc = allocate_fake_cpuc(); | 1478 | fake_cpuc = allocate_fake_cpuc(); |
1347 | if (IS_ERR(fake_cpuc)) | 1479 | if (IS_ERR(fake_cpuc)) |
@@ -1570,3 +1702,15 @@ unsigned long perf_misc_flags(struct pt_regs *regs) | |||
1570 | 1702 | ||
1571 | return misc; | 1703 | return misc; |
1572 | } | 1704 | } |
1705 | |||
1706 | void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap) | ||
1707 | { | ||
1708 | cap->version = x86_pmu.version; | ||
1709 | cap->num_counters_gp = x86_pmu.num_counters; | ||
1710 | cap->num_counters_fixed = x86_pmu.num_counters_fixed; | ||
1711 | cap->bit_width_gp = x86_pmu.cntval_bits; | ||
1712 | cap->bit_width_fixed = x86_pmu.cntval_bits; | ||
1713 | cap->events_mask = (unsigned int)x86_pmu.events_maskl; | ||
1714 | cap->events_mask_len = x86_pmu.events_mask_len; | ||
1715 | } | ||
1716 | EXPORT_SYMBOL_GPL(perf_get_x86_pmu_capability); | ||
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index b9698d40ac4b..8944062f46e2 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -45,6 +45,7 @@ struct event_constraint { | |||
45 | u64 code; | 45 | u64 code; |
46 | u64 cmask; | 46 | u64 cmask; |
47 | int weight; | 47 | int weight; |
48 | int overlap; | ||
48 | }; | 49 | }; |
49 | 50 | ||
50 | struct amd_nb { | 51 | struct amd_nb { |
@@ -151,15 +152,40 @@ struct cpu_hw_events { | |||
151 | void *kfree_on_online; | 152 | void *kfree_on_online; |
152 | }; | 153 | }; |
153 | 154 | ||
154 | #define __EVENT_CONSTRAINT(c, n, m, w) {\ | 155 | #define __EVENT_CONSTRAINT(c, n, m, w, o) {\ |
155 | { .idxmsk64 = (n) }, \ | 156 | { .idxmsk64 = (n) }, \ |
156 | .code = (c), \ | 157 | .code = (c), \ |
157 | .cmask = (m), \ | 158 | .cmask = (m), \ |
158 | .weight = (w), \ | 159 | .weight = (w), \ |
160 | .overlap = (o), \ | ||
159 | } | 161 | } |
160 | 162 | ||
161 | #define EVENT_CONSTRAINT(c, n, m) \ | 163 | #define EVENT_CONSTRAINT(c, n, m) \ |
162 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) | 164 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) |
165 | |||
166 | /* | ||
167 | * The overlap flag marks event constraints with overlapping counter | ||
168 | * masks. This is the case if the counter mask of such an event is not | ||
169 | * a subset of any other counter mask of a constraint with an equal or | ||
170 | * higher weight, e.g.: | ||
171 | * | ||
172 | * c_overlaps = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); | ||
173 | * c_another1 = EVENT_CONSTRAINT(0, 0x07, 0); | ||
174 | * c_another2 = EVENT_CONSTRAINT(0, 0x38, 0); | ||
175 | * | ||
176 | * The event scheduler may not select the correct counter in the first | ||
177 | * cycle because it needs to know which subsequent events will be | ||
178 | * scheduled. It may fail to schedule the events then. So we set the | ||
179 | * overlap flag for such constraints to give the scheduler a hint which | ||
180 | * events to select for counter rescheduling. | ||
181 | * | ||
182 | * Care must be taken as the rescheduling algorithm is O(n!) which | ||
183 | * will increase scheduling cycles for an over-commited system | ||
184 | * dramatically. The number of such EVENT_CONSTRAINT_OVERLAP() macros | ||
185 | * and its counter masks must be kept at a minimum. | ||
186 | */ | ||
187 | #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ | ||
188 | __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) | ||
163 | 189 | ||
164 | /* | 190 | /* |
165 | * Constraint on the Event code. | 191 | * Constraint on the Event code. |
@@ -235,6 +261,11 @@ union perf_capabilities { | |||
235 | u64 capabilities; | 261 | u64 capabilities; |
236 | }; | 262 | }; |
237 | 263 | ||
264 | struct x86_pmu_quirk { | ||
265 | struct x86_pmu_quirk *next; | ||
266 | void (*func)(void); | ||
267 | }; | ||
268 | |||
238 | /* | 269 | /* |
239 | * struct x86_pmu - generic x86 pmu | 270 | * struct x86_pmu - generic x86 pmu |
240 | */ | 271 | */ |
@@ -259,6 +290,11 @@ struct x86_pmu { | |||
259 | int num_counters_fixed; | 290 | int num_counters_fixed; |
260 | int cntval_bits; | 291 | int cntval_bits; |
261 | u64 cntval_mask; | 292 | u64 cntval_mask; |
293 | union { | ||
294 | unsigned long events_maskl; | ||
295 | unsigned long events_mask[BITS_TO_LONGS(ARCH_PERFMON_EVENTS_COUNT)]; | ||
296 | }; | ||
297 | int events_mask_len; | ||
262 | int apic; | 298 | int apic; |
263 | u64 max_period; | 299 | u64 max_period; |
264 | struct event_constraint * | 300 | struct event_constraint * |
@@ -268,7 +304,7 @@ struct x86_pmu { | |||
268 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, | 304 | void (*put_event_constraints)(struct cpu_hw_events *cpuc, |
269 | struct perf_event *event); | 305 | struct perf_event *event); |
270 | struct event_constraint *event_constraints; | 306 | struct event_constraint *event_constraints; |
271 | void (*quirks)(void); | 307 | struct x86_pmu_quirk *quirks; |
272 | int perfctr_second_write; | 308 | int perfctr_second_write; |
273 | 309 | ||
274 | int (*cpu_prepare)(int cpu); | 310 | int (*cpu_prepare)(int cpu); |
@@ -309,6 +345,15 @@ struct x86_pmu { | |||
309 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); | 345 | struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr); |
310 | }; | 346 | }; |
311 | 347 | ||
348 | #define x86_add_quirk(func_) \ | ||
349 | do { \ | ||
350 | static struct x86_pmu_quirk __quirk __initdata = { \ | ||
351 | .func = func_, \ | ||
352 | }; \ | ||
353 | __quirk.next = x86_pmu.quirks; \ | ||
354 | x86_pmu.quirks = &__quirk; \ | ||
355 | } while (0) | ||
356 | |||
312 | #define ERF_NO_HT_SHARING 1 | 357 | #define ERF_NO_HT_SHARING 1 |
313 | #define ERF_HAS_RSP_1 2 | 358 | #define ERF_HAS_RSP_1 2 |
314 | 359 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index aeefd45697a2..0397b23be8e9 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c | |||
@@ -492,7 +492,7 @@ static __initconst const struct x86_pmu amd_pmu = { | |||
492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); | 492 | static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); |
493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); | 493 | static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); |
494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); | 494 | static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); |
495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); | 495 | static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, 0); |
496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); | 496 | static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); |
497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); | 497 | static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); |
498 | 498 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index ab6343d21825..3b8a2d30d14e 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -199,8 +199,7 @@ static int force_ibs_eilvt_setup(void) | |||
199 | goto out; | 199 | goto out; |
200 | } | 200 | } |
201 | 201 | ||
202 | pr_err(FW_BUG "using offset %d for IBS interrupts\n", offset); | 202 | pr_info("IBS: LVT offset %d assigned\n", offset); |
203 | pr_err(FW_BUG "workaround enabled for IBS LVT offset\n"); | ||
204 | 203 | ||
205 | return 0; | 204 | return 0; |
206 | out: | 205 | out: |
@@ -265,19 +264,23 @@ perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *h | |||
265 | static __init int amd_ibs_init(void) | 264 | static __init int amd_ibs_init(void) |
266 | { | 265 | { |
267 | u32 caps; | 266 | u32 caps; |
268 | int ret; | 267 | int ret = -EINVAL; |
269 | 268 | ||
270 | caps = __get_ibs_caps(); | 269 | caps = __get_ibs_caps(); |
271 | if (!caps) | 270 | if (!caps) |
272 | return -ENODEV; /* ibs not supported by the cpu */ | 271 | return -ENODEV; /* ibs not supported by the cpu */ |
273 | 272 | ||
274 | if (!ibs_eilvt_valid()) { | 273 | /* |
275 | ret = force_ibs_eilvt_setup(); | 274 | * Force LVT offset assignment for family 10h: The offsets are |
276 | if (ret) { | 275 | * not assigned by the BIOS for this family, so the OS is |
277 | pr_err("Failed to setup IBS, %d\n", ret); | 276 | * responsible for doing it. If the OS assignment fails, fall |
278 | return ret; | 277 | * back to BIOS settings and try to setup this. |
279 | } | 278 | */ |
280 | } | 279 | if (boot_cpu_data.x86 == 0x10) |
280 | force_ibs_eilvt_setup(); | ||
281 | |||
282 | if (!ibs_eilvt_valid()) | ||
283 | goto out; | ||
281 | 284 | ||
282 | get_online_cpus(); | 285 | get_online_cpus(); |
283 | ibs_caps = caps; | 286 | ibs_caps = caps; |
@@ -287,7 +290,11 @@ static __init int amd_ibs_init(void) | |||
287 | smp_call_function(setup_APIC_ibs, NULL, 1); | 290 | smp_call_function(setup_APIC_ibs, NULL, 1); |
288 | put_online_cpus(); | 291 | put_online_cpus(); |
289 | 292 | ||
290 | return perf_event_ibs_init(); | 293 | ret = perf_event_ibs_init(); |
294 | out: | ||
295 | if (ret) | ||
296 | pr_err("Failed to setup IBS, %d\n", ret); | ||
297 | return ret; | ||
291 | } | 298 | } |
292 | 299 | ||
293 | /* Since we need the pci subsystem to init ibs we can't do this earlier: */ | 300 | /* Since we need the pci subsystem to init ibs we can't do this earlier: */ |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 2be5ebe99872..3bd37bdf1b8e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -28,6 +28,7 @@ static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = | |||
28 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | 28 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, |
29 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | 29 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, |
30 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | 30 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, |
31 | [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x0300, /* pseudo-encoding */ | ||
31 | }; | 32 | }; |
32 | 33 | ||
33 | static struct event_constraint intel_core_event_constraints[] __read_mostly = | 34 | static struct event_constraint intel_core_event_constraints[] __read_mostly = |
@@ -45,12 +46,7 @@ static struct event_constraint intel_core2_event_constraints[] __read_mostly = | |||
45 | { | 46 | { |
46 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 47 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
47 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 48 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
48 | /* | 49 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
49 | * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event | ||
50 | * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed | ||
51 | * ratio between these counters. | ||
52 | */ | ||
53 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | ||
54 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ | 50 | INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ |
55 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ | 51 | INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ |
56 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ | 52 | INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ |
@@ -68,7 +64,7 @@ static struct event_constraint intel_nehalem_event_constraints[] __read_mostly = | |||
68 | { | 64 | { |
69 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 65 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
70 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 66 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
71 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 67 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
72 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ | 68 | INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ |
73 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ | 69 | INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ |
74 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ | 70 | INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ |
@@ -90,7 +86,7 @@ static struct event_constraint intel_westmere_event_constraints[] __read_mostly | |||
90 | { | 86 | { |
91 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 87 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
92 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 88 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
93 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 89 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
94 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ | 90 | INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ |
95 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ | 91 | INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ |
96 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ | 92 | INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ |
@@ -102,7 +98,7 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly = | |||
102 | { | 98 | { |
103 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 99 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
104 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 100 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
105 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 101 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
106 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ | 102 | INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ |
107 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ | 103 | INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ |
108 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ | 104 | INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ |
@@ -125,7 +121,7 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = | |||
125 | { | 121 | { |
126 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ | 122 | FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ |
127 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ | 123 | FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ |
128 | /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ | 124 | FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ |
129 | EVENT_CONSTRAINT_END | 125 | EVENT_CONSTRAINT_END |
130 | }; | 126 | }; |
131 | 127 | ||
@@ -1169,7 +1165,7 @@ again: | |||
1169 | */ | 1165 | */ |
1170 | c = &unconstrained; | 1166 | c = &unconstrained; |
1171 | } else if (intel_try_alt_er(event, orig_idx)) { | 1167 | } else if (intel_try_alt_er(event, orig_idx)) { |
1172 | raw_spin_unlock(&era->lock); | 1168 | raw_spin_unlock_irqrestore(&era->lock, flags); |
1173 | goto again; | 1169 | goto again; |
1174 | } | 1170 | } |
1175 | raw_spin_unlock_irqrestore(&era->lock, flags); | 1171 | raw_spin_unlock_irqrestore(&era->lock, flags); |
@@ -1519,7 +1515,7 @@ static __initconst const struct x86_pmu intel_pmu = { | |||
1519 | .guest_get_msrs = intel_guest_get_msrs, | 1515 | .guest_get_msrs = intel_guest_get_msrs, |
1520 | }; | 1516 | }; |
1521 | 1517 | ||
1522 | static void intel_clovertown_quirks(void) | 1518 | static __init void intel_clovertown_quirk(void) |
1523 | { | 1519 | { |
1524 | /* | 1520 | /* |
1525 | * PEBS is unreliable due to: | 1521 | * PEBS is unreliable due to: |
@@ -1545,12 +1541,60 @@ static void intel_clovertown_quirks(void) | |||
1545 | x86_pmu.pebs_constraints = NULL; | 1541 | x86_pmu.pebs_constraints = NULL; |
1546 | } | 1542 | } |
1547 | 1543 | ||
1544 | static __init void intel_sandybridge_quirk(void) | ||
1545 | { | ||
1546 | printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); | ||
1547 | x86_pmu.pebs = 0; | ||
1548 | x86_pmu.pebs_constraints = NULL; | ||
1549 | } | ||
1550 | |||
1551 | static const struct { int id; char *name; } intel_arch_events_map[] __initconst = { | ||
1552 | { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, | ||
1553 | { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, | ||
1554 | { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, | ||
1555 | { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, | ||
1556 | { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, | ||
1557 | { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, | ||
1558 | { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, | ||
1559 | }; | ||
1560 | |||
1561 | static __init void intel_arch_events_quirk(void) | ||
1562 | { | ||
1563 | int bit; | ||
1564 | |||
1565 | /* disable event that reported as not presend by cpuid */ | ||
1566 | for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) { | ||
1567 | intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0; | ||
1568 | printk(KERN_WARNING "CPUID marked event: \'%s\' unavailable\n", | ||
1569 | intel_arch_events_map[bit].name); | ||
1570 | } | ||
1571 | } | ||
1572 | |||
1573 | static __init void intel_nehalem_quirk(void) | ||
1574 | { | ||
1575 | union cpuid10_ebx ebx; | ||
1576 | |||
1577 | ebx.full = x86_pmu.events_maskl; | ||
1578 | if (ebx.split.no_branch_misses_retired) { | ||
1579 | /* | ||
1580 | * Erratum AAJ80 detected, we work it around by using | ||
1581 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1582 | * branch-misses, but it's still much better than the | ||
1583 | * architectural event which is often completely bogus: | ||
1584 | */ | ||
1585 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1586 | ebx.split.no_branch_misses_retired = 0; | ||
1587 | x86_pmu.events_maskl = ebx.full; | ||
1588 | printk(KERN_INFO "CPU erratum AAJ80 worked around\n"); | ||
1589 | } | ||
1590 | } | ||
1591 | |||
1548 | __init int intel_pmu_init(void) | 1592 | __init int intel_pmu_init(void) |
1549 | { | 1593 | { |
1550 | union cpuid10_edx edx; | 1594 | union cpuid10_edx edx; |
1551 | union cpuid10_eax eax; | 1595 | union cpuid10_eax eax; |
1596 | union cpuid10_ebx ebx; | ||
1552 | unsigned int unused; | 1597 | unsigned int unused; |
1553 | unsigned int ebx; | ||
1554 | int version; | 1598 | int version; |
1555 | 1599 | ||
1556 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 1600 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
@@ -1567,8 +1611,8 @@ __init int intel_pmu_init(void) | |||
1567 | * Check whether the Architectural PerfMon supports | 1611 | * Check whether the Architectural PerfMon supports |
1568 | * Branch Misses Retired hw_event or not. | 1612 | * Branch Misses Retired hw_event or not. |
1569 | */ | 1613 | */ |
1570 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | 1614 | cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); |
1571 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | 1615 | if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT) |
1572 | return -ENODEV; | 1616 | return -ENODEV; |
1573 | 1617 | ||
1574 | version = eax.split.version_id; | 1618 | version = eax.split.version_id; |
@@ -1582,6 +1626,9 @@ __init int intel_pmu_init(void) | |||
1582 | x86_pmu.cntval_bits = eax.split.bit_width; | 1626 | x86_pmu.cntval_bits = eax.split.bit_width; |
1583 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; | 1627 | x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; |
1584 | 1628 | ||
1629 | x86_pmu.events_maskl = ebx.full; | ||
1630 | x86_pmu.events_mask_len = eax.split.mask_length; | ||
1631 | |||
1585 | /* | 1632 | /* |
1586 | * Quirk: v2 perfmon does not report fixed-purpose events, so | 1633 | * Quirk: v2 perfmon does not report fixed-purpose events, so |
1587 | * assume at least 3 events: | 1634 | * assume at least 3 events: |
@@ -1601,6 +1648,8 @@ __init int intel_pmu_init(void) | |||
1601 | 1648 | ||
1602 | intel_ds_init(); | 1649 | intel_ds_init(); |
1603 | 1650 | ||
1651 | x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */ | ||
1652 | |||
1604 | /* | 1653 | /* |
1605 | * Install the hw-cache-events table: | 1654 | * Install the hw-cache-events table: |
1606 | */ | 1655 | */ |
@@ -1610,7 +1659,7 @@ __init int intel_pmu_init(void) | |||
1610 | break; | 1659 | break; |
1611 | 1660 | ||
1612 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | 1661 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ |
1613 | x86_pmu.quirks = intel_clovertown_quirks; | 1662 | x86_add_quirk(intel_clovertown_quirk); |
1614 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | 1663 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ |
1615 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | 1664 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ |
1616 | case 29: /* six-core 45 nm xeon "Dunnington" */ | 1665 | case 29: /* six-core 45 nm xeon "Dunnington" */ |
@@ -1644,17 +1693,8 @@ __init int intel_pmu_init(void) | |||
1644 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ | 1693 | /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */ |
1645 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; | 1694 | intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x1803fb1; |
1646 | 1695 | ||
1647 | if (ebx & 0x40) { | 1696 | x86_add_quirk(intel_nehalem_quirk); |
1648 | /* | ||
1649 | * Erratum AAJ80 detected, we work it around by using | ||
1650 | * the BR_MISP_EXEC.ANY event. This will over-count | ||
1651 | * branch-misses, but it's still much better than the | ||
1652 | * architectural event which is often completely bogus: | ||
1653 | */ | ||
1654 | intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89; | ||
1655 | 1697 | ||
1656 | pr_cont("erratum AAJ80 worked around, "); | ||
1657 | } | ||
1658 | pr_cont("Nehalem events, "); | 1698 | pr_cont("Nehalem events, "); |
1659 | break; | 1699 | break; |
1660 | 1700 | ||
@@ -1694,6 +1734,7 @@ __init int intel_pmu_init(void) | |||
1694 | break; | 1734 | break; |
1695 | 1735 | ||
1696 | case 42: /* SandyBridge */ | 1736 | case 42: /* SandyBridge */ |
1737 | x86_add_quirk(intel_sandybridge_quirk); | ||
1697 | case 45: /* SandyBridge, "Romely-EP" */ | 1738 | case 45: /* SandyBridge, "Romely-EP" */ |
1698 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, | 1739 | memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, |
1699 | sizeof(hw_cache_event_ids)); | 1740 | sizeof(hw_cache_event_ids)); |
@@ -1730,5 +1771,6 @@ __init int intel_pmu_init(void) | |||
1730 | break; | 1771 | break; |
1731 | } | 1772 | } |
1732 | } | 1773 | } |
1774 | |||
1733 | return 0; | 1775 | return 0; |
1734 | } | 1776 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index c0d238f49db8..73da6b64f5b7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c | |||
@@ -493,6 +493,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
493 | unsigned long from = cpuc->lbr_entries[0].from; | 493 | unsigned long from = cpuc->lbr_entries[0].from; |
494 | unsigned long old_to, to = cpuc->lbr_entries[0].to; | 494 | unsigned long old_to, to = cpuc->lbr_entries[0].to; |
495 | unsigned long ip = regs->ip; | 495 | unsigned long ip = regs->ip; |
496 | int is_64bit = 0; | ||
496 | 497 | ||
497 | /* | 498 | /* |
498 | * We don't need to fixup if the PEBS assist is fault like | 499 | * We don't need to fixup if the PEBS assist is fault like |
@@ -544,7 +545,10 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) | |||
544 | } else | 545 | } else |
545 | kaddr = (void *)to; | 546 | kaddr = (void *)to; |
546 | 547 | ||
547 | kernel_insn_init(&insn, kaddr); | 548 | #ifdef CONFIG_X86_64 |
549 | is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); | ||
550 | #endif | ||
551 | insn_init(&insn, kaddr, is_64bit); | ||
548 | insn_get_length(&insn); | 552 | insn_get_length(&insn); |
549 | to += insn.length; | 553 | to += insn.length; |
550 | } while (to < ip); | 554 | } while (to < ip); |
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 492bf1358a7c..ef484d9d0a25 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -1268,7 +1268,7 @@ reserve: | |||
1268 | } | 1268 | } |
1269 | 1269 | ||
1270 | done: | 1270 | done: |
1271 | return num ? -ENOSPC : 0; | 1271 | return num ? -EINVAL : 0; |
1272 | } | 1272 | } |
1273 | 1273 | ||
1274 | static __initconst const struct x86_pmu p4_pmu = { | 1274 | static __initconst const struct x86_pmu p4_pmu = { |