diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_counter.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_counter.c | 289 |
1 files changed, 267 insertions, 22 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 13bd6d6cf0bd..3d4ebbd2e129 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -55,6 +55,7 @@ struct x86_pmu { | |||
55 | int num_counters_fixed; | 55 | int num_counters_fixed; |
56 | int counter_bits; | 56 | int counter_bits; |
57 | u64 counter_mask; | 57 | u64 counter_mask; |
58 | int apic; | ||
58 | u64 max_period; | 59 | u64 max_period; |
59 | u64 intel_ctrl; | 60 | u64 intel_ctrl; |
60 | }; | 61 | }; |
@@ -66,6 +67,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { | |||
66 | }; | 67 | }; |
67 | 68 | ||
68 | /* | 69 | /* |
70 | * Not sure about some of these | ||
71 | */ | ||
72 | static const u64 p6_perfmon_event_map[] = | ||
73 | { | ||
74 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, | ||
75 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
76 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, | ||
77 | [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, | ||
78 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
79 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
80 | [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, | ||
81 | }; | ||
82 | |||
83 | static u64 p6_pmu_event_map(int event) | ||
84 | { | ||
85 | return p6_perfmon_event_map[event]; | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Counter setting that is specified not to count anything. | ||
90 | * We use this to effectively disable a counter. | ||
91 | * | ||
92 | * L2_RQSTS with 0 MESI unit mask. | ||
93 | */ | ||
94 | #define P6_NOP_COUNTER 0x0000002EULL | ||
95 | |||
96 | static u64 p6_pmu_raw_event(u64 event) | ||
97 | { | ||
98 | #define P6_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
99 | #define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
100 | #define P6_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
101 | #define P6_EVNTSEL_INV_MASK 0x00800000ULL | ||
102 | #define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL | ||
103 | |||
104 | #define P6_EVNTSEL_MASK \ | ||
105 | (P6_EVNTSEL_EVENT_MASK | \ | ||
106 | P6_EVNTSEL_UNIT_MASK | \ | ||
107 | P6_EVNTSEL_EDGE_MASK | \ | ||
108 | P6_EVNTSEL_INV_MASK | \ | ||
109 | P6_EVNTSEL_COUNTER_MASK) | ||
110 | |||
111 | return event & P6_EVNTSEL_MASK; | ||
112 | } | ||
113 | |||
114 | |||
115 | /* | ||
69 | * Intel PerfMon v3. Used on Core2 and later. | 116 | * Intel PerfMon v3. Used on Core2 and later. |
70 | */ | 117 | */ |
71 | static const u64 intel_perfmon_event_map[] = | 118 | static const u64 intel_perfmon_event_map[] = |
@@ -567,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex); | |||
567 | 614 | ||
568 | static bool reserve_pmc_hardware(void) | 615 | static bool reserve_pmc_hardware(void) |
569 | { | 616 | { |
617 | #ifdef CONFIG_X86_LOCAL_APIC | ||
570 | int i; | 618 | int i; |
571 | 619 | ||
572 | if (nmi_watchdog == NMI_LOCAL_APIC) | 620 | if (nmi_watchdog == NMI_LOCAL_APIC) |
@@ -581,9 +629,11 @@ static bool reserve_pmc_hardware(void) | |||
581 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | 629 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) |
582 | goto eventsel_fail; | 630 | goto eventsel_fail; |
583 | } | 631 | } |
632 | #endif | ||
584 | 633 | ||
585 | return true; | 634 | return true; |
586 | 635 | ||
636 | #ifdef CONFIG_X86_LOCAL_APIC | ||
587 | eventsel_fail: | 637 | eventsel_fail: |
588 | for (i--; i >= 0; i--) | 638 | for (i--; i >= 0; i--) |
589 | release_evntsel_nmi(x86_pmu.eventsel + i); | 639 | release_evntsel_nmi(x86_pmu.eventsel + i); |
@@ -598,10 +648,12 @@ perfctr_fail: | |||
598 | enable_lapic_nmi_watchdog(); | 648 | enable_lapic_nmi_watchdog(); |
599 | 649 | ||
600 | return false; | 650 | return false; |
651 | #endif | ||
601 | } | 652 | } |
602 | 653 | ||
603 | static void release_pmc_hardware(void) | 654 | static void release_pmc_hardware(void) |
604 | { | 655 | { |
656 | #ifdef CONFIG_X86_LOCAL_APIC | ||
605 | int i; | 657 | int i; |
606 | 658 | ||
607 | for (i = 0; i < x86_pmu.num_counters; i++) { | 659 | for (i = 0; i < x86_pmu.num_counters; i++) { |
@@ -611,6 +663,7 @@ static void release_pmc_hardware(void) | |||
611 | 663 | ||
612 | if (nmi_watchdog == NMI_LOCAL_APIC) | 664 | if (nmi_watchdog == NMI_LOCAL_APIC) |
613 | enable_lapic_nmi_watchdog(); | 665 | enable_lapic_nmi_watchdog(); |
666 | #endif | ||
614 | } | 667 | } |
615 | 668 | ||
616 | static void hw_perf_counter_destroy(struct perf_counter *counter) | 669 | static void hw_perf_counter_destroy(struct perf_counter *counter) |
@@ -666,6 +719,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
666 | { | 719 | { |
667 | struct perf_counter_attr *attr = &counter->attr; | 720 | struct perf_counter_attr *attr = &counter->attr; |
668 | struct hw_perf_counter *hwc = &counter->hw; | 721 | struct hw_perf_counter *hwc = &counter->hw; |
722 | u64 config; | ||
669 | int err; | 723 | int err; |
670 | 724 | ||
671 | if (!x86_pmu_initialized()) | 725 | if (!x86_pmu_initialized()) |
@@ -701,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
701 | hwc->sample_period = x86_pmu.max_period; | 755 | hwc->sample_period = x86_pmu.max_period; |
702 | hwc->last_period = hwc->sample_period; | 756 | hwc->last_period = hwc->sample_period; |
703 | atomic64_set(&hwc->period_left, hwc->sample_period); | 757 | atomic64_set(&hwc->period_left, hwc->sample_period); |
758 | } else { | ||
759 | /* | ||
760 | * If we have a PMU initialized but no APIC | ||
761 | * interrupts, we cannot sample hardware | ||
762 | * counters (user-space has to fall back and | ||
763 | * sample via a hrtimer based software counter): | ||
764 | */ | ||
765 | if (!x86_pmu.apic) | ||
766 | return -EOPNOTSUPP; | ||
704 | } | 767 | } |
705 | 768 | ||
706 | counter->destroy = hw_perf_counter_destroy; | 769 | counter->destroy = hw_perf_counter_destroy; |
@@ -718,14 +781,40 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
718 | 781 | ||
719 | if (attr->config >= x86_pmu.max_events) | 782 | if (attr->config >= x86_pmu.max_events) |
720 | return -EINVAL; | 783 | return -EINVAL; |
784 | |||
721 | /* | 785 | /* |
722 | * The generic map: | 786 | * The generic map: |
723 | */ | 787 | */ |
724 | hwc->config |= x86_pmu.event_map(attr->config); | 788 | config = x86_pmu.event_map(attr->config); |
789 | |||
790 | if (config == 0) | ||
791 | return -ENOENT; | ||
792 | |||
793 | if (config == -1LL) | ||
794 | return -EINVAL; | ||
795 | |||
796 | hwc->config |= config; | ||
725 | 797 | ||
726 | return 0; | 798 | return 0; |
727 | } | 799 | } |
728 | 800 | ||
801 | static void p6_pmu_disable_all(void) | ||
802 | { | ||
803 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
804 | u64 val; | ||
805 | |||
806 | if (!cpuc->enabled) | ||
807 | return; | ||
808 | |||
809 | cpuc->enabled = 0; | ||
810 | barrier(); | ||
811 | |||
812 | /* p6 only has one enable register */ | ||
813 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
814 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
815 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
816 | } | ||
817 | |||
729 | static void intel_pmu_disable_all(void) | 818 | static void intel_pmu_disable_all(void) |
730 | { | 819 | { |
731 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | 820 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
@@ -767,6 +856,23 @@ void hw_perf_disable(void) | |||
767 | return x86_pmu.disable_all(); | 856 | return x86_pmu.disable_all(); |
768 | } | 857 | } |
769 | 858 | ||
859 | static void p6_pmu_enable_all(void) | ||
860 | { | ||
861 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
862 | unsigned long val; | ||
863 | |||
864 | if (cpuc->enabled) | ||
865 | return; | ||
866 | |||
867 | cpuc->enabled = 1; | ||
868 | barrier(); | ||
869 | |||
870 | /* p6 only has one enable register */ | ||
871 | rdmsrl(MSR_P6_EVNTSEL0, val); | ||
872 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
873 | wrmsrl(MSR_P6_EVNTSEL0, val); | ||
874 | } | ||
875 | |||
770 | static void intel_pmu_enable_all(void) | 876 | static void intel_pmu_enable_all(void) |
771 | { | 877 | { |
772 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 878 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
@@ -784,13 +890,13 @@ static void amd_pmu_enable_all(void) | |||
784 | barrier(); | 890 | barrier(); |
785 | 891 | ||
786 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 892 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
893 | struct perf_counter *counter = cpuc->counters[idx]; | ||
787 | u64 val; | 894 | u64 val; |
788 | 895 | ||
789 | if (!test_bit(idx, cpuc->active_mask)) | 896 | if (!test_bit(idx, cpuc->active_mask)) |
790 | continue; | 897 | continue; |
791 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | 898 | |
792 | if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) | 899 | val = counter->hw.config; |
793 | continue; | ||
794 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 900 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
795 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | 901 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); |
796 | } | 902 | } |
@@ -819,16 +925,13 @@ static inline void intel_pmu_ack_status(u64 ack) | |||
819 | 925 | ||
820 | static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 926 | static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) |
821 | { | 927 | { |
822 | int err; | 928 | (void)checking_wrmsrl(hwc->config_base + idx, |
823 | err = checking_wrmsrl(hwc->config_base + idx, | ||
824 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | 929 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); |
825 | } | 930 | } |
826 | 931 | ||
827 | static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | 932 | static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) |
828 | { | 933 | { |
829 | int err; | 934 | (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); |
830 | err = checking_wrmsrl(hwc->config_base + idx, | ||
831 | hwc->config); | ||
832 | } | 935 | } |
833 | 936 | ||
834 | static inline void | 937 | static inline void |
@@ -836,13 +939,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) | |||
836 | { | 939 | { |
837 | int idx = __idx - X86_PMC_IDX_FIXED; | 940 | int idx = __idx - X86_PMC_IDX_FIXED; |
838 | u64 ctrl_val, mask; | 941 | u64 ctrl_val, mask; |
839 | int err; | ||
840 | 942 | ||
841 | mask = 0xfULL << (idx * 4); | 943 | mask = 0xfULL << (idx * 4); |
842 | 944 | ||
843 | rdmsrl(hwc->config_base, ctrl_val); | 945 | rdmsrl(hwc->config_base, ctrl_val); |
844 | ctrl_val &= ~mask; | 946 | ctrl_val &= ~mask; |
845 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 947 | (void)checking_wrmsrl(hwc->config_base, ctrl_val); |
948 | } | ||
949 | |||
950 | static inline void | ||
951 | p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
952 | { | ||
953 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
954 | u64 val = P6_NOP_COUNTER; | ||
955 | |||
956 | if (cpuc->enabled) | ||
957 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
958 | |||
959 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
846 | } | 960 | } |
847 | 961 | ||
848 | static inline void | 962 | static inline void |
@@ -943,6 +1057,19 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) | |||
943 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | 1057 | err = checking_wrmsrl(hwc->config_base, ctrl_val); |
944 | } | 1058 | } |
945 | 1059 | ||
1060 | static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
1061 | { | ||
1062 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1063 | u64 val; | ||
1064 | |||
1065 | val = hwc->config; | ||
1066 | if (cpuc->enabled) | ||
1067 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
1068 | |||
1069 | (void)checking_wrmsrl(hwc->config_base + idx, val); | ||
1070 | } | ||
1071 | |||
1072 | |||
946 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 1073 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) |
947 | { | 1074 | { |
948 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1075 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
@@ -959,8 +1086,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | |||
959 | 1086 | ||
960 | if (cpuc->enabled) | 1087 | if (cpuc->enabled) |
961 | x86_pmu_enable_counter(hwc, idx); | 1088 | x86_pmu_enable_counter(hwc, idx); |
962 | else | ||
963 | x86_pmu_disable_counter(hwc, idx); | ||
964 | } | 1089 | } |
965 | 1090 | ||
966 | static int | 1091 | static int |
@@ -1176,6 +1301,49 @@ static void intel_pmu_reset(void) | |||
1176 | local_irq_restore(flags); | 1301 | local_irq_restore(flags); |
1177 | } | 1302 | } |
1178 | 1303 | ||
1304 | static int p6_pmu_handle_irq(struct pt_regs *regs) | ||
1305 | { | ||
1306 | struct perf_sample_data data; | ||
1307 | struct cpu_hw_counters *cpuc; | ||
1308 | struct perf_counter *counter; | ||
1309 | struct hw_perf_counter *hwc; | ||
1310 | int idx, handled = 0; | ||
1311 | u64 val; | ||
1312 | |||
1313 | data.regs = regs; | ||
1314 | data.addr = 0; | ||
1315 | |||
1316 | cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1317 | |||
1318 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
1319 | if (!test_bit(idx, cpuc->active_mask)) | ||
1320 | continue; | ||
1321 | |||
1322 | counter = cpuc->counters[idx]; | ||
1323 | hwc = &counter->hw; | ||
1324 | |||
1325 | val = x86_perf_counter_update(counter, hwc, idx); | ||
1326 | if (val & (1ULL << (x86_pmu.counter_bits - 1))) | ||
1327 | continue; | ||
1328 | |||
1329 | /* | ||
1330 | * counter overflow | ||
1331 | */ | ||
1332 | handled = 1; | ||
1333 | data.period = counter->hw.last_period; | ||
1334 | |||
1335 | if (!x86_perf_counter_set_period(counter, hwc, idx)) | ||
1336 | continue; | ||
1337 | |||
1338 | if (perf_counter_overflow(counter, 1, &data)) | ||
1339 | p6_pmu_disable_counter(hwc, idx); | ||
1340 | } | ||
1341 | |||
1342 | if (handled) | ||
1343 | inc_irq_stat(apic_perf_irqs); | ||
1344 | |||
1345 | return handled; | ||
1346 | } | ||
1179 | 1347 | ||
1180 | /* | 1348 | /* |
1181 | * This handler is triggered by the local APIC, so the APIC IRQ handling | 1349 | * This handler is triggered by the local APIC, so the APIC IRQ handling |
@@ -1185,14 +1353,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1185 | { | 1353 | { |
1186 | struct perf_sample_data data; | 1354 | struct perf_sample_data data; |
1187 | struct cpu_hw_counters *cpuc; | 1355 | struct cpu_hw_counters *cpuc; |
1188 | int bit, cpu, loops; | 1356 | int bit, loops; |
1189 | u64 ack, status; | 1357 | u64 ack, status; |
1190 | 1358 | ||
1191 | data.regs = regs; | 1359 | data.regs = regs; |
1192 | data.addr = 0; | 1360 | data.addr = 0; |
1193 | 1361 | ||
1194 | cpu = smp_processor_id(); | 1362 | cpuc = &__get_cpu_var(cpu_hw_counters); |
1195 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
1196 | 1363 | ||
1197 | perf_disable(); | 1364 | perf_disable(); |
1198 | status = intel_pmu_get_status(); | 1365 | status = intel_pmu_get_status(); |
@@ -1249,14 +1416,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) | |||
1249 | struct cpu_hw_counters *cpuc; | 1416 | struct cpu_hw_counters *cpuc; |
1250 | struct perf_counter *counter; | 1417 | struct perf_counter *counter; |
1251 | struct hw_perf_counter *hwc; | 1418 | struct hw_perf_counter *hwc; |
1252 | int cpu, idx, handled = 0; | 1419 | int idx, handled = 0; |
1253 | u64 val; | 1420 | u64 val; |
1254 | 1421 | ||
1255 | data.regs = regs; | 1422 | data.regs = regs; |
1256 | data.addr = 0; | 1423 | data.addr = 0; |
1257 | 1424 | ||
1258 | cpu = smp_processor_id(); | 1425 | cpuc = &__get_cpu_var(cpu_hw_counters); |
1259 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
1260 | 1426 | ||
1261 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | 1427 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { |
1262 | if (!test_bit(idx, cpuc->active_mask)) | 1428 | if (!test_bit(idx, cpuc->active_mask)) |
@@ -1299,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs) | |||
1299 | 1465 | ||
1300 | void set_perf_counter_pending(void) | 1466 | void set_perf_counter_pending(void) |
1301 | { | 1467 | { |
1468 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1302 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | 1469 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); |
1470 | #endif | ||
1303 | } | 1471 | } |
1304 | 1472 | ||
1305 | void perf_counters_lapic_init(void) | 1473 | void perf_counters_lapic_init(void) |
1306 | { | 1474 | { |
1307 | if (!x86_pmu_initialized()) | 1475 | #ifdef CONFIG_X86_LOCAL_APIC |
1476 | if (!x86_pmu.apic || !x86_pmu_initialized()) | ||
1308 | return; | 1477 | return; |
1309 | 1478 | ||
1310 | /* | 1479 | /* |
1311 | * Always use NMI for PMU | 1480 | * Always use NMI for PMU |
1312 | */ | 1481 | */ |
1313 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1482 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1483 | #endif | ||
1314 | } | 1484 | } |
1315 | 1485 | ||
1316 | static int __kprobes | 1486 | static int __kprobes |
@@ -1334,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self, | |||
1334 | 1504 | ||
1335 | regs = args->regs; | 1505 | regs = args->regs; |
1336 | 1506 | ||
1507 | #ifdef CONFIG_X86_LOCAL_APIC | ||
1337 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1508 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1509 | #endif | ||
1338 | /* | 1510 | /* |
1339 | * Can't rely on the handled return value to say it was our NMI, two | 1511 | * Can't rely on the handled return value to say it was our NMI, two |
1340 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. | 1512 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. |
@@ -1353,6 +1525,33 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = { | |||
1353 | .priority = 1 | 1525 | .priority = 1 |
1354 | }; | 1526 | }; |
1355 | 1527 | ||
1528 | static struct x86_pmu p6_pmu = { | ||
1529 | .name = "p6", | ||
1530 | .handle_irq = p6_pmu_handle_irq, | ||
1531 | .disable_all = p6_pmu_disable_all, | ||
1532 | .enable_all = p6_pmu_enable_all, | ||
1533 | .enable = p6_pmu_enable_counter, | ||
1534 | .disable = p6_pmu_disable_counter, | ||
1535 | .eventsel = MSR_P6_EVNTSEL0, | ||
1536 | .perfctr = MSR_P6_PERFCTR0, | ||
1537 | .event_map = p6_pmu_event_map, | ||
1538 | .raw_event = p6_pmu_raw_event, | ||
1539 | .max_events = ARRAY_SIZE(p6_perfmon_event_map), | ||
1540 | .apic = 1, | ||
1541 | .max_period = (1ULL << 31) - 1, | ||
1542 | .version = 0, | ||
1543 | .num_counters = 2, | ||
1544 | /* | ||
1545 | * Counters have 40 bits implemented. However they are designed such | ||
1546 | * that bits [32-39] are sign extensions of bit 31. As such the | ||
1547 | * effective width of a counter for P6-like PMU is 32 bits only. | ||
1548 | * | ||
1549 | * See IA-32 Intel Architecture Software developer manual Vol 3B | ||
1550 | */ | ||
1551 | .counter_bits = 32, | ||
1552 | .counter_mask = (1ULL << 32) - 1, | ||
1553 | }; | ||
1554 | |||
1356 | static struct x86_pmu intel_pmu = { | 1555 | static struct x86_pmu intel_pmu = { |
1357 | .name = "Intel", | 1556 | .name = "Intel", |
1358 | .handle_irq = intel_pmu_handle_irq, | 1557 | .handle_irq = intel_pmu_handle_irq, |
@@ -1365,6 +1564,7 @@ static struct x86_pmu intel_pmu = { | |||
1365 | .event_map = intel_pmu_event_map, | 1564 | .event_map = intel_pmu_event_map, |
1366 | .raw_event = intel_pmu_raw_event, | 1565 | .raw_event = intel_pmu_raw_event, |
1367 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | 1566 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), |
1567 | .apic = 1, | ||
1368 | /* | 1568 | /* |
1369 | * Intel PMCs cannot be accessed sanely above 32 bit width, | 1569 | * Intel PMCs cannot be accessed sanely above 32 bit width, |
1370 | * so we install an artificial 1<<31 period regardless of | 1570 | * so we install an artificial 1<<31 period regardless of |
@@ -1388,10 +1588,43 @@ static struct x86_pmu amd_pmu = { | |||
1388 | .num_counters = 4, | 1588 | .num_counters = 4, |
1389 | .counter_bits = 48, | 1589 | .counter_bits = 48, |
1390 | .counter_mask = (1ULL << 48) - 1, | 1590 | .counter_mask = (1ULL << 48) - 1, |
1591 | .apic = 1, | ||
1391 | /* use highest bit to detect overflow */ | 1592 | /* use highest bit to detect overflow */ |
1392 | .max_period = (1ULL << 47) - 1, | 1593 | .max_period = (1ULL << 47) - 1, |
1393 | }; | 1594 | }; |
1394 | 1595 | ||
1596 | static int p6_pmu_init(void) | ||
1597 | { | ||
1598 | switch (boot_cpu_data.x86_model) { | ||
1599 | case 1: | ||
1600 | case 3: /* Pentium Pro */ | ||
1601 | case 5: | ||
1602 | case 6: /* Pentium II */ | ||
1603 | case 7: | ||
1604 | case 8: | ||
1605 | case 11: /* Pentium III */ | ||
1606 | break; | ||
1607 | case 9: | ||
1608 | case 13: | ||
1609 | /* Pentium M */ | ||
1610 | break; | ||
1611 | default: | ||
1612 | pr_cont("unsupported p6 CPU model %d ", | ||
1613 | boot_cpu_data.x86_model); | ||
1614 | return -ENODEV; | ||
1615 | } | ||
1616 | |||
1617 | x86_pmu = p6_pmu; | ||
1618 | |||
1619 | if (!cpu_has_apic) { | ||
1620 | pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n"); | ||
1621 | pr_info("no hardware sampling interrupt available.\n"); | ||
1622 | x86_pmu.apic = 0; | ||
1623 | } | ||
1624 | |||
1625 | return 0; | ||
1626 | } | ||
1627 | |||
1395 | static int intel_pmu_init(void) | 1628 | static int intel_pmu_init(void) |
1396 | { | 1629 | { |
1397 | union cpuid10_edx edx; | 1630 | union cpuid10_edx edx; |
@@ -1400,8 +1633,14 @@ static int intel_pmu_init(void) | |||
1400 | unsigned int ebx; | 1633 | unsigned int ebx; |
1401 | int version; | 1634 | int version; |
1402 | 1635 | ||
1403 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 1636 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
1637 | /* check for P6 processor family */ | ||
1638 | if (boot_cpu_data.x86 == 6) { | ||
1639 | return p6_pmu_init(); | ||
1640 | } else { | ||
1404 | return -ENODEV; | 1641 | return -ENODEV; |
1642 | } | ||
1643 | } | ||
1405 | 1644 | ||
1406 | /* | 1645 | /* |
1407 | * Check whether the Architectural PerfMon supports | 1646 | * Check whether the Architectural PerfMon supports |
@@ -1561,6 +1800,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) | |||
1561 | 1800 | ||
1562 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); | 1801 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); |
1563 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); | 1802 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); |
1803 | static DEFINE_PER_CPU(int, in_nmi_frame); | ||
1564 | 1804 | ||
1565 | 1805 | ||
1566 | static void | 1806 | static void |
@@ -1576,7 +1816,9 @@ static void backtrace_warning(void *data, char *msg) | |||
1576 | 1816 | ||
1577 | static int backtrace_stack(void *data, char *name) | 1817 | static int backtrace_stack(void *data, char *name) |
1578 | { | 1818 | { |
1579 | /* Process all stacks: */ | 1819 | per_cpu(in_nmi_frame, smp_processor_id()) = |
1820 | x86_is_stack_id(NMI_STACK, name); | ||
1821 | |||
1580 | return 0; | 1822 | return 0; |
1581 | } | 1823 | } |
1582 | 1824 | ||
@@ -1584,6 +1826,9 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) | |||
1584 | { | 1826 | { |
1585 | struct perf_callchain_entry *entry = data; | 1827 | struct perf_callchain_entry *entry = data; |
1586 | 1828 | ||
1829 | if (per_cpu(in_nmi_frame, smp_processor_id())) | ||
1830 | return; | ||
1831 | |||
1587 | if (reliable) | 1832 | if (reliable) |
1588 | callchain_store(entry, addr); | 1833 | callchain_store(entry, addr); |
1589 | } | 1834 | } |