aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/perf_counter.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/perf_counter.c')
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c289
1 files changed, 267 insertions, 22 deletions
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d4cf4ce19aac..900332b800f8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -55,6 +55,7 @@ struct x86_pmu {
55 int num_counters_fixed; 55 int num_counters_fixed;
56 int counter_bits; 56 int counter_bits;
57 u64 counter_mask; 57 u64 counter_mask;
58 int apic;
58 u64 max_period; 59 u64 max_period;
59 u64 intel_ctrl; 60 u64 intel_ctrl;
60}; 61};
@@ -66,6 +67,52 @@ static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
66}; 67};
67 68
68/* 69/*
70 * Not sure about some of these
71 */
72static const u64 p6_perfmon_event_map[] =
73{
74 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
75 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
76 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
77 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
78 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
79 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
80 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
81};
82
83static u64 p6_pmu_event_map(int event)
84{
85 return p6_perfmon_event_map[event];
86}
87
88/*
89 * Counter setting that is specified not to count anything.
90 * We use this to effectively disable a counter.
91 *
92 * L2_RQSTS with 0 MESI unit mask.
93 */
94#define P6_NOP_COUNTER 0x0000002EULL
95
96static u64 p6_pmu_raw_event(u64 event)
97{
98#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL
99#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL
100#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL
101#define P6_EVNTSEL_INV_MASK 0x00800000ULL
102#define P6_EVNTSEL_COUNTER_MASK 0xFF000000ULL
103
104#define P6_EVNTSEL_MASK \
105 (P6_EVNTSEL_EVENT_MASK | \
106 P6_EVNTSEL_UNIT_MASK | \
107 P6_EVNTSEL_EDGE_MASK | \
108 P6_EVNTSEL_INV_MASK | \
109 P6_EVNTSEL_COUNTER_MASK)
110
111 return event & P6_EVNTSEL_MASK;
112}
113
114
115/*
69 * Intel PerfMon v3. Used on Core2 and later. 116 * Intel PerfMon v3. Used on Core2 and later.
70 */ 117 */
71static const u64 intel_perfmon_event_map[] = 118static const u64 intel_perfmon_event_map[] =
@@ -567,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
567 614
568static bool reserve_pmc_hardware(void) 615static bool reserve_pmc_hardware(void)
569{ 616{
617#ifdef CONFIG_X86_LOCAL_APIC
570 int i; 618 int i;
571 619
572 if (nmi_watchdog == NMI_LOCAL_APIC) 620 if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -581,9 +629,11 @@ static bool reserve_pmc_hardware(void)
581 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 629 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
582 goto eventsel_fail; 630 goto eventsel_fail;
583 } 631 }
632#endif
584 633
585 return true; 634 return true;
586 635
636#ifdef CONFIG_X86_LOCAL_APIC
587eventsel_fail: 637eventsel_fail:
588 for (i--; i >= 0; i--) 638 for (i--; i >= 0; i--)
589 release_evntsel_nmi(x86_pmu.eventsel + i); 639 release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -598,10 +648,12 @@ perfctr_fail:
598 enable_lapic_nmi_watchdog(); 648 enable_lapic_nmi_watchdog();
599 649
600 return false; 650 return false;
651#endif
601} 652}
602 653
603static void release_pmc_hardware(void) 654static void release_pmc_hardware(void)
604{ 655{
656#ifdef CONFIG_X86_LOCAL_APIC
605 int i; 657 int i;
606 658
607 for (i = 0; i < x86_pmu.num_counters; i++) { 659 for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -611,6 +663,7 @@ static void release_pmc_hardware(void)
611 663
612 if (nmi_watchdog == NMI_LOCAL_APIC) 664 if (nmi_watchdog == NMI_LOCAL_APIC)
613 enable_lapic_nmi_watchdog(); 665 enable_lapic_nmi_watchdog();
666#endif
614} 667}
615 668
616static void hw_perf_counter_destroy(struct perf_counter *counter) 669static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -666,6 +719,7 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
666{ 719{
667 struct perf_counter_attr *attr = &counter->attr; 720 struct perf_counter_attr *attr = &counter->attr;
668 struct hw_perf_counter *hwc = &counter->hw; 721 struct hw_perf_counter *hwc = &counter->hw;
722 u64 config;
669 int err; 723 int err;
670 724
671 if (!x86_pmu_initialized()) 725 if (!x86_pmu_initialized())
@@ -701,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
701 hwc->sample_period = x86_pmu.max_period; 755 hwc->sample_period = x86_pmu.max_period;
702 hwc->last_period = hwc->sample_period; 756 hwc->last_period = hwc->sample_period;
703 atomic64_set(&hwc->period_left, hwc->sample_period); 757 atomic64_set(&hwc->period_left, hwc->sample_period);
758 } else {
759 /*
760 * If we have a PMU initialized but no APIC
761 * interrupts, we cannot sample hardware
762 * counters (user-space has to fall back and
763 * sample via a hrtimer based software counter):
764 */
765 if (!x86_pmu.apic)
766 return -EOPNOTSUPP;
704 } 767 }
705 768
706 counter->destroy = hw_perf_counter_destroy; 769 counter->destroy = hw_perf_counter_destroy;
@@ -718,14 +781,40 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
718 781
719 if (attr->config >= x86_pmu.max_events) 782 if (attr->config >= x86_pmu.max_events)
720 return -EINVAL; 783 return -EINVAL;
784
721 /* 785 /*
722 * The generic map: 786 * The generic map:
723 */ 787 */
724 hwc->config |= x86_pmu.event_map(attr->config); 788 config = x86_pmu.event_map(attr->config);
789
790 if (config == 0)
791 return -ENOENT;
792
793 if (config == -1LL)
794 return -EINVAL;
795
796 hwc->config |= config;
725 797
726 return 0; 798 return 0;
727} 799}
728 800
801static void p6_pmu_disable_all(void)
802{
803 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
804 u64 val;
805
806 if (!cpuc->enabled)
807 return;
808
809 cpuc->enabled = 0;
810 barrier();
811
812 /* p6 only has one enable register */
813 rdmsrl(MSR_P6_EVNTSEL0, val);
814 val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
815 wrmsrl(MSR_P6_EVNTSEL0, val);
816}
817
729static void intel_pmu_disable_all(void) 818static void intel_pmu_disable_all(void)
730{ 819{
731 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); 820 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
@@ -767,6 +856,23 @@ void hw_perf_disable(void)
767 return x86_pmu.disable_all(); 856 return x86_pmu.disable_all();
768} 857}
769 858
859static void p6_pmu_enable_all(void)
860{
861 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
862 unsigned long val;
863
864 if (cpuc->enabled)
865 return;
866
867 cpuc->enabled = 1;
868 barrier();
869
870 /* p6 only has one enable register */
871 rdmsrl(MSR_P6_EVNTSEL0, val);
872 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
873 wrmsrl(MSR_P6_EVNTSEL0, val);
874}
875
770static void intel_pmu_enable_all(void) 876static void intel_pmu_enable_all(void)
771{ 877{
772 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); 878 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
@@ -784,13 +890,13 @@ static void amd_pmu_enable_all(void)
784 barrier(); 890 barrier();
785 891
786 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 892 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
893 struct perf_counter *counter = cpuc->counters[idx];
787 u64 val; 894 u64 val;
788 895
789 if (!test_bit(idx, cpuc->active_mask)) 896 if (!test_bit(idx, cpuc->active_mask))
790 continue; 897 continue;
791 rdmsrl(MSR_K7_EVNTSEL0 + idx, val); 898
792 if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) 899 val = counter->hw.config;
793 continue;
794 val |= ARCH_PERFMON_EVENTSEL0_ENABLE; 900 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
795 wrmsrl(MSR_K7_EVNTSEL0 + idx, val); 901 wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
796 } 902 }
@@ -819,16 +925,13 @@ static inline void intel_pmu_ack_status(u64 ack)
819 925
820static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 926static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
821{ 927{
822 int err; 928 (void)checking_wrmsrl(hwc->config_base + idx,
823 err = checking_wrmsrl(hwc->config_base + idx,
824 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); 929 hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE);
825} 930}
826 931
827static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) 932static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
828{ 933{
829 int err; 934 (void)checking_wrmsrl(hwc->config_base + idx, hwc->config);
830 err = checking_wrmsrl(hwc->config_base + idx,
831 hwc->config);
832} 935}
833 936
834static inline void 937static inline void
@@ -836,13 +939,24 @@ intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx)
836{ 939{
837 int idx = __idx - X86_PMC_IDX_FIXED; 940 int idx = __idx - X86_PMC_IDX_FIXED;
838 u64 ctrl_val, mask; 941 u64 ctrl_val, mask;
839 int err;
840 942
841 mask = 0xfULL << (idx * 4); 943 mask = 0xfULL << (idx * 4);
842 944
843 rdmsrl(hwc->config_base, ctrl_val); 945 rdmsrl(hwc->config_base, ctrl_val);
844 ctrl_val &= ~mask; 946 ctrl_val &= ~mask;
845 err = checking_wrmsrl(hwc->config_base, ctrl_val); 947 (void)checking_wrmsrl(hwc->config_base, ctrl_val);
948}
949
950static inline void
951p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
952{
953 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
954 u64 val = P6_NOP_COUNTER;
955
956 if (cpuc->enabled)
957 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
958
959 (void)checking_wrmsrl(hwc->config_base + idx, val);
846} 960}
847 961
848static inline void 962static inline void
@@ -943,6 +1057,19 @@ intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx)
943 err = checking_wrmsrl(hwc->config_base, ctrl_val); 1057 err = checking_wrmsrl(hwc->config_base, ctrl_val);
944} 1058}
945 1059
1060static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
1061{
1062 struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
1063 u64 val;
1064
1065 val = hwc->config;
1066 if (cpuc->enabled)
1067 val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
1068
1069 (void)checking_wrmsrl(hwc->config_base + idx, val);
1070}
1071
1072
946static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) 1073static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
947{ 1074{
948 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { 1075 if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
@@ -959,8 +1086,6 @@ static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx)
959 1086
960 if (cpuc->enabled) 1087 if (cpuc->enabled)
961 x86_pmu_enable_counter(hwc, idx); 1088 x86_pmu_enable_counter(hwc, idx);
962 else
963 x86_pmu_disable_counter(hwc, idx);
964} 1089}
965 1090
966static int 1091static int
@@ -1176,6 +1301,49 @@ static void intel_pmu_reset(void)
1176 local_irq_restore(flags); 1301 local_irq_restore(flags);
1177} 1302}
1178 1303
1304static int p6_pmu_handle_irq(struct pt_regs *regs)
1305{
1306 struct perf_sample_data data;
1307 struct cpu_hw_counters *cpuc;
1308 struct perf_counter *counter;
1309 struct hw_perf_counter *hwc;
1310 int idx, handled = 0;
1311 u64 val;
1312
1313 data.regs = regs;
1314 data.addr = 0;
1315
1316 cpuc = &__get_cpu_var(cpu_hw_counters);
1317
1318 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1319 if (!test_bit(idx, cpuc->active_mask))
1320 continue;
1321
1322 counter = cpuc->counters[idx];
1323 hwc = &counter->hw;
1324
1325 val = x86_perf_counter_update(counter, hwc, idx);
1326 if (val & (1ULL << (x86_pmu.counter_bits - 1)))
1327 continue;
1328
1329 /*
1330 * counter overflow
1331 */
1332 handled = 1;
1333 data.period = counter->hw.last_period;
1334
1335 if (!x86_perf_counter_set_period(counter, hwc, idx))
1336 continue;
1337
1338 if (perf_counter_overflow(counter, 1, &data))
1339 p6_pmu_disable_counter(hwc, idx);
1340 }
1341
1342 if (handled)
1343 inc_irq_stat(apic_perf_irqs);
1344
1345 return handled;
1346}
1179 1347
1180/* 1348/*
1181 * This handler is triggered by the local APIC, so the APIC IRQ handling 1349 * This handler is triggered by the local APIC, so the APIC IRQ handling
@@ -1185,14 +1353,13 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
1185{ 1353{
1186 struct perf_sample_data data; 1354 struct perf_sample_data data;
1187 struct cpu_hw_counters *cpuc; 1355 struct cpu_hw_counters *cpuc;
1188 int bit, cpu, loops; 1356 int bit, loops;
1189 u64 ack, status; 1357 u64 ack, status;
1190 1358
1191 data.regs = regs; 1359 data.regs = regs;
1192 data.addr = 0; 1360 data.addr = 0;
1193 1361
1194 cpu = smp_processor_id(); 1362 cpuc = &__get_cpu_var(cpu_hw_counters);
1195 cpuc = &per_cpu(cpu_hw_counters, cpu);
1196 1363
1197 perf_disable(); 1364 perf_disable();
1198 status = intel_pmu_get_status(); 1365 status = intel_pmu_get_status();
@@ -1249,14 +1416,13 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
1249 struct cpu_hw_counters *cpuc; 1416 struct cpu_hw_counters *cpuc;
1250 struct perf_counter *counter; 1417 struct perf_counter *counter;
1251 struct hw_perf_counter *hwc; 1418 struct hw_perf_counter *hwc;
1252 int cpu, idx, handled = 0; 1419 int idx, handled = 0;
1253 u64 val; 1420 u64 val;
1254 1421
1255 data.regs = regs; 1422 data.regs = regs;
1256 data.addr = 0; 1423 data.addr = 0;
1257 1424
1258 cpu = smp_processor_id(); 1425 cpuc = &__get_cpu_var(cpu_hw_counters);
1259 cpuc = &per_cpu(cpu_hw_counters, cpu);
1260 1426
1261 for (idx = 0; idx < x86_pmu.num_counters; idx++) { 1427 for (idx = 0; idx < x86_pmu.num_counters; idx++) {
1262 if (!test_bit(idx, cpuc->active_mask)) 1428 if (!test_bit(idx, cpuc->active_mask))
@@ -1299,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1299 1465
1300void set_perf_counter_pending(void) 1466void set_perf_counter_pending(void)
1301{ 1467{
1468#ifdef CONFIG_X86_LOCAL_APIC
1302 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1469 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1470#endif
1303} 1471}
1304 1472
1305void perf_counters_lapic_init(void) 1473void perf_counters_lapic_init(void)
1306{ 1474{
1307 if (!x86_pmu_initialized()) 1475#ifdef CONFIG_X86_LOCAL_APIC
1476 if (!x86_pmu.apic || !x86_pmu_initialized())
1308 return; 1477 return;
1309 1478
1310 /* 1479 /*
1311 * Always use NMI for PMU 1480 * Always use NMI for PMU
1312 */ 1481 */
1313 apic_write(APIC_LVTPC, APIC_DM_NMI); 1482 apic_write(APIC_LVTPC, APIC_DM_NMI);
1483#endif
1314} 1484}
1315 1485
1316static int __kprobes 1486static int __kprobes
@@ -1334,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
1334 1504
1335 regs = args->regs; 1505 regs = args->regs;
1336 1506
1507#ifdef CONFIG_X86_LOCAL_APIC
1337 apic_write(APIC_LVTPC, APIC_DM_NMI); 1508 apic_write(APIC_LVTPC, APIC_DM_NMI);
1509#endif
1338 /* 1510 /*
1339 * Can't rely on the handled return value to say it was our NMI, two 1511 * Can't rely on the handled return value to say it was our NMI, two
1340 * counters could trigger 'simultaneously' raising two back-to-back NMIs. 1512 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1353,6 +1525,33 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
1353 .priority = 1 1525 .priority = 1
1354}; 1526};
1355 1527
1528static struct x86_pmu p6_pmu = {
1529 .name = "p6",
1530 .handle_irq = p6_pmu_handle_irq,
1531 .disable_all = p6_pmu_disable_all,
1532 .enable_all = p6_pmu_enable_all,
1533 .enable = p6_pmu_enable_counter,
1534 .disable = p6_pmu_disable_counter,
1535 .eventsel = MSR_P6_EVNTSEL0,
1536 .perfctr = MSR_P6_PERFCTR0,
1537 .event_map = p6_pmu_event_map,
1538 .raw_event = p6_pmu_raw_event,
1539 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1540 .apic = 1,
1541 .max_period = (1ULL << 31) - 1,
1542 .version = 0,
1543 .num_counters = 2,
1544 /*
1545 * Counters have 40 bits implemented. However they are designed such
1546 * that bits [32-39] are sign extensions of bit 31. As such the
1547 * effective width of a counter for P6-like PMU is 32 bits only.
1548 *
1549 * See IA-32 Intel Architecture Software developer manual Vol 3B
1550 */
1551 .counter_bits = 32,
1552 .counter_mask = (1ULL << 32) - 1,
1553};
1554
1356static struct x86_pmu intel_pmu = { 1555static struct x86_pmu intel_pmu = {
1357 .name = "Intel", 1556 .name = "Intel",
1358 .handle_irq = intel_pmu_handle_irq, 1557 .handle_irq = intel_pmu_handle_irq,
@@ -1365,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
1365 .event_map = intel_pmu_event_map, 1564 .event_map = intel_pmu_event_map,
1366 .raw_event = intel_pmu_raw_event, 1565 .raw_event = intel_pmu_raw_event,
1367 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 1566 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1567 .apic = 1,
1368 /* 1568 /*
1369 * Intel PMCs cannot be accessed sanely above 32 bit width, 1569 * Intel PMCs cannot be accessed sanely above 32 bit width,
1370 * so we install an artificial 1<<31 period regardless of 1570 * so we install an artificial 1<<31 period regardless of
@@ -1388,10 +1588,43 @@ static struct x86_pmu amd_pmu = {
1388 .num_counters = 4, 1588 .num_counters = 4,
1389 .counter_bits = 48, 1589 .counter_bits = 48,
1390 .counter_mask = (1ULL << 48) - 1, 1590 .counter_mask = (1ULL << 48) - 1,
1591 .apic = 1,
1391 /* use highest bit to detect overflow */ 1592 /* use highest bit to detect overflow */
1392 .max_period = (1ULL << 47) - 1, 1593 .max_period = (1ULL << 47) - 1,
1393}; 1594};
1394 1595
1596static int p6_pmu_init(void)
1597{
1598 switch (boot_cpu_data.x86_model) {
1599 case 1:
1600 case 3: /* Pentium Pro */
1601 case 5:
1602 case 6: /* Pentium II */
1603 case 7:
1604 case 8:
1605 case 11: /* Pentium III */
1606 break;
1607 case 9:
1608 case 13:
1609 /* Pentium M */
1610 break;
1611 default:
1612 pr_cont("unsupported p6 CPU model %d ",
1613 boot_cpu_data.x86_model);
1614 return -ENODEV;
1615 }
1616
1617 x86_pmu = p6_pmu;
1618
1619 if (!cpu_has_apic) {
1620 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1621 pr_info("no hardware sampling interrupt available.\n");
1622 x86_pmu.apic = 0;
1623 }
1624
1625 return 0;
1626}
1627
1395static int intel_pmu_init(void) 1628static int intel_pmu_init(void)
1396{ 1629{
1397 union cpuid10_edx edx; 1630 union cpuid10_edx edx;
@@ -1400,8 +1633,14 @@ static int intel_pmu_init(void)
1400 unsigned int ebx; 1633 unsigned int ebx;
1401 int version; 1634 int version;
1402 1635
1403 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 1636 if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
1637 /* check for P6 processor family */
1638 if (boot_cpu_data.x86 == 6) {
1639 return p6_pmu_init();
1640 } else {
1404 return -ENODEV; 1641 return -ENODEV;
1642 }
1643 }
1405 1644
1406 /* 1645 /*
1407 * Check whether the Architectural PerfMon supports 1646 * Check whether the Architectural PerfMon supports
@@ -1561,6 +1800,7 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
1561 1800
1562static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); 1801static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry);
1563static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); 1802static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry);
1803static DEFINE_PER_CPU(int, in_nmi_frame);
1564 1804
1565 1805
1566static void 1806static void
@@ -1576,7 +1816,9 @@ static void backtrace_warning(void *data, char *msg)
1576 1816
1577static int backtrace_stack(void *data, char *name) 1817static int backtrace_stack(void *data, char *name)
1578{ 1818{
1579 /* Process all stacks: */ 1819 per_cpu(in_nmi_frame, smp_processor_id()) =
1820 x86_is_stack_id(NMI_STACK, name);
1821
1580 return 0; 1822 return 0;
1581} 1823}
1582 1824
@@ -1584,6 +1826,9 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
1584{ 1826{
1585 struct perf_callchain_entry *entry = data; 1827 struct perf_callchain_entry *entry = data;
1586 1828
1829 if (per_cpu(in_nmi_frame, smp_processor_id()))
1830 return;
1831
1587 if (reliable) 1832 if (reliable)
1588 callchain_store(entry, addr); 1833 callchain_store(entry, addr);
1589} 1834}