aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorVenkatesh Pallipadi <venkatesh.pallipadi@intel.com>2006-09-26 04:52:27 -0400
committerAndi Kleen <andi@basil.nowhere.org>2006-09-26 04:52:27 -0400
commit248dcb2ffffe8f3e4a369556a68988788c208111 (patch)
tree07517710662b99db976e74064bfe399681437e25 /arch
parent1de84979dfc527c422abf63f27beabe43892989b (diff)
[PATCH] x86: i386/x86-64 Add nmi watchdog support for new Intel CPUs
AK: This redoes the changes I temporarily reverted. Intel now has support for Architectural Performance Monitoring Counters ( Refer to IA-32 Intel Architecture Software Developer's Manual http://www.intel.com/design/pentium4/manuals/253669.htm ). This feature is present starting from Intel Core Duo and Intel Core Solo processors. What this means is, the performance monitoring counters and some performance monitoring events are now defined in an architectural way (using cpuid). And there will be no need to check for family/model etc for these architectural events. Below is the patch to use this performance counters in nmi watchdog driver. Patch handles both i386 and x86-64 kernels. Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch')
-rw-r--r--arch/i386/kernel/nmi.c126
-rw-r--r--arch/x86_64/kernel/nmi.c130
2 files changed, 246 insertions, 10 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 6e5085d5d2f6..7b9a053effa3 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -26,6 +26,7 @@
26#include <asm/smp.h> 26#include <asm/smp.h>
27#include <asm/nmi.h> 27#include <asm/nmi.h>
28#include <asm/kdebug.h> 28#include <asm/kdebug.h>
29#include <asm/intel_arch_perfmon.h>
29 30
30#include "mach_traps.h" 31#include "mach_traps.h"
31 32
@@ -77,6 +78,9 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
77 case X86_VENDOR_AMD: 78 case X86_VENDOR_AMD:
78 return (msr - MSR_K7_PERFCTR0); 79 return (msr - MSR_K7_PERFCTR0);
79 case X86_VENDOR_INTEL: 80 case X86_VENDOR_INTEL:
81 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
82 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
83
80 switch (boot_cpu_data.x86) { 84 switch (boot_cpu_data.x86) {
81 case 6: 85 case 6:
82 return (msr - MSR_P6_PERFCTR0); 86 return (msr - MSR_P6_PERFCTR0);
@@ -95,6 +99,9 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
95 case X86_VENDOR_AMD: 99 case X86_VENDOR_AMD:
96 return (msr - MSR_K7_EVNTSEL0); 100 return (msr - MSR_K7_EVNTSEL0);
97 case X86_VENDOR_INTEL: 101 case X86_VENDOR_INTEL:
102 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
103 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
104
98 switch (boot_cpu_data.x86) { 105 switch (boot_cpu_data.x86) {
99 case 6: 106 case 6:
100 return (msr - MSR_P6_EVNTSEL0); 107 return (msr - MSR_P6_EVNTSEL0);
@@ -174,7 +181,10 @@ static __cpuinit inline int nmi_known_cpu(void)
174 case X86_VENDOR_AMD: 181 case X86_VENDOR_AMD:
175 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); 182 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
176 case X86_VENDOR_INTEL: 183 case X86_VENDOR_INTEL:
177 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); 184 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
185 return 1;
186 else
187 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
178 } 188 }
179 return 0; 189 return 0;
180} 190}
@@ -261,8 +271,24 @@ static int __init check_nmi_watchdog(void)
261 271
262 /* now that we know it works we can reduce NMI frequency to 272 /* now that we know it works we can reduce NMI frequency to
263 something more reasonable; makes a difference in some configs */ 273 something more reasonable; makes a difference in some configs */
264 if (nmi_watchdog == NMI_LOCAL_APIC) 274 if (nmi_watchdog == NMI_LOCAL_APIC) {
275 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
276
265 nmi_hz = 1; 277 nmi_hz = 1;
278 /*
279 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
280 * are writable, with higher bits sign extending from bit 31.
281 * So, we can only program the counter with 31 bit values and
282 * 32nd bit should be 1, for 33.. to be 1.
283 * Find the appropriate nmi_hz
284 */
285 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
286 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
287 u64 count = (u64)cpu_khz * 1000;
288 do_div(count, 0x7fffffffUL);
289 nmi_hz = count + 1;
290 }
291 }
266 292
267 kfree(prev_nmi_count); 293 kfree(prev_nmi_count);
268 return 0; 294 return 0;
@@ -637,6 +663,85 @@ static void stop_p4_watchdog(void)
637 release_perfctr_nmi(wd->perfctr_msr); 663 release_perfctr_nmi(wd->perfctr_msr);
638} 664}
639 665
666#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
667#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
668
669static int setup_intel_arch_watchdog(void)
670{
671 unsigned int ebx;
672 union cpuid10_eax eax;
673 unsigned int unused;
674 unsigned int perfctr_msr, evntsel_msr;
675 unsigned int evntsel;
676 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
677
678 /*
679 * Check whether the Architectural PerfMon supports
680 * Unhalted Core Cycles Event or not.
681 * NOTE: Corresponding bit = 0 in ebx indicates event present.
682 */
683 cpuid(10, &(eax.full), &ebx, &unused, &unused);
684 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
685 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
686 goto fail;
687
688 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
689 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
690
691 if (!reserve_perfctr_nmi(perfctr_msr))
692 goto fail;
693
694 if (!reserve_evntsel_nmi(evntsel_msr))
695 goto fail1;
696
697 wrmsrl(perfctr_msr, 0UL);
698
699 evntsel = ARCH_PERFMON_EVENTSEL_INT
700 | ARCH_PERFMON_EVENTSEL_OS
701 | ARCH_PERFMON_EVENTSEL_USR
702 | ARCH_PERFMON_NMI_EVENT_SEL
703 | ARCH_PERFMON_NMI_EVENT_UMASK;
704
705 /* setup the timer */
706 wrmsr(evntsel_msr, evntsel, 0);
707 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
708 apic_write(APIC_LVTPC, APIC_DM_NMI);
709 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
710 wrmsr(evntsel_msr, evntsel, 0);
711
712 wd->perfctr_msr = perfctr_msr;
713 wd->evntsel_msr = evntsel_msr;
714 wd->cccr_msr = 0; //unused
715 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
716 return 1;
717fail1:
718 release_perfctr_nmi(perfctr_msr);
719fail:
720 return 0;
721}
722
723static void stop_intel_arch_watchdog(void)
724{
725 unsigned int ebx;
726 union cpuid10_eax eax;
727 unsigned int unused;
728 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
729
730 /*
731 * Check whether the Architectural PerfMon supports
732 * Unhalted Core Cycles Event or not.
733 * NOTE: Corresponding bit = 0 in ebx indicates event present.
734 */
735 cpuid(10, &(eax.full), &ebx, &unused, &unused);
736 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
737 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
738 return;
739
740 wrmsr(wd->evntsel_msr, 0, 0);
741 release_evntsel_nmi(wd->evntsel_msr);
742 release_perfctr_nmi(wd->perfctr_msr);
743}
744
640void setup_apic_nmi_watchdog (void *unused) 745void setup_apic_nmi_watchdog (void *unused)
641{ 746{
642 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 747 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
@@ -663,6 +768,11 @@ void setup_apic_nmi_watchdog (void *unused)
663 return; 768 return;
664 break; 769 break;
665 case X86_VENDOR_INTEL: 770 case X86_VENDOR_INTEL:
771 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
772 if (!setup_intel_arch_watchdog())
773 return;
774 break;
775 }
666 switch (boot_cpu_data.x86) { 776 switch (boot_cpu_data.x86) {
667 case 6: 777 case 6:
668 if (boot_cpu_data.x86_model > 0xd) 778 if (boot_cpu_data.x86_model > 0xd)
@@ -708,6 +818,10 @@ void stop_apic_nmi_watchdog(void *unused)
708 stop_k7_watchdog(); 818 stop_k7_watchdog();
709 break; 819 break;
710 case X86_VENDOR_INTEL: 820 case X86_VENDOR_INTEL:
821 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
822 stop_intel_arch_watchdog();
823 break;
824 }
711 switch (boot_cpu_data.x86) { 825 switch (boot_cpu_data.x86) {
712 case 6: 826 case 6:
713 if (boot_cpu_data.x86_model > 0xd) 827 if (boot_cpu_data.x86_model > 0xd)
@@ -831,10 +945,12 @@ int nmi_watchdog_tick (struct pt_regs * regs, unsigned reason)
831 wrmsrl(wd->cccr_msr, dummy); 945 wrmsrl(wd->cccr_msr, dummy);
832 apic_write(APIC_LVTPC, APIC_DM_NMI); 946 apic_write(APIC_LVTPC, APIC_DM_NMI);
833 } 947 }
834 else if (wd->perfctr_msr == MSR_P6_PERFCTR0) { 948 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
835 /* Only P6 based Pentium M need to re-unmask 949 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
950 /* P6 based Pentium M need to re-unmask
836 * the apic vector but it doesn't hurt 951 * the apic vector but it doesn't hurt
837 * other P6 variant */ 952 * other P6 variant.
953 * ArchPerfom/Core Duo also needs this */
838 apic_write(APIC_LVTPC, APIC_DM_NMI); 954 apic_write(APIC_LVTPC, APIC_DM_NMI);
839 } 955 }
840 /* start the cycle over again */ 956 /* start the cycle over again */
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c
index 1b76d1574529..4d6fb047952e 100644
--- a/arch/x86_64/kernel/nmi.c
+++ b/arch/x86_64/kernel/nmi.c
@@ -26,6 +26,7 @@
26#include <asm/proto.h> 26#include <asm/proto.h>
27#include <asm/kdebug.h> 27#include <asm/kdebug.h>
28#include <asm/mce.h> 28#include <asm/mce.h>
29#include <asm/intel_arch_perfmon.h>
29 30
30/* perfctr_nmi_owner tracks the ownership of the perfctr registers: 31/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
31 * evtsel_nmi_owner tracks the ownership of the event selection 32 * evtsel_nmi_owner tracks the ownership of the event selection
@@ -73,7 +74,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
73 case X86_VENDOR_AMD: 74 case X86_VENDOR_AMD:
74 return (msr - MSR_K7_PERFCTR0); 75 return (msr - MSR_K7_PERFCTR0);
75 case X86_VENDOR_INTEL: 76 case X86_VENDOR_INTEL:
76 return (msr - MSR_P4_BPU_PERFCTR0); 77 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
78 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
79 else
80 return (msr - MSR_P4_BPU_PERFCTR0);
77 } 81 }
78 return 0; 82 return 0;
79} 83}
@@ -86,7 +90,10 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
86 case X86_VENDOR_AMD: 90 case X86_VENDOR_AMD:
87 return (msr - MSR_K7_EVNTSEL0); 91 return (msr - MSR_K7_EVNTSEL0);
88 case X86_VENDOR_INTEL: 92 case X86_VENDOR_INTEL:
89 return (msr - MSR_P4_BSU_ESCR0); 93 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
94 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
95 else
96 return (msr - MSR_P4_BSU_ESCR0);
90 } 97 }
91 return 0; 98 return 0;
92} 99}
@@ -160,7 +167,10 @@ static __cpuinit inline int nmi_known_cpu(void)
160 case X86_VENDOR_AMD: 167 case X86_VENDOR_AMD:
161 return boot_cpu_data.x86 == 15; 168 return boot_cpu_data.x86 == 15;
162 case X86_VENDOR_INTEL: 169 case X86_VENDOR_INTEL:
163 return boot_cpu_data.x86 == 15; 170 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
171 return 1;
172 else
173 return (boot_cpu_data.x86 == 15);
164 } 174 }
165 return 0; 175 return 0;
166} 176}
@@ -246,8 +256,22 @@ int __init check_nmi_watchdog (void)
246 256
247 /* now that we know it works we can reduce NMI frequency to 257 /* now that we know it works we can reduce NMI frequency to
248 something more reasonable; makes a difference in some configs */ 258 something more reasonable; makes a difference in some configs */
249 if (nmi_watchdog == NMI_LOCAL_APIC) 259 if (nmi_watchdog == NMI_LOCAL_APIC) {
260 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
261
250 nmi_hz = 1; 262 nmi_hz = 1;
263 /*
264 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
265 * are writable, with higher bits sign extending from bit 31.
266 * So, we can only program the counter with 31 bit values and
267 * 32nd bit should be 1, for 33.. to be 1.
268 * Find the appropriate nmi_hz
269 */
270 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
271 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
272 nmi_hz = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1;
273 }
274 }
251 275
252 kfree(counts); 276 kfree(counts);
253 return 0; 277 return 0;
@@ -563,6 +587,87 @@ static void stop_p4_watchdog(void)
563 release_perfctr_nmi(wd->perfctr_msr); 587 release_perfctr_nmi(wd->perfctr_msr);
564} 588}
565 589
590#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
591#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
592
593static int setup_intel_arch_watchdog(void)
594{
595 unsigned int ebx;
596 union cpuid10_eax eax;
597 unsigned int unused;
598 unsigned int perfctr_msr, evntsel_msr;
599 unsigned int evntsel;
600 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
601
602 /*
603 * Check whether the Architectural PerfMon supports
604 * Unhalted Core Cycles Event or not.
605 * NOTE: Corresponding bit = 0 in ebx indicates event present.
606 */
607 cpuid(10, &(eax.full), &ebx, &unused, &unused);
608 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
609 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
610 goto fail;
611
612 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
613 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
614
615 if (!reserve_perfctr_nmi(perfctr_msr))
616 goto fail;
617
618 if (!reserve_evntsel_nmi(evntsel_msr))
619 goto fail1;
620
621 wrmsrl(perfctr_msr, 0UL);
622
623 evntsel = ARCH_PERFMON_EVENTSEL_INT
624 | ARCH_PERFMON_EVENTSEL_OS
625 | ARCH_PERFMON_EVENTSEL_USR
626 | ARCH_PERFMON_NMI_EVENT_SEL
627 | ARCH_PERFMON_NMI_EVENT_UMASK;
628
629 /* setup the timer */
630 wrmsr(evntsel_msr, evntsel, 0);
631 wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
632
633 apic_write(APIC_LVTPC, APIC_DM_NMI);
634 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
635 wrmsr(evntsel_msr, evntsel, 0);
636
637 wd->perfctr_msr = perfctr_msr;
638 wd->evntsel_msr = evntsel_msr;
639 wd->cccr_msr = 0; //unused
640 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
641 return 1;
642fail1:
643 release_perfctr_nmi(perfctr_msr);
644fail:
645 return 0;
646}
647
648static void stop_intel_arch_watchdog(void)
649{
650 unsigned int ebx;
651 union cpuid10_eax eax;
652 unsigned int unused;
653 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
654
655 /*
656 * Check whether the Architectural PerfMon supports
657 * Unhalted Core Cycles Event or not.
658 * NOTE: Corresponding bit = 0 in ebx indicates event present.
659 */
660 cpuid(10, &(eax.full), &ebx, &unused, &unused);
661 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
662 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
663 return;
664
665 wrmsr(wd->evntsel_msr, 0, 0);
666
667 release_evntsel_nmi(wd->evntsel_msr);
668 release_perfctr_nmi(wd->perfctr_msr);
669}
670
566void setup_apic_nmi_watchdog(void *unused) 671void setup_apic_nmi_watchdog(void *unused)
567{ 672{
568 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); 673 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
@@ -589,6 +694,11 @@ void setup_apic_nmi_watchdog(void *unused)
589 return; 694 return;
590 break; 695 break;
591 case X86_VENDOR_INTEL: 696 case X86_VENDOR_INTEL:
697 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
698 if (!setup_intel_arch_watchdog())
699 return;
700 break;
701 }
592 if (!setup_p4_watchdog()) 702 if (!setup_p4_watchdog())
593 return; 703 return;
594 break; 704 break;
@@ -620,6 +730,10 @@ void stop_apic_nmi_watchdog(void *unused)
620 stop_k7_watchdog(); 730 stop_k7_watchdog();
621 break; 731 break;
622 case X86_VENDOR_INTEL: 732 case X86_VENDOR_INTEL:
733 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
734 stop_intel_arch_watchdog();
735 break;
736 }
623 stop_p4_watchdog(); 737 stop_p4_watchdog();
624 break; 738 break;
625 default: 739 default:
@@ -724,7 +838,13 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
724 dummy &= ~P4_CCCR_OVF; 838 dummy &= ~P4_CCCR_OVF;
725 wrmsrl(wd->cccr_msr, dummy); 839 wrmsrl(wd->cccr_msr, dummy);
726 apic_write(APIC_LVTPC, APIC_DM_NMI); 840 apic_write(APIC_LVTPC, APIC_DM_NMI);
727 } 841 } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
842 /*
843 * ArchPerfom/Core Duo needs to re-unmask
844 * the apic vector
845 */
846 apic_write(APIC_LVTPC, APIC_DM_NMI);
847 }
728 /* start the cycle over again */ 848 /* start the cycle over again */
729 wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); 849 wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz));
730 rc = 1; 850 rc = 1;