diff options
author | Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> | 2006-09-26 04:52:27 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-09-26 04:52:27 -0400 |
commit | 248dcb2ffffe8f3e4a369556a68988788c208111 (patch) | |
tree | 07517710662b99db976e74064bfe399681437e25 /arch/x86_64/kernel/nmi.c | |
parent | 1de84979dfc527c422abf63f27beabe43892989b (diff) |
[PATCH] x86: i386/x86-64 Add nmi watchdog support for new Intel CPUs
AK: This redoes the changes I temporarily reverted.
Intel now has support for Architectural Performance Monitoring Counters
( Refer to IA-32 Intel Architecture Software Developer's Manual
http://www.intel.com/design/pentium4/manuals/253669.htm ). This
feature is present starting from Intel Core Duo and Intel Core Solo processors.
What this means is, the performance monitoring counters and some performance
monitoring events are now defined in an architectural way (using cpuid).
And there will be no need to check for family/model etc for these architectural
events.
Below is the patch to use this performance counters in nmi watchdog driver.
Patch handles both i386 and x86-64 kernels.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/x86_64/kernel/nmi.c')
-rw-r--r-- | arch/x86_64/kernel/nmi.c | 130 |
1 files changed, 125 insertions, 5 deletions
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 1b76d1574529..4d6fb047952e 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <asm/proto.h> | 26 | #include <asm/proto.h> |
27 | #include <asm/kdebug.h> | 27 | #include <asm/kdebug.h> |
28 | #include <asm/mce.h> | 28 | #include <asm/mce.h> |
29 | #include <asm/intel_arch_perfmon.h> | ||
29 | 30 | ||
30 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: | 31 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: |
31 | * evtsel_nmi_owner tracks the ownership of the event selection | 32 | * evtsel_nmi_owner tracks the ownership of the event selection |
@@ -73,7 +74,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | |||
73 | case X86_VENDOR_AMD: | 74 | case X86_VENDOR_AMD: |
74 | return (msr - MSR_K7_PERFCTR0); | 75 | return (msr - MSR_K7_PERFCTR0); |
75 | case X86_VENDOR_INTEL: | 76 | case X86_VENDOR_INTEL: |
76 | return (msr - MSR_P4_BPU_PERFCTR0); | 77 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
78 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | ||
79 | else | ||
80 | return (msr - MSR_P4_BPU_PERFCTR0); | ||
77 | } | 81 | } |
78 | return 0; | 82 | return 0; |
79 | } | 83 | } |
@@ -86,7 +90,10 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | |||
86 | case X86_VENDOR_AMD: | 90 | case X86_VENDOR_AMD: |
87 | return (msr - MSR_K7_EVNTSEL0); | 91 | return (msr - MSR_K7_EVNTSEL0); |
88 | case X86_VENDOR_INTEL: | 92 | case X86_VENDOR_INTEL: |
89 | return (msr - MSR_P4_BSU_ESCR0); | 93 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
94 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); | ||
95 | else | ||
96 | return (msr - MSR_P4_BSU_ESCR0); | ||
90 | } | 97 | } |
91 | return 0; | 98 | return 0; |
92 | } | 99 | } |
@@ -160,7 +167,10 @@ static __cpuinit inline int nmi_known_cpu(void) | |||
160 | case X86_VENDOR_AMD: | 167 | case X86_VENDOR_AMD: |
161 | return boot_cpu_data.x86 == 15; | 168 | return boot_cpu_data.x86 == 15; |
162 | case X86_VENDOR_INTEL: | 169 | case X86_VENDOR_INTEL: |
163 | return boot_cpu_data.x86 == 15; | 170 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
171 | return 1; | ||
172 | else | ||
173 | return (boot_cpu_data.x86 == 15); | ||
164 | } | 174 | } |
165 | return 0; | 175 | return 0; |
166 | } | 176 | } |
@@ -246,8 +256,22 @@ int __init check_nmi_watchdog (void) | |||
246 | 256 | ||
247 | /* now that we know it works we can reduce NMI frequency to | 257 | /* now that we know it works we can reduce NMI frequency to |
248 | something more reasonable; makes a difference in some configs */ | 258 | something more reasonable; makes a difference in some configs */ |
249 | if (nmi_watchdog == NMI_LOCAL_APIC) | 259 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
260 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
261 | |||
250 | nmi_hz = 1; | 262 | nmi_hz = 1; |
263 | /* | ||
264 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | ||
265 | * are writable, with higher bits sign extending from bit 31. | ||
266 | * So, we can only program the counter with 31 bit values and | ||
267 | * 32nd bit should be 1, for 33.. to be 1. | ||
268 | * Find the appropriate nmi_hz | ||
269 | */ | ||
270 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && | ||
271 | ((u64)cpu_khz * 1000) > 0x7fffffffULL) { | ||
272 | nmi_hz = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1; | ||
273 | } | ||
274 | } | ||
251 | 275 | ||
252 | kfree(counts); | 276 | kfree(counts); |
253 | return 0; | 277 | return 0; |
@@ -563,6 +587,87 @@ static void stop_p4_watchdog(void) | |||
563 | release_perfctr_nmi(wd->perfctr_msr); | 587 | release_perfctr_nmi(wd->perfctr_msr); |
564 | } | 588 | } |
565 | 589 | ||
590 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
591 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
592 | |||
593 | static int setup_intel_arch_watchdog(void) | ||
594 | { | ||
595 | unsigned int ebx; | ||
596 | union cpuid10_eax eax; | ||
597 | unsigned int unused; | ||
598 | unsigned int perfctr_msr, evntsel_msr; | ||
599 | unsigned int evntsel; | ||
600 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
601 | |||
602 | /* | ||
603 | * Check whether the Architectural PerfMon supports | ||
604 | * Unhalted Core Cycles Event or not. | ||
605 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
606 | */ | ||
607 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
608 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
609 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
610 | goto fail; | ||
611 | |||
612 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
613 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; | ||
614 | |||
615 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
616 | goto fail; | ||
617 | |||
618 | if (!reserve_evntsel_nmi(evntsel_msr)) | ||
619 | goto fail1; | ||
620 | |||
621 | wrmsrl(perfctr_msr, 0UL); | ||
622 | |||
623 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
624 | | ARCH_PERFMON_EVENTSEL_OS | ||
625 | | ARCH_PERFMON_EVENTSEL_USR | ||
626 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
627 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
628 | |||
629 | /* setup the timer */ | ||
630 | wrmsr(evntsel_msr, evntsel, 0); | ||
631 | wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
632 | |||
633 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
634 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
635 | wrmsr(evntsel_msr, evntsel, 0); | ||
636 | |||
637 | wd->perfctr_msr = perfctr_msr; | ||
638 | wd->evntsel_msr = evntsel_msr; | ||
639 | wd->cccr_msr = 0; //unused | ||
640 | wd->check_bit = 1ULL << (eax.split.bit_width - 1); | ||
641 | return 1; | ||
642 | fail1: | ||
643 | release_perfctr_nmi(perfctr_msr); | ||
644 | fail: | ||
645 | return 0; | ||
646 | } | ||
647 | |||
648 | static void stop_intel_arch_watchdog(void) | ||
649 | { | ||
650 | unsigned int ebx; | ||
651 | union cpuid10_eax eax; | ||
652 | unsigned int unused; | ||
653 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
654 | |||
655 | /* | ||
656 | * Check whether the Architectural PerfMon supports | ||
657 | * Unhalted Core Cycles Event or not. | ||
658 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
659 | */ | ||
660 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
661 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
662 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
663 | return; | ||
664 | |||
665 | wrmsr(wd->evntsel_msr, 0, 0); | ||
666 | |||
667 | release_evntsel_nmi(wd->evntsel_msr); | ||
668 | release_perfctr_nmi(wd->perfctr_msr); | ||
669 | } | ||
670 | |||
566 | void setup_apic_nmi_watchdog(void *unused) | 671 | void setup_apic_nmi_watchdog(void *unused) |
567 | { | 672 | { |
568 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 673 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
@@ -589,6 +694,11 @@ void setup_apic_nmi_watchdog(void *unused) | |||
589 | return; | 694 | return; |
590 | break; | 695 | break; |
591 | case X86_VENDOR_INTEL: | 696 | case X86_VENDOR_INTEL: |
697 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
698 | if (!setup_intel_arch_watchdog()) | ||
699 | return; | ||
700 | break; | ||
701 | } | ||
592 | if (!setup_p4_watchdog()) | 702 | if (!setup_p4_watchdog()) |
593 | return; | 703 | return; |
594 | break; | 704 | break; |
@@ -620,6 +730,10 @@ void stop_apic_nmi_watchdog(void *unused) | |||
620 | stop_k7_watchdog(); | 730 | stop_k7_watchdog(); |
621 | break; | 731 | break; |
622 | case X86_VENDOR_INTEL: | 732 | case X86_VENDOR_INTEL: |
733 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
734 | stop_intel_arch_watchdog(); | ||
735 | break; | ||
736 | } | ||
623 | stop_p4_watchdog(); | 737 | stop_p4_watchdog(); |
624 | break; | 738 | break; |
625 | default: | 739 | default: |
@@ -724,7 +838,13 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
724 | dummy &= ~P4_CCCR_OVF; | 838 | dummy &= ~P4_CCCR_OVF; |
725 | wrmsrl(wd->cccr_msr, dummy); | 839 | wrmsrl(wd->cccr_msr, dummy); |
726 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 840 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
727 | } | 841 | } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { |
842 | /* | ||
843 | * ArchPerfom/Core Duo needs to re-unmask | ||
844 | * the apic vector | ||
845 | */ | ||
846 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
847 | } | ||
728 | /* start the cycle over again */ | 848 | /* start the cycle over again */ |
729 | wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | 849 | wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); |
730 | rc = 1; | 850 | rc = 1; |