diff options
Diffstat (limited to 'arch/i386/kernel/nmi.c')
| -rw-r--r-- | arch/i386/kernel/nmi.c | 940 |
1 files changed, 651 insertions, 289 deletions
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index acb351478e42..dbda706fdd14 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
| @@ -21,83 +21,174 @@ | |||
| 21 | #include <linux/sysdev.h> | 21 | #include <linux/sysdev.h> |
| 22 | #include <linux/sysctl.h> | 22 | #include <linux/sysctl.h> |
| 23 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
| 24 | #include <linux/dmi.h> | ||
| 25 | #include <linux/kprobes.h> | ||
| 24 | 26 | ||
| 25 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
| 26 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
| 29 | #include <asm/kdebug.h> | ||
| 27 | #include <asm/intel_arch_perfmon.h> | 30 | #include <asm/intel_arch_perfmon.h> |
| 28 | 31 | ||
| 29 | #include "mach_traps.h" | 32 | #include "mach_traps.h" |
| 30 | 33 | ||
| 31 | unsigned int nmi_watchdog = NMI_NONE; | 34 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: |
| 32 | extern int unknown_nmi_panic; | 35 | * evtsel_nmi_owner tracks the ownership of the event selection |
| 33 | static unsigned int nmi_hz = HZ; | 36 | * - different performance counters/ event selection may be reserved for |
| 34 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | 37 | * different subsystems this reservation system just tries to coordinate |
| 35 | static unsigned int nmi_p4_cccr_val; | 38 | * things a little |
| 36 | extern void show_registers(struct pt_regs *regs); | 39 | */ |
| 40 | static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); | ||
| 41 | static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); | ||
| 37 | 42 | ||
| 38 | /* | 43 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
| 39 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | 44 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) |
| 40 | * - it may be reserved by some other driver, or not | ||
| 41 | * - when not reserved by some other driver, it may be used for | ||
| 42 | * the NMI watchdog, or not | ||
| 43 | * | ||
| 44 | * This is maintained separately from nmi_active because the NMI | ||
| 45 | * watchdog may also be driven from the I/O APIC timer. | ||
| 46 | */ | 45 | */ |
| 47 | static DEFINE_SPINLOCK(lapic_nmi_owner_lock); | 46 | #define NMI_MAX_COUNTER_BITS 66 |
| 48 | static unsigned int lapic_nmi_owner; | ||
| 49 | #define LAPIC_NMI_WATCHDOG (1<<0) | ||
| 50 | #define LAPIC_NMI_RESERVED (1<<1) | ||
| 51 | 47 | ||
| 52 | /* nmi_active: | 48 | /* nmi_active: |
| 53 | * +1: the lapic NMI watchdog is active, but can be disabled | 49 | * >0: the lapic NMI watchdog is active, but can be disabled |
| 54 | * 0: the lapic NMI watchdog has not been set up, and cannot | 50 | * <0: the lapic NMI watchdog has not been set up, and cannot |
| 55 | * be enabled | 51 | * be enabled |
| 56 | * -1: the lapic NMI watchdog is disabled, but can be enabled | 52 | * 0: the lapic NMI watchdog is disabled, but can be enabled |
| 57 | */ | 53 | */ |
| 58 | int nmi_active; | 54 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ |
| 59 | 55 | ||
| 60 | #define K7_EVNTSEL_ENABLE (1 << 22) | 56 | unsigned int nmi_watchdog = NMI_DEFAULT; |
| 61 | #define K7_EVNTSEL_INT (1 << 20) | 57 | static unsigned int nmi_hz = HZ; |
| 62 | #define K7_EVNTSEL_OS (1 << 17) | ||
| 63 | #define K7_EVNTSEL_USR (1 << 16) | ||
| 64 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
| 65 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
| 66 | 58 | ||
| 67 | #define P6_EVNTSEL0_ENABLE (1 << 22) | 59 | struct nmi_watchdog_ctlblk { |
| 68 | #define P6_EVNTSEL_INT (1 << 20) | 60 | int enabled; |
| 69 | #define P6_EVNTSEL_OS (1 << 17) | 61 | u64 check_bit; |
| 70 | #define P6_EVNTSEL_USR (1 << 16) | 62 | unsigned int cccr_msr; |
| 71 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | 63 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ |
| 72 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | 64 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ |
| 65 | }; | ||
| 66 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
| 73 | 67 | ||
| 74 | #define MSR_P4_MISC_ENABLE 0x1A0 | 68 | /* local prototypes */ |
| 75 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | 69 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); |
| 76 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) | ||
| 77 | #define MSR_P4_PERFCTR0 0x300 | ||
| 78 | #define MSR_P4_CCCR0 0x360 | ||
| 79 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
| 80 | #define P4_ESCR_OS (1<<3) | ||
| 81 | #define P4_ESCR_USR (1<<2) | ||
| 82 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
| 83 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
| 84 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
| 85 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
| 86 | #define P4_CCCR_COMPARE (1<<18) | ||
| 87 | #define P4_CCCR_REQUIRED (3<<16) | ||
| 88 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
| 89 | #define P4_CCCR_ENABLE (1<<12) | ||
| 90 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
| 91 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
| 92 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
| 93 | #define MSR_P4_IQ_COUNTER0 0x30C | ||
| 94 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) | ||
| 95 | #define P4_NMI_IQ_CCCR0 \ | ||
| 96 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | ||
| 97 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | ||
| 98 | 70 | ||
| 99 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | 71 | extern void show_registers(struct pt_regs *regs); |
| 100 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | 72 | extern int unknown_nmi_panic; |
| 73 | |||
| 74 | /* converts an msr to an appropriate reservation bit */ | ||
| 75 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | ||
| 76 | { | ||
| 77 | /* returns the bit offset of the performance counter register */ | ||
| 78 | switch (boot_cpu_data.x86_vendor) { | ||
| 79 | case X86_VENDOR_AMD: | ||
| 80 | return (msr - MSR_K7_PERFCTR0); | ||
| 81 | case X86_VENDOR_INTEL: | ||
| 82 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
| 83 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | ||
| 84 | |||
| 85 | switch (boot_cpu_data.x86) { | ||
| 86 | case 6: | ||
| 87 | return (msr - MSR_P6_PERFCTR0); | ||
| 88 | case 15: | ||
| 89 | return (msr - MSR_P4_BPU_PERFCTR0); | ||
| 90 | } | ||
| 91 | } | ||
| 92 | return 0; | ||
| 93 | } | ||
| 94 | |||
| 95 | /* converts an msr to an appropriate reservation bit */ | ||
| 96 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | ||
| 97 | { | ||
| 98 | /* returns the bit offset of the event selection register */ | ||
| 99 | switch (boot_cpu_data.x86_vendor) { | ||
| 100 | case X86_VENDOR_AMD: | ||
| 101 | return (msr - MSR_K7_EVNTSEL0); | ||
| 102 | case X86_VENDOR_INTEL: | ||
| 103 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
| 104 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); | ||
| 105 | |||
| 106 | switch (boot_cpu_data.x86) { | ||
| 107 | case 6: | ||
| 108 | return (msr - MSR_P6_EVNTSEL0); | ||
| 109 | case 15: | ||
| 110 | return (msr - MSR_P4_BSU_ESCR0); | ||
| 111 | } | ||
| 112 | } | ||
| 113 | return 0; | ||
| 114 | } | ||
| 115 | |||
| 116 | /* checks for a bit availability (hack for oprofile) */ | ||
| 117 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | ||
| 118 | { | ||
| 119 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
| 120 | |||
| 121 | return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); | ||
| 122 | } | ||
| 123 | |||
| 124 | /* checks the an msr for availability */ | ||
| 125 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
| 126 | { | ||
| 127 | unsigned int counter; | ||
| 128 | |||
| 129 | counter = nmi_perfctr_msr_to_bit(msr); | ||
| 130 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
| 131 | |||
| 132 | return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); | ||
| 133 | } | ||
| 134 | |||
| 135 | int reserve_perfctr_nmi(unsigned int msr) | ||
| 136 | { | ||
| 137 | unsigned int counter; | ||
| 138 | |||
| 139 | counter = nmi_perfctr_msr_to_bit(msr); | ||
| 140 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
| 141 | |||
| 142 | if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner))) | ||
| 143 | return 1; | ||
| 144 | return 0; | ||
| 145 | } | ||
| 146 | |||
| 147 | void release_perfctr_nmi(unsigned int msr) | ||
| 148 | { | ||
| 149 | unsigned int counter; | ||
| 150 | |||
| 151 | counter = nmi_perfctr_msr_to_bit(msr); | ||
| 152 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
| 153 | |||
| 154 | clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner)); | ||
| 155 | } | ||
| 156 | |||
| 157 | int reserve_evntsel_nmi(unsigned int msr) | ||
| 158 | { | ||
| 159 | unsigned int counter; | ||
| 160 | |||
| 161 | counter = nmi_evntsel_msr_to_bit(msr); | ||
| 162 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
| 163 | |||
| 164 | if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0])) | ||
| 165 | return 1; | ||
| 166 | return 0; | ||
| 167 | } | ||
| 168 | |||
| 169 | void release_evntsel_nmi(unsigned int msr) | ||
| 170 | { | ||
| 171 | unsigned int counter; | ||
| 172 | |||
| 173 | counter = nmi_evntsel_msr_to_bit(msr); | ||
| 174 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
| 175 | |||
| 176 | clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); | ||
| 177 | } | ||
| 178 | |||
| 179 | static __cpuinit inline int nmi_known_cpu(void) | ||
| 180 | { | ||
| 181 | switch (boot_cpu_data.x86_vendor) { | ||
| 182 | case X86_VENDOR_AMD: | ||
| 183 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | ||
| 184 | case X86_VENDOR_INTEL: | ||
| 185 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
| 186 | return 1; | ||
| 187 | else | ||
| 188 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | ||
| 189 | } | ||
| 190 | return 0; | ||
| 191 | } | ||
| 101 | 192 | ||
| 102 | #ifdef CONFIG_SMP | 193 | #ifdef CONFIG_SMP |
| 103 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when | 194 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
| @@ -125,7 +216,18 @@ static int __init check_nmi_watchdog(void) | |||
| 125 | unsigned int *prev_nmi_count; | 216 | unsigned int *prev_nmi_count; |
| 126 | int cpu; | 217 | int cpu; |
| 127 | 218 | ||
| 128 | if (nmi_watchdog == NMI_NONE) | 219 | /* Enable NMI watchdog for newer systems. |
| 220 | Actually it should be safe for most systems before 2004 too except | ||
| 221 | for some IBM systems that corrupt registers when NMI happens | ||
| 222 | during SMM. Unfortunately we don't have more exact information | ||
| 223 | on these and use this coarse check. */ | ||
| 224 | if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004) | ||
| 225 | nmi_watchdog = NMI_LOCAL_APIC; | ||
| 226 | |||
| 227 | if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) | ||
| 228 | return 0; | ||
| 229 | |||
| 230 | if (!atomic_read(&nmi_active)) | ||
| 129 | return 0; | 231 | return 0; |
| 130 | 232 | ||
| 131 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 233 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
| @@ -149,25 +251,45 @@ static int __init check_nmi_watchdog(void) | |||
| 149 | if (!cpu_isset(cpu, cpu_callin_map)) | 251 | if (!cpu_isset(cpu, cpu_callin_map)) |
| 150 | continue; | 252 | continue; |
| 151 | #endif | 253 | #endif |
| 254 | if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) | ||
| 255 | continue; | ||
| 152 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | 256 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { |
| 153 | endflag = 1; | ||
| 154 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 257 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
| 155 | cpu, | 258 | cpu, |
| 156 | prev_nmi_count[cpu], | 259 | prev_nmi_count[cpu], |
| 157 | nmi_count(cpu)); | 260 | nmi_count(cpu)); |
| 158 | nmi_active = 0; | 261 | per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; |
| 159 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 262 | atomic_dec(&nmi_active); |
| 160 | kfree(prev_nmi_count); | ||
| 161 | return -1; | ||
| 162 | } | 263 | } |
| 163 | } | 264 | } |
| 265 | if (!atomic_read(&nmi_active)) { | ||
| 266 | kfree(prev_nmi_count); | ||
| 267 | atomic_set(&nmi_active, -1); | ||
| 268 | return -1; | ||
| 269 | } | ||
| 164 | endflag = 1; | 270 | endflag = 1; |
| 165 | printk("OK.\n"); | 271 | printk("OK.\n"); |
| 166 | 272 | ||
| 167 | /* now that we know it works we can reduce NMI frequency to | 273 | /* now that we know it works we can reduce NMI frequency to |
| 168 | something more reasonable; makes a difference in some configs */ | 274 | something more reasonable; makes a difference in some configs */ |
| 169 | if (nmi_watchdog == NMI_LOCAL_APIC) | 275 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
| 276 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
| 277 | |||
| 170 | nmi_hz = 1; | 278 | nmi_hz = 1; |
| 279 | /* | ||
| 280 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | ||
| 281 | * are writable, with higher bits sign extending from bit 31. | ||
| 282 | * So, we can only program the counter with 31 bit values and | ||
| 283 | * 32nd bit should be 1, for 33.. to be 1. | ||
| 284 | * Find the appropriate nmi_hz | ||
| 285 | */ | ||
| 286 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && | ||
| 287 | ((u64)cpu_khz * 1000) > 0x7fffffffULL) { | ||
| 288 | u64 count = (u64)cpu_khz * 1000; | ||
| 289 | do_div(count, 0x7fffffffUL); | ||
| 290 | nmi_hz = count + 1; | ||
| 291 | } | ||
| 292 | } | ||
| 171 | 293 | ||
| 172 | kfree(prev_nmi_count); | 294 | kfree(prev_nmi_count); |
| 173 | return 0; | 295 | return 0; |
| @@ -181,124 +303,70 @@ static int __init setup_nmi_watchdog(char *str) | |||
| 181 | 303 | ||
| 182 | get_option(&str, &nmi); | 304 | get_option(&str, &nmi); |
| 183 | 305 | ||
| 184 | if (nmi >= NMI_INVALID) | 306 | if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) |
| 185 | return 0; | 307 | return 0; |
| 186 | if (nmi == NMI_NONE) | ||
| 187 | nmi_watchdog = nmi; | ||
| 188 | /* | 308 | /* |
| 189 | * If any other x86 CPU has a local APIC, then | 309 | * If any other x86 CPU has a local APIC, then |
| 190 | * please test the NMI stuff there and send me the | 310 | * please test the NMI stuff there and send me the |
| 191 | * missing bits. Right now Intel P6/P4 and AMD K7 only. | 311 | * missing bits. Right now Intel P6/P4 and AMD K7 only. |
| 192 | */ | 312 | */ |
| 193 | if ((nmi == NMI_LOCAL_APIC) && | 313 | if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0)) |
| 194 | (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | 314 | return 0; /* no lapic support */ |
| 195 | (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) | 315 | nmi_watchdog = nmi; |
| 196 | nmi_watchdog = nmi; | ||
| 197 | if ((nmi == NMI_LOCAL_APIC) && | ||
| 198 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && | ||
| 199 | (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) | ||
| 200 | nmi_watchdog = nmi; | ||
| 201 | /* | ||
| 202 | * We can enable the IO-APIC watchdog | ||
| 203 | * unconditionally. | ||
| 204 | */ | ||
| 205 | if (nmi == NMI_IO_APIC) { | ||
| 206 | nmi_active = 1; | ||
| 207 | nmi_watchdog = nmi; | ||
| 208 | } | ||
| 209 | return 1; | 316 | return 1; |
| 210 | } | 317 | } |
| 211 | 318 | ||
| 212 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 319 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
| 213 | 320 | ||
| 214 | static void disable_intel_arch_watchdog(void); | ||
| 215 | |||
| 216 | static void disable_lapic_nmi_watchdog(void) | 321 | static void disable_lapic_nmi_watchdog(void) |
| 217 | { | 322 | { |
| 218 | if (nmi_active <= 0) | 323 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
| 324 | |||
| 325 | if (atomic_read(&nmi_active) <= 0) | ||
| 219 | return; | 326 | return; |
| 220 | switch (boot_cpu_data.x86_vendor) { | ||
| 221 | case X86_VENDOR_AMD: | ||
| 222 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | ||
| 223 | break; | ||
| 224 | case X86_VENDOR_INTEL: | ||
| 225 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
| 226 | disable_intel_arch_watchdog(); | ||
| 227 | break; | ||
| 228 | } | ||
| 229 | switch (boot_cpu_data.x86) { | ||
| 230 | case 6: | ||
| 231 | if (boot_cpu_data.x86_model > 0xd) | ||
| 232 | break; | ||
| 233 | 327 | ||
| 234 | wrmsr(MSR_P6_EVNTSEL0, 0, 0); | 328 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
| 235 | break; | ||
| 236 | case 15: | ||
| 237 | if (boot_cpu_data.x86_model > 0x4) | ||
| 238 | break; | ||
| 239 | 329 | ||
| 240 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | 330 | BUG_ON(atomic_read(&nmi_active) != 0); |
| 241 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | ||
| 242 | break; | ||
| 243 | } | ||
| 244 | break; | ||
| 245 | } | ||
| 246 | nmi_active = -1; | ||
| 247 | /* tell do_nmi() and others that we're not active any more */ | ||
| 248 | nmi_watchdog = 0; | ||
| 249 | } | 331 | } |
| 250 | 332 | ||
| 251 | static void enable_lapic_nmi_watchdog(void) | 333 | static void enable_lapic_nmi_watchdog(void) |
| 252 | { | 334 | { |
| 253 | if (nmi_active < 0) { | 335 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
| 254 | nmi_watchdog = NMI_LOCAL_APIC; | ||
| 255 | setup_apic_nmi_watchdog(); | ||
| 256 | } | ||
| 257 | } | ||
| 258 | 336 | ||
| 259 | int reserve_lapic_nmi(void) | 337 | /* are we already enabled */ |
| 260 | { | 338 | if (atomic_read(&nmi_active) != 0) |
| 261 | unsigned int old_owner; | 339 | return; |
| 262 | |||
| 263 | spin_lock(&lapic_nmi_owner_lock); | ||
| 264 | old_owner = lapic_nmi_owner; | ||
| 265 | lapic_nmi_owner |= LAPIC_NMI_RESERVED; | ||
| 266 | spin_unlock(&lapic_nmi_owner_lock); | ||
| 267 | if (old_owner & LAPIC_NMI_RESERVED) | ||
| 268 | return -EBUSY; | ||
| 269 | if (old_owner & LAPIC_NMI_WATCHDOG) | ||
| 270 | disable_lapic_nmi_watchdog(); | ||
| 271 | return 0; | ||
| 272 | } | ||
| 273 | 340 | ||
| 274 | void release_lapic_nmi(void) | 341 | /* are we lapic aware */ |
| 275 | { | 342 | if (nmi_known_cpu() <= 0) |
| 276 | unsigned int new_owner; | 343 | return; |
| 277 | 344 | ||
| 278 | spin_lock(&lapic_nmi_owner_lock); | 345 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
| 279 | new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; | 346 | touch_nmi_watchdog(); |
| 280 | lapic_nmi_owner = new_owner; | ||
| 281 | spin_unlock(&lapic_nmi_owner_lock); | ||
| 282 | if (new_owner & LAPIC_NMI_WATCHDOG) | ||
| 283 | enable_lapic_nmi_watchdog(); | ||
| 284 | } | 347 | } |
| 285 | 348 | ||
| 286 | void disable_timer_nmi_watchdog(void) | 349 | void disable_timer_nmi_watchdog(void) |
| 287 | { | 350 | { |
| 288 | if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) | 351 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
| 352 | |||
| 353 | if (atomic_read(&nmi_active) <= 0) | ||
| 289 | return; | 354 | return; |
| 290 | 355 | ||
| 291 | unset_nmi_callback(); | 356 | disable_irq(0); |
| 292 | nmi_active = -1; | 357 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
| 293 | nmi_watchdog = NMI_NONE; | 358 | |
| 359 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
| 294 | } | 360 | } |
| 295 | 361 | ||
| 296 | void enable_timer_nmi_watchdog(void) | 362 | void enable_timer_nmi_watchdog(void) |
| 297 | { | 363 | { |
| 298 | if (nmi_active < 0) { | 364 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
| 299 | nmi_watchdog = NMI_IO_APIC; | 365 | |
| 366 | if (atomic_read(&nmi_active) == 0) { | ||
| 300 | touch_nmi_watchdog(); | 367 | touch_nmi_watchdog(); |
| 301 | nmi_active = 1; | 368 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
| 369 | enable_irq(0); | ||
| 302 | } | 370 | } |
| 303 | } | 371 | } |
| 304 | 372 | ||
| @@ -308,15 +376,20 @@ static int nmi_pm_active; /* nmi_active before suspend */ | |||
| 308 | 376 | ||
| 309 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | 377 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) |
| 310 | { | 378 | { |
| 311 | nmi_pm_active = nmi_active; | 379 | /* only CPU0 goes here, other CPUs should be offline */ |
| 312 | disable_lapic_nmi_watchdog(); | 380 | nmi_pm_active = atomic_read(&nmi_active); |
| 381 | stop_apic_nmi_watchdog(NULL); | ||
| 382 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
| 313 | return 0; | 383 | return 0; |
| 314 | } | 384 | } |
| 315 | 385 | ||
| 316 | static int lapic_nmi_resume(struct sys_device *dev) | 386 | static int lapic_nmi_resume(struct sys_device *dev) |
| 317 | { | 387 | { |
| 318 | if (nmi_pm_active > 0) | 388 | /* only CPU0 goes here, other CPUs should be offline */ |
| 319 | enable_lapic_nmi_watchdog(); | 389 | if (nmi_pm_active > 0) { |
| 390 | setup_apic_nmi_watchdog(NULL); | ||
| 391 | touch_nmi_watchdog(); | ||
| 392 | } | ||
| 320 | return 0; | 393 | return 0; |
| 321 | } | 394 | } |
| 322 | 395 | ||
| @@ -336,7 +409,13 @@ static int __init init_lapic_nmi_sysfs(void) | |||
| 336 | { | 409 | { |
| 337 | int error; | 410 | int error; |
| 338 | 411 | ||
| 339 | if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) | 412 | /* should really be a BUG_ON but b/c this is an |
| 413 | * init call, it just doesn't work. -dcz | ||
| 414 | */ | ||
| 415 | if (nmi_watchdog != NMI_LOCAL_APIC) | ||
| 416 | return 0; | ||
| 417 | |||
| 418 | if ( atomic_read(&nmi_active) < 0 ) | ||
| 340 | return 0; | 419 | return 0; |
| 341 | 420 | ||
| 342 | error = sysdev_class_register(&nmi_sysclass); | 421 | error = sysdev_class_register(&nmi_sysclass); |
| @@ -354,138 +433,269 @@ late_initcall(init_lapic_nmi_sysfs); | |||
| 354 | * Original code written by Keith Owens. | 433 | * Original code written by Keith Owens. |
| 355 | */ | 434 | */ |
| 356 | 435 | ||
| 357 | static void clear_msr_range(unsigned int base, unsigned int n) | 436 | static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) |
| 358 | { | ||
| 359 | unsigned int i; | ||
| 360 | |||
| 361 | for(i = 0; i < n; ++i) | ||
| 362 | wrmsr(base+i, 0, 0); | ||
| 363 | } | ||
| 364 | |||
| 365 | static void write_watchdog_counter(const char *descr) | ||
| 366 | { | 437 | { |
| 367 | u64 count = (u64)cpu_khz * 1000; | 438 | u64 count = (u64)cpu_khz * 1000; |
| 368 | 439 | ||
| 369 | do_div(count, nmi_hz); | 440 | do_div(count, nmi_hz); |
| 370 | if(descr) | 441 | if(descr) |
| 371 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | 442 | Dprintk("setting %s to -0x%08Lx\n", descr, count); |
| 372 | wrmsrl(nmi_perfctr_msr, 0 - count); | 443 | wrmsrl(perfctr_msr, 0 - count); |
| 373 | } | 444 | } |
| 374 | 445 | ||
| 375 | static void setup_k7_watchdog(void) | 446 | /* Note that these events don't tick when the CPU idles. This means |
| 447 | the frequency varies with CPU load. */ | ||
| 448 | |||
| 449 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
| 450 | #define K7_EVNTSEL_INT (1 << 20) | ||
| 451 | #define K7_EVNTSEL_OS (1 << 17) | ||
| 452 | #define K7_EVNTSEL_USR (1 << 16) | ||
| 453 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
| 454 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
| 455 | |||
| 456 | static int setup_k7_watchdog(void) | ||
| 376 | { | 457 | { |
| 458 | unsigned int perfctr_msr, evntsel_msr; | ||
| 377 | unsigned int evntsel; | 459 | unsigned int evntsel; |
| 460 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
| 461 | |||
| 462 | perfctr_msr = MSR_K7_PERFCTR0; | ||
| 463 | evntsel_msr = MSR_K7_EVNTSEL0; | ||
| 464 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
| 465 | goto fail; | ||
| 378 | 466 | ||
| 379 | nmi_perfctr_msr = MSR_K7_PERFCTR0; | 467 | if (!reserve_evntsel_nmi(evntsel_msr)) |
| 468 | goto fail1; | ||
| 380 | 469 | ||
| 381 | clear_msr_range(MSR_K7_EVNTSEL0, 4); | 470 | wrmsrl(perfctr_msr, 0UL); |
| 382 | clear_msr_range(MSR_K7_PERFCTR0, 4); | ||
| 383 | 471 | ||
| 384 | evntsel = K7_EVNTSEL_INT | 472 | evntsel = K7_EVNTSEL_INT |
| 385 | | K7_EVNTSEL_OS | 473 | | K7_EVNTSEL_OS |
| 386 | | K7_EVNTSEL_USR | 474 | | K7_EVNTSEL_USR |
| 387 | | K7_NMI_EVENT; | 475 | | K7_NMI_EVENT; |
| 388 | 476 | ||
| 389 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 477 | /* setup the timer */ |
| 390 | write_watchdog_counter("K7_PERFCTR0"); | 478 | wrmsr(evntsel_msr, evntsel, 0); |
| 479 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0"); | ||
| 391 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 480 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 392 | evntsel |= K7_EVNTSEL_ENABLE; | 481 | evntsel |= K7_EVNTSEL_ENABLE; |
| 393 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 482 | wrmsr(evntsel_msr, evntsel, 0); |
| 483 | |||
| 484 | wd->perfctr_msr = perfctr_msr; | ||
| 485 | wd->evntsel_msr = evntsel_msr; | ||
| 486 | wd->cccr_msr = 0; //unused | ||
| 487 | wd->check_bit = 1ULL<<63; | ||
| 488 | return 1; | ||
| 489 | fail1: | ||
| 490 | release_perfctr_nmi(perfctr_msr); | ||
| 491 | fail: | ||
| 492 | return 0; | ||
| 493 | } | ||
| 494 | |||
| 495 | static void stop_k7_watchdog(void) | ||
| 496 | { | ||
| 497 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
| 498 | |||
| 499 | wrmsr(wd->evntsel_msr, 0, 0); | ||
| 500 | |||
| 501 | release_evntsel_nmi(wd->evntsel_msr); | ||
| 502 | release_perfctr_nmi(wd->perfctr_msr); | ||
| 394 | } | 503 | } |
| 395 | 504 | ||
| 396 | static void setup_p6_watchdog(void) | 505 | #define P6_EVNTSEL0_ENABLE (1 << 22) |
| 506 | #define P6_EVNTSEL_INT (1 << 20) | ||
| 507 | #define P6_EVNTSEL_OS (1 << 17) | ||
| 508 | #define P6_EVNTSEL_USR (1 << 16) | ||
| 509 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
| 510 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
| 511 | |||
| 512 | static int setup_p6_watchdog(void) | ||
| 397 | { | 513 | { |
| 514 | unsigned int perfctr_msr, evntsel_msr; | ||
| 398 | unsigned int evntsel; | 515 | unsigned int evntsel; |
| 516 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
| 517 | |||
| 518 | perfctr_msr = MSR_P6_PERFCTR0; | ||
| 519 | evntsel_msr = MSR_P6_EVNTSEL0; | ||
| 520 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
| 521 | goto fail; | ||
| 399 | 522 | ||
| 400 | nmi_perfctr_msr = MSR_P6_PERFCTR0; | 523 | if (!reserve_evntsel_nmi(evntsel_msr)) |
| 524 | goto fail1; | ||
| 401 | 525 | ||
| 402 | clear_msr_range(MSR_P6_EVNTSEL0, 2); | 526 | wrmsrl(perfctr_msr, 0UL); |
| 403 | clear_msr_range(MSR_P6_PERFCTR0, 2); | ||
| 404 | 527 | ||
| 405 | evntsel = P6_EVNTSEL_INT | 528 | evntsel = P6_EVNTSEL_INT |
| 406 | | P6_EVNTSEL_OS | 529 | | P6_EVNTSEL_OS |
| 407 | | P6_EVNTSEL_USR | 530 | | P6_EVNTSEL_USR |
| 408 | | P6_NMI_EVENT; | 531 | | P6_NMI_EVENT; |
| 409 | 532 | ||
| 410 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); | 533 | /* setup the timer */ |
| 411 | write_watchdog_counter("P6_PERFCTR0"); | 534 | wrmsr(evntsel_msr, evntsel, 0); |
| 535 | write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); | ||
| 412 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 536 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 413 | evntsel |= P6_EVNTSEL0_ENABLE; | 537 | evntsel |= P6_EVNTSEL0_ENABLE; |
| 414 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); | 538 | wrmsr(evntsel_msr, evntsel, 0); |
| 539 | |||
| 540 | wd->perfctr_msr = perfctr_msr; | ||
| 541 | wd->evntsel_msr = evntsel_msr; | ||
| 542 | wd->cccr_msr = 0; //unused | ||
| 543 | wd->check_bit = 1ULL<<39; | ||
| 544 | return 1; | ||
| 545 | fail1: | ||
| 546 | release_perfctr_nmi(perfctr_msr); | ||
| 547 | fail: | ||
| 548 | return 0; | ||
| 549 | } | ||
| 550 | |||
| 551 | static void stop_p6_watchdog(void) | ||
| 552 | { | ||
| 553 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
| 554 | |||
| 555 | wrmsr(wd->evntsel_msr, 0, 0); | ||
| 556 | |||
| 557 | release_evntsel_nmi(wd->evntsel_msr); | ||
| 558 | release_perfctr_nmi(wd->perfctr_msr); | ||
| 415 | } | 559 | } |
| 416 | 560 | ||
| 561 | /* Note that these events don't tick when the CPU idles. This means | ||
| 562 | the frequency varies with CPU load. */ | ||
| 563 | |||
| 564 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
| 565 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
| 566 | #define P4_ESCR_OS (1<<3) | ||
| 567 | #define P4_ESCR_USR (1<<2) | ||
| 568 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
| 569 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
| 570 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
| 571 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
| 572 | #define P4_CCCR_COMPARE (1<<18) | ||
| 573 | #define P4_CCCR_REQUIRED (3<<16) | ||
| 574 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
| 575 | #define P4_CCCR_ENABLE (1<<12) | ||
| 576 | #define P4_CCCR_OVF (1<<31) | ||
| 577 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
| 578 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
| 579 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
| 580 | |||
| 417 | static int setup_p4_watchdog(void) | 581 | static int setup_p4_watchdog(void) |
| 418 | { | 582 | { |
| 583 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
| 584 | unsigned int evntsel, cccr_val; | ||
| 419 | unsigned int misc_enable, dummy; | 585 | unsigned int misc_enable, dummy; |
| 586 | unsigned int ht_num; | ||
| 587 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
| 420 | 588 | ||
| 421 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); | 589 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); |
| 422 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | 590 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) |
| 423 | return 0; | 591 | return 0; |
| 424 | 592 | ||
| 425 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; | ||
| 426 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; | ||
| 427 | #ifdef CONFIG_SMP | 593 | #ifdef CONFIG_SMP |
| 428 | if (smp_num_siblings == 2) | 594 | /* detect which hyperthread we are on */ |
| 429 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; | 595 | if (smp_num_siblings == 2) { |
| 596 | unsigned int ebx, apicid; | ||
| 597 | |||
| 598 | ebx = cpuid_ebx(1); | ||
| 599 | apicid = (ebx >> 24) & 0xff; | ||
| 600 | ht_num = apicid & 1; | ||
| 601 | } else | ||
| 430 | #endif | 602 | #endif |
| 603 | ht_num = 0; | ||
| 431 | 604 | ||
| 432 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) | 605 | /* performance counters are shared resources |
| 433 | clear_msr_range(0x3F1, 2); | 606 | * assign each hyperthread its own set |
| 434 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current | 607 | * (re-use the ESCR0 register, seems safe |
| 435 | docs doesn't fully define it, so leave it alone for now. */ | 608 | * and keeps the cccr_val the same) |
| 436 | if (boot_cpu_data.x86_model >= 0x3) { | 609 | */ |
| 437 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ | 610 | if (!ht_num) { |
| 438 | clear_msr_range(0x3A0, 26); | 611 | /* logical cpu 0 */ |
| 439 | clear_msr_range(0x3BC, 3); | 612 | perfctr_msr = MSR_P4_IQ_PERFCTR0; |
| 613 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
| 614 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
| 615 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
| 440 | } else { | 616 | } else { |
| 441 | clear_msr_range(0x3A0, 31); | 617 | /* logical cpu 1 */ |
| 618 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
| 619 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
| 620 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
| 621 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
| 442 | } | 622 | } |
| 443 | clear_msr_range(0x3C0, 6); | 623 | |
| 444 | clear_msr_range(0x3C8, 6); | 624 | if (!reserve_perfctr_nmi(perfctr_msr)) |
| 445 | clear_msr_range(0x3E0, 2); | 625 | goto fail; |
| 446 | clear_msr_range(MSR_P4_CCCR0, 18); | 626 | |
| 447 | clear_msr_range(MSR_P4_PERFCTR0, 18); | 627 | if (!reserve_evntsel_nmi(evntsel_msr)) |
| 448 | 628 | goto fail1; | |
| 449 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); | 629 | |
| 450 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); | 630 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) |
| 451 | write_watchdog_counter("P4_IQ_COUNTER0"); | 631 | | P4_ESCR_OS |
| 632 | | P4_ESCR_USR; | ||
| 633 | |||
| 634 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
| 635 | | P4_CCCR_COMPLEMENT | ||
| 636 | | P4_CCCR_COMPARE | ||
| 637 | | P4_CCCR_REQUIRED; | ||
| 638 | |||
| 639 | wrmsr(evntsel_msr, evntsel, 0); | ||
| 640 | wrmsr(cccr_msr, cccr_val, 0); | ||
| 641 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0"); | ||
| 452 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 642 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 453 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 643 | cccr_val |= P4_CCCR_ENABLE; |
| 644 | wrmsr(cccr_msr, cccr_val, 0); | ||
| 645 | wd->perfctr_msr = perfctr_msr; | ||
| 646 | wd->evntsel_msr = evntsel_msr; | ||
| 647 | wd->cccr_msr = cccr_msr; | ||
| 648 | wd->check_bit = 1ULL<<39; | ||
| 454 | return 1; | 649 | return 1; |
| 650 | fail1: | ||
| 651 | release_perfctr_nmi(perfctr_msr); | ||
| 652 | fail: | ||
| 653 | return 0; | ||
| 455 | } | 654 | } |
| 456 | 655 | ||
| 457 | static void disable_intel_arch_watchdog(void) | 656 | static void stop_p4_watchdog(void) |
| 458 | { | 657 | { |
| 459 | unsigned ebx; | 658 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
| 460 | 659 | ||
| 461 | /* | 660 | wrmsr(wd->cccr_msr, 0, 0); |
| 462 | * Check whether the Architectural PerfMon supports | 661 | wrmsr(wd->evntsel_msr, 0, 0); |
| 463 | * Unhalted Core Cycles Event or not. | 662 | |
| 464 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | 663 | release_evntsel_nmi(wd->evntsel_msr); |
| 465 | */ | 664 | release_perfctr_nmi(wd->perfctr_msr); |
| 466 | ebx = cpuid_ebx(10); | ||
| 467 | if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
| 468 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); | ||
| 469 | } | 665 | } |
| 470 | 666 | ||
| 667 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
| 668 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
| 669 | |||
| 471 | static int setup_intel_arch_watchdog(void) | 670 | static int setup_intel_arch_watchdog(void) |
| 472 | { | 671 | { |
| 672 | unsigned int ebx; | ||
| 673 | union cpuid10_eax eax; | ||
| 674 | unsigned int unused; | ||
| 675 | unsigned int perfctr_msr, evntsel_msr; | ||
| 473 | unsigned int evntsel; | 676 | unsigned int evntsel; |
| 474 | unsigned ebx; | 677 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
| 475 | 678 | ||
| 476 | /* | 679 | /* |
| 477 | * Check whether the Architectural PerfMon supports | 680 | * Check whether the Architectural PerfMon supports |
| 478 | * Unhalted Core Cycles Event or not. | 681 | * Unhalted Core Cycles Event or not. |
| 479 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | 682 | * NOTE: Corresponding bit = 0 in ebx indicates event present. |
| 480 | */ | 683 | */ |
| 481 | ebx = cpuid_ebx(10); | 684 | cpuid(10, &(eax.full), &ebx, &unused, &unused); |
| 482 | if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | 685 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || |
| 483 | return 0; | 686 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) |
| 687 | goto fail; | ||
| 688 | |||
| 689 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
| 690 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; | ||
| 484 | 691 | ||
| 485 | nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | 692 | if (!reserve_perfctr_nmi(perfctr_msr)) |
| 693 | goto fail; | ||
| 486 | 694 | ||
| 487 | clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); | 695 | if (!reserve_evntsel_nmi(evntsel_msr)) |
| 488 | clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); | 696 | goto fail1; |
| 697 | |||
| 698 | wrmsrl(perfctr_msr, 0UL); | ||
| 489 | 699 | ||
| 490 | evntsel = ARCH_PERFMON_EVENTSEL_INT | 700 | evntsel = ARCH_PERFMON_EVENTSEL_INT |
| 491 | | ARCH_PERFMON_EVENTSEL_OS | 701 | | ARCH_PERFMON_EVENTSEL_OS |
| @@ -493,51 +703,145 @@ static int setup_intel_arch_watchdog(void) | |||
| 493 | | ARCH_PERFMON_NMI_EVENT_SEL | 703 | | ARCH_PERFMON_NMI_EVENT_SEL |
| 494 | | ARCH_PERFMON_NMI_EVENT_UMASK; | 704 | | ARCH_PERFMON_NMI_EVENT_UMASK; |
| 495 | 705 | ||
| 496 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | 706 | /* setup the timer */ |
| 497 | write_watchdog_counter("INTEL_ARCH_PERFCTR0"); | 707 | wrmsr(evntsel_msr, evntsel, 0); |
| 708 | write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | ||
| 498 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 709 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
| 499 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 710 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
| 500 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | 711 | wrmsr(evntsel_msr, evntsel, 0); |
| 712 | |||
| 713 | wd->perfctr_msr = perfctr_msr; | ||
| 714 | wd->evntsel_msr = evntsel_msr; | ||
| 715 | wd->cccr_msr = 0; //unused | ||
| 716 | wd->check_bit = 1ULL << (eax.split.bit_width - 1); | ||
| 501 | return 1; | 717 | return 1; |
| 718 | fail1: | ||
| 719 | release_perfctr_nmi(perfctr_msr); | ||
| 720 | fail: | ||
| 721 | return 0; | ||
| 502 | } | 722 | } |
| 503 | 723 | ||
| 504 | void setup_apic_nmi_watchdog (void) | 724 | static void stop_intel_arch_watchdog(void) |
| 505 | { | 725 | { |
| 506 | switch (boot_cpu_data.x86_vendor) { | 726 | unsigned int ebx; |
| 507 | case X86_VENDOR_AMD: | 727 | union cpuid10_eax eax; |
| 508 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | 728 | unsigned int unused; |
| 509 | return; | 729 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
| 510 | setup_k7_watchdog(); | 730 | |
| 511 | break; | 731 | /* |
| 512 | case X86_VENDOR_INTEL: | 732 | * Check whether the Architectural PerfMon supports |
| 513 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 733 | * Unhalted Core Cycles Event or not. |
| 514 | if (!setup_intel_arch_watchdog()) | 734 | * NOTE: Corresponding bit = 0 in ebx indicates event present. |
| 735 | */ | ||
| 736 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
| 737 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
| 738 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
| 739 | return; | ||
| 740 | |||
| 741 | wrmsr(wd->evntsel_msr, 0, 0); | ||
| 742 | release_evntsel_nmi(wd->evntsel_msr); | ||
| 743 | release_perfctr_nmi(wd->perfctr_msr); | ||
| 744 | } | ||
| 745 | |||
| 746 | void setup_apic_nmi_watchdog (void *unused) | ||
| 747 | { | ||
| 748 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
| 749 | |||
| 750 | /* only support LOCAL and IO APICs for now */ | ||
| 751 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
| 752 | (nmi_watchdog != NMI_IO_APIC)) | ||
| 753 | return; | ||
| 754 | |||
| 755 | if (wd->enabled == 1) | ||
| 756 | return; | ||
| 757 | |||
| 758 | /* cheap hack to support suspend/resume */ | ||
| 759 | /* if cpu0 is not active neither should the other cpus */ | ||
| 760 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) | ||
| 761 | return; | ||
| 762 | |||
| 763 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
| 764 | switch (boot_cpu_data.x86_vendor) { | ||
| 765 | case X86_VENDOR_AMD: | ||
| 766 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | ||
| 515 | return; | 767 | return; |
| 516 | break; | 768 | if (!setup_k7_watchdog()) |
| 517 | } | ||
| 518 | switch (boot_cpu_data.x86) { | ||
| 519 | case 6: | ||
| 520 | if (boot_cpu_data.x86_model > 0xd) | ||
| 521 | return; | 769 | return; |
| 522 | |||
| 523 | setup_p6_watchdog(); | ||
| 524 | break; | 770 | break; |
| 525 | case 15: | 771 | case X86_VENDOR_INTEL: |
| 526 | if (boot_cpu_data.x86_model > 0x4) | 772 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
| 527 | return; | 773 | if (!setup_intel_arch_watchdog()) |
| 774 | return; | ||
| 775 | break; | ||
| 776 | } | ||
| 777 | switch (boot_cpu_data.x86) { | ||
| 778 | case 6: | ||
| 779 | if (boot_cpu_data.x86_model > 0xd) | ||
| 780 | return; | ||
| 781 | |||
| 782 | if (!setup_p6_watchdog()) | ||
| 783 | return; | ||
| 784 | break; | ||
| 785 | case 15: | ||
| 786 | if (boot_cpu_data.x86_model > 0x4) | ||
| 787 | return; | ||
| 528 | 788 | ||
| 529 | if (!setup_p4_watchdog()) | 789 | if (!setup_p4_watchdog()) |
| 790 | return; | ||
| 791 | break; | ||
| 792 | default: | ||
| 530 | return; | 793 | return; |
| 794 | } | ||
| 531 | break; | 795 | break; |
| 532 | default: | 796 | default: |
| 533 | return; | 797 | return; |
| 534 | } | 798 | } |
| 535 | break; | 799 | } |
| 536 | default: | 800 | wd->enabled = 1; |
| 801 | atomic_inc(&nmi_active); | ||
| 802 | } | ||
| 803 | |||
| 804 | void stop_apic_nmi_watchdog(void *unused) | ||
| 805 | { | ||
| 806 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
| 807 | |||
| 808 | /* only support LOCAL and IO APICs for now */ | ||
| 809 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
| 810 | (nmi_watchdog != NMI_IO_APIC)) | ||
| 811 | return; | ||
| 812 | |||
| 813 | if (wd->enabled == 0) | ||
| 537 | return; | 814 | return; |
| 815 | |||
| 816 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
| 817 | switch (boot_cpu_data.x86_vendor) { | ||
| 818 | case X86_VENDOR_AMD: | ||
| 819 | stop_k7_watchdog(); | ||
| 820 | break; | ||
| 821 | case X86_VENDOR_INTEL: | ||
| 822 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
| 823 | stop_intel_arch_watchdog(); | ||
| 824 | break; | ||
| 825 | } | ||
| 826 | switch (boot_cpu_data.x86) { | ||
| 827 | case 6: | ||
| 828 | if (boot_cpu_data.x86_model > 0xd) | ||
| 829 | break; | ||
| 830 | stop_p6_watchdog(); | ||
| 831 | break; | ||
| 832 | case 15: | ||
| 833 | if (boot_cpu_data.x86_model > 0x4) | ||
| 834 | break; | ||
| 835 | stop_p4_watchdog(); | ||
| 836 | break; | ||
| 837 | } | ||
| 838 | break; | ||
| 839 | default: | ||
| 840 | return; | ||
| 841 | } | ||
| 538 | } | 842 | } |
| 539 | lapic_nmi_owner = LAPIC_NMI_WATCHDOG; | 843 | wd->enabled = 0; |
| 540 | nmi_active = 1; | 844 | atomic_dec(&nmi_active); |
| 541 | } | 845 | } |
| 542 | 846 | ||
| 543 | /* | 847 | /* |
| @@ -579,7 +883,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog); | |||
| 579 | 883 | ||
| 580 | extern void die_nmi(struct pt_regs *, const char *msg); | 884 | extern void die_nmi(struct pt_regs *, const char *msg); |
| 581 | 885 | ||
| 582 | void nmi_watchdog_tick (struct pt_regs * regs) | 886 | __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) |
| 583 | { | 887 | { |
| 584 | 888 | ||
| 585 | /* | 889 | /* |
| @@ -588,11 +892,23 @@ void nmi_watchdog_tick (struct pt_regs * regs) | |||
| 588 | * smp_processor_id(). | 892 | * smp_processor_id(). |
| 589 | */ | 893 | */ |
| 590 | unsigned int sum; | 894 | unsigned int sum; |
| 895 | int touched = 0; | ||
| 591 | int cpu = smp_processor_id(); | 896 | int cpu = smp_processor_id(); |
| 897 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
| 898 | u64 dummy; | ||
| 899 | int rc=0; | ||
| 900 | |||
| 901 | /* check for other users first */ | ||
| 902 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | ||
| 903 | == NOTIFY_STOP) { | ||
| 904 | rc = 1; | ||
| 905 | touched = 1; | ||
| 906 | } | ||
| 592 | 907 | ||
| 593 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; | 908 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; |
| 594 | 909 | ||
| 595 | if (last_irq_sums[cpu] == sum) { | 910 | /* if the apic timer isn't firing, this cpu isn't doing much */ |
| 911 | if (!touched && last_irq_sums[cpu] == sum) { | ||
| 596 | /* | 912 | /* |
| 597 | * Ayiee, looks like this CPU is stuck ... | 913 | * Ayiee, looks like this CPU is stuck ... |
| 598 | * wait a few IRQs (5 seconds) before doing the oops ... | 914 | * wait a few IRQs (5 seconds) before doing the oops ... |
| @@ -607,27 +923,59 @@ void nmi_watchdog_tick (struct pt_regs * regs) | |||
| 607 | last_irq_sums[cpu] = sum; | 923 | last_irq_sums[cpu] = sum; |
| 608 | alert_counter[cpu] = 0; | 924 | alert_counter[cpu] = 0; |
| 609 | } | 925 | } |
| 610 | if (nmi_perfctr_msr) { | 926 | /* see if the nmi watchdog went off */ |
| 611 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { | 927 | if (wd->enabled) { |
| 612 | /* | 928 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
| 613 | * P4 quirks: | 929 | rdmsrl(wd->perfctr_msr, dummy); |
| 614 | * - An overflown perfctr will assert its interrupt | 930 | if (dummy & wd->check_bit){ |
| 615 | * until the OVF flag in its CCCR is cleared. | 931 | /* this wasn't a watchdog timer interrupt */ |
| 616 | * - LVTPC is masked on interrupt and must be | 932 | goto done; |
| 617 | * unmasked by the LVTPC handler. | 933 | } |
| 934 | |||
| 935 | /* only Intel P4 uses the cccr msr */ | ||
| 936 | if (wd->cccr_msr != 0) { | ||
| 937 | /* | ||
| 938 | * P4 quirks: | ||
| 939 | * - An overflown perfctr will assert its interrupt | ||
| 940 | * until the OVF flag in its CCCR is cleared. | ||
| 941 | * - LVTPC is masked on interrupt and must be | ||
| 942 | * unmasked by the LVTPC handler. | ||
| 943 | */ | ||
| 944 | rdmsrl(wd->cccr_msr, dummy); | ||
| 945 | dummy &= ~P4_CCCR_OVF; | ||
| 946 | wrmsrl(wd->cccr_msr, dummy); | ||
| 947 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 948 | } | ||
| 949 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
| 950 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
| 951 | /* P6 based Pentium M need to re-unmask | ||
| 952 | * the apic vector but it doesn't hurt | ||
| 953 | * other P6 variant. | ||
| 954 | * ArchPerfom/Core Duo also needs this */ | ||
| 955 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 956 | } | ||
| 957 | /* start the cycle over again */ | ||
| 958 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
| 959 | rc = 1; | ||
| 960 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
| 961 | /* don't know how to accurately check for this. | ||
| 962 | * just assume it was a watchdog timer interrupt | ||
| 963 | * This matches the old behaviour. | ||
| 618 | */ | 964 | */ |
| 619 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 965 | rc = 1; |
| 620 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 621 | } | 966 | } |
| 622 | else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 || | ||
| 623 | nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
| 624 | /* Only P6 based Pentium M need to re-unmask | ||
| 625 | * the apic vector but it doesn't hurt | ||
| 626 | * other P6 variant */ | ||
| 627 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
| 628 | } | ||
| 629 | write_watchdog_counter(NULL); | ||
| 630 | } | 967 | } |
| 968 | done: | ||
| 969 | return rc; | ||
| 970 | } | ||
| 971 | |||
| 972 | int do_nmi_callback(struct pt_regs * regs, int cpu) | ||
| 973 | { | ||
| 974 | #ifdef CONFIG_SYSCTL | ||
| 975 | if (unknown_nmi_panic) | ||
| 976 | return unknown_nmi_panic_callback(regs, cpu); | ||
| 977 | #endif | ||
| 978 | return 0; | ||
| 631 | } | 979 | } |
| 632 | 980 | ||
| 633 | #ifdef CONFIG_SYSCTL | 981 | #ifdef CONFIG_SYSCTL |
| @@ -637,36 +985,46 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | |||
| 637 | unsigned char reason = get_nmi_reason(); | 985 | unsigned char reason = get_nmi_reason(); |
| 638 | char buf[64]; | 986 | char buf[64]; |
| 639 | 987 | ||
| 640 | if (!(reason & 0xc0)) { | 988 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); |
| 641 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); | 989 | die_nmi(regs, buf); |
| 642 | die_nmi(regs, buf); | ||
| 643 | } | ||
| 644 | return 0; | 990 | return 0; |
| 645 | } | 991 | } |
| 646 | 992 | ||
| 647 | /* | 993 | /* |
| 648 | * proc handler for /proc/sys/kernel/unknown_nmi_panic | 994 | * proc handler for /proc/sys/kernel/nmi |
| 649 | */ | 995 | */ |
| 650 | int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, | 996 | int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, |
| 651 | void __user *buffer, size_t *length, loff_t *ppos) | 997 | void __user *buffer, size_t *length, loff_t *ppos) |
| 652 | { | 998 | { |
| 653 | int old_state; | 999 | int old_state; |
| 654 | 1000 | ||
| 655 | old_state = unknown_nmi_panic; | 1001 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; |
| 1002 | old_state = nmi_watchdog_enabled; | ||
| 656 | proc_dointvec(table, write, file, buffer, length, ppos); | 1003 | proc_dointvec(table, write, file, buffer, length, ppos); |
| 657 | if (!!old_state == !!unknown_nmi_panic) | 1004 | if (!!old_state == !!nmi_watchdog_enabled) |
| 658 | return 0; | 1005 | return 0; |
| 659 | 1006 | ||
| 660 | if (unknown_nmi_panic) { | 1007 | if (atomic_read(&nmi_active) < 0) { |
| 661 | if (reserve_lapic_nmi() < 0) { | 1008 | printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); |
| 662 | unknown_nmi_panic = 0; | 1009 | return -EIO; |
| 663 | return -EBUSY; | 1010 | } |
| 664 | } else { | 1011 | |
| 665 | set_nmi_callback(unknown_nmi_panic_callback); | 1012 | if (nmi_watchdog == NMI_DEFAULT) { |
| 666 | } | 1013 | if (nmi_known_cpu() > 0) |
| 1014 | nmi_watchdog = NMI_LOCAL_APIC; | ||
| 1015 | else | ||
| 1016 | nmi_watchdog = NMI_IO_APIC; | ||
| 1017 | } | ||
| 1018 | |||
| 1019 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
| 1020 | if (nmi_watchdog_enabled) | ||
| 1021 | enable_lapic_nmi_watchdog(); | ||
| 1022 | else | ||
| 1023 | disable_lapic_nmi_watchdog(); | ||
| 667 | } else { | 1024 | } else { |
| 668 | release_lapic_nmi(); | 1025 | printk( KERN_WARNING |
| 669 | unset_nmi_callback(); | 1026 | "NMI watchdog doesn't know what hardware to touch\n"); |
| 1027 | return -EIO; | ||
| 670 | } | 1028 | } |
| 671 | return 0; | 1029 | return 0; |
| 672 | } | 1030 | } |
| @@ -675,7 +1033,11 @@ int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, | |||
| 675 | 1033 | ||
| 676 | EXPORT_SYMBOL(nmi_active); | 1034 | EXPORT_SYMBOL(nmi_active); |
| 677 | EXPORT_SYMBOL(nmi_watchdog); | 1035 | EXPORT_SYMBOL(nmi_watchdog); |
| 678 | EXPORT_SYMBOL(reserve_lapic_nmi); | 1036 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); |
| 679 | EXPORT_SYMBOL(release_lapic_nmi); | 1037 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
| 1038 | EXPORT_SYMBOL(reserve_perfctr_nmi); | ||
| 1039 | EXPORT_SYMBOL(release_perfctr_nmi); | ||
| 1040 | EXPORT_SYMBOL(reserve_evntsel_nmi); | ||
| 1041 | EXPORT_SYMBOL(release_evntsel_nmi); | ||
| 680 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); | 1042 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); |
| 681 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); | 1043 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); |
