aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c102
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c90
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.h14
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c91
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c253
-rw-r--r--arch/x86/kernel/cpu/mcheck/p5.c53
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c119
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c186
-rw-r--r--arch/x86/kernel/cpu/mcheck/winchip.c36
10 files changed, 946 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
new file mode 100644
index 000000000000..f1ebe1c1c17a
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -0,0 +1,2 @@
1obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o
2obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
new file mode 100644
index 000000000000..eef63e3630c2
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/k7.c
@@ -0,0 +1,102 @@
1/*
2 * Athlon/Hammer specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/interrupt.h>
10#include <linux/smp.h>
11
12#include <asm/processor.h>
13#include <asm/system.h>
14#include <asm/msr.h>
15
16#include "mce.h"
17
18/* Machine Check Handler For AMD Athlon/Duron */
19static fastcall void k7_machine_check(struct pt_regs * regs, long error_code)
20{
21 int recover=1;
22 u32 alow, ahigh, high, low;
23 u32 mcgstl, mcgsth;
24 int i;
25
26 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
27 if (mcgstl & (1<<0)) /* Recoverable ? */
28 recover=0;
29
30 printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
31 smp_processor_id(), mcgsth, mcgstl);
32
33 for (i=1; i<nr_mce_banks; i++) {
34 rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
35 if (high&(1<<31)) {
36 if (high & (1<<29))
37 recover |= 1;
38 if (high & (1<<25))
39 recover |= 2;
40 printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
41 high &= ~(1<<31);
42 if (high & (1<<27)) {
43 rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
44 printk ("[%08x%08x]", ahigh, alow);
45 }
46 if (high & (1<<26)) {
47 rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
48 printk (" at %08x%08x", ahigh, alow);
49 }
50 printk ("\n");
51 /* Clear it */
52 wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
53 /* Serialize */
54 wmb();
55 add_taint(TAINT_MACHINE_CHECK);
56 }
57 }
58
59 if (recover&2)
60 panic ("CPU context corrupt");
61 if (recover&1)
62 panic ("Unable to continue");
63 printk (KERN_EMERG "Attempting to continue.\n");
64 mcgstl &= ~(1<<2);
65 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
66}
67
68
69/* AMD K7 machine check is Intel like */
70void amd_mcheck_init(struct cpuinfo_x86 *c)
71{
72 u32 l, h;
73 int i;
74
75 if (!cpu_has(c, X86_FEATURE_MCE))
76 return;
77
78 machine_check_vector = k7_machine_check;
79 wmb();
80
81 printk (KERN_INFO "Intel machine check architecture supported.\n");
82 rdmsr (MSR_IA32_MCG_CAP, l, h);
83 if (l & (1<<8)) /* Control register present ? */
84 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
85 nr_mce_banks = l & 0xff;
86
87 /* Clear status for MC index 0 separately, we don't touch CTL,
88 * as some K7 Athlons cause spurious MCEs when its enabled. */
89 if (boot_cpu_data.x86 == 6) {
90 wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
91 i = 1;
92 } else
93 i = 0;
94 for (; i<nr_mce_banks; i++) {
95 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
96 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
97 }
98
99 set_in_cr4 (X86_CR4_MCE);
100 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
101 smp_processor_id());
102}
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
new file mode 100644
index 000000000000..34c781eddee4
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -0,0 +1,90 @@
1/*
2 * mce.c - x86 Machine Check Exception Reporting
3 * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/module.h>
10#include <linux/smp.h>
11#include <linux/thread_info.h>
12
13#include <asm/processor.h>
14#include <asm/system.h>
15#include <asm/mce.h>
16
17#include "mce.h"
18
19int mce_disabled = 0;
20int nr_mce_banks;
21
22EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
23
24/* Handle unconfigured int18 (should never happen) */
25static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code)
26{
27 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id());
28}
29
30/* Call the installed machine check handler for this CPU setup. */
31void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check;
32
33/* This has to be run for each processor */
34void mcheck_init(struct cpuinfo_x86 *c)
35{
36 if (mce_disabled==1)
37 return;
38
39 switch (c->x86_vendor) {
40 case X86_VENDOR_AMD:
41 amd_mcheck_init(c);
42 break;
43
44 case X86_VENDOR_INTEL:
45 if (c->x86==5)
46 intel_p5_mcheck_init(c);
47 if (c->x86==6)
48 intel_p6_mcheck_init(c);
49 if (c->x86==15)
50 intel_p4_mcheck_init(c);
51 break;
52
53 case X86_VENDOR_CENTAUR:
54 if (c->x86==5)
55 winchip_mcheck_init(c);
56 break;
57
58 default:
59 break;
60 }
61}
62
63static unsigned long old_cr4 __initdata;
64
65void __init stop_mce(void)
66{
67 old_cr4 = read_cr4();
68 clear_in_cr4(X86_CR4_MCE);
69}
70
71void __init restart_mce(void)
72{
73 if (old_cr4 & X86_CR4_MCE)
74 set_in_cr4(X86_CR4_MCE);
75}
76
77static int __init mcheck_disable(char *str)
78{
79 mce_disabled = 1;
80 return 1;
81}
82
83static int __init mcheck_enable(char *str)
84{
85 mce_disabled = -1;
86 return 1;
87}
88
89__setup("nomce", mcheck_disable);
90__setup("mce", mcheck_enable);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h
new file mode 100644
index 000000000000..81fb6e2d35f3
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce.h
@@ -0,0 +1,14 @@
1#include <linux/init.h>
2#include <asm/mce.h>
3
4void amd_mcheck_init(struct cpuinfo_x86 *c);
5void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
6void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
7void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
8void winchip_mcheck_init(struct cpuinfo_x86 *c);
9
10/* Call the installed machine check handler for this CPU setup. */
11extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code);
12
13extern int nr_mce_banks;
14
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
new file mode 100644
index 000000000000..bf39409b3838
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -0,0 +1,91 @@
1/*
2 * Non Fatal Machine Check Exception Reporting
3 *
4 * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk>
5 *
6 * This file contains routines to check for non-fatal MCEs every 15s
7 *
8 */
9
10#include <linux/init.h>
11#include <linux/types.h>
12#include <linux/kernel.h>
13#include <linux/jiffies.h>
14#include <linux/workqueue.h>
15#include <linux/interrupt.h>
16#include <linux/smp.h>
17#include <linux/module.h>
18
19#include <asm/processor.h>
20#include <asm/system.h>
21#include <asm/msr.h>
22
23#include "mce.h"
24
25static int firstbank;
26
27#define MCE_RATE 15*HZ /* timer rate is 15s */
28
29static void mce_checkregs (void *info)
30{
31 u32 low, high;
32 int i;
33
34 for (i=firstbank; i<nr_mce_banks; i++) {
35 rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);
36
37 if (high & (1<<31)) {
38 printk(KERN_INFO "MCE: The hardware reports a non "
39 "fatal, correctable incident occurred on "
40 "CPU %d.\n",
41 smp_processor_id());
42 printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
43
44 /* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
45 wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
46
47 /* Serialize */
48 wmb();
49 add_taint(TAINT_MACHINE_CHECK);
50 }
51 }
52}
53
54static void mce_work_fn(struct work_struct *work);
55static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
56
57static void mce_work_fn(struct work_struct *work)
58{
59 on_each_cpu(mce_checkregs, NULL, 1, 1);
60 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
61}
62
63static int __init init_nonfatal_mce_checker(void)
64{
65 struct cpuinfo_x86 *c = &boot_cpu_data;
66
67 /* Check for MCE support */
68 if (!cpu_has(c, X86_FEATURE_MCE))
69 return -ENODEV;
70
71 /* Check for PPro style MCA */
72 if (!cpu_has(c, X86_FEATURE_MCA))
73 return -ENODEV;
74
75 /* Some Athlons misbehave when we frob bank 0 */
76 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
77 boot_cpu_data.x86 == 6)
78 firstbank = 1;
79 else
80 firstbank = 0;
81
82 /*
83 * Check for non-fatal errors every MCE_RATE s
84 */
85 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
86 printk(KERN_INFO "Machine check exception polling timer started.\n");
87 return 0;
88}
89module_init(init_nonfatal_mce_checker);
90
91MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
new file mode 100644
index 000000000000..1509edfb2313
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -0,0 +1,253 @@
1/*
2 * P4 specific Machine Check Exception Reporting
3 */
4
5#include <linux/init.h>
6#include <linux/types.h>
7#include <linux/kernel.h>
8#include <linux/interrupt.h>
9#include <linux/smp.h>
10
11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/msr.h>
14#include <asm/apic.h>
15
16#include <asm/therm_throt.h>
17
18#include "mce.h"
19
20/* as supported by the P4/Xeon family */
21struct intel_mce_extended_msrs {
22 u32 eax;
23 u32 ebx;
24 u32 ecx;
25 u32 edx;
26 u32 esi;
27 u32 edi;
28 u32 ebp;
29 u32 esp;
30 u32 eflags;
31 u32 eip;
32 /* u32 *reserved[]; */
33};
34
35static int mce_num_extended_msrs = 0;
36
37
38#ifdef CONFIG_X86_MCE_P4THERMAL
39static void unexpected_thermal_interrupt(struct pt_regs *regs)
40{
41 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n",
42 smp_processor_id());
43 add_taint(TAINT_MACHINE_CHECK);
44}
45
46/* P4/Xeon Thermal transition interrupt handler */
47static void intel_thermal_interrupt(struct pt_regs *regs)
48{
49 __u64 msr_val;
50
51 ack_APIC_irq();
52
53 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
54 therm_throt_process(msr_val & 0x1);
55}
56
57/* Thermal interrupt handler for this CPU setup */
58static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt;
59
60fastcall void smp_thermal_interrupt(struct pt_regs *regs)
61{
62 irq_enter();
63 vendor_thermal_interrupt(regs);
64 irq_exit();
65}
66
67/* P4/Xeon Thermal regulation detect and init */
68static void intel_init_thermal(struct cpuinfo_x86 *c)
69{
70 u32 l, h;
71 unsigned int cpu = smp_processor_id();
72
73 /* Thermal monitoring */
74 if (!cpu_has(c, X86_FEATURE_ACPI))
75 return; /* -ENODEV */
76
77 /* Clock modulation */
78 if (!cpu_has(c, X86_FEATURE_ACC))
79 return; /* -ENODEV */
80
81 /* first check if its enabled already, in which case there might
82 * be some SMM goo which handles it, so we can't even put a handler
83 * since it might be delivered via SMI already -zwanem.
84 */
85 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
86 h = apic_read(APIC_LVTTHMR);
87 if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
88 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
89 cpu);
90 return; /* -EBUSY */
91 }
92
93 /* check whether a vector already exists, temporarily masked? */
94 if (h & APIC_VECTOR_MASK) {
95 printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already "
96 "installed\n",
97 cpu, (h & APIC_VECTOR_MASK));
98 return; /* -EBUSY */
99 }
100
101 /* The temperature transition interrupt handler setup */
102 h = THERMAL_APIC_VECTOR; /* our delivery vector */
103 h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
104 apic_write_around(APIC_LVTTHMR, h);
105
106 rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
107 wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
108
109 /* ok we're good to go... */
110 vendor_thermal_interrupt = intel_thermal_interrupt;
111
112 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
113 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
114
115 l = apic_read (APIC_LVTTHMR);
116 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
117 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
118
119 /* enable thermal throttle processing */
120 atomic_set(&therm_throt_en, 1);
121 return;
122}
123#endif /* CONFIG_X86_MCE_P4THERMAL */
124
125
126/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
127static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
128{
129 u32 h;
130
131 rdmsr (MSR_IA32_MCG_EAX, r->eax, h);
132 rdmsr (MSR_IA32_MCG_EBX, r->ebx, h);
133 rdmsr (MSR_IA32_MCG_ECX, r->ecx, h);
134 rdmsr (MSR_IA32_MCG_EDX, r->edx, h);
135 rdmsr (MSR_IA32_MCG_ESI, r->esi, h);
136 rdmsr (MSR_IA32_MCG_EDI, r->edi, h);
137 rdmsr (MSR_IA32_MCG_EBP, r->ebp, h);
138 rdmsr (MSR_IA32_MCG_ESP, r->esp, h);
139 rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h);
140 rdmsr (MSR_IA32_MCG_EIP, r->eip, h);
141}
142
143static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
144{
145 int recover=1;
146 u32 alow, ahigh, high, low;
147 u32 mcgstl, mcgsth;
148 int i;
149
150 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
151 if (mcgstl & (1<<0)) /* Recoverable ? */
152 recover=0;
153
154 printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
155 smp_processor_id(), mcgsth, mcgstl);
156
157 if (mce_num_extended_msrs > 0) {
158 struct intel_mce_extended_msrs dbg;
159 intel_get_extended_msrs(&dbg);
160 printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n",
161 smp_processor_id(), dbg.eip, dbg.eflags);
162 printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n",
163 dbg.eax, dbg.ebx, dbg.ecx, dbg.edx);
164 printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
165 dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
166 }
167
168 for (i=0; i<nr_mce_banks; i++) {
169 rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
170 if (high & (1<<31)) {
171 if (high & (1<<29))
172 recover |= 1;
173 if (high & (1<<25))
174 recover |= 2;
175 printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
176 high &= ~(1<<31);
177 if (high & (1<<27)) {
178 rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
179 printk ("[%08x%08x]", ahigh, alow);
180 }
181 if (high & (1<<26)) {
182 rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
183 printk (" at %08x%08x", ahigh, alow);
184 }
185 printk ("\n");
186 }
187 }
188
189 if (recover & 2)
190 panic ("CPU context corrupt");
191 if (recover & 1)
192 panic ("Unable to continue");
193
194 printk(KERN_EMERG "Attempting to continue.\n");
195 /*
196 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
197 * recoverable/continuable.This will allow BIOS to look at the MSRs
198 * for errors if the OS could not log the error.
199 */
200 for (i=0; i<nr_mce_banks; i++) {
201 u32 msr;
202 msr = MSR_IA32_MC0_STATUS+i*4;
203 rdmsr (msr, low, high);
204 if (high&(1<<31)) {
205 /* Clear it */
206 wrmsr(msr, 0UL, 0UL);
207 /* Serialize */
208 wmb();
209 add_taint(TAINT_MACHINE_CHECK);
210 }
211 }
212 mcgstl &= ~(1<<2);
213 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
214}
215
216
217void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
218{
219 u32 l, h;
220 int i;
221
222 machine_check_vector = intel_machine_check;
223 wmb();
224
225 printk (KERN_INFO "Intel machine check architecture supported.\n");
226 rdmsr (MSR_IA32_MCG_CAP, l, h);
227 if (l & (1<<8)) /* Control register present ? */
228 wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
229 nr_mce_banks = l & 0xff;
230
231 for (i=0; i<nr_mce_banks; i++) {
232 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
233 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
234 }
235
236 set_in_cr4 (X86_CR4_MCE);
237 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
238 smp_processor_id());
239
240 /* Check for P4/Xeon extended MCE MSRs */
241 rdmsr (MSR_IA32_MCG_CAP, l, h);
242 if (l & (1<<9)) {/* MCG_EXT_P */
243 mce_num_extended_msrs = (l >> 16) & 0xff;
244 printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
245 " available\n",
246 smp_processor_id(), mce_num_extended_msrs);
247
248#ifdef CONFIG_X86_MCE_P4THERMAL
249 /* Check for P4/Xeon Thermal monitor */
250 intel_init_thermal(c);
251#endif
252 }
253}
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
new file mode 100644
index 000000000000..94bc43d950cf
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -0,0 +1,53 @@
1/*
2 * P5 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@redhat.com>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/interrupt.h>
10#include <linux/smp.h>
11
12#include <asm/processor.h>
13#include <asm/system.h>
14#include <asm/msr.h>
15
16#include "mce.h"
17
18/* Machine check handler for Pentium class Intel */
19static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code)
20{
21 u32 loaddr, hi, lotype;
22 rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
23 rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
24 printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype);
25 if(lotype&(1<<5))
26 printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id());
27 add_taint(TAINT_MACHINE_CHECK);
28}
29
30/* Set up machine check reporting for processors with Intel style MCE */
31void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
32{
33 u32 l, h;
34
35 /*Check for MCE support */
36 if( !cpu_has(c, X86_FEATURE_MCE) )
37 return;
38
39 /* Default P5 to off as its often misconnected */
40 if(mce_disabled != -1)
41 return;
42 machine_check_vector = pentium_machine_check;
43 wmb();
44
45 /* Read registers before enabling */
46 rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
47 rdmsr(MSR_IA32_P5_MC_TYPE, l, h);
48 printk(KERN_INFO "Intel old style machine check architecture supported.\n");
49
50 /* Enable MCE */
51 set_in_cr4(X86_CR4_MCE);
52 printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
53}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
new file mode 100644
index 000000000000..deeae42ce199
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/p6.c
@@ -0,0 +1,119 @@
1/*
2 * P6 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@redhat.com>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/interrupt.h>
10#include <linux/smp.h>
11
12#include <asm/processor.h>
13#include <asm/system.h>
14#include <asm/msr.h>
15
16#include "mce.h"
17
18/* Machine Check Handler For PII/PIII */
19static fastcall void intel_machine_check(struct pt_regs * regs, long error_code)
20{
21 int recover=1;
22 u32 alow, ahigh, high, low;
23 u32 mcgstl, mcgsth;
24 int i;
25
26 rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
27 if (mcgstl & (1<<0)) /* Recoverable ? */
28 recover=0;
29
30 printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
31 smp_processor_id(), mcgsth, mcgstl);
32
33 for (i=0; i<nr_mce_banks; i++) {
34 rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high);
35 if (high & (1<<31)) {
36 if (high & (1<<29))
37 recover |= 1;
38 if (high & (1<<25))
39 recover |= 2;
40 printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low);
41 high &= ~(1<<31);
42 if (high & (1<<27)) {
43 rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh);
44 printk ("[%08x%08x]", ahigh, alow);
45 }
46 if (high & (1<<26)) {
47 rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
48 printk (" at %08x%08x", ahigh, alow);
49 }
50 printk ("\n");
51 }
52 }
53
54 if (recover & 2)
55 panic ("CPU context corrupt");
56 if (recover & 1)
57 panic ("Unable to continue");
58
59 printk (KERN_EMERG "Attempting to continue.\n");
60 /*
61 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
62 * recoverable/continuable.This will allow BIOS to look at the MSRs
63 * for errors if the OS could not log the error.
64 */
65 for (i=0; i<nr_mce_banks; i++) {
66 unsigned int msr;
67 msr = MSR_IA32_MC0_STATUS+i*4;
68 rdmsr (msr,low, high);
69 if (high & (1<<31)) {
70 /* Clear it */
71 wrmsr (msr, 0UL, 0UL);
72 /* Serialize */
73 wmb();
74 add_taint(TAINT_MACHINE_CHECK);
75 }
76 }
77 mcgstl &= ~(1<<2);
78 wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
79}
80
81/* Set up machine check reporting for processors with Intel style MCE */
82void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
83{
84 u32 l, h;
85 int i;
86
87 /* Check for MCE support */
88 if (!cpu_has(c, X86_FEATURE_MCE))
89 return;
90
91 /* Check for PPro style MCA */
92 if (!cpu_has(c, X86_FEATURE_MCA))
93 return;
94
95 /* Ok machine check is available */
96 machine_check_vector = intel_machine_check;
97 wmb();
98
99 printk (KERN_INFO "Intel machine check architecture supported.\n");
100 rdmsr (MSR_IA32_MCG_CAP, l, h);
101 if (l & (1<<8)) /* Control register present ? */
102 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
103 nr_mce_banks = l & 0xff;
104
105 /*
106 * Following the example in IA-32 SDM Vol 3:
107 * - MC0_CTL should not be written
108 * - Status registers on all banks should be cleared on reset
109 */
110 for (i=1; i<nr_mce_banks; i++)
111 wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
112
113 for (i=0; i<nr_mce_banks; i++)
114 wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
115
116 set_in_cr4 (X86_CR4_MCE);
117 printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
118 smp_processor_id());
119}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
new file mode 100644
index 000000000000..1203dc5ab87a
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -0,0 +1,186 @@
1/*
2 * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c
3 *
4 * Thermal throttle event support code (such as syslog messaging and rate
5 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
6 * This allows consistent reporting of CPU thermal throttle events.
7 *
8 * Maintains a counter in /sys that keeps track of the number of thermal
9 * events, such that the user knows how bad the thermal problem might be
10 * (since the logging to syslog and mcelog is rate limited).
11 *
12 * Author: Dmitriy Zavin (dmitriyz@google.com)
13 *
14 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
15 * Inspired by Ross Biro's and Al Borchers' counter code.
16 */
17
18#include <linux/percpu.h>
19#include <linux/sysdev.h>
20#include <linux/cpu.h>
21#include <asm/cpu.h>
22#include <linux/notifier.h>
23#include <linux/jiffies.h>
24#include <asm/therm_throt.h>
25
26/* How long to wait between reporting thermal events */
27#define CHECK_INTERVAL (300 * HZ)
28
29static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
30static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
31atomic_t therm_throt_en = ATOMIC_INIT(0);
32
33#ifdef CONFIG_SYSFS
34#define define_therm_throt_sysdev_one_ro(_name) \
35 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
36
37#define define_therm_throt_sysdev_show_func(name) \
38static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
39 char *buf) \
40{ \
41 unsigned int cpu = dev->id; \
42 ssize_t ret; \
43 \
44 preempt_disable(); /* CPU hotplug */ \
45 if (cpu_online(cpu)) \
46 ret = sprintf(buf, "%lu\n", \
47 per_cpu(thermal_throttle_##name, cpu)); \
48 else \
49 ret = 0; \
50 preempt_enable(); \
51 \
52 return ret; \
53}
54
55define_therm_throt_sysdev_show_func(count);
56define_therm_throt_sysdev_one_ro(count);
57
58static struct attribute *thermal_throttle_attrs[] = {
59 &attr_count.attr,
60 NULL
61};
62
63static struct attribute_group thermal_throttle_attr_group = {
64 .attrs = thermal_throttle_attrs,
65 .name = "thermal_throttle"
66};
67#endif /* CONFIG_SYSFS */
68
69/***
70 * therm_throt_process - Process thermal throttling event from interrupt
71 * @curr: Whether the condition is current or not (boolean), since the
72 * thermal interrupt normally gets called both when the thermal
73 * event begins and once the event has ended.
74 *
75 * This function is called by the thermal interrupt after the
76 * IRQ has been acknowledged.
77 *
78 * It will take care of rate limiting and printing messages to the syslog.
79 *
80 * Returns: 0 : Event should NOT be further logged, i.e. still in
81 * "timeout" from previous log message.
82 * 1 : Event should be logged further, and a message has been
83 * printed to the syslog.
84 */
85int therm_throt_process(int curr)
86{
87 unsigned int cpu = smp_processor_id();
88 __u64 tmp_jiffs = get_jiffies_64();
89
90 if (curr)
91 __get_cpu_var(thermal_throttle_count)++;
92
93 if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
94 return 0;
95
96 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
97
98 /* if we just entered the thermal event */
99 if (curr) {
100 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
101 "cpu clock throttled (total events = %lu)\n", cpu,
102 __get_cpu_var(thermal_throttle_count));
103
104 add_taint(TAINT_MACHINE_CHECK);
105 } else {
106 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
107 }
108
109 return 1;
110}
111
112#ifdef CONFIG_SYSFS
113/* Add/Remove thermal_throttle interface for CPU device */
114static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev)
115{
116 return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
117}
118
119static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev)
120{
121 return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
122}
123
124/* Mutex protecting device creation against CPU hotplug */
125static DEFINE_MUTEX(therm_cpu_lock);
126
127/* Get notified when a cpu comes on/off. Be hotplug friendly. */
128static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
129 unsigned long action,
130 void *hcpu)
131{
132 unsigned int cpu = (unsigned long)hcpu;
133 struct sys_device *sys_dev;
134 int err;
135
136 sys_dev = get_cpu_sysdev(cpu);
137 switch (action) {
138 case CPU_ONLINE:
139 case CPU_ONLINE_FROZEN:
140 mutex_lock(&therm_cpu_lock);
141 err = thermal_throttle_add_dev(sys_dev);
142 mutex_unlock(&therm_cpu_lock);
143 WARN_ON(err);
144 break;
145 case CPU_DEAD:
146 case CPU_DEAD_FROZEN:
147 mutex_lock(&therm_cpu_lock);
148 thermal_throttle_remove_dev(sys_dev);
149 mutex_unlock(&therm_cpu_lock);
150 break;
151 }
152 return NOTIFY_OK;
153}
154
155static struct notifier_block thermal_throttle_cpu_notifier =
156{
157 .notifier_call = thermal_throttle_cpu_callback,
158};
159
160static __init int thermal_throttle_init_device(void)
161{
162 unsigned int cpu = 0;
163 int err;
164
165 if (!atomic_read(&therm_throt_en))
166 return 0;
167
168 register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
169
170#ifdef CONFIG_HOTPLUG_CPU
171 mutex_lock(&therm_cpu_lock);
172#endif
173 /* connect live CPUs to sysfs */
174 for_each_online_cpu(cpu) {
175 err = thermal_throttle_add_dev(get_cpu_sysdev(cpu));
176 WARN_ON(err);
177 }
178#ifdef CONFIG_HOTPLUG_CPU
179 mutex_unlock(&therm_cpu_lock);
180#endif
181
182 return 0;
183}
184
185device_initcall(thermal_throttle_init_device);
186#endif /* CONFIG_SYSFS */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
new file mode 100644
index 000000000000..9e424b6c293d
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -0,0 +1,36 @@
1/*
2 * IDT Winchip specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@redhat.com>
4 */
5
6#include <linux/init.h>
7#include <linux/types.h>
8#include <linux/kernel.h>
9#include <linux/interrupt.h>
10
11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/msr.h>
14
15#include "mce.h"
16
17/* Machine check handler for WinChip C6 */
18static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code)
19{
20 printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
21 add_taint(TAINT_MACHINE_CHECK);
22}
23
24/* Set up machine check reporting on the Winchip C6 series */
25void winchip_mcheck_init(struct cpuinfo_x86 *c)
26{
27 u32 lo, hi;
28 machine_check_vector = winchip_machine_check;
29 wmb();
30 rdmsr(MSR_IDT_FCR1, lo, hi);
31 lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */
32 lo&= ~(1<<4); /* Enable MCE */
33 wrmsr(MSR_IDT_FCR1, lo, hi);
34 set_in_cr4(X86_CR4_MCE);
35 printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
36}