diff options
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/k7.c | 102 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 90 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.h | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/non-fatal.c | 91 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/p4.c | 253 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/p5.c | 53 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/p6.c | 119 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 186 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/winchip.c | 36 |
10 files changed, 946 insertions, 0 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile new file mode 100644 index 00000000000..f1ebe1c1c17 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -0,0 +1,2 @@ | |||
1 | obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o | ||
2 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c new file mode 100644 index 00000000000..eef63e3630c --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/k7.c | |||
@@ -0,0 +1,102 @@ | |||
1 | /* | ||
2 | * Athlon/Hammer specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Dave Jones <davej@codemonkey.org.uk> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/smp.h> | ||
11 | |||
12 | #include <asm/processor.h> | ||
13 | #include <asm/system.h> | ||
14 | #include <asm/msr.h> | ||
15 | |||
16 | #include "mce.h" | ||
17 | |||
18 | /* Machine Check Handler For AMD Athlon/Duron */ | ||
19 | static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) | ||
20 | { | ||
21 | int recover=1; | ||
22 | u32 alow, ahigh, high, low; | ||
23 | u32 mcgstl, mcgsth; | ||
24 | int i; | ||
25 | |||
26 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
27 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
28 | recover=0; | ||
29 | |||
30 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
31 | smp_processor_id(), mcgsth, mcgstl); | ||
32 | |||
33 | for (i=1; i<nr_mce_banks; i++) { | ||
34 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); | ||
35 | if (high&(1<<31)) { | ||
36 | if (high & (1<<29)) | ||
37 | recover |= 1; | ||
38 | if (high & (1<<25)) | ||
39 | recover |= 2; | ||
40 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); | ||
41 | high &= ~(1<<31); | ||
42 | if (high & (1<<27)) { | ||
43 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
44 | printk ("[%08x%08x]", ahigh, alow); | ||
45 | } | ||
46 | if (high & (1<<26)) { | ||
47 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
48 | printk (" at %08x%08x", ahigh, alow); | ||
49 | } | ||
50 | printk ("\n"); | ||
51 | /* Clear it */ | ||
52 | wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | ||
53 | /* Serialize */ | ||
54 | wmb(); | ||
55 | add_taint(TAINT_MACHINE_CHECK); | ||
56 | } | ||
57 | } | ||
58 | |||
59 | if (recover&2) | ||
60 | panic ("CPU context corrupt"); | ||
61 | if (recover&1) | ||
62 | panic ("Unable to continue"); | ||
63 | printk (KERN_EMERG "Attempting to continue.\n"); | ||
64 | mcgstl &= ~(1<<2); | ||
65 | wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); | ||
66 | } | ||
67 | |||
68 | |||
69 | /* AMD K7 machine check is Intel like */ | ||
70 | void amd_mcheck_init(struct cpuinfo_x86 *c) | ||
71 | { | ||
72 | u32 l, h; | ||
73 | int i; | ||
74 | |||
75 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
76 | return; | ||
77 | |||
78 | machine_check_vector = k7_machine_check; | ||
79 | wmb(); | ||
80 | |||
81 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | ||
82 | rdmsr (MSR_IA32_MCG_CAP, l, h); | ||
83 | if (l & (1<<8)) /* Control register present ? */ | ||
84 | wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
85 | nr_mce_banks = l & 0xff; | ||
86 | |||
87 | /* Clear status for MC index 0 separately, we don't touch CTL, | ||
88 | * as some K7 Athlons cause spurious MCEs when its enabled. */ | ||
89 | if (boot_cpu_data.x86 == 6) { | ||
90 | wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); | ||
91 | i = 1; | ||
92 | } else | ||
93 | i = 0; | ||
94 | for (; i<nr_mce_banks; i++) { | ||
95 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
96 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
97 | } | ||
98 | |||
99 | set_in_cr4 (X86_CR4_MCE); | ||
100 | printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
101 | smp_processor_id()); | ||
102 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c new file mode 100644 index 00000000000..34c781eddee --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * mce.c - x86 Machine Check Exception Reporting | ||
3 | * (c) 2002 Alan Cox <alan@redhat.com>, Dave Jones <davej@codemonkey.org.uk> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/smp.h> | ||
11 | #include <linux/thread_info.h> | ||
12 | |||
13 | #include <asm/processor.h> | ||
14 | #include <asm/system.h> | ||
15 | #include <asm/mce.h> | ||
16 | |||
17 | #include "mce.h" | ||
18 | |||
19 | int mce_disabled = 0; | ||
20 | int nr_mce_banks; | ||
21 | |||
22 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | ||
23 | |||
24 | /* Handle unconfigured int18 (should never happen) */ | ||
25 | static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_code) | ||
26 | { | ||
27 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); | ||
28 | } | ||
29 | |||
30 | /* Call the installed machine check handler for this CPU setup. */ | ||
31 | void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; | ||
32 | |||
33 | /* This has to be run for each processor */ | ||
34 | void mcheck_init(struct cpuinfo_x86 *c) | ||
35 | { | ||
36 | if (mce_disabled==1) | ||
37 | return; | ||
38 | |||
39 | switch (c->x86_vendor) { | ||
40 | case X86_VENDOR_AMD: | ||
41 | amd_mcheck_init(c); | ||
42 | break; | ||
43 | |||
44 | case X86_VENDOR_INTEL: | ||
45 | if (c->x86==5) | ||
46 | intel_p5_mcheck_init(c); | ||
47 | if (c->x86==6) | ||
48 | intel_p6_mcheck_init(c); | ||
49 | if (c->x86==15) | ||
50 | intel_p4_mcheck_init(c); | ||
51 | break; | ||
52 | |||
53 | case X86_VENDOR_CENTAUR: | ||
54 | if (c->x86==5) | ||
55 | winchip_mcheck_init(c); | ||
56 | break; | ||
57 | |||
58 | default: | ||
59 | break; | ||
60 | } | ||
61 | } | ||
62 | |||
63 | static unsigned long old_cr4 __initdata; | ||
64 | |||
65 | void __init stop_mce(void) | ||
66 | { | ||
67 | old_cr4 = read_cr4(); | ||
68 | clear_in_cr4(X86_CR4_MCE); | ||
69 | } | ||
70 | |||
71 | void __init restart_mce(void) | ||
72 | { | ||
73 | if (old_cr4 & X86_CR4_MCE) | ||
74 | set_in_cr4(X86_CR4_MCE); | ||
75 | } | ||
76 | |||
77 | static int __init mcheck_disable(char *str) | ||
78 | { | ||
79 | mce_disabled = 1; | ||
80 | return 1; | ||
81 | } | ||
82 | |||
83 | static int __init mcheck_enable(char *str) | ||
84 | { | ||
85 | mce_disabled = -1; | ||
86 | return 1; | ||
87 | } | ||
88 | |||
89 | __setup("nomce", mcheck_disable); | ||
90 | __setup("mce", mcheck_enable); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h new file mode 100644 index 00000000000..81fb6e2d35f --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <asm/mce.h> | ||
3 | |||
4 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
5 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
6 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
7 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
8 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
9 | |||
10 | /* Call the installed machine check handler for this CPU setup. */ | ||
11 | extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
12 | |||
13 | extern int nr_mce_banks; | ||
14 | |||
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c new file mode 100644 index 00000000000..bf39409b383 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c | |||
@@ -0,0 +1,91 @@ | |||
1 | /* | ||
2 | * Non Fatal Machine Check Exception Reporting | ||
3 | * | ||
4 | * (C) Copyright 2002 Dave Jones. <davej@codemonkey.org.uk> | ||
5 | * | ||
6 | * This file contains routines to check for non-fatal MCEs every 15s | ||
7 | * | ||
8 | */ | ||
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/types.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/jiffies.h> | ||
14 | #include <linux/workqueue.h> | ||
15 | #include <linux/interrupt.h> | ||
16 | #include <linux/smp.h> | ||
17 | #include <linux/module.h> | ||
18 | |||
19 | #include <asm/processor.h> | ||
20 | #include <asm/system.h> | ||
21 | #include <asm/msr.h> | ||
22 | |||
23 | #include "mce.h" | ||
24 | |||
25 | static int firstbank; | ||
26 | |||
27 | #define MCE_RATE 15*HZ /* timer rate is 15s */ | ||
28 | |||
29 | static void mce_checkregs (void *info) | ||
30 | { | ||
31 | u32 low, high; | ||
32 | int i; | ||
33 | |||
34 | for (i=firstbank; i<nr_mce_banks; i++) { | ||
35 | rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high); | ||
36 | |||
37 | if (high & (1<<31)) { | ||
38 | printk(KERN_INFO "MCE: The hardware reports a non " | ||
39 | "fatal, correctable incident occurred on " | ||
40 | "CPU %d.\n", | ||
41 | smp_processor_id()); | ||
42 | printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | ||
43 | |||
44 | /* Scrub the error so we don't pick it up in MCE_RATE seconds time. */ | ||
45 | wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | ||
46 | |||
47 | /* Serialize */ | ||
48 | wmb(); | ||
49 | add_taint(TAINT_MACHINE_CHECK); | ||
50 | } | ||
51 | } | ||
52 | } | ||
53 | |||
54 | static void mce_work_fn(struct work_struct *work); | ||
55 | static DECLARE_DELAYED_WORK(mce_work, mce_work_fn); | ||
56 | |||
57 | static void mce_work_fn(struct work_struct *work) | ||
58 | { | ||
59 | on_each_cpu(mce_checkregs, NULL, 1, 1); | ||
60 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | ||
61 | } | ||
62 | |||
63 | static int __init init_nonfatal_mce_checker(void) | ||
64 | { | ||
65 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
66 | |||
67 | /* Check for MCE support */ | ||
68 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
69 | return -ENODEV; | ||
70 | |||
71 | /* Check for PPro style MCA */ | ||
72 | if (!cpu_has(c, X86_FEATURE_MCA)) | ||
73 | return -ENODEV; | ||
74 | |||
75 | /* Some Athlons misbehave when we frob bank 0 */ | ||
76 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
77 | boot_cpu_data.x86 == 6) | ||
78 | firstbank = 1; | ||
79 | else | ||
80 | firstbank = 0; | ||
81 | |||
82 | /* | ||
83 | * Check for non-fatal errors every MCE_RATE s | ||
84 | */ | ||
85 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | ||
86 | printk(KERN_INFO "Machine check exception polling timer started.\n"); | ||
87 | return 0; | ||
88 | } | ||
89 | module_init(init_nonfatal_mce_checker); | ||
90 | |||
91 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c new file mode 100644 index 00000000000..1509edfb231 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
@@ -0,0 +1,253 @@ | |||
1 | /* | ||
2 | * P4 specific Machine Check Exception Reporting | ||
3 | */ | ||
4 | |||
5 | #include <linux/init.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/interrupt.h> | ||
9 | #include <linux/smp.h> | ||
10 | |||
11 | #include <asm/processor.h> | ||
12 | #include <asm/system.h> | ||
13 | #include <asm/msr.h> | ||
14 | #include <asm/apic.h> | ||
15 | |||
16 | #include <asm/therm_throt.h> | ||
17 | |||
18 | #include "mce.h" | ||
19 | |||
20 | /* as supported by the P4/Xeon family */ | ||
21 | struct intel_mce_extended_msrs { | ||
22 | u32 eax; | ||
23 | u32 ebx; | ||
24 | u32 ecx; | ||
25 | u32 edx; | ||
26 | u32 esi; | ||
27 | u32 edi; | ||
28 | u32 ebp; | ||
29 | u32 esp; | ||
30 | u32 eflags; | ||
31 | u32 eip; | ||
32 | /* u32 *reserved[]; */ | ||
33 | }; | ||
34 | |||
35 | static int mce_num_extended_msrs = 0; | ||
36 | |||
37 | |||
38 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) | ||
40 | { | ||
41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
42 | smp_processor_id()); | ||
43 | add_taint(TAINT_MACHINE_CHECK); | ||
44 | } | ||
45 | |||
46 | /* P4/Xeon Thermal transition interrupt handler */ | ||
47 | static void intel_thermal_interrupt(struct pt_regs *regs) | ||
48 | { | ||
49 | __u64 msr_val; | ||
50 | |||
51 | ack_APIC_irq(); | ||
52 | |||
53 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
54 | therm_throt_process(msr_val & 0x1); | ||
55 | } | ||
56 | |||
57 | /* Thermal interrupt handler for this CPU setup */ | ||
58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; | ||
59 | |||
60 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) | ||
61 | { | ||
62 | irq_enter(); | ||
63 | vendor_thermal_interrupt(regs); | ||
64 | irq_exit(); | ||
65 | } | ||
66 | |||
67 | /* P4/Xeon Thermal regulation detect and init */ | ||
68 | static void intel_init_thermal(struct cpuinfo_x86 *c) | ||
69 | { | ||
70 | u32 l, h; | ||
71 | unsigned int cpu = smp_processor_id(); | ||
72 | |||
73 | /* Thermal monitoring */ | ||
74 | if (!cpu_has(c, X86_FEATURE_ACPI)) | ||
75 | return; /* -ENODEV */ | ||
76 | |||
77 | /* Clock modulation */ | ||
78 | if (!cpu_has(c, X86_FEATURE_ACC)) | ||
79 | return; /* -ENODEV */ | ||
80 | |||
81 | /* first check if its enabled already, in which case there might | ||
82 | * be some SMM goo which handles it, so we can't even put a handler | ||
83 | * since it might be delivered via SMI already -zwanem. | ||
84 | */ | ||
85 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); | ||
86 | h = apic_read(APIC_LVTTHMR); | ||
87 | if ((l & (1<<3)) && (h & APIC_DM_SMI)) { | ||
88 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", | ||
89 | cpu); | ||
90 | return; /* -EBUSY */ | ||
91 | } | ||
92 | |||
93 | /* check whether a vector already exists, temporarily masked? */ | ||
94 | if (h & APIC_VECTOR_MASK) { | ||
95 | printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " | ||
96 | "installed\n", | ||
97 | cpu, (h & APIC_VECTOR_MASK)); | ||
98 | return; /* -EBUSY */ | ||
99 | } | ||
100 | |||
101 | /* The temperature transition interrupt handler setup */ | ||
102 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ | ||
103 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ | ||
104 | apic_write_around(APIC_LVTTHMR, h); | ||
105 | |||
106 | rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); | ||
107 | wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); | ||
108 | |||
109 | /* ok we're good to go... */ | ||
110 | vendor_thermal_interrupt = intel_thermal_interrupt; | ||
111 | |||
112 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); | ||
113 | wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); | ||
114 | |||
115 | l = apic_read (APIC_LVTTHMR); | ||
116 | apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
117 | printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | ||
118 | |||
119 | /* enable thermal throttle processing */ | ||
120 | atomic_set(&therm_throt_en, 1); | ||
121 | return; | ||
122 | } | ||
123 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | ||
124 | |||
125 | |||
126 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | ||
127 | static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | ||
128 | { | ||
129 | u32 h; | ||
130 | |||
131 | rdmsr (MSR_IA32_MCG_EAX, r->eax, h); | ||
132 | rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); | ||
133 | rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); | ||
134 | rdmsr (MSR_IA32_MCG_EDX, r->edx, h); | ||
135 | rdmsr (MSR_IA32_MCG_ESI, r->esi, h); | ||
136 | rdmsr (MSR_IA32_MCG_EDI, r->edi, h); | ||
137 | rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); | ||
138 | rdmsr (MSR_IA32_MCG_ESP, r->esp, h); | ||
139 | rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); | ||
140 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); | ||
141 | } | ||
142 | |||
143 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | ||
144 | { | ||
145 | int recover=1; | ||
146 | u32 alow, ahigh, high, low; | ||
147 | u32 mcgstl, mcgsth; | ||
148 | int i; | ||
149 | |||
150 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
151 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
152 | recover=0; | ||
153 | |||
154 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
155 | smp_processor_id(), mcgsth, mcgstl); | ||
156 | |||
157 | if (mce_num_extended_msrs > 0) { | ||
158 | struct intel_mce_extended_msrs dbg; | ||
159 | intel_get_extended_msrs(&dbg); | ||
160 | printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", | ||
161 | smp_processor_id(), dbg.eip, dbg.eflags); | ||
162 | printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", | ||
163 | dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); | ||
164 | printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", | ||
165 | dbg.esi, dbg.edi, dbg.ebp, dbg.esp); | ||
166 | } | ||
167 | |||
168 | for (i=0; i<nr_mce_banks; i++) { | ||
169 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); | ||
170 | if (high & (1<<31)) { | ||
171 | if (high & (1<<29)) | ||
172 | recover |= 1; | ||
173 | if (high & (1<<25)) | ||
174 | recover |= 2; | ||
175 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); | ||
176 | high &= ~(1<<31); | ||
177 | if (high & (1<<27)) { | ||
178 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
179 | printk ("[%08x%08x]", ahigh, alow); | ||
180 | } | ||
181 | if (high & (1<<26)) { | ||
182 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
183 | printk (" at %08x%08x", ahigh, alow); | ||
184 | } | ||
185 | printk ("\n"); | ||
186 | } | ||
187 | } | ||
188 | |||
189 | if (recover & 2) | ||
190 | panic ("CPU context corrupt"); | ||
191 | if (recover & 1) | ||
192 | panic ("Unable to continue"); | ||
193 | |||
194 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
195 | /* | ||
196 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | ||
197 | * recoverable/continuable.This will allow BIOS to look at the MSRs | ||
198 | * for errors if the OS could not log the error. | ||
199 | */ | ||
200 | for (i=0; i<nr_mce_banks; i++) { | ||
201 | u32 msr; | ||
202 | msr = MSR_IA32_MC0_STATUS+i*4; | ||
203 | rdmsr (msr, low, high); | ||
204 | if (high&(1<<31)) { | ||
205 | /* Clear it */ | ||
206 | wrmsr(msr, 0UL, 0UL); | ||
207 | /* Serialize */ | ||
208 | wmb(); | ||
209 | add_taint(TAINT_MACHINE_CHECK); | ||
210 | } | ||
211 | } | ||
212 | mcgstl &= ~(1<<2); | ||
213 | wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); | ||
214 | } | ||
215 | |||
216 | |||
217 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) | ||
218 | { | ||
219 | u32 l, h; | ||
220 | int i; | ||
221 | |||
222 | machine_check_vector = intel_machine_check; | ||
223 | wmb(); | ||
224 | |||
225 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | ||
226 | rdmsr (MSR_IA32_MCG_CAP, l, h); | ||
227 | if (l & (1<<8)) /* Control register present ? */ | ||
228 | wrmsr (MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
229 | nr_mce_banks = l & 0xff; | ||
230 | |||
231 | for (i=0; i<nr_mce_banks; i++) { | ||
232 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
233 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
234 | } | ||
235 | |||
236 | set_in_cr4 (X86_CR4_MCE); | ||
237 | printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
238 | smp_processor_id()); | ||
239 | |||
240 | /* Check for P4/Xeon extended MCE MSRs */ | ||
241 | rdmsr (MSR_IA32_MCG_CAP, l, h); | ||
242 | if (l & (1<<9)) {/* MCG_EXT_P */ | ||
243 | mce_num_extended_msrs = (l >> 16) & 0xff; | ||
244 | printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" | ||
245 | " available\n", | ||
246 | smp_processor_id(), mce_num_extended_msrs); | ||
247 | |||
248 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
249 | /* Check for P4/Xeon Thermal monitor */ | ||
250 | intel_init_thermal(c); | ||
251 | #endif | ||
252 | } | ||
253 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c new file mode 100644 index 00000000000..94bc43d950c --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -0,0 +1,53 @@ | |||
1 | /* | ||
2 | * P5 specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Alan Cox <alan@redhat.com> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/smp.h> | ||
11 | |||
12 | #include <asm/processor.h> | ||
13 | #include <asm/system.h> | ||
14 | #include <asm/msr.h> | ||
15 | |||
16 | #include "mce.h" | ||
17 | |||
18 | /* Machine check handler for Pentium class Intel */ | ||
19 | static fastcall void pentium_machine_check(struct pt_regs * regs, long error_code) | ||
20 | { | ||
21 | u32 loaddr, hi, lotype; | ||
22 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); | ||
23 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); | ||
24 | printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); | ||
25 | if(lotype&(1<<5)) | ||
26 | printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); | ||
27 | add_taint(TAINT_MACHINE_CHECK); | ||
28 | } | ||
29 | |||
30 | /* Set up machine check reporting for processors with Intel style MCE */ | ||
31 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c) | ||
32 | { | ||
33 | u32 l, h; | ||
34 | |||
35 | /*Check for MCE support */ | ||
36 | if( !cpu_has(c, X86_FEATURE_MCE) ) | ||
37 | return; | ||
38 | |||
39 | /* Default P5 to off as its often misconnected */ | ||
40 | if(mce_disabled != -1) | ||
41 | return; | ||
42 | machine_check_vector = pentium_machine_check; | ||
43 | wmb(); | ||
44 | |||
45 | /* Read registers before enabling */ | ||
46 | rdmsr(MSR_IA32_P5_MC_ADDR, l, h); | ||
47 | rdmsr(MSR_IA32_P5_MC_TYPE, l, h); | ||
48 | printk(KERN_INFO "Intel old style machine check architecture supported.\n"); | ||
49 | |||
50 | /* Enable MCE */ | ||
51 | set_in_cr4(X86_CR4_MCE); | ||
52 | printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); | ||
53 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c new file mode 100644 index 00000000000..deeae42ce19 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/p6.c | |||
@@ -0,0 +1,119 @@ | |||
1 | /* | ||
2 | * P6 specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Alan Cox <alan@redhat.com> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/smp.h> | ||
11 | |||
12 | #include <asm/processor.h> | ||
13 | #include <asm/system.h> | ||
14 | #include <asm/msr.h> | ||
15 | |||
16 | #include "mce.h" | ||
17 | |||
18 | /* Machine Check Handler For PII/PIII */ | ||
19 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | ||
20 | { | ||
21 | int recover=1; | ||
22 | u32 alow, ahigh, high, low; | ||
23 | u32 mcgstl, mcgsth; | ||
24 | int i; | ||
25 | |||
26 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
27 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
28 | recover=0; | ||
29 | |||
30 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
31 | smp_processor_id(), mcgsth, mcgstl); | ||
32 | |||
33 | for (i=0; i<nr_mce_banks; i++) { | ||
34 | rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); | ||
35 | if (high & (1<<31)) { | ||
36 | if (high & (1<<29)) | ||
37 | recover |= 1; | ||
38 | if (high & (1<<25)) | ||
39 | recover |= 2; | ||
40 | printk (KERN_EMERG "Bank %d: %08x%08x", i, high, low); | ||
41 | high &= ~(1<<31); | ||
42 | if (high & (1<<27)) { | ||
43 | rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
44 | printk ("[%08x%08x]", ahigh, alow); | ||
45 | } | ||
46 | if (high & (1<<26)) { | ||
47 | rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
48 | printk (" at %08x%08x", ahigh, alow); | ||
49 | } | ||
50 | printk ("\n"); | ||
51 | } | ||
52 | } | ||
53 | |||
54 | if (recover & 2) | ||
55 | panic ("CPU context corrupt"); | ||
56 | if (recover & 1) | ||
57 | panic ("Unable to continue"); | ||
58 | |||
59 | printk (KERN_EMERG "Attempting to continue.\n"); | ||
60 | /* | ||
61 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | ||
62 | * recoverable/continuable.This will allow BIOS to look at the MSRs | ||
63 | * for errors if the OS could not log the error. | ||
64 | */ | ||
65 | for (i=0; i<nr_mce_banks; i++) { | ||
66 | unsigned int msr; | ||
67 | msr = MSR_IA32_MC0_STATUS+i*4; | ||
68 | rdmsr (msr,low, high); | ||
69 | if (high & (1<<31)) { | ||
70 | /* Clear it */ | ||
71 | wrmsr (msr, 0UL, 0UL); | ||
72 | /* Serialize */ | ||
73 | wmb(); | ||
74 | add_taint(TAINT_MACHINE_CHECK); | ||
75 | } | ||
76 | } | ||
77 | mcgstl &= ~(1<<2); | ||
78 | wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth); | ||
79 | } | ||
80 | |||
81 | /* Set up machine check reporting for processors with Intel style MCE */ | ||
82 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c) | ||
83 | { | ||
84 | u32 l, h; | ||
85 | int i; | ||
86 | |||
87 | /* Check for MCE support */ | ||
88 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
89 | return; | ||
90 | |||
91 | /* Check for PPro style MCA */ | ||
92 | if (!cpu_has(c, X86_FEATURE_MCA)) | ||
93 | return; | ||
94 | |||
95 | /* Ok machine check is available */ | ||
96 | machine_check_vector = intel_machine_check; | ||
97 | wmb(); | ||
98 | |||
99 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | ||
100 | rdmsr (MSR_IA32_MCG_CAP, l, h); | ||
101 | if (l & (1<<8)) /* Control register present ? */ | ||
102 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
103 | nr_mce_banks = l & 0xff; | ||
104 | |||
105 | /* | ||
106 | * Following the example in IA-32 SDM Vol 3: | ||
107 | * - MC0_CTL should not be written | ||
108 | * - Status registers on all banks should be cleared on reset | ||
109 | */ | ||
110 | for (i=1; i<nr_mce_banks; i++) | ||
111 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
112 | |||
113 | for (i=0; i<nr_mce_banks; i++) | ||
114 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
115 | |||
116 | set_in_cr4 (X86_CR4_MCE); | ||
117 | printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
118 | smp_processor_id()); | ||
119 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c new file mode 100644 index 00000000000..1203dc5ab87 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -0,0 +1,186 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c | ||
3 | * | ||
4 | * Thermal throttle event support code (such as syslog messaging and rate | ||
5 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). | ||
6 | * This allows consistent reporting of CPU thermal throttle events. | ||
7 | * | ||
8 | * Maintains a counter in /sys that keeps track of the number of thermal | ||
9 | * events, such that the user knows how bad the thermal problem might be | ||
10 | * (since the logging to syslog and mcelog is rate limited). | ||
11 | * | ||
12 | * Author: Dmitriy Zavin (dmitriyz@google.com) | ||
13 | * | ||
14 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | ||
15 | * Inspired by Ross Biro's and Al Borchers' counter code. | ||
16 | */ | ||
17 | |||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/sysdev.h> | ||
20 | #include <linux/cpu.h> | ||
21 | #include <asm/cpu.h> | ||
22 | #include <linux/notifier.h> | ||
23 | #include <linux/jiffies.h> | ||
24 | #include <asm/therm_throt.h> | ||
25 | |||
26 | /* How long to wait between reporting thermal events */ | ||
27 | #define CHECK_INTERVAL (300 * HZ) | ||
28 | |||
29 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | ||
30 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | ||
31 | atomic_t therm_throt_en = ATOMIC_INIT(0); | ||
32 | |||
33 | #ifdef CONFIG_SYSFS | ||
34 | #define define_therm_throt_sysdev_one_ro(_name) \ | ||
35 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | ||
36 | |||
37 | #define define_therm_throt_sysdev_show_func(name) \ | ||
38 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | ||
39 | char *buf) \ | ||
40 | { \ | ||
41 | unsigned int cpu = dev->id; \ | ||
42 | ssize_t ret; \ | ||
43 | \ | ||
44 | preempt_disable(); /* CPU hotplug */ \ | ||
45 | if (cpu_online(cpu)) \ | ||
46 | ret = sprintf(buf, "%lu\n", \ | ||
47 | per_cpu(thermal_throttle_##name, cpu)); \ | ||
48 | else \ | ||
49 | ret = 0; \ | ||
50 | preempt_enable(); \ | ||
51 | \ | ||
52 | return ret; \ | ||
53 | } | ||
54 | |||
55 | define_therm_throt_sysdev_show_func(count); | ||
56 | define_therm_throt_sysdev_one_ro(count); | ||
57 | |||
58 | static struct attribute *thermal_throttle_attrs[] = { | ||
59 | &attr_count.attr, | ||
60 | NULL | ||
61 | }; | ||
62 | |||
63 | static struct attribute_group thermal_throttle_attr_group = { | ||
64 | .attrs = thermal_throttle_attrs, | ||
65 | .name = "thermal_throttle" | ||
66 | }; | ||
67 | #endif /* CONFIG_SYSFS */ | ||
68 | |||
69 | /*** | ||
70 | * therm_throt_process - Process thermal throttling event from interrupt | ||
71 | * @curr: Whether the condition is current or not (boolean), since the | ||
72 | * thermal interrupt normally gets called both when the thermal | ||
73 | * event begins and once the event has ended. | ||
74 | * | ||
75 | * This function is called by the thermal interrupt after the | ||
76 | * IRQ has been acknowledged. | ||
77 | * | ||
78 | * It will take care of rate limiting and printing messages to the syslog. | ||
79 | * | ||
80 | * Returns: 0 : Event should NOT be further logged, i.e. still in | ||
81 | * "timeout" from previous log message. | ||
82 | * 1 : Event should be logged further, and a message has been | ||
83 | * printed to the syslog. | ||
84 | */ | ||
85 | int therm_throt_process(int curr) | ||
86 | { | ||
87 | unsigned int cpu = smp_processor_id(); | ||
88 | __u64 tmp_jiffs = get_jiffies_64(); | ||
89 | |||
90 | if (curr) | ||
91 | __get_cpu_var(thermal_throttle_count)++; | ||
92 | |||
93 | if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) | ||
94 | return 0; | ||
95 | |||
96 | __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; | ||
97 | |||
98 | /* if we just entered the thermal event */ | ||
99 | if (curr) { | ||
100 | printk(KERN_CRIT "CPU%d: Temperature above threshold, " | ||
101 | "cpu clock throttled (total events = %lu)\n", cpu, | ||
102 | __get_cpu_var(thermal_throttle_count)); | ||
103 | |||
104 | add_taint(TAINT_MACHINE_CHECK); | ||
105 | } else { | ||
106 | printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); | ||
107 | } | ||
108 | |||
109 | return 1; | ||
110 | } | ||
111 | |||
112 | #ifdef CONFIG_SYSFS | ||
113 | /* Add/Remove thermal_throttle interface for CPU device */ | ||
114 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) | ||
115 | { | ||
116 | return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); | ||
117 | } | ||
118 | |||
119 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | ||
120 | { | ||
121 | return sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | ||
122 | } | ||
123 | |||
124 | /* Mutex protecting device creation against CPU hotplug */ | ||
125 | static DEFINE_MUTEX(therm_cpu_lock); | ||
126 | |||
127 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | ||
128 | static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, | ||
129 | unsigned long action, | ||
130 | void *hcpu) | ||
131 | { | ||
132 | unsigned int cpu = (unsigned long)hcpu; | ||
133 | struct sys_device *sys_dev; | ||
134 | int err; | ||
135 | |||
136 | sys_dev = get_cpu_sysdev(cpu); | ||
137 | switch (action) { | ||
138 | case CPU_ONLINE: | ||
139 | case CPU_ONLINE_FROZEN: | ||
140 | mutex_lock(&therm_cpu_lock); | ||
141 | err = thermal_throttle_add_dev(sys_dev); | ||
142 | mutex_unlock(&therm_cpu_lock); | ||
143 | WARN_ON(err); | ||
144 | break; | ||
145 | case CPU_DEAD: | ||
146 | case CPU_DEAD_FROZEN: | ||
147 | mutex_lock(&therm_cpu_lock); | ||
148 | thermal_throttle_remove_dev(sys_dev); | ||
149 | mutex_unlock(&therm_cpu_lock); | ||
150 | break; | ||
151 | } | ||
152 | return NOTIFY_OK; | ||
153 | } | ||
154 | |||
155 | static struct notifier_block thermal_throttle_cpu_notifier = | ||
156 | { | ||
157 | .notifier_call = thermal_throttle_cpu_callback, | ||
158 | }; | ||
159 | |||
160 | static __init int thermal_throttle_init_device(void) | ||
161 | { | ||
162 | unsigned int cpu = 0; | ||
163 | int err; | ||
164 | |||
165 | if (!atomic_read(&therm_throt_en)) | ||
166 | return 0; | ||
167 | |||
168 | register_hotcpu_notifier(&thermal_throttle_cpu_notifier); | ||
169 | |||
170 | #ifdef CONFIG_HOTPLUG_CPU | ||
171 | mutex_lock(&therm_cpu_lock); | ||
172 | #endif | ||
173 | /* connect live CPUs to sysfs */ | ||
174 | for_each_online_cpu(cpu) { | ||
175 | err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); | ||
176 | WARN_ON(err); | ||
177 | } | ||
178 | #ifdef CONFIG_HOTPLUG_CPU | ||
179 | mutex_unlock(&therm_cpu_lock); | ||
180 | #endif | ||
181 | |||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | device_initcall(thermal_throttle_init_device); | ||
186 | #endif /* CONFIG_SYSFS */ | ||
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c new file mode 100644 index 00000000000..9e424b6c293 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -0,0 +1,36 @@ | |||
1 | /* | ||
2 | * IDT Winchip specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Alan Cox <alan@redhat.com> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | |||
11 | #include <asm/processor.h> | ||
12 | #include <asm/system.h> | ||
13 | #include <asm/msr.h> | ||
14 | |||
15 | #include "mce.h" | ||
16 | |||
17 | /* Machine check handler for WinChip C6 */ | ||
18 | static fastcall void winchip_machine_check(struct pt_regs * regs, long error_code) | ||
19 | { | ||
20 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); | ||
21 | add_taint(TAINT_MACHINE_CHECK); | ||
22 | } | ||
23 | |||
24 | /* Set up machine check reporting on the Winchip C6 series */ | ||
25 | void winchip_mcheck_init(struct cpuinfo_x86 *c) | ||
26 | { | ||
27 | u32 lo, hi; | ||
28 | machine_check_vector = winchip_machine_check; | ||
29 | wmb(); | ||
30 | rdmsr(MSR_IDT_FCR1, lo, hi); | ||
31 | lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */ | ||
32 | lo&= ~(1<<4); /* Enable MCE */ | ||
33 | wrmsr(MSR_IDT_FCR1, lo, hi); | ||
34 | set_in_cr4(X86_CR4_MCE); | ||
35 | printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); | ||
36 | } | ||