diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/Makefile | 13 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/k7.c | 45 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-inject.c | 127 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-severity.c | 218 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 2049 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.h | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_32.c | 76 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_64.c | 1187 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c (renamed from arch/x86/kernel/cpu/mcheck/mce_amd_64.c) | 203 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c (renamed from arch/x86/kernel/cpu/mcheck/mce_intel_64.c) | 84 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/non-fatal.c | 60 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/p4.c | 112 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/p5.c | 51 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/p6.c | 29 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 177 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/threshold.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/winchip.c | 20 |
18 files changed, 2798 insertions, 1684 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index b2f89829bbe8..188a1ca5ad2b 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -1,7 +1,12 @@ | |||
1 | obj-y = mce_$(BITS).o therm_throt.o | 1 | obj-y = mce.o |
2 | 2 | ||
3 | obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o | 3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o |
4 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o | 4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o |
5 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o | 5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o |
6 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | ||
7 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | ||
6 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | 8 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o |
7 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | 9 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o |
10 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | ||
11 | |||
12 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index dd3af6e7b39a..b945d5dbc609 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c | |||
@@ -2,25 +2,23 @@ | |||
2 | * Athlon specific Machine Check Exception Reporting | 2 | * Athlon specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Dave Jones <davej@redhat.com> | 3 | * (C) Copyright 2002 Dave Jones <davej@redhat.com> |
4 | */ | 4 | */ |
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
10 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
11 | 10 | ||
12 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
13 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | 15 | ||
16 | #include "mce.h" | 16 | /* Machine Check Handler For AMD Athlon/Duron: */ |
17 | |||
18 | /* Machine Check Handler For AMD Athlon/Duron */ | ||
19 | static void k7_machine_check(struct pt_regs *regs, long error_code) | 17 | static void k7_machine_check(struct pt_regs *regs, long error_code) |
20 | { | 18 | { |
21 | int recover = 1; | ||
22 | u32 alow, ahigh, high, low; | 19 | u32 alow, ahigh, high, low; |
23 | u32 mcgstl, mcgsth; | 20 | u32 mcgstl, mcgsth; |
21 | int recover = 1; | ||
24 | int i; | 22 | int i; |
25 | 23 | ||
26 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 24 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
@@ -32,15 +30,19 @@ static void k7_machine_check(struct pt_regs *regs, long error_code) | |||
32 | 30 | ||
33 | for (i = 1; i < nr_mce_banks; i++) { | 31 | for (i = 1; i < nr_mce_banks; i++) { |
34 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | 32 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); |
35 | if (high&(1<<31)) { | 33 | if (high & (1<<31)) { |
36 | char misc[20]; | 34 | char misc[20]; |
37 | char addr[24]; | 35 | char addr[24]; |
38 | misc[0] = addr[0] = '\0'; | 36 | |
37 | misc[0] = '\0'; | ||
38 | addr[0] = '\0'; | ||
39 | |||
39 | if (high & (1<<29)) | 40 | if (high & (1<<29)) |
40 | recover |= 1; | 41 | recover |= 1; |
41 | if (high & (1<<25)) | 42 | if (high & (1<<25)) |
42 | recover |= 2; | 43 | recover |= 2; |
43 | high &= ~(1<<31); | 44 | high &= ~(1<<31); |
45 | |||
44 | if (high & (1<<27)) { | 46 | if (high & (1<<27)) { |
45 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | 47 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); |
46 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | 48 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); |
@@ -49,27 +51,31 @@ static void k7_machine_check(struct pt_regs *regs, long error_code) | |||
49 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | 51 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); |
50 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | 52 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); |
51 | } | 53 | } |
54 | |||
52 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | 55 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", |
53 | smp_processor_id(), i, high, low, misc, addr); | 56 | smp_processor_id(), i, high, low, misc, addr); |
54 | /* Clear it */ | 57 | |
58 | /* Clear it: */ | ||
55 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | 59 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); |
56 | /* Serialize */ | 60 | /* Serialize: */ |
57 | wmb(); | 61 | wmb(); |
58 | add_taint(TAINT_MACHINE_CHECK); | 62 | add_taint(TAINT_MACHINE_CHECK); |
59 | } | 63 | } |
60 | } | 64 | } |
61 | 65 | ||
62 | if (recover&2) | 66 | if (recover & 2) |
63 | panic("CPU context corrupt"); | 67 | panic("CPU context corrupt"); |
64 | if (recover&1) | 68 | if (recover & 1) |
65 | panic("Unable to continue"); | 69 | panic("Unable to continue"); |
70 | |||
66 | printk(KERN_EMERG "Attempting to continue.\n"); | 71 | printk(KERN_EMERG "Attempting to continue.\n"); |
72 | |||
67 | mcgstl &= ~(1<<2); | 73 | mcgstl &= ~(1<<2); |
68 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 74 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
69 | } | 75 | } |
70 | 76 | ||
71 | 77 | ||
72 | /* AMD K7 machine check is Intel like */ | 78 | /* AMD K7 machine check is Intel like: */ |
73 | void amd_mcheck_init(struct cpuinfo_x86 *c) | 79 | void amd_mcheck_init(struct cpuinfo_x86 *c) |
74 | { | 80 | { |
75 | u32 l, h; | 81 | u32 l, h; |
@@ -79,21 +85,26 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) | |||
79 | return; | 85 | return; |
80 | 86 | ||
81 | machine_check_vector = k7_machine_check; | 87 | machine_check_vector = k7_machine_check; |
88 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
82 | wmb(); | 89 | wmb(); |
83 | 90 | ||
84 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | 91 | printk(KERN_INFO "Intel machine check architecture supported.\n"); |
92 | |||
85 | rdmsr(MSR_IA32_MCG_CAP, l, h); | 93 | rdmsr(MSR_IA32_MCG_CAP, l, h); |
86 | if (l & (1<<8)) /* Control register present ? */ | 94 | if (l & (1<<8)) /* Control register present ? */ |
87 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 95 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
88 | nr_mce_banks = l & 0xff; | 96 | nr_mce_banks = l & 0xff; |
89 | 97 | ||
90 | /* Clear status for MC index 0 separately, we don't touch CTL, | 98 | /* |
91 | * as some K7 Athlons cause spurious MCEs when its enabled. */ | 99 | * Clear status for MC index 0 separately, we don't touch CTL, |
100 | * as some K7 Athlons cause spurious MCEs when its enabled: | ||
101 | */ | ||
92 | if (boot_cpu_data.x86 == 6) { | 102 | if (boot_cpu_data.x86 == 6) { |
93 | wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); | 103 | wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); |
94 | i = 1; | 104 | i = 1; |
95 | } else | 105 | } else |
96 | i = 0; | 106 | i = 0; |
107 | |||
97 | for (; i < nr_mce_banks; i++) { | 108 | for (; i < nr_mce_banks; i++) { |
98 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | 109 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); |
99 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | 110 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c new file mode 100644 index 000000000000..a3a235a53f09 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -0,0 +1,127 @@ | |||
1 | /* | ||
2 | * Machine check injection support. | ||
3 | * Copyright 2008 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; version 2 | ||
8 | * of the License. | ||
9 | * | ||
10 | * Authors: | ||
11 | * Andi Kleen | ||
12 | * Ying Huang | ||
13 | */ | ||
14 | #include <linux/uaccess.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/timer.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/string.h> | ||
19 | #include <linux/fs.h> | ||
20 | #include <linux/smp.h> | ||
21 | #include <asm/mce.h> | ||
22 | |||
23 | /* Update fake mce registers on current CPU. */ | ||
24 | static void inject_mce(struct mce *m) | ||
25 | { | ||
26 | struct mce *i = &per_cpu(injectm, m->extcpu); | ||
27 | |||
28 | /* Make sure noone reads partially written injectm */ | ||
29 | i->finished = 0; | ||
30 | mb(); | ||
31 | m->finished = 0; | ||
32 | /* First set the fields after finished */ | ||
33 | i->extcpu = m->extcpu; | ||
34 | mb(); | ||
35 | /* Now write record in order, finished last (except above) */ | ||
36 | memcpy(i, m, sizeof(struct mce)); | ||
37 | /* Finally activate it */ | ||
38 | mb(); | ||
39 | i->finished = 1; | ||
40 | } | ||
41 | |||
42 | struct delayed_mce { | ||
43 | struct timer_list timer; | ||
44 | struct mce m; | ||
45 | }; | ||
46 | |||
47 | /* Inject mce on current CPU */ | ||
48 | static void raise_mce(unsigned long data) | ||
49 | { | ||
50 | struct delayed_mce *dm = (struct delayed_mce *)data; | ||
51 | struct mce *m = &dm->m; | ||
52 | int cpu = m->extcpu; | ||
53 | |||
54 | inject_mce(m); | ||
55 | if (m->status & MCI_STATUS_UC) { | ||
56 | struct pt_regs regs; | ||
57 | memset(®s, 0, sizeof(struct pt_regs)); | ||
58 | regs.ip = m->ip; | ||
59 | regs.cs = m->cs; | ||
60 | printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); | ||
61 | do_machine_check(®s, 0); | ||
62 | printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); | ||
63 | } else { | ||
64 | mce_banks_t b; | ||
65 | memset(&b, 0xff, sizeof(mce_banks_t)); | ||
66 | printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); | ||
67 | machine_check_poll(0, &b); | ||
68 | mce_notify_irq(); | ||
69 | printk(KERN_INFO "Finished machine check poll on CPU %d\n", | ||
70 | cpu); | ||
71 | } | ||
72 | kfree(dm); | ||
73 | } | ||
74 | |||
75 | /* Error injection interface */ | ||
76 | static ssize_t mce_write(struct file *filp, const char __user *ubuf, | ||
77 | size_t usize, loff_t *off) | ||
78 | { | ||
79 | struct delayed_mce *dm; | ||
80 | struct mce m; | ||
81 | |||
82 | if (!capable(CAP_SYS_ADMIN)) | ||
83 | return -EPERM; | ||
84 | /* | ||
85 | * There are some cases where real MSR reads could slip | ||
86 | * through. | ||
87 | */ | ||
88 | if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) | ||
89 | return -EIO; | ||
90 | |||
91 | if ((unsigned long)usize > sizeof(struct mce)) | ||
92 | usize = sizeof(struct mce); | ||
93 | if (copy_from_user(&m, ubuf, usize)) | ||
94 | return -EFAULT; | ||
95 | |||
96 | if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) | ||
97 | return -EINVAL; | ||
98 | |||
99 | dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); | ||
100 | if (!dm) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | /* | ||
104 | * Need to give user space some time to set everything up, | ||
105 | * so do it a jiffie or two later everywhere. | ||
106 | * Should we use a hrtimer here for better synchronization? | ||
107 | */ | ||
108 | memcpy(&dm->m, &m, sizeof(struct mce)); | ||
109 | setup_timer(&dm->timer, raise_mce, (unsigned long)dm); | ||
110 | dm->timer.expires = jiffies + 2; | ||
111 | add_timer_on(&dm->timer, m.extcpu); | ||
112 | return usize; | ||
113 | } | ||
114 | |||
115 | static int inject_init(void) | ||
116 | { | ||
117 | printk(KERN_INFO "Machine check injector initialized\n"); | ||
118 | mce_chrdev_ops.write = mce_write; | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | module_init(inject_init); | ||
123 | /* | ||
124 | * Cannot tolerate unloading currently because we cannot | ||
125 | * guarantee all openers of mce_chrdev will get a reference to us. | ||
126 | */ | ||
127 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h new file mode 100644 index 000000000000..54dcb8ff12e5 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -0,0 +1,15 @@ | |||
1 | #include <asm/mce.h> | ||
2 | |||
3 | enum severity_level { | ||
4 | MCE_NO_SEVERITY, | ||
5 | MCE_KEEP_SEVERITY, | ||
6 | MCE_SOME_SEVERITY, | ||
7 | MCE_AO_SEVERITY, | ||
8 | MCE_UC_SEVERITY, | ||
9 | MCE_AR_SEVERITY, | ||
10 | MCE_PANIC_SEVERITY, | ||
11 | }; | ||
12 | |||
13 | int mce_severity(struct mce *a, int tolerant, char **msg); | ||
14 | |||
15 | extern int mce_ser; | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c new file mode 100644 index 000000000000..ff0807f97056 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -0,0 +1,218 @@ | |||
1 | /* | ||
2 | * MCE grading rules. | ||
3 | * Copyright 2008, 2009 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; version 2 | ||
8 | * of the License. | ||
9 | * | ||
10 | * Author: Andi Kleen | ||
11 | */ | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/seq_file.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/debugfs.h> | ||
16 | #include <asm/mce.h> | ||
17 | |||
18 | #include "mce-internal.h" | ||
19 | |||
20 | /* | ||
21 | * Grade an mce by severity. In general the most severe ones are processed | ||
22 | * first. Since there are quite a lot of combinations test the bits in a | ||
23 | * table-driven way. The rules are simply processed in order, first | ||
24 | * match wins. | ||
25 | * | ||
26 | * Note this is only used for machine check exceptions, the corrected | ||
27 | * errors use much simpler rules. The exceptions still check for the corrected | ||
28 | * errors, but only to leave them alone for the CMCI handler (except for | ||
29 | * panic situations) | ||
30 | */ | ||
31 | |||
32 | enum context { IN_KERNEL = 1, IN_USER = 2 }; | ||
33 | enum ser { SER_REQUIRED = 1, NO_SER = 2 }; | ||
34 | |||
35 | static struct severity { | ||
36 | u64 mask; | ||
37 | u64 result; | ||
38 | unsigned char sev; | ||
39 | unsigned char mcgmask; | ||
40 | unsigned char mcgres; | ||
41 | unsigned char ser; | ||
42 | unsigned char context; | ||
43 | unsigned char covered; | ||
44 | char *msg; | ||
45 | } severities[] = { | ||
46 | #define KERNEL .context = IN_KERNEL | ||
47 | #define USER .context = IN_USER | ||
48 | #define SER .ser = SER_REQUIRED | ||
49 | #define NOSER .ser = NO_SER | ||
50 | #define SEV(s) .sev = MCE_ ## s ## _SEVERITY | ||
51 | #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } | ||
52 | #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } | ||
53 | #define MCGMASK(x, res, s, m, r...) \ | ||
54 | { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } | ||
55 | #define MASK(x, y, s, m, r...) \ | ||
56 | { .mask = x, .result = y, SEV(s), .msg = m, ## r } | ||
57 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) | ||
58 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) | ||
59 | #define MCACOD 0xffff | ||
60 | |||
61 | BITCLR(MCI_STATUS_VAL, NO, "Invalid"), | ||
62 | BITCLR(MCI_STATUS_EN, NO, "Not enabled"), | ||
63 | BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), | ||
64 | /* When MCIP is not set something is very confused */ | ||
65 | MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), | ||
66 | /* Neither return not error IP -- no chance to recover -> PANIC */ | ||
67 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, | ||
68 | "Neither restart nor error IP"), | ||
69 | MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", | ||
70 | KERNEL), | ||
71 | BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), | ||
72 | MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, | ||
73 | "Spurious not enabled", SER), | ||
74 | |||
75 | /* ignore OVER for UCNA */ | ||
76 | MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, | ||
77 | "Uncorrected no action required", SER), | ||
78 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, | ||
79 | "Illegal combination (UCNA with AR=1)", SER), | ||
80 | MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), | ||
81 | |||
82 | /* AR add known MCACODs here */ | ||
83 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, | ||
84 | "Action required with lost events", SER), | ||
85 | MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, | ||
86 | "Action required; unknown MCACOD", SER), | ||
87 | |||
88 | /* known AO MCACODs: */ | ||
89 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, | ||
90 | "Action optional: memory scrubbing error", SER), | ||
91 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, | ||
92 | "Action optional: last level cache writeback error", SER), | ||
93 | |||
94 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, | ||
95 | "Action optional unknown MCACOD", SER), | ||
96 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, | ||
97 | "Action optional with lost events", SER), | ||
98 | BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), | ||
99 | BITSET(MCI_STATUS_UC, UC, "Uncorrected"), | ||
100 | BITSET(0, SOME, "No match") /* always matches. keep at end */ | ||
101 | }; | ||
102 | |||
103 | /* | ||
104 | * If the EIPV bit is set, it means the saved IP is the | ||
105 | * instruction which caused the MCE. | ||
106 | */ | ||
107 | static int error_context(struct mce *m) | ||
108 | { | ||
109 | if (m->mcgstatus & MCG_STATUS_EIPV) | ||
110 | return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; | ||
111 | /* Unknown, assume kernel */ | ||
112 | return IN_KERNEL; | ||
113 | } | ||
114 | |||
115 | int mce_severity(struct mce *a, int tolerant, char **msg) | ||
116 | { | ||
117 | enum context ctx = error_context(a); | ||
118 | struct severity *s; | ||
119 | |||
120 | for (s = severities;; s++) { | ||
121 | if ((a->status & s->mask) != s->result) | ||
122 | continue; | ||
123 | if ((a->mcgstatus & s->mcgmask) != s->mcgres) | ||
124 | continue; | ||
125 | if (s->ser == SER_REQUIRED && !mce_ser) | ||
126 | continue; | ||
127 | if (s->ser == NO_SER && mce_ser) | ||
128 | continue; | ||
129 | if (s->context && ctx != s->context) | ||
130 | continue; | ||
131 | if (msg) | ||
132 | *msg = s->msg; | ||
133 | s->covered = 1; | ||
134 | if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { | ||
135 | if (panic_on_oops || tolerant < 1) | ||
136 | return MCE_PANIC_SEVERITY; | ||
137 | } | ||
138 | return s->sev; | ||
139 | } | ||
140 | } | ||
141 | |||
142 | static void *s_start(struct seq_file *f, loff_t *pos) | ||
143 | { | ||
144 | if (*pos >= ARRAY_SIZE(severities)) | ||
145 | return NULL; | ||
146 | return &severities[*pos]; | ||
147 | } | ||
148 | |||
149 | static void *s_next(struct seq_file *f, void *data, loff_t *pos) | ||
150 | { | ||
151 | if (++(*pos) >= ARRAY_SIZE(severities)) | ||
152 | return NULL; | ||
153 | return &severities[*pos]; | ||
154 | } | ||
155 | |||
156 | static void s_stop(struct seq_file *f, void *data) | ||
157 | { | ||
158 | } | ||
159 | |||
160 | static int s_show(struct seq_file *f, void *data) | ||
161 | { | ||
162 | struct severity *ser = data; | ||
163 | seq_printf(f, "%d\t%s\n", ser->covered, ser->msg); | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static const struct seq_operations severities_seq_ops = { | ||
168 | .start = s_start, | ||
169 | .next = s_next, | ||
170 | .stop = s_stop, | ||
171 | .show = s_show, | ||
172 | }; | ||
173 | |||
174 | static int severities_coverage_open(struct inode *inode, struct file *file) | ||
175 | { | ||
176 | return seq_open(file, &severities_seq_ops); | ||
177 | } | ||
178 | |||
179 | static ssize_t severities_coverage_write(struct file *file, | ||
180 | const char __user *ubuf, | ||
181 | size_t count, loff_t *ppos) | ||
182 | { | ||
183 | int i; | ||
184 | for (i = 0; i < ARRAY_SIZE(severities); i++) | ||
185 | severities[i].covered = 0; | ||
186 | return count; | ||
187 | } | ||
188 | |||
189 | static const struct file_operations severities_coverage_fops = { | ||
190 | .open = severities_coverage_open, | ||
191 | .release = seq_release, | ||
192 | .read = seq_read, | ||
193 | .write = severities_coverage_write, | ||
194 | }; | ||
195 | |||
196 | static int __init severities_debugfs_init(void) | ||
197 | { | ||
198 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; | ||
199 | |||
200 | dmce = debugfs_create_dir("mce", NULL); | ||
201 | if (dmce == NULL) | ||
202 | goto err_out; | ||
203 | fseverities_coverage = debugfs_create_file("severities-coverage", | ||
204 | 0444, dmce, NULL, | ||
205 | &severities_coverage_fops); | ||
206 | if (fseverities_coverage == NULL) | ||
207 | goto err_out; | ||
208 | |||
209 | return 0; | ||
210 | |||
211 | err_out: | ||
212 | if (fseverities_coverage) | ||
213 | debugfs_remove(fseverities_coverage); | ||
214 | if (dmce) | ||
215 | debugfs_remove(dmce); | ||
216 | return -ENOMEM; | ||
217 | } | ||
218 | late_initcall(severities_debugfs_init); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c new file mode 100644 index 000000000000..284d1de968bc --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -0,0 +1,2049 @@ | |||
1 | /* | ||
2 | * Machine check handler. | ||
3 | * | ||
4 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
5 | * Rest from unknown author(s). | ||
6 | * 2004 Andi Kleen. Rewrote most of it. | ||
7 | * Copyright 2008 Intel Corporation | ||
8 | * Author: Andi Kleen | ||
9 | */ | ||
10 | #include <linux/thread_info.h> | ||
11 | #include <linux/capability.h> | ||
12 | #include <linux/miscdevice.h> | ||
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/ratelimit.h> | ||
15 | #include <linux/kallsyms.h> | ||
16 | #include <linux/rcupdate.h> | ||
17 | #include <linux/kobject.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <linux/kdebug.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/percpu.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/sysdev.h> | ||
24 | #include <linux/delay.h> | ||
25 | #include <linux/ctype.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/sysfs.h> | ||
28 | #include <linux/types.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/kmod.h> | ||
31 | #include <linux/poll.h> | ||
32 | #include <linux/nmi.h> | ||
33 | #include <linux/cpu.h> | ||
34 | #include <linux/smp.h> | ||
35 | #include <linux/fs.h> | ||
36 | #include <linux/mm.h> | ||
37 | |||
38 | #include <asm/processor.h> | ||
39 | #include <asm/hw_irq.h> | ||
40 | #include <asm/apic.h> | ||
41 | #include <asm/idle.h> | ||
42 | #include <asm/ipi.h> | ||
43 | #include <asm/mce.h> | ||
44 | #include <asm/msr.h> | ||
45 | |||
46 | #include "mce-internal.h" | ||
47 | |||
48 | /* Handle unconfigured int18 (should never happen) */ | ||
49 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
50 | { | ||
51 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | ||
52 | smp_processor_id()); | ||
53 | } | ||
54 | |||
55 | /* Call the installed machine check handler for this CPU setup. */ | ||
56 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | ||
57 | unexpected_machine_check; | ||
58 | |||
59 | int mce_disabled __read_mostly; | ||
60 | |||
61 | #ifdef CONFIG_X86_NEW_MCE | ||
62 | |||
63 | #define MISC_MCELOG_MINOR 227 | ||
64 | |||
65 | #define SPINUNIT 100 /* 100ns */ | ||
66 | |||
67 | atomic_t mce_entry; | ||
68 | |||
69 | DEFINE_PER_CPU(unsigned, mce_exception_count); | ||
70 | |||
71 | /* | ||
72 | * Tolerant levels: | ||
73 | * 0: always panic on uncorrected errors, log corrected errors | ||
74 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors | ||
75 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | ||
76 | * 3: never panic or SIGBUS, log all errors (for testing only) | ||
77 | */ | ||
78 | static int tolerant __read_mostly = 1; | ||
79 | static int banks __read_mostly; | ||
80 | static u64 *bank __read_mostly; | ||
81 | static int rip_msr __read_mostly; | ||
82 | static int mce_bootlog __read_mostly = -1; | ||
83 | static int monarch_timeout __read_mostly = -1; | ||
84 | static int mce_panic_timeout __read_mostly; | ||
85 | static int mce_dont_log_ce __read_mostly; | ||
86 | int mce_cmci_disabled __read_mostly; | ||
87 | int mce_ignore_ce __read_mostly; | ||
88 | int mce_ser __read_mostly; | ||
89 | |||
90 | /* User mode helper program triggered by machine check event */ | ||
91 | static unsigned long mce_need_notify; | ||
92 | static char mce_helper[128]; | ||
93 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | ||
94 | |||
95 | static unsigned long dont_init_banks; | ||
96 | |||
97 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | ||
98 | static DEFINE_PER_CPU(struct mce, mces_seen); | ||
99 | static int cpu_missing; | ||
100 | |||
101 | |||
102 | /* MCA banks polled by the period polling timer for corrected events */ | ||
103 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | ||
104 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | ||
105 | }; | ||
106 | |||
107 | static inline int skip_bank_init(int i) | ||
108 | { | ||
109 | return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); | ||
110 | } | ||
111 | |||
112 | static DEFINE_PER_CPU(struct work_struct, mce_work); | ||
113 | |||
114 | /* Do initial initialization of a struct mce */ | ||
115 | void mce_setup(struct mce *m) | ||
116 | { | ||
117 | memset(m, 0, sizeof(struct mce)); | ||
118 | m->cpu = m->extcpu = smp_processor_id(); | ||
119 | rdtscll(m->tsc); | ||
120 | /* We hope get_seconds stays lockless */ | ||
121 | m->time = get_seconds(); | ||
122 | m->cpuvendor = boot_cpu_data.x86_vendor; | ||
123 | m->cpuid = cpuid_eax(1); | ||
124 | #ifdef CONFIG_SMP | ||
125 | m->socketid = cpu_data(m->extcpu).phys_proc_id; | ||
126 | #endif | ||
127 | m->apicid = cpu_data(m->extcpu).initial_apicid; | ||
128 | rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); | ||
129 | } | ||
130 | |||
131 | DEFINE_PER_CPU(struct mce, injectm); | ||
132 | EXPORT_PER_CPU_SYMBOL_GPL(injectm); | ||
133 | |||
134 | /* | ||
135 | * Lockless MCE logging infrastructure. | ||
136 | * This avoids deadlocks on printk locks without having to break locks. Also | ||
137 | * separate MCEs from kernel messages to avoid bogus bug reports. | ||
138 | */ | ||
139 | |||
140 | static struct mce_log mcelog = { | ||
141 | .signature = MCE_LOG_SIGNATURE, | ||
142 | .len = MCE_LOG_LEN, | ||
143 | .recordlen = sizeof(struct mce), | ||
144 | }; | ||
145 | |||
146 | void mce_log(struct mce *mce) | ||
147 | { | ||
148 | unsigned next, entry; | ||
149 | |||
150 | mce->finished = 0; | ||
151 | wmb(); | ||
152 | for (;;) { | ||
153 | entry = rcu_dereference(mcelog.next); | ||
154 | for (;;) { | ||
155 | /* | ||
156 | * When the buffer fills up discard new entries. | ||
157 | * Assume that the earlier errors are the more | ||
158 | * interesting ones: | ||
159 | */ | ||
160 | if (entry >= MCE_LOG_LEN) { | ||
161 | set_bit(MCE_OVERFLOW, | ||
162 | (unsigned long *)&mcelog.flags); | ||
163 | return; | ||
164 | } | ||
165 | /* Old left over entry. Skip: */ | ||
166 | if (mcelog.entry[entry].finished) { | ||
167 | entry++; | ||
168 | continue; | ||
169 | } | ||
170 | break; | ||
171 | } | ||
172 | smp_rmb(); | ||
173 | next = entry + 1; | ||
174 | if (cmpxchg(&mcelog.next, entry, next) == entry) | ||
175 | break; | ||
176 | } | ||
177 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); | ||
178 | wmb(); | ||
179 | mcelog.entry[entry].finished = 1; | ||
180 | wmb(); | ||
181 | |||
182 | mce->finished = 1; | ||
183 | set_bit(0, &mce_need_notify); | ||
184 | } | ||
185 | |||
186 | static void print_mce(struct mce *m) | ||
187 | { | ||
188 | printk(KERN_EMERG | ||
189 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | ||
190 | m->extcpu, m->mcgstatus, m->bank, m->status); | ||
191 | if (m->ip) { | ||
192 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | ||
193 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | ||
194 | m->cs, m->ip); | ||
195 | if (m->cs == __KERNEL_CS) | ||
196 | print_symbol("{%s}", m->ip); | ||
197 | printk("\n"); | ||
198 | } | ||
199 | printk(KERN_EMERG "TSC %llx ", m->tsc); | ||
200 | if (m->addr) | ||
201 | printk("ADDR %llx ", m->addr); | ||
202 | if (m->misc) | ||
203 | printk("MISC %llx ", m->misc); | ||
204 | printk("\n"); | ||
205 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | ||
206 | m->cpuvendor, m->cpuid, m->time, m->socketid, | ||
207 | m->apicid); | ||
208 | } | ||
209 | |||
210 | static void print_mce_head(void) | ||
211 | { | ||
212 | printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); | ||
213 | } | ||
214 | |||
215 | static void print_mce_tail(void) | ||
216 | { | ||
217 | printk(KERN_EMERG "This is not a software problem!\n" | ||
218 | KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | ||
219 | } | ||
220 | |||
221 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | ||
222 | |||
223 | static atomic_t mce_paniced; | ||
224 | |||
225 | /* Panic in progress. Enable interrupts and wait for final IPI */ | ||
226 | static void wait_for_panic(void) | ||
227 | { | ||
228 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; | ||
229 | preempt_disable(); | ||
230 | local_irq_enable(); | ||
231 | while (timeout-- > 0) | ||
232 | udelay(1); | ||
233 | if (panic_timeout == 0) | ||
234 | panic_timeout = mce_panic_timeout; | ||
235 | panic("Panicing machine check CPU died"); | ||
236 | } | ||
237 | |||
238 | static void mce_panic(char *msg, struct mce *final, char *exp) | ||
239 | { | ||
240 | int i; | ||
241 | |||
242 | /* | ||
243 | * Make sure only one CPU runs in machine check panic | ||
244 | */ | ||
245 | if (atomic_add_return(1, &mce_paniced) > 1) | ||
246 | wait_for_panic(); | ||
247 | barrier(); | ||
248 | |||
249 | bust_spinlocks(1); | ||
250 | console_verbose(); | ||
251 | print_mce_head(); | ||
252 | /* First print corrected ones that are still unlogged */ | ||
253 | for (i = 0; i < MCE_LOG_LEN; i++) { | ||
254 | struct mce *m = &mcelog.entry[i]; | ||
255 | if (!(m->status & MCI_STATUS_VAL)) | ||
256 | continue; | ||
257 | if (!(m->status & MCI_STATUS_UC)) | ||
258 | print_mce(m); | ||
259 | } | ||
260 | /* Now print uncorrected but with the final one last */ | ||
261 | for (i = 0; i < MCE_LOG_LEN; i++) { | ||
262 | struct mce *m = &mcelog.entry[i]; | ||
263 | if (!(m->status & MCI_STATUS_VAL)) | ||
264 | continue; | ||
265 | if (!(m->status & MCI_STATUS_UC)) | ||
266 | continue; | ||
267 | if (!final || memcmp(m, final, sizeof(struct mce))) | ||
268 | print_mce(m); | ||
269 | } | ||
270 | if (final) | ||
271 | print_mce(final); | ||
272 | if (cpu_missing) | ||
273 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | ||
274 | print_mce_tail(); | ||
275 | if (exp) | ||
276 | printk(KERN_EMERG "Machine check: %s\n", exp); | ||
277 | if (panic_timeout == 0) | ||
278 | panic_timeout = mce_panic_timeout; | ||
279 | panic(msg); | ||
280 | } | ||
281 | |||
282 | /* Support code for software error injection */ | ||
283 | |||
284 | static int msr_to_offset(u32 msr) | ||
285 | { | ||
286 | unsigned bank = __get_cpu_var(injectm.bank); | ||
287 | if (msr == rip_msr) | ||
288 | return offsetof(struct mce, ip); | ||
289 | if (msr == MSR_IA32_MC0_STATUS + bank*4) | ||
290 | return offsetof(struct mce, status); | ||
291 | if (msr == MSR_IA32_MC0_ADDR + bank*4) | ||
292 | return offsetof(struct mce, addr); | ||
293 | if (msr == MSR_IA32_MC0_MISC + bank*4) | ||
294 | return offsetof(struct mce, misc); | ||
295 | if (msr == MSR_IA32_MCG_STATUS) | ||
296 | return offsetof(struct mce, mcgstatus); | ||
297 | return -1; | ||
298 | } | ||
299 | |||
300 | /* MSR access wrappers used for error injection */ | ||
301 | static u64 mce_rdmsrl(u32 msr) | ||
302 | { | ||
303 | u64 v; | ||
304 | if (__get_cpu_var(injectm).finished) { | ||
305 | int offset = msr_to_offset(msr); | ||
306 | if (offset < 0) | ||
307 | return 0; | ||
308 | return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); | ||
309 | } | ||
310 | rdmsrl(msr, v); | ||
311 | return v; | ||
312 | } | ||
313 | |||
314 | static void mce_wrmsrl(u32 msr, u64 v) | ||
315 | { | ||
316 | if (__get_cpu_var(injectm).finished) { | ||
317 | int offset = msr_to_offset(msr); | ||
318 | if (offset >= 0) | ||
319 | *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; | ||
320 | return; | ||
321 | } | ||
322 | wrmsrl(msr, v); | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Simple lockless ring to communicate PFNs from the exception handler with the | ||
327 | * process context work function. This is vastly simplified because there's | ||
328 | * only a single reader and a single writer. | ||
329 | */ | ||
330 | #define MCE_RING_SIZE 16 /* we use one entry less */ | ||
331 | |||
332 | struct mce_ring { | ||
333 | unsigned short start; | ||
334 | unsigned short end; | ||
335 | unsigned long ring[MCE_RING_SIZE]; | ||
336 | }; | ||
337 | static DEFINE_PER_CPU(struct mce_ring, mce_ring); | ||
338 | |||
339 | /* Runs with CPU affinity in workqueue */ | ||
340 | static int mce_ring_empty(void) | ||
341 | { | ||
342 | struct mce_ring *r = &__get_cpu_var(mce_ring); | ||
343 | |||
344 | return r->start == r->end; | ||
345 | } | ||
346 | |||
347 | static int mce_ring_get(unsigned long *pfn) | ||
348 | { | ||
349 | struct mce_ring *r; | ||
350 | int ret = 0; | ||
351 | |||
352 | *pfn = 0; | ||
353 | get_cpu(); | ||
354 | r = &__get_cpu_var(mce_ring); | ||
355 | if (r->start == r->end) | ||
356 | goto out; | ||
357 | *pfn = r->ring[r->start]; | ||
358 | r->start = (r->start + 1) % MCE_RING_SIZE; | ||
359 | ret = 1; | ||
360 | out: | ||
361 | put_cpu(); | ||
362 | return ret; | ||
363 | } | ||
364 | |||
365 | /* Always runs in MCE context with preempt off */ | ||
366 | static int mce_ring_add(unsigned long pfn) | ||
367 | { | ||
368 | struct mce_ring *r = &__get_cpu_var(mce_ring); | ||
369 | unsigned next; | ||
370 | |||
371 | next = (r->end + 1) % MCE_RING_SIZE; | ||
372 | if (next == r->start) | ||
373 | return -1; | ||
374 | r->ring[r->end] = pfn; | ||
375 | wmb(); | ||
376 | r->end = next; | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | int mce_available(struct cpuinfo_x86 *c) | ||
381 | { | ||
382 | if (mce_disabled) | ||
383 | return 0; | ||
384 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | ||
385 | } | ||
386 | |||
387 | static void mce_schedule_work(void) | ||
388 | { | ||
389 | if (!mce_ring_empty()) { | ||
390 | struct work_struct *work = &__get_cpu_var(mce_work); | ||
391 | if (!work_pending(work)) | ||
392 | schedule_work(work); | ||
393 | } | ||
394 | } | ||
395 | |||
396 | /* | ||
397 | * Get the address of the instruction at the time of the machine check | ||
398 | * error. | ||
399 | */ | ||
400 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | ||
401 | { | ||
402 | |||
403 | if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { | ||
404 | m->ip = regs->ip; | ||
405 | m->cs = regs->cs; | ||
406 | } else { | ||
407 | m->ip = 0; | ||
408 | m->cs = 0; | ||
409 | } | ||
410 | if (rip_msr) | ||
411 | m->ip = mce_rdmsrl(rip_msr); | ||
412 | } | ||
413 | |||
414 | #ifdef CONFIG_X86_LOCAL_APIC | ||
415 | /* | ||
416 | * Called after interrupts have been reenabled again | ||
417 | * when a MCE happened during an interrupts off region | ||
418 | * in the kernel. | ||
419 | */ | ||
420 | asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) | ||
421 | { | ||
422 | ack_APIC_irq(); | ||
423 | exit_idle(); | ||
424 | irq_enter(); | ||
425 | mce_notify_irq(); | ||
426 | mce_schedule_work(); | ||
427 | irq_exit(); | ||
428 | } | ||
429 | #endif | ||
430 | |||
431 | static void mce_report_event(struct pt_regs *regs) | ||
432 | { | ||
433 | if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { | ||
434 | mce_notify_irq(); | ||
435 | /* | ||
436 | * Triggering the work queue here is just an insurance | ||
437 | * policy in case the syscall exit notify handler | ||
438 | * doesn't run soon enough or ends up running on the | ||
439 | * wrong CPU (can happen when audit sleeps) | ||
440 | */ | ||
441 | mce_schedule_work(); | ||
442 | return; | ||
443 | } | ||
444 | |||
445 | #ifdef CONFIG_X86_LOCAL_APIC | ||
446 | /* | ||
447 | * Without APIC do not notify. The event will be picked | ||
448 | * up eventually. | ||
449 | */ | ||
450 | if (!cpu_has_apic) | ||
451 | return; | ||
452 | |||
453 | /* | ||
454 | * When interrupts are disabled we cannot use | ||
455 | * kernel services safely. Trigger an self interrupt | ||
456 | * through the APIC to instead do the notification | ||
457 | * after interrupts are reenabled again. | ||
458 | */ | ||
459 | apic->send_IPI_self(MCE_SELF_VECTOR); | ||
460 | |||
461 | /* | ||
462 | * Wait for idle afterwards again so that we don't leave the | ||
463 | * APIC in a non idle state because the normal APIC writes | ||
464 | * cannot exclude us. | ||
465 | */ | ||
466 | apic_wait_icr_idle(); | ||
467 | #endif | ||
468 | } | ||
469 | |||
470 | DEFINE_PER_CPU(unsigned, mce_poll_count); | ||
471 | |||
472 | /* | ||
473 | * Poll for corrected events or events that happened before reset. | ||
474 | * Those are just logged through /dev/mcelog. | ||
475 | * | ||
476 | * This is executed in standard interrupt context. | ||
477 | * | ||
478 | * Note: spec recommends to panic for fatal unsignalled | ||
479 | * errors here. However this would be quite problematic -- | ||
480 | * we would need to reimplement the Monarch handling and | ||
481 | * it would mess up the exclusion between exception handler | ||
482 | * and poll hander -- * so we skip this for now. | ||
483 | * These cases should not happen anyways, or only when the CPU | ||
484 | * is already totally * confused. In this case it's likely it will | ||
485 | * not fully execute the machine check handler either. | ||
486 | */ | ||
487 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | ||
488 | { | ||
489 | struct mce m; | ||
490 | int i; | ||
491 | |||
492 | __get_cpu_var(mce_poll_count)++; | ||
493 | |||
494 | mce_setup(&m); | ||
495 | |||
496 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
497 | for (i = 0; i < banks; i++) { | ||
498 | if (!bank[i] || !test_bit(i, *b)) | ||
499 | continue; | ||
500 | |||
501 | m.misc = 0; | ||
502 | m.addr = 0; | ||
503 | m.bank = i; | ||
504 | m.tsc = 0; | ||
505 | |||
506 | barrier(); | ||
507 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | ||
508 | if (!(m.status & MCI_STATUS_VAL)) | ||
509 | continue; | ||
510 | |||
511 | /* | ||
512 | * Uncorrected or signalled events are handled by the exception | ||
513 | * handler when it is enabled, so don't process those here. | ||
514 | * | ||
515 | * TBD do the same check for MCI_STATUS_EN here? | ||
516 | */ | ||
517 | if (!(flags & MCP_UC) && | ||
518 | (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) | ||
519 | continue; | ||
520 | |||
521 | if (m.status & MCI_STATUS_MISCV) | ||
522 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | ||
523 | if (m.status & MCI_STATUS_ADDRV) | ||
524 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | ||
525 | |||
526 | if (!(flags & MCP_TIMESTAMP)) | ||
527 | m.tsc = 0; | ||
528 | /* | ||
529 | * Don't get the IP here because it's unlikely to | ||
530 | * have anything to do with the actual error location. | ||
531 | */ | ||
532 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | ||
533 | mce_log(&m); | ||
534 | add_taint(TAINT_MACHINE_CHECK); | ||
535 | } | ||
536 | |||
537 | /* | ||
538 | * Clear state for this bank. | ||
539 | */ | ||
540 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * Don't clear MCG_STATUS here because it's only defined for | ||
545 | * exceptions. | ||
546 | */ | ||
547 | |||
548 | sync_core(); | ||
549 | } | ||
550 | EXPORT_SYMBOL_GPL(machine_check_poll); | ||
551 | |||
552 | /* | ||
553 | * Do a quick check if any of the events requires a panic. | ||
554 | * This decides if we keep the events around or clear them. | ||
555 | */ | ||
556 | static int mce_no_way_out(struct mce *m, char **msg) | ||
557 | { | ||
558 | int i; | ||
559 | |||
560 | for (i = 0; i < banks; i++) { | ||
561 | m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | ||
562 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | ||
563 | return 1; | ||
564 | } | ||
565 | return 0; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * Variable to establish order between CPUs while scanning. | ||
570 | * Each CPU spins initially until executing is equal its number. | ||
571 | */ | ||
572 | static atomic_t mce_executing; | ||
573 | |||
574 | /* | ||
575 | * Defines order of CPUs on entry. First CPU becomes Monarch. | ||
576 | */ | ||
577 | static atomic_t mce_callin; | ||
578 | |||
579 | /* | ||
580 | * Check if a timeout waiting for other CPUs happened. | ||
581 | */ | ||
582 | static int mce_timed_out(u64 *t) | ||
583 | { | ||
584 | /* | ||
585 | * The others already did panic for some reason. | ||
586 | * Bail out like in a timeout. | ||
587 | * rmb() to tell the compiler that system_state | ||
588 | * might have been modified by someone else. | ||
589 | */ | ||
590 | rmb(); | ||
591 | if (atomic_read(&mce_paniced)) | ||
592 | wait_for_panic(); | ||
593 | if (!monarch_timeout) | ||
594 | goto out; | ||
595 | if ((s64)*t < SPINUNIT) { | ||
596 | /* CHECKME: Make panic default for 1 too? */ | ||
597 | if (tolerant < 1) | ||
598 | mce_panic("Timeout synchronizing machine check over CPUs", | ||
599 | NULL, NULL); | ||
600 | cpu_missing = 1; | ||
601 | return 1; | ||
602 | } | ||
603 | *t -= SPINUNIT; | ||
604 | out: | ||
605 | touch_nmi_watchdog(); | ||
606 | return 0; | ||
607 | } | ||
608 | |||
609 | /* | ||
610 | * The Monarch's reign. The Monarch is the CPU who entered | ||
611 | * the machine check handler first. It waits for the others to | ||
612 | * raise the exception too and then grades them. When any | ||
613 | * error is fatal panic. Only then let the others continue. | ||
614 | * | ||
615 | * The other CPUs entering the MCE handler will be controlled by the | ||
616 | * Monarch. They are called Subjects. | ||
617 | * | ||
618 | * This way we prevent any potential data corruption in a unrecoverable case | ||
619 | * and also makes sure always all CPU's errors are examined. | ||
620 | * | ||
621 | * Also this detects the case of an machine check event coming from outer | ||
622 | * space (not detected by any CPUs) In this case some external agent wants | ||
623 | * us to shut down, so panic too. | ||
624 | * | ||
625 | * The other CPUs might still decide to panic if the handler happens | ||
626 | * in a unrecoverable place, but in this case the system is in a semi-stable | ||
627 | * state and won't corrupt anything by itself. It's ok to let the others | ||
628 | * continue for a bit first. | ||
629 | * | ||
630 | * All the spin loops have timeouts; when a timeout happens a CPU | ||
631 | * typically elects itself to be Monarch. | ||
632 | */ | ||
633 | static void mce_reign(void) | ||
634 | { | ||
635 | int cpu; | ||
636 | struct mce *m = NULL; | ||
637 | int global_worst = 0; | ||
638 | char *msg = NULL; | ||
639 | char *nmsg = NULL; | ||
640 | |||
641 | /* | ||
642 | * This CPU is the Monarch and the other CPUs have run | ||
643 | * through their handlers. | ||
644 | * Grade the severity of the errors of all the CPUs. | ||
645 | */ | ||
646 | for_each_possible_cpu(cpu) { | ||
647 | int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, | ||
648 | &nmsg); | ||
649 | if (severity > global_worst) { | ||
650 | msg = nmsg; | ||
651 | global_worst = severity; | ||
652 | m = &per_cpu(mces_seen, cpu); | ||
653 | } | ||
654 | } | ||
655 | |||
656 | /* | ||
657 | * Cannot recover? Panic here then. | ||
658 | * This dumps all the mces in the log buffer and stops the | ||
659 | * other CPUs. | ||
660 | */ | ||
661 | if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) | ||
662 | mce_panic("Fatal Machine check", m, msg); | ||
663 | |||
664 | /* | ||
665 | * For UC somewhere we let the CPU who detects it handle it. | ||
666 | * Also must let continue the others, otherwise the handling | ||
667 | * CPU could deadlock on a lock. | ||
668 | */ | ||
669 | |||
670 | /* | ||
671 | * No machine check event found. Must be some external | ||
672 | * source or one CPU is hung. Panic. | ||
673 | */ | ||
674 | if (!m && tolerant < 3) | ||
675 | mce_panic("Machine check from unknown source", NULL, NULL); | ||
676 | |||
677 | /* | ||
678 | * Now clear all the mces_seen so that they don't reappear on | ||
679 | * the next mce. | ||
680 | */ | ||
681 | for_each_possible_cpu(cpu) | ||
682 | memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce)); | ||
683 | } | ||
684 | |||
685 | static atomic_t global_nwo; | ||
686 | |||
687 | /* | ||
688 | * Start of Monarch synchronization. This waits until all CPUs have | ||
689 | * entered the exception handler and then determines if any of them | ||
690 | * saw a fatal event that requires panic. Then it executes them | ||
691 | * in the entry order. | ||
692 | * TBD double check parallel CPU hotunplug | ||
693 | */ | ||
694 | static int mce_start(int *no_way_out) | ||
695 | { | ||
696 | int order; | ||
697 | int cpus = num_online_cpus(); | ||
698 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | ||
699 | |||
700 | if (!timeout) | ||
701 | return -1; | ||
702 | |||
703 | atomic_add(*no_way_out, &global_nwo); | ||
704 | /* | ||
705 | * global_nwo should be updated before mce_callin | ||
706 | */ | ||
707 | smp_wmb(); | ||
708 | order = atomic_add_return(1, &mce_callin); | ||
709 | |||
710 | /* | ||
711 | * Wait for everyone. | ||
712 | */ | ||
713 | while (atomic_read(&mce_callin) != cpus) { | ||
714 | if (mce_timed_out(&timeout)) { | ||
715 | atomic_set(&global_nwo, 0); | ||
716 | return -1; | ||
717 | } | ||
718 | ndelay(SPINUNIT); | ||
719 | } | ||
720 | |||
721 | /* | ||
722 | * mce_callin should be read before global_nwo | ||
723 | */ | ||
724 | smp_rmb(); | ||
725 | |||
726 | if (order == 1) { | ||
727 | /* | ||
728 | * Monarch: Starts executing now, the others wait. | ||
729 | */ | ||
730 | atomic_set(&mce_executing, 1); | ||
731 | } else { | ||
732 | /* | ||
733 | * Subject: Now start the scanning loop one by one in | ||
734 | * the original callin order. | ||
735 | * This way when there are any shared banks it will be | ||
736 | * only seen by one CPU before cleared, avoiding duplicates. | ||
737 | */ | ||
738 | while (atomic_read(&mce_executing) < order) { | ||
739 | if (mce_timed_out(&timeout)) { | ||
740 | atomic_set(&global_nwo, 0); | ||
741 | return -1; | ||
742 | } | ||
743 | ndelay(SPINUNIT); | ||
744 | } | ||
745 | } | ||
746 | |||
747 | /* | ||
748 | * Cache the global no_way_out state. | ||
749 | */ | ||
750 | *no_way_out = atomic_read(&global_nwo); | ||
751 | |||
752 | return order; | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * Synchronize between CPUs after main scanning loop. | ||
757 | * This invokes the bulk of the Monarch processing. | ||
758 | */ | ||
759 | static int mce_end(int order) | ||
760 | { | ||
761 | int ret = -1; | ||
762 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | ||
763 | |||
764 | if (!timeout) | ||
765 | goto reset; | ||
766 | if (order < 0) | ||
767 | goto reset; | ||
768 | |||
769 | /* | ||
770 | * Allow others to run. | ||
771 | */ | ||
772 | atomic_inc(&mce_executing); | ||
773 | |||
774 | if (order == 1) { | ||
775 | /* CHECKME: Can this race with a parallel hotplug? */ | ||
776 | int cpus = num_online_cpus(); | ||
777 | |||
778 | /* | ||
779 | * Monarch: Wait for everyone to go through their scanning | ||
780 | * loops. | ||
781 | */ | ||
782 | while (atomic_read(&mce_executing) <= cpus) { | ||
783 | if (mce_timed_out(&timeout)) | ||
784 | goto reset; | ||
785 | ndelay(SPINUNIT); | ||
786 | } | ||
787 | |||
788 | mce_reign(); | ||
789 | barrier(); | ||
790 | ret = 0; | ||
791 | } else { | ||
792 | /* | ||
793 | * Subject: Wait for Monarch to finish. | ||
794 | */ | ||
795 | while (atomic_read(&mce_executing) != 0) { | ||
796 | if (mce_timed_out(&timeout)) | ||
797 | goto reset; | ||
798 | ndelay(SPINUNIT); | ||
799 | } | ||
800 | |||
801 | /* | ||
802 | * Don't reset anything. That's done by the Monarch. | ||
803 | */ | ||
804 | return 0; | ||
805 | } | ||
806 | |||
807 | /* | ||
808 | * Reset all global state. | ||
809 | */ | ||
810 | reset: | ||
811 | atomic_set(&global_nwo, 0); | ||
812 | atomic_set(&mce_callin, 0); | ||
813 | barrier(); | ||
814 | |||
815 | /* | ||
816 | * Let others run again. | ||
817 | */ | ||
818 | atomic_set(&mce_executing, 0); | ||
819 | return ret; | ||
820 | } | ||
821 | |||
822 | /* | ||
823 | * Check if the address reported by the CPU is in a format we can parse. | ||
824 | * It would be possible to add code for most other cases, but all would | ||
825 | * be somewhat complicated (e.g. segment offset would require an instruction | ||
826 | * parser). So only support physical addresses upto page granuality for now. | ||
827 | */ | ||
828 | static int mce_usable_address(struct mce *m) | ||
829 | { | ||
830 | if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) | ||
831 | return 0; | ||
832 | if ((m->misc & 0x3f) > PAGE_SHIFT) | ||
833 | return 0; | ||
834 | if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS) | ||
835 | return 0; | ||
836 | return 1; | ||
837 | } | ||
838 | |||
839 | static void mce_clear_state(unsigned long *toclear) | ||
840 | { | ||
841 | int i; | ||
842 | |||
843 | for (i = 0; i < banks; i++) { | ||
844 | if (test_bit(i, toclear)) | ||
845 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
846 | } | ||
847 | } | ||
848 | |||
849 | /* | ||
850 | * The actual machine check handler. This only handles real | ||
851 | * exceptions when something got corrupted coming in through int 18. | ||
852 | * | ||
853 | * This is executed in NMI context not subject to normal locking rules. This | ||
854 | * implies that most kernel services cannot be safely used. Don't even | ||
855 | * think about putting a printk in there! | ||
856 | * | ||
857 | * On Intel systems this is entered on all CPUs in parallel through | ||
858 | * MCE broadcast. However some CPUs might be broken beyond repair, | ||
859 | * so be always careful when synchronizing with others. | ||
860 | */ | ||
861 | void do_machine_check(struct pt_regs *regs, long error_code) | ||
862 | { | ||
863 | struct mce m, *final; | ||
864 | int i; | ||
865 | int worst = 0; | ||
866 | int severity; | ||
867 | /* | ||
868 | * Establish sequential order between the CPUs entering the machine | ||
869 | * check handler. | ||
870 | */ | ||
871 | int order; | ||
872 | /* | ||
873 | * If no_way_out gets set, there is no safe way to recover from this | ||
874 | * MCE. If tolerant is cranked up, we'll try anyway. | ||
875 | */ | ||
876 | int no_way_out = 0; | ||
877 | /* | ||
878 | * If kill_it gets set, there might be a way to recover from this | ||
879 | * error. | ||
880 | */ | ||
881 | int kill_it = 0; | ||
882 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | ||
883 | char *msg = "Unknown"; | ||
884 | |||
885 | atomic_inc(&mce_entry); | ||
886 | |||
887 | __get_cpu_var(mce_exception_count)++; | ||
888 | |||
889 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | ||
890 | 18, SIGKILL) == NOTIFY_STOP) | ||
891 | goto out; | ||
892 | if (!banks) | ||
893 | goto out; | ||
894 | |||
895 | mce_setup(&m); | ||
896 | |||
897 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
898 | no_way_out = mce_no_way_out(&m, &msg); | ||
899 | |||
900 | final = &__get_cpu_var(mces_seen); | ||
901 | *final = m; | ||
902 | |||
903 | barrier(); | ||
904 | |||
905 | /* | ||
906 | * When no restart IP must always kill or panic. | ||
907 | */ | ||
908 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | ||
909 | kill_it = 1; | ||
910 | |||
911 | /* | ||
912 | * Go through all the banks in exclusion of the other CPUs. | ||
913 | * This way we don't report duplicated events on shared banks | ||
914 | * because the first one to see it will clear it. | ||
915 | */ | ||
916 | order = mce_start(&no_way_out); | ||
917 | for (i = 0; i < banks; i++) { | ||
918 | __clear_bit(i, toclear); | ||
919 | if (!bank[i]) | ||
920 | continue; | ||
921 | |||
922 | m.misc = 0; | ||
923 | m.addr = 0; | ||
924 | m.bank = i; | ||
925 | |||
926 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | ||
927 | if ((m.status & MCI_STATUS_VAL) == 0) | ||
928 | continue; | ||
929 | |||
930 | /* | ||
931 | * Non uncorrected or non signaled errors are handled by | ||
932 | * machine_check_poll. Leave them alone, unless this panics. | ||
933 | */ | ||
934 | if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && | ||
935 | !no_way_out) | ||
936 | continue; | ||
937 | |||
938 | /* | ||
939 | * Set taint even when machine check was not enabled. | ||
940 | */ | ||
941 | add_taint(TAINT_MACHINE_CHECK); | ||
942 | |||
943 | severity = mce_severity(&m, tolerant, NULL); | ||
944 | |||
945 | /* | ||
946 | * When machine check was for corrected handler don't touch, | ||
947 | * unless we're panicing. | ||
948 | */ | ||
949 | if (severity == MCE_KEEP_SEVERITY && !no_way_out) | ||
950 | continue; | ||
951 | __set_bit(i, toclear); | ||
952 | if (severity == MCE_NO_SEVERITY) { | ||
953 | /* | ||
954 | * Machine check event was not enabled. Clear, but | ||
955 | * ignore. | ||
956 | */ | ||
957 | continue; | ||
958 | } | ||
959 | |||
960 | /* | ||
961 | * Kill on action required. | ||
962 | */ | ||
963 | if (severity == MCE_AR_SEVERITY) | ||
964 | kill_it = 1; | ||
965 | |||
966 | if (m.status & MCI_STATUS_MISCV) | ||
967 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | ||
968 | if (m.status & MCI_STATUS_ADDRV) | ||
969 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | ||
970 | |||
971 | /* | ||
972 | * Action optional error. Queue address for later processing. | ||
973 | * When the ring overflows we just ignore the AO error. | ||
974 | * RED-PEN add some logging mechanism when | ||
975 | * usable_address or mce_add_ring fails. | ||
976 | * RED-PEN don't ignore overflow for tolerant == 0 | ||
977 | */ | ||
978 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) | ||
979 | mce_ring_add(m.addr >> PAGE_SHIFT); | ||
980 | |||
981 | mce_get_rip(&m, regs); | ||
982 | mce_log(&m); | ||
983 | |||
984 | if (severity > worst) { | ||
985 | *final = m; | ||
986 | worst = severity; | ||
987 | } | ||
988 | } | ||
989 | |||
990 | if (!no_way_out) | ||
991 | mce_clear_state(toclear); | ||
992 | |||
993 | /* | ||
994 | * Do most of the synchronization with other CPUs. | ||
995 | * When there's any problem use only local no_way_out state. | ||
996 | */ | ||
997 | if (mce_end(order) < 0) | ||
998 | no_way_out = worst >= MCE_PANIC_SEVERITY; | ||
999 | |||
1000 | /* | ||
1001 | * If we have decided that we just CAN'T continue, and the user | ||
1002 | * has not set tolerant to an insane level, give up and die. | ||
1003 | * | ||
1004 | * This is mainly used in the case when the system doesn't | ||
1005 | * support MCE broadcasting or it has been disabled. | ||
1006 | */ | ||
1007 | if (no_way_out && tolerant < 3) | ||
1008 | mce_panic("Fatal machine check on current CPU", final, msg); | ||
1009 | |||
1010 | /* | ||
1011 | * If the error seems to be unrecoverable, something should be | ||
1012 | * done. Try to kill as little as possible. If we can kill just | ||
1013 | * one task, do that. If the user has set the tolerance very | ||
1014 | * high, don't try to do anything at all. | ||
1015 | */ | ||
1016 | |||
1017 | if (kill_it && tolerant < 3) | ||
1018 | force_sig(SIGBUS, current); | ||
1019 | |||
1020 | /* notify userspace ASAP */ | ||
1021 | set_thread_flag(TIF_MCE_NOTIFY); | ||
1022 | |||
1023 | if (worst > 0) | ||
1024 | mce_report_event(regs); | ||
1025 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); | ||
1026 | out: | ||
1027 | atomic_dec(&mce_entry); | ||
1028 | sync_core(); | ||
1029 | } | ||
1030 | EXPORT_SYMBOL_GPL(do_machine_check); | ||
1031 | |||
1032 | /* dummy to break dependency. actual code is in mm/memory-failure.c */ | ||
1033 | void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) | ||
1034 | { | ||
1035 | printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); | ||
1036 | } | ||
1037 | |||
1038 | /* | ||
1039 | * Called after mce notification in process context. This code | ||
1040 | * is allowed to sleep. Call the high level VM handler to process | ||
1041 | * any corrupted pages. | ||
1042 | * Assume that the work queue code only calls this one at a time | ||
1043 | * per CPU. | ||
1044 | * Note we don't disable preemption, so this code might run on the wrong | ||
1045 | * CPU. In this case the event is picked up by the scheduled work queue. | ||
1046 | * This is merely a fast path to expedite processing in some common | ||
1047 | * cases. | ||
1048 | */ | ||
1049 | void mce_notify_process(void) | ||
1050 | { | ||
1051 | unsigned long pfn; | ||
1052 | mce_notify_irq(); | ||
1053 | while (mce_ring_get(&pfn)) | ||
1054 | memory_failure(pfn, MCE_VECTOR); | ||
1055 | } | ||
1056 | |||
1057 | static void mce_process_work(struct work_struct *dummy) | ||
1058 | { | ||
1059 | mce_notify_process(); | ||
1060 | } | ||
1061 | |||
1062 | #ifdef CONFIG_X86_MCE_INTEL | ||
1063 | /*** | ||
1064 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog | ||
1065 | * @cpu: The CPU on which the event occurred. | ||
1066 | * @status: Event status information | ||
1067 | * | ||
1068 | * This function should be called by the thermal interrupt after the | ||
1069 | * event has been processed and the decision was made to log the event | ||
1070 | * further. | ||
1071 | * | ||
1072 | * The status parameter will be saved to the 'status' field of 'struct mce' | ||
1073 | * and historically has been the register value of the | ||
1074 | * MSR_IA32_THERMAL_STATUS (Intel) msr. | ||
1075 | */ | ||
1076 | void mce_log_therm_throt_event(__u64 status) | ||
1077 | { | ||
1078 | struct mce m; | ||
1079 | |||
1080 | mce_setup(&m); | ||
1081 | m.bank = MCE_THERMAL_BANK; | ||
1082 | m.status = status; | ||
1083 | mce_log(&m); | ||
1084 | } | ||
1085 | #endif /* CONFIG_X86_MCE_INTEL */ | ||
1086 | |||
1087 | /* | ||
1088 | * Periodic polling timer for "silent" machine check errors. If the | ||
1089 | * poller finds an MCE, poll 2x faster. When the poller finds no more | ||
1090 | * errors, poll 2x slower (up to check_interval seconds). | ||
1091 | */ | ||
1092 | static int check_interval = 5 * 60; /* 5 minutes */ | ||
1093 | |||
1094 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ | ||
1095 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | ||
1096 | |||
1097 | static void mcheck_timer(unsigned long data) | ||
1098 | { | ||
1099 | struct timer_list *t = &per_cpu(mce_timer, data); | ||
1100 | int *n; | ||
1101 | |||
1102 | WARN_ON(smp_processor_id() != data); | ||
1103 | |||
1104 | if (mce_available(¤t_cpu_data)) { | ||
1105 | machine_check_poll(MCP_TIMESTAMP, | ||
1106 | &__get_cpu_var(mce_poll_banks)); | ||
1107 | } | ||
1108 | |||
1109 | /* | ||
1110 | * Alert userspace if needed. If we logged an MCE, reduce the | ||
1111 | * polling interval, otherwise increase the polling interval. | ||
1112 | */ | ||
1113 | n = &__get_cpu_var(next_interval); | ||
1114 | if (mce_notify_irq()) | ||
1115 | *n = max(*n/2, HZ/100); | ||
1116 | else | ||
1117 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); | ||
1118 | |||
1119 | t->expires = jiffies + *n; | ||
1120 | add_timer(t); | ||
1121 | } | ||
1122 | |||
1123 | static void mce_do_trigger(struct work_struct *work) | ||
1124 | { | ||
1125 | call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); | ||
1126 | } | ||
1127 | |||
1128 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | ||
1129 | |||
1130 | /* | ||
1131 | * Notify the user(s) about new machine check events. | ||
1132 | * Can be called from interrupt context, but not from machine check/NMI | ||
1133 | * context. | ||
1134 | */ | ||
1135 | int mce_notify_irq(void) | ||
1136 | { | ||
1137 | /* Not more than two messages every minute */ | ||
1138 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | ||
1139 | |||
1140 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
1141 | |||
1142 | if (test_and_clear_bit(0, &mce_need_notify)) { | ||
1143 | wake_up_interruptible(&mce_wait); | ||
1144 | |||
1145 | /* | ||
1146 | * There is no risk of missing notifications because | ||
1147 | * work_pending is always cleared before the function is | ||
1148 | * executed. | ||
1149 | */ | ||
1150 | if (mce_helper[0] && !work_pending(&mce_trigger_work)) | ||
1151 | schedule_work(&mce_trigger_work); | ||
1152 | |||
1153 | if (__ratelimit(&ratelimit)) | ||
1154 | printk(KERN_INFO "Machine check events logged\n"); | ||
1155 | |||
1156 | return 1; | ||
1157 | } | ||
1158 | return 0; | ||
1159 | } | ||
1160 | EXPORT_SYMBOL_GPL(mce_notify_irq); | ||
1161 | |||
1162 | /* | ||
1163 | * Initialize Machine Checks for a CPU. | ||
1164 | */ | ||
1165 | static int mce_cap_init(void) | ||
1166 | { | ||
1167 | unsigned b; | ||
1168 | u64 cap; | ||
1169 | |||
1170 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
1171 | |||
1172 | b = cap & MCG_BANKCNT_MASK; | ||
1173 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); | ||
1174 | |||
1175 | if (b > MAX_NR_BANKS) { | ||
1176 | printk(KERN_WARNING | ||
1177 | "MCE: Using only %u machine check banks out of %u\n", | ||
1178 | MAX_NR_BANKS, b); | ||
1179 | b = MAX_NR_BANKS; | ||
1180 | } | ||
1181 | |||
1182 | /* Don't support asymmetric configurations today */ | ||
1183 | WARN_ON(banks != 0 && b != banks); | ||
1184 | banks = b; | ||
1185 | if (!bank) { | ||
1186 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | ||
1187 | if (!bank) | ||
1188 | return -ENOMEM; | ||
1189 | memset(bank, 0xff, banks * sizeof(u64)); | ||
1190 | } | ||
1191 | |||
1192 | /* Use accurate RIP reporting if available. */ | ||
1193 | if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) | ||
1194 | rip_msr = MSR_IA32_MCG_EIP; | ||
1195 | |||
1196 | if (cap & MCG_SER_P) | ||
1197 | mce_ser = 1; | ||
1198 | |||
1199 | return 0; | ||
1200 | } | ||
1201 | |||
1202 | static void mce_init(void) | ||
1203 | { | ||
1204 | mce_banks_t all_banks; | ||
1205 | u64 cap; | ||
1206 | int i; | ||
1207 | |||
1208 | /* | ||
1209 | * Log the machine checks left over from the previous reset. | ||
1210 | */ | ||
1211 | bitmap_fill(all_banks, MAX_NR_BANKS); | ||
1212 | machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); | ||
1213 | |||
1214 | set_in_cr4(X86_CR4_MCE); | ||
1215 | |||
1216 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
1217 | if (cap & MCG_CTL_P) | ||
1218 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
1219 | |||
1220 | for (i = 0; i < banks; i++) { | ||
1221 | if (skip_bank_init(i)) | ||
1222 | continue; | ||
1223 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | ||
1224 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
1225 | } | ||
1226 | } | ||
1227 | |||
1228 | /* Add per CPU specific workarounds here */ | ||
1229 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) | ||
1230 | { | ||
1231 | /* This should be disabled by the BIOS, but isn't always */ | ||
1232 | if (c->x86_vendor == X86_VENDOR_AMD) { | ||
1233 | if (c->x86 == 15 && banks > 4) { | ||
1234 | /* | ||
1235 | * disable GART TBL walk error reporting, which | ||
1236 | * trips off incorrectly with the IOMMU & 3ware | ||
1237 | * & Cerberus: | ||
1238 | */ | ||
1239 | clear_bit(10, (unsigned long *)&bank[4]); | ||
1240 | } | ||
1241 | if (c->x86 <= 17 && mce_bootlog < 0) { | ||
1242 | /* | ||
1243 | * Lots of broken BIOS around that don't clear them | ||
1244 | * by default and leave crap in there. Don't log: | ||
1245 | */ | ||
1246 | mce_bootlog = 0; | ||
1247 | } | ||
1248 | /* | ||
1249 | * Various K7s with broken bank 0 around. Always disable | ||
1250 | * by default. | ||
1251 | */ | ||
1252 | if (c->x86 == 6 && banks > 0) | ||
1253 | bank[0] = 0; | ||
1254 | } | ||
1255 | |||
1256 | if (c->x86_vendor == X86_VENDOR_INTEL) { | ||
1257 | /* | ||
1258 | * SDM documents that on family 6 bank 0 should not be written | ||
1259 | * because it aliases to another special BIOS controlled | ||
1260 | * register. | ||
1261 | * But it's not aliased anymore on model 0x1a+ | ||
1262 | * Don't ignore bank 0 completely because there could be a | ||
1263 | * valid event later, merely don't write CTL0. | ||
1264 | */ | ||
1265 | |||
1266 | if (c->x86 == 6 && c->x86_model < 0x1A) | ||
1267 | __set_bit(0, &dont_init_banks); | ||
1268 | |||
1269 | /* | ||
1270 | * All newer Intel systems support MCE broadcasting. Enable | ||
1271 | * synchronization with a one second timeout. | ||
1272 | */ | ||
1273 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && | ||
1274 | monarch_timeout < 0) | ||
1275 | monarch_timeout = USEC_PER_SEC; | ||
1276 | } | ||
1277 | if (monarch_timeout < 0) | ||
1278 | monarch_timeout = 0; | ||
1279 | if (mce_bootlog != 0) | ||
1280 | mce_panic_timeout = 30; | ||
1281 | } | ||
1282 | |||
1283 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | ||
1284 | { | ||
1285 | if (c->x86 != 5) | ||
1286 | return; | ||
1287 | switch (c->x86_vendor) { | ||
1288 | case X86_VENDOR_INTEL: | ||
1289 | intel_p5_mcheck_init(c); | ||
1290 | break; | ||
1291 | case X86_VENDOR_CENTAUR: | ||
1292 | winchip_mcheck_init(c); | ||
1293 | break; | ||
1294 | } | ||
1295 | } | ||
1296 | |||
1297 | static void mce_cpu_features(struct cpuinfo_x86 *c) | ||
1298 | { | ||
1299 | switch (c->x86_vendor) { | ||
1300 | case X86_VENDOR_INTEL: | ||
1301 | mce_intel_feature_init(c); | ||
1302 | break; | ||
1303 | case X86_VENDOR_AMD: | ||
1304 | mce_amd_feature_init(c); | ||
1305 | break; | ||
1306 | default: | ||
1307 | break; | ||
1308 | } | ||
1309 | } | ||
1310 | |||
1311 | static void mce_init_timer(void) | ||
1312 | { | ||
1313 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
1314 | int *n = &__get_cpu_var(next_interval); | ||
1315 | |||
1316 | if (mce_ignore_ce) | ||
1317 | return; | ||
1318 | |||
1319 | *n = check_interval * HZ; | ||
1320 | if (!*n) | ||
1321 | return; | ||
1322 | setup_timer(t, mcheck_timer, smp_processor_id()); | ||
1323 | t->expires = round_jiffies(jiffies + *n); | ||
1324 | add_timer(t); | ||
1325 | } | ||
1326 | |||
1327 | /* | ||
1328 | * Called for each booted CPU to set up machine checks. | ||
1329 | * Must be called with preempt off: | ||
1330 | */ | ||
1331 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | ||
1332 | { | ||
1333 | if (mce_disabled) | ||
1334 | return; | ||
1335 | |||
1336 | mce_ancient_init(c); | ||
1337 | |||
1338 | if (!mce_available(c)) | ||
1339 | return; | ||
1340 | |||
1341 | if (mce_cap_init() < 0) { | ||
1342 | mce_disabled = 1; | ||
1343 | return; | ||
1344 | } | ||
1345 | mce_cpu_quirks(c); | ||
1346 | |||
1347 | machine_check_vector = do_machine_check; | ||
1348 | |||
1349 | mce_init(); | ||
1350 | mce_cpu_features(c); | ||
1351 | mce_init_timer(); | ||
1352 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | ||
1353 | } | ||
1354 | |||
1355 | /* | ||
1356 | * Character device to read and clear the MCE log. | ||
1357 | */ | ||
1358 | |||
1359 | static DEFINE_SPINLOCK(mce_state_lock); | ||
1360 | static int open_count; /* #times opened */ | ||
1361 | static int open_exclu; /* already open exclusive? */ | ||
1362 | |||
1363 | static int mce_open(struct inode *inode, struct file *file) | ||
1364 | { | ||
1365 | spin_lock(&mce_state_lock); | ||
1366 | |||
1367 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { | ||
1368 | spin_unlock(&mce_state_lock); | ||
1369 | |||
1370 | return -EBUSY; | ||
1371 | } | ||
1372 | |||
1373 | if (file->f_flags & O_EXCL) | ||
1374 | open_exclu = 1; | ||
1375 | open_count++; | ||
1376 | |||
1377 | spin_unlock(&mce_state_lock); | ||
1378 | |||
1379 | return nonseekable_open(inode, file); | ||
1380 | } | ||
1381 | |||
1382 | static int mce_release(struct inode *inode, struct file *file) | ||
1383 | { | ||
1384 | spin_lock(&mce_state_lock); | ||
1385 | |||
1386 | open_count--; | ||
1387 | open_exclu = 0; | ||
1388 | |||
1389 | spin_unlock(&mce_state_lock); | ||
1390 | |||
1391 | return 0; | ||
1392 | } | ||
1393 | |||
1394 | static void collect_tscs(void *data) | ||
1395 | { | ||
1396 | unsigned long *cpu_tsc = (unsigned long *)data; | ||
1397 | |||
1398 | rdtscll(cpu_tsc[smp_processor_id()]); | ||
1399 | } | ||
1400 | |||
1401 | static DEFINE_MUTEX(mce_read_mutex); | ||
1402 | |||
1403 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | ||
1404 | loff_t *off) | ||
1405 | { | ||
1406 | char __user *buf = ubuf; | ||
1407 | unsigned long *cpu_tsc; | ||
1408 | unsigned prev, next; | ||
1409 | int i, err; | ||
1410 | |||
1411 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); | ||
1412 | if (!cpu_tsc) | ||
1413 | return -ENOMEM; | ||
1414 | |||
1415 | mutex_lock(&mce_read_mutex); | ||
1416 | next = rcu_dereference(mcelog.next); | ||
1417 | |||
1418 | /* Only supports full reads right now */ | ||
1419 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | ||
1420 | mutex_unlock(&mce_read_mutex); | ||
1421 | kfree(cpu_tsc); | ||
1422 | |||
1423 | return -EINVAL; | ||
1424 | } | ||
1425 | |||
1426 | err = 0; | ||
1427 | prev = 0; | ||
1428 | do { | ||
1429 | for (i = prev; i < next; i++) { | ||
1430 | unsigned long start = jiffies; | ||
1431 | |||
1432 | while (!mcelog.entry[i].finished) { | ||
1433 | if (time_after_eq(jiffies, start + 2)) { | ||
1434 | memset(mcelog.entry + i, 0, | ||
1435 | sizeof(struct mce)); | ||
1436 | goto timeout; | ||
1437 | } | ||
1438 | cpu_relax(); | ||
1439 | } | ||
1440 | smp_rmb(); | ||
1441 | err |= copy_to_user(buf, mcelog.entry + i, | ||
1442 | sizeof(struct mce)); | ||
1443 | buf += sizeof(struct mce); | ||
1444 | timeout: | ||
1445 | ; | ||
1446 | } | ||
1447 | |||
1448 | memset(mcelog.entry + prev, 0, | ||
1449 | (next - prev) * sizeof(struct mce)); | ||
1450 | prev = next; | ||
1451 | next = cmpxchg(&mcelog.next, prev, 0); | ||
1452 | } while (next != prev); | ||
1453 | |||
1454 | synchronize_sched(); | ||
1455 | |||
1456 | /* | ||
1457 | * Collect entries that were still getting written before the | ||
1458 | * synchronize. | ||
1459 | */ | ||
1460 | on_each_cpu(collect_tscs, cpu_tsc, 1); | ||
1461 | |||
1462 | for (i = next; i < MCE_LOG_LEN; i++) { | ||
1463 | if (mcelog.entry[i].finished && | ||
1464 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { | ||
1465 | err |= copy_to_user(buf, mcelog.entry+i, | ||
1466 | sizeof(struct mce)); | ||
1467 | smp_rmb(); | ||
1468 | buf += sizeof(struct mce); | ||
1469 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | ||
1470 | } | ||
1471 | } | ||
1472 | mutex_unlock(&mce_read_mutex); | ||
1473 | kfree(cpu_tsc); | ||
1474 | |||
1475 | return err ? -EFAULT : buf - ubuf; | ||
1476 | } | ||
1477 | |||
1478 | static unsigned int mce_poll(struct file *file, poll_table *wait) | ||
1479 | { | ||
1480 | poll_wait(file, &mce_wait, wait); | ||
1481 | if (rcu_dereference(mcelog.next)) | ||
1482 | return POLLIN | POLLRDNORM; | ||
1483 | return 0; | ||
1484 | } | ||
1485 | |||
1486 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | ||
1487 | { | ||
1488 | int __user *p = (int __user *)arg; | ||
1489 | |||
1490 | if (!capable(CAP_SYS_ADMIN)) | ||
1491 | return -EPERM; | ||
1492 | |||
1493 | switch (cmd) { | ||
1494 | case MCE_GET_RECORD_LEN: | ||
1495 | return put_user(sizeof(struct mce), p); | ||
1496 | case MCE_GET_LOG_LEN: | ||
1497 | return put_user(MCE_LOG_LEN, p); | ||
1498 | case MCE_GETCLEAR_FLAGS: { | ||
1499 | unsigned flags; | ||
1500 | |||
1501 | do { | ||
1502 | flags = mcelog.flags; | ||
1503 | } while (cmpxchg(&mcelog.flags, flags, 0) != flags); | ||
1504 | |||
1505 | return put_user(flags, p); | ||
1506 | } | ||
1507 | default: | ||
1508 | return -ENOTTY; | ||
1509 | } | ||
1510 | } | ||
1511 | |||
1512 | /* Modified in mce-inject.c, so not static or const */ | ||
1513 | struct file_operations mce_chrdev_ops = { | ||
1514 | .open = mce_open, | ||
1515 | .release = mce_release, | ||
1516 | .read = mce_read, | ||
1517 | .poll = mce_poll, | ||
1518 | .unlocked_ioctl = mce_ioctl, | ||
1519 | }; | ||
1520 | EXPORT_SYMBOL_GPL(mce_chrdev_ops); | ||
1521 | |||
1522 | static struct miscdevice mce_log_device = { | ||
1523 | MISC_MCELOG_MINOR, | ||
1524 | "mcelog", | ||
1525 | &mce_chrdev_ops, | ||
1526 | }; | ||
1527 | |||
1528 | /* | ||
1529 | * mce=off Disables machine check | ||
1530 | * mce=no_cmci Disables CMCI | ||
1531 | * mce=dont_log_ce Clears corrected events silently, no log created for CEs. | ||
1532 | * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. | ||
1533 | * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) | ||
1534 | * monarchtimeout is how long to wait for other CPUs on machine | ||
1535 | * check, or 0 to not wait | ||
1536 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | ||
1537 | * mce=nobootlog Don't log MCEs from before booting. | ||
1538 | */ | ||
1539 | static int __init mcheck_enable(char *str) | ||
1540 | { | ||
1541 | if (*str == 0) | ||
1542 | enable_p5_mce(); | ||
1543 | if (*str == '=') | ||
1544 | str++; | ||
1545 | if (!strcmp(str, "off")) | ||
1546 | mce_disabled = 1; | ||
1547 | else if (!strcmp(str, "no_cmci")) | ||
1548 | mce_cmci_disabled = 1; | ||
1549 | else if (!strcmp(str, "dont_log_ce")) | ||
1550 | mce_dont_log_ce = 1; | ||
1551 | else if (!strcmp(str, "ignore_ce")) | ||
1552 | mce_ignore_ce = 1; | ||
1553 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | ||
1554 | mce_bootlog = (str[0] == 'b'); | ||
1555 | else if (isdigit(str[0])) { | ||
1556 | get_option(&str, &tolerant); | ||
1557 | if (*str == ',') { | ||
1558 | ++str; | ||
1559 | get_option(&str, &monarch_timeout); | ||
1560 | } | ||
1561 | } else { | ||
1562 | printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", | ||
1563 | str); | ||
1564 | return 0; | ||
1565 | } | ||
1566 | return 1; | ||
1567 | } | ||
1568 | __setup("mce", mcheck_enable); | ||
1569 | |||
1570 | /* | ||
1571 | * Sysfs support | ||
1572 | */ | ||
1573 | |||
1574 | /* | ||
1575 | * Disable machine checks on suspend and shutdown. We can't really handle | ||
1576 | * them later. | ||
1577 | */ | ||
1578 | static int mce_disable(void) | ||
1579 | { | ||
1580 | int i; | ||
1581 | |||
1582 | for (i = 0; i < banks; i++) { | ||
1583 | if (!skip_bank_init(i)) | ||
1584 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
1585 | } | ||
1586 | return 0; | ||
1587 | } | ||
1588 | |||
1589 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | ||
1590 | { | ||
1591 | return mce_disable(); | ||
1592 | } | ||
1593 | |||
1594 | static int mce_shutdown(struct sys_device *dev) | ||
1595 | { | ||
1596 | return mce_disable(); | ||
1597 | } | ||
1598 | |||
1599 | /* | ||
1600 | * On resume clear all MCE state. Don't want to see leftovers from the BIOS. | ||
1601 | * Only one CPU is active at this time, the others get re-added later using | ||
1602 | * CPU hotplug: | ||
1603 | */ | ||
1604 | static int mce_resume(struct sys_device *dev) | ||
1605 | { | ||
1606 | mce_init(); | ||
1607 | mce_cpu_features(¤t_cpu_data); | ||
1608 | |||
1609 | return 0; | ||
1610 | } | ||
1611 | |||
1612 | static void mce_cpu_restart(void *data) | ||
1613 | { | ||
1614 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
1615 | if (!mce_available(¤t_cpu_data)) | ||
1616 | return; | ||
1617 | mce_init(); | ||
1618 | mce_init_timer(); | ||
1619 | } | ||
1620 | |||
1621 | /* Reinit MCEs after user configuration changes */ | ||
1622 | static void mce_restart(void) | ||
1623 | { | ||
1624 | on_each_cpu(mce_cpu_restart, NULL, 1); | ||
1625 | } | ||
1626 | |||
1627 | /* Toggle features for corrected errors */ | ||
1628 | static void mce_disable_ce(void *all) | ||
1629 | { | ||
1630 | if (!mce_available(¤t_cpu_data)) | ||
1631 | return; | ||
1632 | if (all) | ||
1633 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
1634 | cmci_clear(); | ||
1635 | } | ||
1636 | |||
1637 | static void mce_enable_ce(void *all) | ||
1638 | { | ||
1639 | if (!mce_available(¤t_cpu_data)) | ||
1640 | return; | ||
1641 | cmci_reenable(); | ||
1642 | cmci_recheck(); | ||
1643 | if (all) | ||
1644 | mce_init_timer(); | ||
1645 | } | ||
1646 | |||
1647 | static struct sysdev_class mce_sysclass = { | ||
1648 | .suspend = mce_suspend, | ||
1649 | .shutdown = mce_shutdown, | ||
1650 | .resume = mce_resume, | ||
1651 | .name = "machinecheck", | ||
1652 | }; | ||
1653 | |||
1654 | DEFINE_PER_CPU(struct sys_device, mce_dev); | ||
1655 | |||
1656 | __cpuinitdata | ||
1657 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
1658 | |||
1659 | static struct sysdev_attribute *bank_attrs; | ||
1660 | |||
1661 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
1662 | char *buf) | ||
1663 | { | ||
1664 | u64 b = bank[attr - bank_attrs]; | ||
1665 | |||
1666 | return sprintf(buf, "%llx\n", b); | ||
1667 | } | ||
1668 | |||
1669 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
1670 | const char *buf, size_t size) | ||
1671 | { | ||
1672 | u64 new; | ||
1673 | |||
1674 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1675 | return -EINVAL; | ||
1676 | |||
1677 | bank[attr - bank_attrs] = new; | ||
1678 | mce_restart(); | ||
1679 | |||
1680 | return size; | ||
1681 | } | ||
1682 | |||
1683 | static ssize_t | ||
1684 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) | ||
1685 | { | ||
1686 | strcpy(buf, mce_helper); | ||
1687 | strcat(buf, "\n"); | ||
1688 | return strlen(mce_helper) + 1; | ||
1689 | } | ||
1690 | |||
1691 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | ||
1692 | const char *buf, size_t siz) | ||
1693 | { | ||
1694 | char *p; | ||
1695 | int len; | ||
1696 | |||
1697 | strncpy(mce_helper, buf, sizeof(mce_helper)); | ||
1698 | mce_helper[sizeof(mce_helper)-1] = 0; | ||
1699 | len = strlen(mce_helper); | ||
1700 | p = strchr(mce_helper, '\n'); | ||
1701 | |||
1702 | if (*p) | ||
1703 | *p = 0; | ||
1704 | |||
1705 | return len; | ||
1706 | } | ||
1707 | |||
1708 | static ssize_t set_ignore_ce(struct sys_device *s, | ||
1709 | struct sysdev_attribute *attr, | ||
1710 | const char *buf, size_t size) | ||
1711 | { | ||
1712 | u64 new; | ||
1713 | |||
1714 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1715 | return -EINVAL; | ||
1716 | |||
1717 | if (mce_ignore_ce ^ !!new) { | ||
1718 | if (new) { | ||
1719 | /* disable ce features */ | ||
1720 | on_each_cpu(mce_disable_ce, (void *)1, 1); | ||
1721 | mce_ignore_ce = 1; | ||
1722 | } else { | ||
1723 | /* enable ce features */ | ||
1724 | mce_ignore_ce = 0; | ||
1725 | on_each_cpu(mce_enable_ce, (void *)1, 1); | ||
1726 | } | ||
1727 | } | ||
1728 | return size; | ||
1729 | } | ||
1730 | |||
1731 | static ssize_t set_cmci_disabled(struct sys_device *s, | ||
1732 | struct sysdev_attribute *attr, | ||
1733 | const char *buf, size_t size) | ||
1734 | { | ||
1735 | u64 new; | ||
1736 | |||
1737 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1738 | return -EINVAL; | ||
1739 | |||
1740 | if (mce_cmci_disabled ^ !!new) { | ||
1741 | if (new) { | ||
1742 | /* disable cmci */ | ||
1743 | on_each_cpu(mce_disable_ce, NULL, 1); | ||
1744 | mce_cmci_disabled = 1; | ||
1745 | } else { | ||
1746 | /* enable cmci */ | ||
1747 | mce_cmci_disabled = 0; | ||
1748 | on_each_cpu(mce_enable_ce, NULL, 1); | ||
1749 | } | ||
1750 | } | ||
1751 | return size; | ||
1752 | } | ||
1753 | |||
1754 | static ssize_t store_int_with_restart(struct sys_device *s, | ||
1755 | struct sysdev_attribute *attr, | ||
1756 | const char *buf, size_t size) | ||
1757 | { | ||
1758 | ssize_t ret = sysdev_store_int(s, attr, buf, size); | ||
1759 | mce_restart(); | ||
1760 | return ret; | ||
1761 | } | ||
1762 | |||
1763 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | ||
1764 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | ||
1765 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | ||
1766 | static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); | ||
1767 | |||
1768 | static struct sysdev_ext_attribute attr_check_interval = { | ||
1769 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, | ||
1770 | store_int_with_restart), | ||
1771 | &check_interval | ||
1772 | }; | ||
1773 | |||
1774 | static struct sysdev_ext_attribute attr_ignore_ce = { | ||
1775 | _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), | ||
1776 | &mce_ignore_ce | ||
1777 | }; | ||
1778 | |||
1779 | static struct sysdev_ext_attribute attr_cmci_disabled = { | ||
1780 | _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), | ||
1781 | &mce_cmci_disabled | ||
1782 | }; | ||
1783 | |||
1784 | static struct sysdev_attribute *mce_attrs[] = { | ||
1785 | &attr_tolerant.attr, | ||
1786 | &attr_check_interval.attr, | ||
1787 | &attr_trigger, | ||
1788 | &attr_monarch_timeout.attr, | ||
1789 | &attr_dont_log_ce.attr, | ||
1790 | &attr_ignore_ce.attr, | ||
1791 | &attr_cmci_disabled.attr, | ||
1792 | NULL | ||
1793 | }; | ||
1794 | |||
1795 | static cpumask_var_t mce_dev_initialized; | ||
1796 | |||
1797 | /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ | ||
1798 | static __cpuinit int mce_create_device(unsigned int cpu) | ||
1799 | { | ||
1800 | int err; | ||
1801 | int i, j; | ||
1802 | |||
1803 | if (!mce_available(&boot_cpu_data)) | ||
1804 | return -EIO; | ||
1805 | |||
1806 | memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject)); | ||
1807 | per_cpu(mce_dev, cpu).id = cpu; | ||
1808 | per_cpu(mce_dev, cpu).cls = &mce_sysclass; | ||
1809 | |||
1810 | err = sysdev_register(&per_cpu(mce_dev, cpu)); | ||
1811 | if (err) | ||
1812 | return err; | ||
1813 | |||
1814 | for (i = 0; mce_attrs[i]; i++) { | ||
1815 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | ||
1816 | if (err) | ||
1817 | goto error; | ||
1818 | } | ||
1819 | for (j = 0; j < banks; j++) { | ||
1820 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | ||
1821 | &bank_attrs[j]); | ||
1822 | if (err) | ||
1823 | goto error2; | ||
1824 | } | ||
1825 | cpumask_set_cpu(cpu, mce_dev_initialized); | ||
1826 | |||
1827 | return 0; | ||
1828 | error2: | ||
1829 | while (--j >= 0) | ||
1830 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); | ||
1831 | error: | ||
1832 | while (--i >= 0) | ||
1833 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | ||
1834 | |||
1835 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | ||
1836 | |||
1837 | return err; | ||
1838 | } | ||
1839 | |||
1840 | static __cpuinit void mce_remove_device(unsigned int cpu) | ||
1841 | { | ||
1842 | int i; | ||
1843 | |||
1844 | if (!cpumask_test_cpu(cpu, mce_dev_initialized)) | ||
1845 | return; | ||
1846 | |||
1847 | for (i = 0; mce_attrs[i]; i++) | ||
1848 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | ||
1849 | |||
1850 | for (i = 0; i < banks; i++) | ||
1851 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); | ||
1852 | |||
1853 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | ||
1854 | cpumask_clear_cpu(cpu, mce_dev_initialized); | ||
1855 | } | ||
1856 | |||
1857 | /* Make sure there are no machine checks on offlined CPUs. */ | ||
1858 | static void mce_disable_cpu(void *h) | ||
1859 | { | ||
1860 | unsigned long action = *(unsigned long *)h; | ||
1861 | int i; | ||
1862 | |||
1863 | if (!mce_available(¤t_cpu_data)) | ||
1864 | return; | ||
1865 | if (!(action & CPU_TASKS_FROZEN)) | ||
1866 | cmci_clear(); | ||
1867 | for (i = 0; i < banks; i++) { | ||
1868 | if (!skip_bank_init(i)) | ||
1869 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
1870 | } | ||
1871 | } | ||
1872 | |||
1873 | static void mce_reenable_cpu(void *h) | ||
1874 | { | ||
1875 | unsigned long action = *(unsigned long *)h; | ||
1876 | int i; | ||
1877 | |||
1878 | if (!mce_available(¤t_cpu_data)) | ||
1879 | return; | ||
1880 | |||
1881 | if (!(action & CPU_TASKS_FROZEN)) | ||
1882 | cmci_reenable(); | ||
1883 | for (i = 0; i < banks; i++) { | ||
1884 | if (!skip_bank_init(i)) | ||
1885 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | ||
1886 | } | ||
1887 | } | ||
1888 | |||
1889 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | ||
1890 | static int __cpuinit | ||
1891 | mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
1892 | { | ||
1893 | unsigned int cpu = (unsigned long)hcpu; | ||
1894 | struct timer_list *t = &per_cpu(mce_timer, cpu); | ||
1895 | |||
1896 | switch (action) { | ||
1897 | case CPU_ONLINE: | ||
1898 | case CPU_ONLINE_FROZEN: | ||
1899 | mce_create_device(cpu); | ||
1900 | if (threshold_cpu_callback) | ||
1901 | threshold_cpu_callback(action, cpu); | ||
1902 | break; | ||
1903 | case CPU_DEAD: | ||
1904 | case CPU_DEAD_FROZEN: | ||
1905 | if (threshold_cpu_callback) | ||
1906 | threshold_cpu_callback(action, cpu); | ||
1907 | mce_remove_device(cpu); | ||
1908 | break; | ||
1909 | case CPU_DOWN_PREPARE: | ||
1910 | case CPU_DOWN_PREPARE_FROZEN: | ||
1911 | del_timer_sync(t); | ||
1912 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | ||
1913 | break; | ||
1914 | case CPU_DOWN_FAILED: | ||
1915 | case CPU_DOWN_FAILED_FROZEN: | ||
1916 | t->expires = round_jiffies(jiffies + | ||
1917 | __get_cpu_var(next_interval)); | ||
1918 | add_timer_on(t, cpu); | ||
1919 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | ||
1920 | break; | ||
1921 | case CPU_POST_DEAD: | ||
1922 | /* intentionally ignoring frozen here */ | ||
1923 | cmci_rediscover(cpu); | ||
1924 | break; | ||
1925 | } | ||
1926 | return NOTIFY_OK; | ||
1927 | } | ||
1928 | |||
1929 | static struct notifier_block mce_cpu_notifier __cpuinitdata = { | ||
1930 | .notifier_call = mce_cpu_callback, | ||
1931 | }; | ||
1932 | |||
1933 | static __init int mce_init_banks(void) | ||
1934 | { | ||
1935 | int i; | ||
1936 | |||
1937 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
1938 | GFP_KERNEL); | ||
1939 | if (!bank_attrs) | ||
1940 | return -ENOMEM; | ||
1941 | |||
1942 | for (i = 0; i < banks; i++) { | ||
1943 | struct sysdev_attribute *a = &bank_attrs[i]; | ||
1944 | |||
1945 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | ||
1946 | if (!a->attr.name) | ||
1947 | goto nomem; | ||
1948 | |||
1949 | a->attr.mode = 0644; | ||
1950 | a->show = show_bank; | ||
1951 | a->store = set_bank; | ||
1952 | } | ||
1953 | return 0; | ||
1954 | |||
1955 | nomem: | ||
1956 | while (--i >= 0) | ||
1957 | kfree(bank_attrs[i].attr.name); | ||
1958 | kfree(bank_attrs); | ||
1959 | bank_attrs = NULL; | ||
1960 | |||
1961 | return -ENOMEM; | ||
1962 | } | ||
1963 | |||
1964 | static __init int mce_init_device(void) | ||
1965 | { | ||
1966 | int err; | ||
1967 | int i = 0; | ||
1968 | |||
1969 | if (!mce_available(&boot_cpu_data)) | ||
1970 | return -EIO; | ||
1971 | |||
1972 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | ||
1973 | |||
1974 | err = mce_init_banks(); | ||
1975 | if (err) | ||
1976 | return err; | ||
1977 | |||
1978 | err = sysdev_class_register(&mce_sysclass); | ||
1979 | if (err) | ||
1980 | return err; | ||
1981 | |||
1982 | for_each_online_cpu(i) { | ||
1983 | err = mce_create_device(i); | ||
1984 | if (err) | ||
1985 | return err; | ||
1986 | } | ||
1987 | |||
1988 | register_hotcpu_notifier(&mce_cpu_notifier); | ||
1989 | misc_register(&mce_log_device); | ||
1990 | |||
1991 | return err; | ||
1992 | } | ||
1993 | |||
1994 | device_initcall(mce_init_device); | ||
1995 | |||
1996 | #else /* CONFIG_X86_OLD_MCE: */ | ||
1997 | |||
1998 | int nr_mce_banks; | ||
1999 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | ||
2000 | |||
2001 | /* This has to be run for each processor */ | ||
2002 | void mcheck_init(struct cpuinfo_x86 *c) | ||
2003 | { | ||
2004 | if (mce_disabled) | ||
2005 | return; | ||
2006 | |||
2007 | switch (c->x86_vendor) { | ||
2008 | case X86_VENDOR_AMD: | ||
2009 | amd_mcheck_init(c); | ||
2010 | break; | ||
2011 | |||
2012 | case X86_VENDOR_INTEL: | ||
2013 | if (c->x86 == 5) | ||
2014 | intel_p5_mcheck_init(c); | ||
2015 | if (c->x86 == 6) | ||
2016 | intel_p6_mcheck_init(c); | ||
2017 | if (c->x86 == 15) | ||
2018 | intel_p4_mcheck_init(c); | ||
2019 | break; | ||
2020 | |||
2021 | case X86_VENDOR_CENTAUR: | ||
2022 | if (c->x86 == 5) | ||
2023 | winchip_mcheck_init(c); | ||
2024 | break; | ||
2025 | |||
2026 | default: | ||
2027 | break; | ||
2028 | } | ||
2029 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); | ||
2030 | } | ||
2031 | |||
2032 | static int __init mcheck_enable(char *str) | ||
2033 | { | ||
2034 | mce_p5_enabled = 1; | ||
2035 | return 1; | ||
2036 | } | ||
2037 | __setup("mce", mcheck_enable); | ||
2038 | |||
2039 | #endif /* CONFIG_X86_OLD_MCE */ | ||
2040 | |||
2041 | /* | ||
2042 | * Old style boot options parsing. Only for compatibility. | ||
2043 | */ | ||
2044 | static int __init mcheck_disable(char *str) | ||
2045 | { | ||
2046 | mce_disabled = 1; | ||
2047 | return 1; | ||
2048 | } | ||
2049 | __setup("nomce", mcheck_disable); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h deleted file mode 100644 index ae9f628838f1..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <asm/mce.h> | ||
3 | |||
4 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
5 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
6 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
7 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
8 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
9 | |||
10 | /* Call the installed machine check handler for this CPU setup. */ | ||
11 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
12 | |||
13 | extern int nr_mce_banks; | ||
14 | |||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c deleted file mode 100644 index 3552119b091d..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ /dev/null | |||
@@ -1,76 +0,0 @@ | |||
1 | /* | ||
2 | * mce.c - x86 Machine Check Exception Reporting | ||
3 | * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/smp.h> | ||
11 | #include <linux/thread_info.h> | ||
12 | |||
13 | #include <asm/processor.h> | ||
14 | #include <asm/system.h> | ||
15 | #include <asm/mce.h> | ||
16 | |||
17 | #include "mce.h" | ||
18 | |||
19 | int mce_disabled; | ||
20 | int nr_mce_banks; | ||
21 | |||
22 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | ||
23 | |||
24 | /* Handle unconfigured int18 (should never happen) */ | ||
25 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
26 | { | ||
27 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); | ||
28 | } | ||
29 | |||
30 | /* Call the installed machine check handler for this CPU setup. */ | ||
31 | void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; | ||
32 | |||
33 | /* This has to be run for each processor */ | ||
34 | void mcheck_init(struct cpuinfo_x86 *c) | ||
35 | { | ||
36 | if (mce_disabled == 1) | ||
37 | return; | ||
38 | |||
39 | switch (c->x86_vendor) { | ||
40 | case X86_VENDOR_AMD: | ||
41 | amd_mcheck_init(c); | ||
42 | break; | ||
43 | |||
44 | case X86_VENDOR_INTEL: | ||
45 | if (c->x86 == 5) | ||
46 | intel_p5_mcheck_init(c); | ||
47 | if (c->x86 == 6) | ||
48 | intel_p6_mcheck_init(c); | ||
49 | if (c->x86 == 15) | ||
50 | intel_p4_mcheck_init(c); | ||
51 | break; | ||
52 | |||
53 | case X86_VENDOR_CENTAUR: | ||
54 | if (c->x86 == 5) | ||
55 | winchip_mcheck_init(c); | ||
56 | break; | ||
57 | |||
58 | default: | ||
59 | break; | ||
60 | } | ||
61 | } | ||
62 | |||
63 | static int __init mcheck_disable(char *str) | ||
64 | { | ||
65 | mce_disabled = 1; | ||
66 | return 1; | ||
67 | } | ||
68 | |||
69 | static int __init mcheck_enable(char *str) | ||
70 | { | ||
71 | mce_disabled = -1; | ||
72 | return 1; | ||
73 | } | ||
74 | |||
75 | __setup("nomce", mcheck_disable); | ||
76 | __setup("mce", mcheck_enable); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c deleted file mode 100644 index 09dd1d414fc3..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ /dev/null | |||
@@ -1,1187 +0,0 @@ | |||
1 | /* | ||
2 | * Machine check handler. | ||
3 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
4 | * Rest from unknown author(s). | ||
5 | * 2004 Andi Kleen. Rewrote most of it. | ||
6 | * Copyright 2008 Intel Corporation | ||
7 | * Author: Andi Kleen | ||
8 | */ | ||
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/types.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/smp_lock.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/rcupdate.h> | ||
17 | #include <linux/kallsyms.h> | ||
18 | #include <linux/sysdev.h> | ||
19 | #include <linux/miscdevice.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/capability.h> | ||
22 | #include <linux/cpu.h> | ||
23 | #include <linux/percpu.h> | ||
24 | #include <linux/poll.h> | ||
25 | #include <linux/thread_info.h> | ||
26 | #include <linux/ctype.h> | ||
27 | #include <linux/kmod.h> | ||
28 | #include <linux/kdebug.h> | ||
29 | #include <linux/kobject.h> | ||
30 | #include <linux/sysfs.h> | ||
31 | #include <linux/ratelimit.h> | ||
32 | #include <asm/processor.h> | ||
33 | #include <asm/msr.h> | ||
34 | #include <asm/mce.h> | ||
35 | #include <asm/uaccess.h> | ||
36 | #include <asm/smp.h> | ||
37 | #include <asm/idle.h> | ||
38 | |||
39 | #define MISC_MCELOG_MINOR 227 | ||
40 | |||
41 | atomic_t mce_entry; | ||
42 | |||
43 | static int mce_dont_init; | ||
44 | |||
45 | /* | ||
46 | * Tolerant levels: | ||
47 | * 0: always panic on uncorrected errors, log corrected errors | ||
48 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors | ||
49 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | ||
50 | * 3: never panic or SIGBUS, log all errors (for testing only) | ||
51 | */ | ||
52 | static int tolerant = 1; | ||
53 | static int banks; | ||
54 | static u64 *bank; | ||
55 | static unsigned long notify_user; | ||
56 | static int rip_msr; | ||
57 | static int mce_bootlog = -1; | ||
58 | static atomic_t mce_events; | ||
59 | |||
60 | static char trigger[128]; | ||
61 | static char *trigger_argv[2] = { trigger, NULL }; | ||
62 | |||
63 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | ||
64 | |||
65 | /* MCA banks polled by the period polling timer for corrected events */ | ||
66 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | ||
67 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | ||
68 | }; | ||
69 | |||
70 | /* Do initial initialization of a struct mce */ | ||
71 | void mce_setup(struct mce *m) | ||
72 | { | ||
73 | memset(m, 0, sizeof(struct mce)); | ||
74 | m->cpu = smp_processor_id(); | ||
75 | rdtscll(m->tsc); | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * Lockless MCE logging infrastructure. | ||
80 | * This avoids deadlocks on printk locks without having to break locks. Also | ||
81 | * separate MCEs from kernel messages to avoid bogus bug reports. | ||
82 | */ | ||
83 | |||
84 | static struct mce_log mcelog = { | ||
85 | MCE_LOG_SIGNATURE, | ||
86 | MCE_LOG_LEN, | ||
87 | }; | ||
88 | |||
89 | void mce_log(struct mce *mce) | ||
90 | { | ||
91 | unsigned next, entry; | ||
92 | atomic_inc(&mce_events); | ||
93 | mce->finished = 0; | ||
94 | wmb(); | ||
95 | for (;;) { | ||
96 | entry = rcu_dereference(mcelog.next); | ||
97 | for (;;) { | ||
98 | /* When the buffer fills up discard new entries. Assume | ||
99 | that the earlier errors are the more interesting. */ | ||
100 | if (entry >= MCE_LOG_LEN) { | ||
101 | set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); | ||
102 | return; | ||
103 | } | ||
104 | /* Old left over entry. Skip. */ | ||
105 | if (mcelog.entry[entry].finished) { | ||
106 | entry++; | ||
107 | continue; | ||
108 | } | ||
109 | break; | ||
110 | } | ||
111 | smp_rmb(); | ||
112 | next = entry + 1; | ||
113 | if (cmpxchg(&mcelog.next, entry, next) == entry) | ||
114 | break; | ||
115 | } | ||
116 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); | ||
117 | wmb(); | ||
118 | mcelog.entry[entry].finished = 1; | ||
119 | wmb(); | ||
120 | |||
121 | set_bit(0, ¬ify_user); | ||
122 | } | ||
123 | |||
124 | static void print_mce(struct mce *m) | ||
125 | { | ||
126 | printk(KERN_EMERG "\n" | ||
127 | KERN_EMERG "HARDWARE ERROR\n" | ||
128 | KERN_EMERG | ||
129 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | ||
130 | m->cpu, m->mcgstatus, m->bank, m->status); | ||
131 | if (m->ip) { | ||
132 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | ||
133 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | ||
134 | m->cs, m->ip); | ||
135 | if (m->cs == __KERNEL_CS) | ||
136 | print_symbol("{%s}", m->ip); | ||
137 | printk("\n"); | ||
138 | } | ||
139 | printk(KERN_EMERG "TSC %llx ", m->tsc); | ||
140 | if (m->addr) | ||
141 | printk("ADDR %llx ", m->addr); | ||
142 | if (m->misc) | ||
143 | printk("MISC %llx ", m->misc); | ||
144 | printk("\n"); | ||
145 | printk(KERN_EMERG "This is not a software problem!\n"); | ||
146 | printk(KERN_EMERG "Run through mcelog --ascii to decode " | ||
147 | "and contact your hardware vendor\n"); | ||
148 | } | ||
149 | |||
150 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) | ||
151 | { | ||
152 | int i; | ||
153 | |||
154 | oops_begin(); | ||
155 | for (i = 0; i < MCE_LOG_LEN; i++) { | ||
156 | unsigned long tsc = mcelog.entry[i].tsc; | ||
157 | |||
158 | if (time_before(tsc, start)) | ||
159 | continue; | ||
160 | print_mce(&mcelog.entry[i]); | ||
161 | if (backup && mcelog.entry[i].tsc == backup->tsc) | ||
162 | backup = NULL; | ||
163 | } | ||
164 | if (backup) | ||
165 | print_mce(backup); | ||
166 | panic(msg); | ||
167 | } | ||
168 | |||
169 | int mce_available(struct cpuinfo_x86 *c) | ||
170 | { | ||
171 | if (mce_dont_init) | ||
172 | return 0; | ||
173 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | ||
174 | } | ||
175 | |||
176 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | ||
177 | { | ||
178 | if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { | ||
179 | m->ip = regs->ip; | ||
180 | m->cs = regs->cs; | ||
181 | } else { | ||
182 | m->ip = 0; | ||
183 | m->cs = 0; | ||
184 | } | ||
185 | if (rip_msr) { | ||
186 | /* Assume the RIP in the MSR is exact. Is this true? */ | ||
187 | m->mcgstatus |= MCG_STATUS_EIPV; | ||
188 | rdmsrl(rip_msr, m->ip); | ||
189 | m->cs = 0; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * Poll for corrected events or events that happened before reset. | ||
195 | * Those are just logged through /dev/mcelog. | ||
196 | * | ||
197 | * This is executed in standard interrupt context. | ||
198 | */ | ||
199 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | ||
200 | { | ||
201 | struct mce m; | ||
202 | int i; | ||
203 | |||
204 | mce_setup(&m); | ||
205 | |||
206 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | ||
207 | for (i = 0; i < banks; i++) { | ||
208 | if (!bank[i] || !test_bit(i, *b)) | ||
209 | continue; | ||
210 | |||
211 | m.misc = 0; | ||
212 | m.addr = 0; | ||
213 | m.bank = i; | ||
214 | m.tsc = 0; | ||
215 | |||
216 | barrier(); | ||
217 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); | ||
218 | if (!(m.status & MCI_STATUS_VAL)) | ||
219 | continue; | ||
220 | |||
221 | /* | ||
222 | * Uncorrected events are handled by the exception handler | ||
223 | * when it is enabled. But when the exception is disabled log | ||
224 | * everything. | ||
225 | * | ||
226 | * TBD do the same check for MCI_STATUS_EN here? | ||
227 | */ | ||
228 | if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) | ||
229 | continue; | ||
230 | |||
231 | if (m.status & MCI_STATUS_MISCV) | ||
232 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); | ||
233 | if (m.status & MCI_STATUS_ADDRV) | ||
234 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); | ||
235 | |||
236 | if (!(flags & MCP_TIMESTAMP)) | ||
237 | m.tsc = 0; | ||
238 | /* | ||
239 | * Don't get the IP here because it's unlikely to | ||
240 | * have anything to do with the actual error location. | ||
241 | */ | ||
242 | if (!(flags & MCP_DONTLOG)) { | ||
243 | mce_log(&m); | ||
244 | add_taint(TAINT_MACHINE_CHECK); | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * Clear state for this bank. | ||
249 | */ | ||
250 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * Don't clear MCG_STATUS here because it's only defined for | ||
255 | * exceptions. | ||
256 | */ | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * The actual machine check handler. This only handles real | ||
261 | * exceptions when something got corrupted coming in through int 18. | ||
262 | * | ||
263 | * This is executed in NMI context not subject to normal locking rules. This | ||
264 | * implies that most kernel services cannot be safely used. Don't even | ||
265 | * think about putting a printk in there! | ||
266 | */ | ||
267 | void do_machine_check(struct pt_regs * regs, long error_code) | ||
268 | { | ||
269 | struct mce m, panicm; | ||
270 | u64 mcestart = 0; | ||
271 | int i; | ||
272 | int panicm_found = 0; | ||
273 | /* | ||
274 | * If no_way_out gets set, there is no safe way to recover from this | ||
275 | * MCE. If tolerant is cranked up, we'll try anyway. | ||
276 | */ | ||
277 | int no_way_out = 0; | ||
278 | /* | ||
279 | * If kill_it gets set, there might be a way to recover from this | ||
280 | * error. | ||
281 | */ | ||
282 | int kill_it = 0; | ||
283 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | ||
284 | |||
285 | atomic_inc(&mce_entry); | ||
286 | |||
287 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | ||
288 | 18, SIGKILL) == NOTIFY_STOP) | ||
289 | goto out2; | ||
290 | if (!banks) | ||
291 | goto out2; | ||
292 | |||
293 | mce_setup(&m); | ||
294 | |||
295 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | ||
296 | /* if the restart IP is not valid, we're done for */ | ||
297 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | ||
298 | no_way_out = 1; | ||
299 | |||
300 | rdtscll(mcestart); | ||
301 | barrier(); | ||
302 | |||
303 | for (i = 0; i < banks; i++) { | ||
304 | __clear_bit(i, toclear); | ||
305 | if (!bank[i]) | ||
306 | continue; | ||
307 | |||
308 | m.misc = 0; | ||
309 | m.addr = 0; | ||
310 | m.bank = i; | ||
311 | |||
312 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); | ||
313 | if ((m.status & MCI_STATUS_VAL) == 0) | ||
314 | continue; | ||
315 | |||
316 | /* | ||
317 | * Non uncorrected errors are handled by machine_check_poll | ||
318 | * Leave them alone. | ||
319 | */ | ||
320 | if ((m.status & MCI_STATUS_UC) == 0) | ||
321 | continue; | ||
322 | |||
323 | /* | ||
324 | * Set taint even when machine check was not enabled. | ||
325 | */ | ||
326 | add_taint(TAINT_MACHINE_CHECK); | ||
327 | |||
328 | __set_bit(i, toclear); | ||
329 | |||
330 | if (m.status & MCI_STATUS_EN) { | ||
331 | /* if PCC was set, there's no way out */ | ||
332 | no_way_out |= !!(m.status & MCI_STATUS_PCC); | ||
333 | /* | ||
334 | * If this error was uncorrectable and there was | ||
335 | * an overflow, we're in trouble. If no overflow, | ||
336 | * we might get away with just killing a task. | ||
337 | */ | ||
338 | if (m.status & MCI_STATUS_UC) { | ||
339 | if (tolerant < 1 || m.status & MCI_STATUS_OVER) | ||
340 | no_way_out = 1; | ||
341 | kill_it = 1; | ||
342 | } | ||
343 | } else { | ||
344 | /* | ||
345 | * Machine check event was not enabled. Clear, but | ||
346 | * ignore. | ||
347 | */ | ||
348 | continue; | ||
349 | } | ||
350 | |||
351 | if (m.status & MCI_STATUS_MISCV) | ||
352 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); | ||
353 | if (m.status & MCI_STATUS_ADDRV) | ||
354 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); | ||
355 | |||
356 | mce_get_rip(&m, regs); | ||
357 | mce_log(&m); | ||
358 | |||
359 | /* Did this bank cause the exception? */ | ||
360 | /* Assume that the bank with uncorrectable errors did it, | ||
361 | and that there is only a single one. */ | ||
362 | if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { | ||
363 | panicm = m; | ||
364 | panicm_found = 1; | ||
365 | } | ||
366 | } | ||
367 | |||
368 | /* If we didn't find an uncorrectable error, pick | ||
369 | the last one (shouldn't happen, just being safe). */ | ||
370 | if (!panicm_found) | ||
371 | panicm = m; | ||
372 | |||
373 | /* | ||
374 | * If we have decided that we just CAN'T continue, and the user | ||
375 | * has not set tolerant to an insane level, give up and die. | ||
376 | */ | ||
377 | if (no_way_out && tolerant < 3) | ||
378 | mce_panic("Machine check", &panicm, mcestart); | ||
379 | |||
380 | /* | ||
381 | * If the error seems to be unrecoverable, something should be | ||
382 | * done. Try to kill as little as possible. If we can kill just | ||
383 | * one task, do that. If the user has set the tolerance very | ||
384 | * high, don't try to do anything at all. | ||
385 | */ | ||
386 | if (kill_it && tolerant < 3) { | ||
387 | int user_space = 0; | ||
388 | |||
389 | /* | ||
390 | * If the EIPV bit is set, it means the saved IP is the | ||
391 | * instruction which caused the MCE. | ||
392 | */ | ||
393 | if (m.mcgstatus & MCG_STATUS_EIPV) | ||
394 | user_space = panicm.ip && (panicm.cs & 3); | ||
395 | |||
396 | /* | ||
397 | * If we know that the error was in user space, send a | ||
398 | * SIGBUS. Otherwise, panic if tolerance is low. | ||
399 | * | ||
400 | * force_sig() takes an awful lot of locks and has a slight | ||
401 | * risk of deadlocking. | ||
402 | */ | ||
403 | if (user_space) { | ||
404 | force_sig(SIGBUS, current); | ||
405 | } else if (panic_on_oops || tolerant < 2) { | ||
406 | mce_panic("Uncorrected machine check", | ||
407 | &panicm, mcestart); | ||
408 | } | ||
409 | } | ||
410 | |||
411 | /* notify userspace ASAP */ | ||
412 | set_thread_flag(TIF_MCE_NOTIFY); | ||
413 | |||
414 | /* the last thing we do is clear state */ | ||
415 | for (i = 0; i < banks; i++) { | ||
416 | if (test_bit(i, toclear)) | ||
417 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
418 | } | ||
419 | wrmsrl(MSR_IA32_MCG_STATUS, 0); | ||
420 | out2: | ||
421 | atomic_dec(&mce_entry); | ||
422 | } | ||
423 | |||
424 | #ifdef CONFIG_X86_MCE_INTEL | ||
425 | /*** | ||
426 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog | ||
427 | * @cpu: The CPU on which the event occurred. | ||
428 | * @status: Event status information | ||
429 | * | ||
430 | * This function should be called by the thermal interrupt after the | ||
431 | * event has been processed and the decision was made to log the event | ||
432 | * further. | ||
433 | * | ||
434 | * The status parameter will be saved to the 'status' field of 'struct mce' | ||
435 | * and historically has been the register value of the | ||
436 | * MSR_IA32_THERMAL_STATUS (Intel) msr. | ||
437 | */ | ||
438 | void mce_log_therm_throt_event(__u64 status) | ||
439 | { | ||
440 | struct mce m; | ||
441 | |||
442 | mce_setup(&m); | ||
443 | m.bank = MCE_THERMAL_BANK; | ||
444 | m.status = status; | ||
445 | mce_log(&m); | ||
446 | } | ||
447 | #endif /* CONFIG_X86_MCE_INTEL */ | ||
448 | |||
449 | /* | ||
450 | * Periodic polling timer for "silent" machine check errors. If the | ||
451 | * poller finds an MCE, poll 2x faster. When the poller finds no more | ||
452 | * errors, poll 2x slower (up to check_interval seconds). | ||
453 | */ | ||
454 | |||
455 | static int check_interval = 5 * 60; /* 5 minutes */ | ||
456 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ | ||
457 | static void mcheck_timer(unsigned long); | ||
458 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | ||
459 | |||
460 | static void mcheck_timer(unsigned long data) | ||
461 | { | ||
462 | struct timer_list *t = &per_cpu(mce_timer, data); | ||
463 | int *n; | ||
464 | |||
465 | WARN_ON(smp_processor_id() != data); | ||
466 | |||
467 | if (mce_available(¤t_cpu_data)) | ||
468 | machine_check_poll(MCP_TIMESTAMP, | ||
469 | &__get_cpu_var(mce_poll_banks)); | ||
470 | |||
471 | /* | ||
472 | * Alert userspace if needed. If we logged an MCE, reduce the | ||
473 | * polling interval, otherwise increase the polling interval. | ||
474 | */ | ||
475 | n = &__get_cpu_var(next_interval); | ||
476 | if (mce_notify_user()) { | ||
477 | *n = max(*n/2, HZ/100); | ||
478 | } else { | ||
479 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); | ||
480 | } | ||
481 | |||
482 | t->expires = jiffies + *n; | ||
483 | add_timer(t); | ||
484 | } | ||
485 | |||
486 | static void mce_do_trigger(struct work_struct *work) | ||
487 | { | ||
488 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); | ||
489 | } | ||
490 | |||
491 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | ||
492 | |||
493 | /* | ||
494 | * Notify the user(s) about new machine check events. | ||
495 | * Can be called from interrupt context, but not from machine check/NMI | ||
496 | * context. | ||
497 | */ | ||
498 | int mce_notify_user(void) | ||
499 | { | ||
500 | /* Not more than two messages every minute */ | ||
501 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | ||
502 | |||
503 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
504 | if (test_and_clear_bit(0, ¬ify_user)) { | ||
505 | wake_up_interruptible(&mce_wait); | ||
506 | |||
507 | /* | ||
508 | * There is no risk of missing notifications because | ||
509 | * work_pending is always cleared before the function is | ||
510 | * executed. | ||
511 | */ | ||
512 | if (trigger[0] && !work_pending(&mce_trigger_work)) | ||
513 | schedule_work(&mce_trigger_work); | ||
514 | |||
515 | if (__ratelimit(&ratelimit)) | ||
516 | printk(KERN_INFO "Machine check events logged\n"); | ||
517 | |||
518 | return 1; | ||
519 | } | ||
520 | return 0; | ||
521 | } | ||
522 | |||
523 | /* see if the idle task needs to notify userspace */ | ||
524 | static int | ||
525 | mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) | ||
526 | { | ||
527 | /* IDLE_END should be safe - interrupts are back on */ | ||
528 | if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) | ||
529 | mce_notify_user(); | ||
530 | |||
531 | return NOTIFY_OK; | ||
532 | } | ||
533 | |||
534 | static struct notifier_block mce_idle_notifier = { | ||
535 | .notifier_call = mce_idle_callback, | ||
536 | }; | ||
537 | |||
538 | static __init int periodic_mcheck_init(void) | ||
539 | { | ||
540 | idle_notifier_register(&mce_idle_notifier); | ||
541 | return 0; | ||
542 | } | ||
543 | __initcall(periodic_mcheck_init); | ||
544 | |||
545 | /* | ||
546 | * Initialize Machine Checks for a CPU. | ||
547 | */ | ||
548 | static int mce_cap_init(void) | ||
549 | { | ||
550 | u64 cap; | ||
551 | unsigned b; | ||
552 | |||
553 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
554 | b = cap & 0xff; | ||
555 | if (b > MAX_NR_BANKS) { | ||
556 | printk(KERN_WARNING | ||
557 | "MCE: Using only %u machine check banks out of %u\n", | ||
558 | MAX_NR_BANKS, b); | ||
559 | b = MAX_NR_BANKS; | ||
560 | } | ||
561 | |||
562 | /* Don't support asymmetric configurations today */ | ||
563 | WARN_ON(banks != 0 && b != banks); | ||
564 | banks = b; | ||
565 | if (!bank) { | ||
566 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | ||
567 | if (!bank) | ||
568 | return -ENOMEM; | ||
569 | memset(bank, 0xff, banks * sizeof(u64)); | ||
570 | } | ||
571 | |||
572 | /* Use accurate RIP reporting if available. */ | ||
573 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) | ||
574 | rip_msr = MSR_IA32_MCG_EIP; | ||
575 | |||
576 | return 0; | ||
577 | } | ||
578 | |||
579 | static void mce_init(void *dummy) | ||
580 | { | ||
581 | u64 cap; | ||
582 | int i; | ||
583 | mce_banks_t all_banks; | ||
584 | |||
585 | /* | ||
586 | * Log the machine checks left over from the previous reset. | ||
587 | */ | ||
588 | bitmap_fill(all_banks, MAX_NR_BANKS); | ||
589 | machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); | ||
590 | |||
591 | set_in_cr4(X86_CR4_MCE); | ||
592 | |||
593 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
594 | if (cap & MCG_CTL_P) | ||
595 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
596 | |||
597 | for (i = 0; i < banks; i++) { | ||
598 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | ||
599 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
600 | } | ||
601 | } | ||
602 | |||
603 | /* Add per CPU specific workarounds here */ | ||
604 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) | ||
605 | { | ||
606 | /* This should be disabled by the BIOS, but isn't always */ | ||
607 | if (c->x86_vendor == X86_VENDOR_AMD) { | ||
608 | if (c->x86 == 15 && banks > 4) | ||
609 | /* disable GART TBL walk error reporting, which trips off | ||
610 | incorrectly with the IOMMU & 3ware & Cerberus. */ | ||
611 | clear_bit(10, (unsigned long *)&bank[4]); | ||
612 | if(c->x86 <= 17 && mce_bootlog < 0) | ||
613 | /* Lots of broken BIOS around that don't clear them | ||
614 | by default and leave crap in there. Don't log. */ | ||
615 | mce_bootlog = 0; | ||
616 | } | ||
617 | |||
618 | } | ||
619 | |||
620 | static void mce_cpu_features(struct cpuinfo_x86 *c) | ||
621 | { | ||
622 | switch (c->x86_vendor) { | ||
623 | case X86_VENDOR_INTEL: | ||
624 | mce_intel_feature_init(c); | ||
625 | break; | ||
626 | case X86_VENDOR_AMD: | ||
627 | mce_amd_feature_init(c); | ||
628 | break; | ||
629 | default: | ||
630 | break; | ||
631 | } | ||
632 | } | ||
633 | |||
634 | static void mce_init_timer(void) | ||
635 | { | ||
636 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
637 | int *n = &__get_cpu_var(next_interval); | ||
638 | |||
639 | *n = check_interval * HZ; | ||
640 | if (!*n) | ||
641 | return; | ||
642 | setup_timer(t, mcheck_timer, smp_processor_id()); | ||
643 | t->expires = round_jiffies(jiffies + *n); | ||
644 | add_timer(t); | ||
645 | } | ||
646 | |||
647 | /* | ||
648 | * Called for each booted CPU to set up machine checks. | ||
649 | * Must be called with preempt off. | ||
650 | */ | ||
651 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | ||
652 | { | ||
653 | if (!mce_available(c)) | ||
654 | return; | ||
655 | |||
656 | if (mce_cap_init() < 0) { | ||
657 | mce_dont_init = 1; | ||
658 | return; | ||
659 | } | ||
660 | mce_cpu_quirks(c); | ||
661 | |||
662 | mce_init(NULL); | ||
663 | mce_cpu_features(c); | ||
664 | mce_init_timer(); | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Character device to read and clear the MCE log. | ||
669 | */ | ||
670 | |||
671 | static DEFINE_SPINLOCK(mce_state_lock); | ||
672 | static int open_count; /* #times opened */ | ||
673 | static int open_exclu; /* already open exclusive? */ | ||
674 | |||
675 | static int mce_open(struct inode *inode, struct file *file) | ||
676 | { | ||
677 | lock_kernel(); | ||
678 | spin_lock(&mce_state_lock); | ||
679 | |||
680 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { | ||
681 | spin_unlock(&mce_state_lock); | ||
682 | unlock_kernel(); | ||
683 | return -EBUSY; | ||
684 | } | ||
685 | |||
686 | if (file->f_flags & O_EXCL) | ||
687 | open_exclu = 1; | ||
688 | open_count++; | ||
689 | |||
690 | spin_unlock(&mce_state_lock); | ||
691 | unlock_kernel(); | ||
692 | |||
693 | return nonseekable_open(inode, file); | ||
694 | } | ||
695 | |||
696 | static int mce_release(struct inode *inode, struct file *file) | ||
697 | { | ||
698 | spin_lock(&mce_state_lock); | ||
699 | |||
700 | open_count--; | ||
701 | open_exclu = 0; | ||
702 | |||
703 | spin_unlock(&mce_state_lock); | ||
704 | |||
705 | return 0; | ||
706 | } | ||
707 | |||
708 | static void collect_tscs(void *data) | ||
709 | { | ||
710 | unsigned long *cpu_tsc = (unsigned long *)data; | ||
711 | |||
712 | rdtscll(cpu_tsc[smp_processor_id()]); | ||
713 | } | ||
714 | |||
715 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | ||
716 | loff_t *off) | ||
717 | { | ||
718 | unsigned long *cpu_tsc; | ||
719 | static DEFINE_MUTEX(mce_read_mutex); | ||
720 | unsigned prev, next; | ||
721 | char __user *buf = ubuf; | ||
722 | int i, err; | ||
723 | |||
724 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); | ||
725 | if (!cpu_tsc) | ||
726 | return -ENOMEM; | ||
727 | |||
728 | mutex_lock(&mce_read_mutex); | ||
729 | next = rcu_dereference(mcelog.next); | ||
730 | |||
731 | /* Only supports full reads right now */ | ||
732 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | ||
733 | mutex_unlock(&mce_read_mutex); | ||
734 | kfree(cpu_tsc); | ||
735 | return -EINVAL; | ||
736 | } | ||
737 | |||
738 | err = 0; | ||
739 | prev = 0; | ||
740 | do { | ||
741 | for (i = prev; i < next; i++) { | ||
742 | unsigned long start = jiffies; | ||
743 | |||
744 | while (!mcelog.entry[i].finished) { | ||
745 | if (time_after_eq(jiffies, start + 2)) { | ||
746 | memset(mcelog.entry + i, 0, | ||
747 | sizeof(struct mce)); | ||
748 | goto timeout; | ||
749 | } | ||
750 | cpu_relax(); | ||
751 | } | ||
752 | smp_rmb(); | ||
753 | err |= copy_to_user(buf, mcelog.entry + i, | ||
754 | sizeof(struct mce)); | ||
755 | buf += sizeof(struct mce); | ||
756 | timeout: | ||
757 | ; | ||
758 | } | ||
759 | |||
760 | memset(mcelog.entry + prev, 0, | ||
761 | (next - prev) * sizeof(struct mce)); | ||
762 | prev = next; | ||
763 | next = cmpxchg(&mcelog.next, prev, 0); | ||
764 | } while (next != prev); | ||
765 | |||
766 | synchronize_sched(); | ||
767 | |||
768 | /* | ||
769 | * Collect entries that were still getting written before the | ||
770 | * synchronize. | ||
771 | */ | ||
772 | on_each_cpu(collect_tscs, cpu_tsc, 1); | ||
773 | for (i = next; i < MCE_LOG_LEN; i++) { | ||
774 | if (mcelog.entry[i].finished && | ||
775 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { | ||
776 | err |= copy_to_user(buf, mcelog.entry+i, | ||
777 | sizeof(struct mce)); | ||
778 | smp_rmb(); | ||
779 | buf += sizeof(struct mce); | ||
780 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | ||
781 | } | ||
782 | } | ||
783 | mutex_unlock(&mce_read_mutex); | ||
784 | kfree(cpu_tsc); | ||
785 | return err ? -EFAULT : buf - ubuf; | ||
786 | } | ||
787 | |||
788 | static unsigned int mce_poll(struct file *file, poll_table *wait) | ||
789 | { | ||
790 | poll_wait(file, &mce_wait, wait); | ||
791 | if (rcu_dereference(mcelog.next)) | ||
792 | return POLLIN | POLLRDNORM; | ||
793 | return 0; | ||
794 | } | ||
795 | |||
796 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | ||
797 | { | ||
798 | int __user *p = (int __user *)arg; | ||
799 | |||
800 | if (!capable(CAP_SYS_ADMIN)) | ||
801 | return -EPERM; | ||
802 | switch (cmd) { | ||
803 | case MCE_GET_RECORD_LEN: | ||
804 | return put_user(sizeof(struct mce), p); | ||
805 | case MCE_GET_LOG_LEN: | ||
806 | return put_user(MCE_LOG_LEN, p); | ||
807 | case MCE_GETCLEAR_FLAGS: { | ||
808 | unsigned flags; | ||
809 | |||
810 | do { | ||
811 | flags = mcelog.flags; | ||
812 | } while (cmpxchg(&mcelog.flags, flags, 0) != flags); | ||
813 | return put_user(flags, p); | ||
814 | } | ||
815 | default: | ||
816 | return -ENOTTY; | ||
817 | } | ||
818 | } | ||
819 | |||
820 | static const struct file_operations mce_chrdev_ops = { | ||
821 | .open = mce_open, | ||
822 | .release = mce_release, | ||
823 | .read = mce_read, | ||
824 | .poll = mce_poll, | ||
825 | .unlocked_ioctl = mce_ioctl, | ||
826 | }; | ||
827 | |||
828 | static struct miscdevice mce_log_device = { | ||
829 | MISC_MCELOG_MINOR, | ||
830 | "mcelog", | ||
831 | &mce_chrdev_ops, | ||
832 | }; | ||
833 | |||
834 | /* | ||
835 | * Old style boot options parsing. Only for compatibility. | ||
836 | */ | ||
837 | static int __init mcheck_disable(char *str) | ||
838 | { | ||
839 | mce_dont_init = 1; | ||
840 | return 1; | ||
841 | } | ||
842 | |||
843 | /* mce=off disables machine check. | ||
844 | mce=TOLERANCELEVEL (number, see above) | ||
845 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | ||
846 | mce=nobootlog Don't log MCEs from before booting. */ | ||
847 | static int __init mcheck_enable(char *str) | ||
848 | { | ||
849 | if (!strcmp(str, "off")) | ||
850 | mce_dont_init = 1; | ||
851 | else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) | ||
852 | mce_bootlog = str[0] == 'b'; | ||
853 | else if (isdigit(str[0])) | ||
854 | get_option(&str, &tolerant); | ||
855 | else | ||
856 | printk("mce= argument %s ignored. Please use /sys", str); | ||
857 | return 1; | ||
858 | } | ||
859 | |||
860 | __setup("nomce", mcheck_disable); | ||
861 | __setup("mce=", mcheck_enable); | ||
862 | |||
863 | /* | ||
864 | * Sysfs support | ||
865 | */ | ||
866 | |||
867 | /* | ||
868 | * Disable machine checks on suspend and shutdown. We can't really handle | ||
869 | * them later. | ||
870 | */ | ||
871 | static int mce_disable(void) | ||
872 | { | ||
873 | int i; | ||
874 | |||
875 | for (i = 0; i < banks; i++) | ||
876 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
877 | return 0; | ||
878 | } | ||
879 | |||
880 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | ||
881 | { | ||
882 | return mce_disable(); | ||
883 | } | ||
884 | |||
885 | static int mce_shutdown(struct sys_device *dev) | ||
886 | { | ||
887 | return mce_disable(); | ||
888 | } | ||
889 | |||
890 | /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. | ||
891 | Only one CPU is active at this time, the others get readded later using | ||
892 | CPU hotplug. */ | ||
893 | static int mce_resume(struct sys_device *dev) | ||
894 | { | ||
895 | mce_init(NULL); | ||
896 | mce_cpu_features(¤t_cpu_data); | ||
897 | return 0; | ||
898 | } | ||
899 | |||
900 | static void mce_cpu_restart(void *data) | ||
901 | { | ||
902 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
903 | if (mce_available(¤t_cpu_data)) | ||
904 | mce_init(NULL); | ||
905 | mce_init_timer(); | ||
906 | } | ||
907 | |||
908 | /* Reinit MCEs after user configuration changes */ | ||
909 | static void mce_restart(void) | ||
910 | { | ||
911 | on_each_cpu(mce_cpu_restart, NULL, 1); | ||
912 | } | ||
913 | |||
914 | static struct sysdev_class mce_sysclass = { | ||
915 | .suspend = mce_suspend, | ||
916 | .shutdown = mce_shutdown, | ||
917 | .resume = mce_resume, | ||
918 | .name = "machinecheck", | ||
919 | }; | ||
920 | |||
921 | DEFINE_PER_CPU(struct sys_device, device_mce); | ||
922 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; | ||
923 | |||
924 | /* Why are there no generic functions for this? */ | ||
925 | #define ACCESSOR(name, var, start) \ | ||
926 | static ssize_t show_ ## name(struct sys_device *s, \ | ||
927 | struct sysdev_attribute *attr, \ | ||
928 | char *buf) { \ | ||
929 | return sprintf(buf, "%lx\n", (unsigned long)var); \ | ||
930 | } \ | ||
931 | static ssize_t set_ ## name(struct sys_device *s, \ | ||
932 | struct sysdev_attribute *attr, \ | ||
933 | const char *buf, size_t siz) { \ | ||
934 | char *end; \ | ||
935 | unsigned long new = simple_strtoul(buf, &end, 0); \ | ||
936 | if (end == buf) return -EINVAL; \ | ||
937 | var = new; \ | ||
938 | start; \ | ||
939 | return end-buf; \ | ||
940 | } \ | ||
941 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); | ||
942 | |||
943 | static struct sysdev_attribute *bank_attrs; | ||
944 | |||
945 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
946 | char *buf) | ||
947 | { | ||
948 | u64 b = bank[attr - bank_attrs]; | ||
949 | return sprintf(buf, "%llx\n", b); | ||
950 | } | ||
951 | |||
952 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
953 | const char *buf, size_t siz) | ||
954 | { | ||
955 | char *end; | ||
956 | u64 new = simple_strtoull(buf, &end, 0); | ||
957 | if (end == buf) | ||
958 | return -EINVAL; | ||
959 | bank[attr - bank_attrs] = new; | ||
960 | mce_restart(); | ||
961 | return end-buf; | ||
962 | } | ||
963 | |||
964 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, | ||
965 | char *buf) | ||
966 | { | ||
967 | strcpy(buf, trigger); | ||
968 | strcat(buf, "\n"); | ||
969 | return strlen(trigger) + 1; | ||
970 | } | ||
971 | |||
972 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | ||
973 | const char *buf,size_t siz) | ||
974 | { | ||
975 | char *p; | ||
976 | int len; | ||
977 | strncpy(trigger, buf, sizeof(trigger)); | ||
978 | trigger[sizeof(trigger)-1] = 0; | ||
979 | len = strlen(trigger); | ||
980 | p = strchr(trigger, '\n'); | ||
981 | if (*p) *p = 0; | ||
982 | return len; | ||
983 | } | ||
984 | |||
985 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | ||
986 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | ||
987 | ACCESSOR(check_interval,check_interval,mce_restart()) | ||
988 | static struct sysdev_attribute *mce_attributes[] = { | ||
989 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, | ||
990 | NULL | ||
991 | }; | ||
992 | |||
993 | static cpumask_var_t mce_device_initialized; | ||
994 | |||
995 | /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ | ||
996 | static __cpuinit int mce_create_device(unsigned int cpu) | ||
997 | { | ||
998 | int err; | ||
999 | int i; | ||
1000 | |||
1001 | if (!mce_available(&boot_cpu_data)) | ||
1002 | return -EIO; | ||
1003 | |||
1004 | memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); | ||
1005 | per_cpu(device_mce,cpu).id = cpu; | ||
1006 | per_cpu(device_mce,cpu).cls = &mce_sysclass; | ||
1007 | |||
1008 | err = sysdev_register(&per_cpu(device_mce,cpu)); | ||
1009 | if (err) | ||
1010 | return err; | ||
1011 | |||
1012 | for (i = 0; mce_attributes[i]; i++) { | ||
1013 | err = sysdev_create_file(&per_cpu(device_mce,cpu), | ||
1014 | mce_attributes[i]); | ||
1015 | if (err) | ||
1016 | goto error; | ||
1017 | } | ||
1018 | for (i = 0; i < banks; i++) { | ||
1019 | err = sysdev_create_file(&per_cpu(device_mce, cpu), | ||
1020 | &bank_attrs[i]); | ||
1021 | if (err) | ||
1022 | goto error2; | ||
1023 | } | ||
1024 | cpumask_set_cpu(cpu, mce_device_initialized); | ||
1025 | |||
1026 | return 0; | ||
1027 | error2: | ||
1028 | while (--i >= 0) { | ||
1029 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
1030 | &bank_attrs[i]); | ||
1031 | } | ||
1032 | error: | ||
1033 | while (--i >= 0) { | ||
1034 | sysdev_remove_file(&per_cpu(device_mce,cpu), | ||
1035 | mce_attributes[i]); | ||
1036 | } | ||
1037 | sysdev_unregister(&per_cpu(device_mce,cpu)); | ||
1038 | |||
1039 | return err; | ||
1040 | } | ||
1041 | |||
1042 | static __cpuinit void mce_remove_device(unsigned int cpu) | ||
1043 | { | ||
1044 | int i; | ||
1045 | |||
1046 | if (!cpumask_test_cpu(cpu, mce_device_initialized)) | ||
1047 | return; | ||
1048 | |||
1049 | for (i = 0; mce_attributes[i]; i++) | ||
1050 | sysdev_remove_file(&per_cpu(device_mce,cpu), | ||
1051 | mce_attributes[i]); | ||
1052 | for (i = 0; i < banks; i++) | ||
1053 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
1054 | &bank_attrs[i]); | ||
1055 | sysdev_unregister(&per_cpu(device_mce,cpu)); | ||
1056 | cpumask_clear_cpu(cpu, mce_device_initialized); | ||
1057 | } | ||
1058 | |||
1059 | /* Make sure there are no machine checks on offlined CPUs. */ | ||
1060 | static void mce_disable_cpu(void *h) | ||
1061 | { | ||
1062 | int i; | ||
1063 | unsigned long action = *(unsigned long *)h; | ||
1064 | |||
1065 | if (!mce_available(¤t_cpu_data)) | ||
1066 | return; | ||
1067 | if (!(action & CPU_TASKS_FROZEN)) | ||
1068 | cmci_clear(); | ||
1069 | for (i = 0; i < banks; i++) | ||
1070 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
1071 | } | ||
1072 | |||
1073 | static void mce_reenable_cpu(void *h) | ||
1074 | { | ||
1075 | int i; | ||
1076 | unsigned long action = *(unsigned long *)h; | ||
1077 | |||
1078 | if (!mce_available(¤t_cpu_data)) | ||
1079 | return; | ||
1080 | if (!(action & CPU_TASKS_FROZEN)) | ||
1081 | cmci_reenable(); | ||
1082 | for (i = 0; i < banks; i++) | ||
1083 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | ||
1084 | } | ||
1085 | |||
1086 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | ||
1087 | static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, | ||
1088 | unsigned long action, void *hcpu) | ||
1089 | { | ||
1090 | unsigned int cpu = (unsigned long)hcpu; | ||
1091 | struct timer_list *t = &per_cpu(mce_timer, cpu); | ||
1092 | |||
1093 | switch (action) { | ||
1094 | case CPU_ONLINE: | ||
1095 | case CPU_ONLINE_FROZEN: | ||
1096 | mce_create_device(cpu); | ||
1097 | if (threshold_cpu_callback) | ||
1098 | threshold_cpu_callback(action, cpu); | ||
1099 | break; | ||
1100 | case CPU_DEAD: | ||
1101 | case CPU_DEAD_FROZEN: | ||
1102 | if (threshold_cpu_callback) | ||
1103 | threshold_cpu_callback(action, cpu); | ||
1104 | mce_remove_device(cpu); | ||
1105 | break; | ||
1106 | case CPU_DOWN_PREPARE: | ||
1107 | case CPU_DOWN_PREPARE_FROZEN: | ||
1108 | del_timer_sync(t); | ||
1109 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | ||
1110 | break; | ||
1111 | case CPU_DOWN_FAILED: | ||
1112 | case CPU_DOWN_FAILED_FROZEN: | ||
1113 | t->expires = round_jiffies(jiffies + | ||
1114 | __get_cpu_var(next_interval)); | ||
1115 | add_timer_on(t, cpu); | ||
1116 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | ||
1117 | break; | ||
1118 | case CPU_POST_DEAD: | ||
1119 | /* intentionally ignoring frozen here */ | ||
1120 | cmci_rediscover(cpu); | ||
1121 | break; | ||
1122 | } | ||
1123 | return NOTIFY_OK; | ||
1124 | } | ||
1125 | |||
1126 | static struct notifier_block mce_cpu_notifier __cpuinitdata = { | ||
1127 | .notifier_call = mce_cpu_callback, | ||
1128 | }; | ||
1129 | |||
1130 | static __init int mce_init_banks(void) | ||
1131 | { | ||
1132 | int i; | ||
1133 | |||
1134 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
1135 | GFP_KERNEL); | ||
1136 | if (!bank_attrs) | ||
1137 | return -ENOMEM; | ||
1138 | |||
1139 | for (i = 0; i < banks; i++) { | ||
1140 | struct sysdev_attribute *a = &bank_attrs[i]; | ||
1141 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | ||
1142 | if (!a->attr.name) | ||
1143 | goto nomem; | ||
1144 | a->attr.mode = 0644; | ||
1145 | a->show = show_bank; | ||
1146 | a->store = set_bank; | ||
1147 | } | ||
1148 | return 0; | ||
1149 | |||
1150 | nomem: | ||
1151 | while (--i >= 0) | ||
1152 | kfree(bank_attrs[i].attr.name); | ||
1153 | kfree(bank_attrs); | ||
1154 | bank_attrs = NULL; | ||
1155 | return -ENOMEM; | ||
1156 | } | ||
1157 | |||
1158 | static __init int mce_init_device(void) | ||
1159 | { | ||
1160 | int err; | ||
1161 | int i = 0; | ||
1162 | |||
1163 | if (!mce_available(&boot_cpu_data)) | ||
1164 | return -EIO; | ||
1165 | |||
1166 | zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); | ||
1167 | |||
1168 | err = mce_init_banks(); | ||
1169 | if (err) | ||
1170 | return err; | ||
1171 | |||
1172 | err = sysdev_class_register(&mce_sysclass); | ||
1173 | if (err) | ||
1174 | return err; | ||
1175 | |||
1176 | for_each_online_cpu(i) { | ||
1177 | err = mce_create_device(i); | ||
1178 | if (err) | ||
1179 | return err; | ||
1180 | } | ||
1181 | |||
1182 | register_hotcpu_notifier(&mce_cpu_notifier); | ||
1183 | misc_register(&mce_log_device); | ||
1184 | return err; | ||
1185 | } | ||
1186 | |||
1187 | device_initcall(mce_init_device); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 56dde9c4bc96..ddae21620bda 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -13,22 +13,22 @@ | |||
13 | * | 13 | * |
14 | * All MC4_MISCi registers are shared between multi-cores | 14 | * All MC4_MISCi registers are shared between multi-cores |
15 | */ | 15 | */ |
16 | |||
17 | #include <linux/cpu.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
21 | #include <linux/kobject.h> | ||
22 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
23 | #include <linux/sched.h> | 18 | #include <linux/kobject.h> |
24 | #include <linux/smp.h> | 19 | #include <linux/percpu.h> |
25 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
21 | #include <linux/errno.h> | ||
22 | #include <linux/sched.h> | ||
26 | #include <linux/sysfs.h> | 23 | #include <linux/sysfs.h> |
24 | #include <linux/init.h> | ||
25 | #include <linux/cpu.h> | ||
26 | #include <linux/smp.h> | ||
27 | |||
27 | #include <asm/apic.h> | 28 | #include <asm/apic.h> |
29 | #include <asm/idle.h> | ||
28 | #include <asm/mce.h> | 30 | #include <asm/mce.h> |
29 | #include <asm/msr.h> | 31 | #include <asm/msr.h> |
30 | #include <asm/percpu.h> | ||
31 | #include <asm/idle.h> | ||
32 | 32 | ||
33 | #define PFX "mce_threshold: " | 33 | #define PFX "mce_threshold: " |
34 | #define VERSION "version 1.1.1" | 34 | #define VERSION "version 1.1.1" |
@@ -48,26 +48,26 @@ | |||
48 | #define MCG_XBLK_ADDR 0xC0000400 | 48 | #define MCG_XBLK_ADDR 0xC0000400 |
49 | 49 | ||
50 | struct threshold_block { | 50 | struct threshold_block { |
51 | unsigned int block; | 51 | unsigned int block; |
52 | unsigned int bank; | 52 | unsigned int bank; |
53 | unsigned int cpu; | 53 | unsigned int cpu; |
54 | u32 address; | 54 | u32 address; |
55 | u16 interrupt_enable; | 55 | u16 interrupt_enable; |
56 | u16 threshold_limit; | 56 | u16 threshold_limit; |
57 | struct kobject kobj; | 57 | struct kobject kobj; |
58 | struct list_head miscj; | 58 | struct list_head miscj; |
59 | }; | 59 | }; |
60 | 60 | ||
61 | /* defaults used early on boot */ | 61 | /* defaults used early on boot */ |
62 | static struct threshold_block threshold_defaults = { | 62 | static struct threshold_block threshold_defaults = { |
63 | .interrupt_enable = 0, | 63 | .interrupt_enable = 0, |
64 | .threshold_limit = THRESHOLD_MAX, | 64 | .threshold_limit = THRESHOLD_MAX, |
65 | }; | 65 | }; |
66 | 66 | ||
67 | struct threshold_bank { | 67 | struct threshold_bank { |
68 | struct kobject *kobj; | 68 | struct kobject *kobj; |
69 | struct threshold_block *blocks; | 69 | struct threshold_block *blocks; |
70 | cpumask_var_t cpus; | 70 | cpumask_var_t cpus; |
71 | }; | 71 | }; |
72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | 72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); |
73 | 73 | ||
@@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void); | |||
86 | */ | 86 | */ |
87 | 87 | ||
88 | struct thresh_restart { | 88 | struct thresh_restart { |
89 | struct threshold_block *b; | 89 | struct threshold_block *b; |
90 | int reset; | 90 | int reset; |
91 | u16 old_limit; | 91 | u16 old_limit; |
92 | }; | 92 | }; |
93 | 93 | ||
94 | /* must be called with correct cpu affinity */ | 94 | /* must be called with correct cpu affinity */ |
@@ -110,6 +110,7 @@ static void threshold_restart_bank(void *_tr) | |||
110 | } else if (tr->old_limit) { /* change limit w/o reset */ | 110 | } else if (tr->old_limit) { /* change limit w/o reset */ |
111 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | 111 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + |
112 | (tr->old_limit - tr->b->threshold_limit); | 112 | (tr->old_limit - tr->b->threshold_limit); |
113 | |||
113 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | 114 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | |
114 | (new_count & THRESHOLD_MAX); | 115 | (new_count & THRESHOLD_MAX); |
115 | } | 116 | } |
@@ -125,11 +126,11 @@ static void threshold_restart_bank(void *_tr) | |||
125 | /* cpu init entry point, called from mce.c with preempt off */ | 126 | /* cpu init entry point, called from mce.c with preempt off */ |
126 | void mce_amd_feature_init(struct cpuinfo_x86 *c) | 127 | void mce_amd_feature_init(struct cpuinfo_x86 *c) |
127 | { | 128 | { |
128 | unsigned int bank, block; | ||
129 | unsigned int cpu = smp_processor_id(); | 129 | unsigned int cpu = smp_processor_id(); |
130 | u8 lvt_off; | ||
131 | u32 low = 0, high = 0, address = 0; | 130 | u32 low = 0, high = 0, address = 0; |
131 | unsigned int bank, block; | ||
132 | struct thresh_restart tr; | 132 | struct thresh_restart tr; |
133 | u8 lvt_off; | ||
133 | 134 | ||
134 | for (bank = 0; bank < NR_BANKS; ++bank) { | 135 | for (bank = 0; bank < NR_BANKS; ++bank) { |
135 | for (block = 0; block < NR_BLOCKS; ++block) { | 136 | for (block = 0; block < NR_BLOCKS; ++block) { |
@@ -140,8 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
140 | if (!address) | 141 | if (!address) |
141 | break; | 142 | break; |
142 | address += MCG_XBLK_ADDR; | 143 | address += MCG_XBLK_ADDR; |
143 | } | 144 | } else |
144 | else | ||
145 | ++address; | 145 | ++address; |
146 | 146 | ||
147 | if (rdmsr_safe(address, &low, &high)) | 147 | if (rdmsr_safe(address, &low, &high)) |
@@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
193 | */ | 193 | */ |
194 | static void amd_threshold_interrupt(void) | 194 | static void amd_threshold_interrupt(void) |
195 | { | 195 | { |
196 | u32 low = 0, high = 0, address = 0; | ||
196 | unsigned int bank, block; | 197 | unsigned int bank, block; |
197 | struct mce m; | 198 | struct mce m; |
198 | u32 low = 0, high = 0, address = 0; | ||
199 | 199 | ||
200 | mce_setup(&m); | 200 | mce_setup(&m); |
201 | 201 | ||
@@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void) | |||
204 | if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) | 204 | if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) |
205 | continue; | 205 | continue; |
206 | for (block = 0; block < NR_BLOCKS; ++block) { | 206 | for (block = 0; block < NR_BLOCKS; ++block) { |
207 | if (block == 0) | 207 | if (block == 0) { |
208 | address = MSR_IA32_MC0_MISC + bank * 4; | 208 | address = MSR_IA32_MC0_MISC + bank * 4; |
209 | else if (block == 1) { | 209 | } else if (block == 1) { |
210 | address = (low & MASK_BLKPTR_LO) >> 21; | 210 | address = (low & MASK_BLKPTR_LO) >> 21; |
211 | if (!address) | 211 | if (!address) |
212 | break; | 212 | break; |
213 | address += MCG_XBLK_ADDR; | 213 | address += MCG_XBLK_ADDR; |
214 | } | 214 | } else { |
215 | else | ||
216 | ++address; | 215 | ++address; |
216 | } | ||
217 | 217 | ||
218 | if (rdmsr_safe(address, &low, &high)) | 218 | if (rdmsr_safe(address, &low, &high)) |
219 | break; | 219 | break; |
@@ -229,8 +229,10 @@ static void amd_threshold_interrupt(void) | |||
229 | (high & MASK_LOCKED_HI)) | 229 | (high & MASK_LOCKED_HI)) |
230 | continue; | 230 | continue; |
231 | 231 | ||
232 | /* Log the machine check that caused the threshold | 232 | /* |
233 | event. */ | 233 | * Log the machine check that caused the threshold |
234 | * event. | ||
235 | */ | ||
234 | machine_check_poll(MCP_TIMESTAMP, | 236 | machine_check_poll(MCP_TIMESTAMP, |
235 | &__get_cpu_var(mce_poll_banks)); | 237 | &__get_cpu_var(mce_poll_banks)); |
236 | 238 | ||
@@ -254,48 +256,52 @@ static void amd_threshold_interrupt(void) | |||
254 | 256 | ||
255 | struct threshold_attr { | 257 | struct threshold_attr { |
256 | struct attribute attr; | 258 | struct attribute attr; |
257 | ssize_t(*show) (struct threshold_block *, char *); | 259 | ssize_t (*show) (struct threshold_block *, char *); |
258 | ssize_t(*store) (struct threshold_block *, const char *, size_t count); | 260 | ssize_t (*store) (struct threshold_block *, const char *, size_t count); |
259 | }; | 261 | }; |
260 | 262 | ||
261 | #define SHOW_FIELDS(name) \ | 263 | #define SHOW_FIELDS(name) \ |
262 | static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ | 264 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ |
263 | { \ | 265 | { \ |
264 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | 266 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ |
265 | } | 267 | } |
266 | SHOW_FIELDS(interrupt_enable) | 268 | SHOW_FIELDS(interrupt_enable) |
267 | SHOW_FIELDS(threshold_limit) | 269 | SHOW_FIELDS(threshold_limit) |
268 | 270 | ||
269 | static ssize_t store_interrupt_enable(struct threshold_block *b, | 271 | static ssize_t |
270 | const char *buf, size_t count) | 272 | store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) |
271 | { | 273 | { |
272 | char *end; | ||
273 | struct thresh_restart tr; | 274 | struct thresh_restart tr; |
274 | unsigned long new = simple_strtoul(buf, &end, 0); | 275 | unsigned long new; |
275 | if (end == buf) | 276 | |
277 | if (strict_strtoul(buf, 0, &new) < 0) | ||
276 | return -EINVAL; | 278 | return -EINVAL; |
279 | |||
277 | b->interrupt_enable = !!new; | 280 | b->interrupt_enable = !!new; |
278 | 281 | ||
279 | tr.b = b; | 282 | tr.b = b; |
280 | tr.reset = 0; | 283 | tr.reset = 0; |
281 | tr.old_limit = 0; | 284 | tr.old_limit = 0; |
285 | |||
282 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 286 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); |
283 | 287 | ||
284 | return end - buf; | 288 | return size; |
285 | } | 289 | } |
286 | 290 | ||
287 | static ssize_t store_threshold_limit(struct threshold_block *b, | 291 | static ssize_t |
288 | const char *buf, size_t count) | 292 | store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) |
289 | { | 293 | { |
290 | char *end; | ||
291 | struct thresh_restart tr; | 294 | struct thresh_restart tr; |
292 | unsigned long new = simple_strtoul(buf, &end, 0); | 295 | unsigned long new; |
293 | if (end == buf) | 296 | |
297 | if (strict_strtoul(buf, 0, &new) < 0) | ||
294 | return -EINVAL; | 298 | return -EINVAL; |
299 | |||
295 | if (new > THRESHOLD_MAX) | 300 | if (new > THRESHOLD_MAX) |
296 | new = THRESHOLD_MAX; | 301 | new = THRESHOLD_MAX; |
297 | if (new < 1) | 302 | if (new < 1) |
298 | new = 1; | 303 | new = 1; |
304 | |||
299 | tr.old_limit = b->threshold_limit; | 305 | tr.old_limit = b->threshold_limit; |
300 | b->threshold_limit = new; | 306 | b->threshold_limit = new; |
301 | tr.b = b; | 307 | tr.b = b; |
@@ -303,12 +309,12 @@ static ssize_t store_threshold_limit(struct threshold_block *b, | |||
303 | 309 | ||
304 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 310 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); |
305 | 311 | ||
306 | return end - buf; | 312 | return size; |
307 | } | 313 | } |
308 | 314 | ||
309 | struct threshold_block_cross_cpu { | 315 | struct threshold_block_cross_cpu { |
310 | struct threshold_block *tb; | 316 | struct threshold_block *tb; |
311 | long retval; | 317 | long retval; |
312 | }; | 318 | }; |
313 | 319 | ||
314 | static void local_error_count_handler(void *_tbcc) | 320 | static void local_error_count_handler(void *_tbcc) |
@@ -338,16 +344,13 @@ static ssize_t store_error_count(struct threshold_block *b, | |||
338 | return 1; | 344 | return 1; |
339 | } | 345 | } |
340 | 346 | ||
341 | #define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ | 347 | #define RW_ATTR(val) \ |
342 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | 348 | static struct threshold_attr val = { \ |
343 | .show = _show, \ | 349 | .attr = {.name = __stringify(val), .mode = 0644 }, \ |
344 | .store = _store, \ | 350 | .show = show_## val, \ |
351 | .store = store_## val, \ | ||
345 | }; | 352 | }; |
346 | 353 | ||
347 | #define RW_ATTR(name) \ | ||
348 | static struct threshold_attr name = \ | ||
349 | THRESHOLD_ATTR(name, 0644, show_## name, store_## name) | ||
350 | |||
351 | RW_ATTR(interrupt_enable); | 354 | RW_ATTR(interrupt_enable); |
352 | RW_ATTR(threshold_limit); | 355 | RW_ATTR(threshold_limit); |
353 | RW_ATTR(error_count); | 356 | RW_ATTR(error_count); |
@@ -359,15 +362,17 @@ static struct attribute *default_attrs[] = { | |||
359 | NULL | 362 | NULL |
360 | }; | 363 | }; |
361 | 364 | ||
362 | #define to_block(k) container_of(k, struct threshold_block, kobj) | 365 | #define to_block(k) container_of(k, struct threshold_block, kobj) |
363 | #define to_attr(a) container_of(a, struct threshold_attr, attr) | 366 | #define to_attr(a) container_of(a, struct threshold_attr, attr) |
364 | 367 | ||
365 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | 368 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) |
366 | { | 369 | { |
367 | struct threshold_block *b = to_block(kobj); | 370 | struct threshold_block *b = to_block(kobj); |
368 | struct threshold_attr *a = to_attr(attr); | 371 | struct threshold_attr *a = to_attr(attr); |
369 | ssize_t ret; | 372 | ssize_t ret; |
373 | |||
370 | ret = a->show ? a->show(b, buf) : -EIO; | 374 | ret = a->show ? a->show(b, buf) : -EIO; |
375 | |||
371 | return ret; | 376 | return ret; |
372 | } | 377 | } |
373 | 378 | ||
@@ -377,18 +382,20 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
377 | struct threshold_block *b = to_block(kobj); | 382 | struct threshold_block *b = to_block(kobj); |
378 | struct threshold_attr *a = to_attr(attr); | 383 | struct threshold_attr *a = to_attr(attr); |
379 | ssize_t ret; | 384 | ssize_t ret; |
385 | |||
380 | ret = a->store ? a->store(b, buf, count) : -EIO; | 386 | ret = a->store ? a->store(b, buf, count) : -EIO; |
387 | |||
381 | return ret; | 388 | return ret; |
382 | } | 389 | } |
383 | 390 | ||
384 | static struct sysfs_ops threshold_ops = { | 391 | static struct sysfs_ops threshold_ops = { |
385 | .show = show, | 392 | .show = show, |
386 | .store = store, | 393 | .store = store, |
387 | }; | 394 | }; |
388 | 395 | ||
389 | static struct kobj_type threshold_ktype = { | 396 | static struct kobj_type threshold_ktype = { |
390 | .sysfs_ops = &threshold_ops, | 397 | .sysfs_ops = &threshold_ops, |
391 | .default_attrs = default_attrs, | 398 | .default_attrs = default_attrs, |
392 | }; | 399 | }; |
393 | 400 | ||
394 | static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | 401 | static __cpuinit int allocate_threshold_blocks(unsigned int cpu, |
@@ -396,9 +403,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
396 | unsigned int block, | 403 | unsigned int block, |
397 | u32 address) | 404 | u32 address) |
398 | { | 405 | { |
399 | int err; | ||
400 | u32 low, high; | ||
401 | struct threshold_block *b = NULL; | 406 | struct threshold_block *b = NULL; |
407 | u32 low, high; | ||
408 | int err; | ||
402 | 409 | ||
403 | if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) | 410 | if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) |
404 | return 0; | 411 | return 0; |
@@ -421,20 +428,21 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
421 | if (!b) | 428 | if (!b) |
422 | return -ENOMEM; | 429 | return -ENOMEM; |
423 | 430 | ||
424 | b->block = block; | 431 | b->block = block; |
425 | b->bank = bank; | 432 | b->bank = bank; |
426 | b->cpu = cpu; | 433 | b->cpu = cpu; |
427 | b->address = address; | 434 | b->address = address; |
428 | b->interrupt_enable = 0; | 435 | b->interrupt_enable = 0; |
429 | b->threshold_limit = THRESHOLD_MAX; | 436 | b->threshold_limit = THRESHOLD_MAX; |
430 | 437 | ||
431 | INIT_LIST_HEAD(&b->miscj); | 438 | INIT_LIST_HEAD(&b->miscj); |
432 | 439 | ||
433 | if (per_cpu(threshold_banks, cpu)[bank]->blocks) | 440 | if (per_cpu(threshold_banks, cpu)[bank]->blocks) { |
434 | list_add(&b->miscj, | 441 | list_add(&b->miscj, |
435 | &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); | 442 | &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); |
436 | else | 443 | } else { |
437 | per_cpu(threshold_banks, cpu)[bank]->blocks = b; | 444 | per_cpu(threshold_banks, cpu)[bank]->blocks = b; |
445 | } | ||
438 | 446 | ||
439 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, | 447 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, |
440 | per_cpu(threshold_banks, cpu)[bank]->kobj, | 448 | per_cpu(threshold_banks, cpu)[bank]->kobj, |
@@ -447,8 +455,9 @@ recurse: | |||
447 | if (!address) | 455 | if (!address) |
448 | return 0; | 456 | return 0; |
449 | address += MCG_XBLK_ADDR; | 457 | address += MCG_XBLK_ADDR; |
450 | } else | 458 | } else { |
451 | ++address; | 459 | ++address; |
460 | } | ||
452 | 461 | ||
453 | err = allocate_threshold_blocks(cpu, bank, ++block, address); | 462 | err = allocate_threshold_blocks(cpu, bank, ++block, address); |
454 | if (err) | 463 | if (err) |
@@ -500,13 +509,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
500 | if (!b) | 509 | if (!b) |
501 | goto out; | 510 | goto out; |
502 | 511 | ||
503 | err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, | 512 | err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj, |
504 | b->kobj, name); | 513 | b->kobj, name); |
505 | if (err) | 514 | if (err) |
506 | goto out; | 515 | goto out; |
507 | 516 | ||
508 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); | 517 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); |
509 | per_cpu(threshold_banks, cpu)[bank] = b; | 518 | per_cpu(threshold_banks, cpu)[bank] = b; |
519 | |||
510 | goto out; | 520 | goto out; |
511 | } | 521 | } |
512 | #endif | 522 | #endif |
@@ -522,7 +532,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
522 | goto out; | 532 | goto out; |
523 | } | 533 | } |
524 | 534 | ||
525 | b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); | 535 | b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj); |
526 | if (!b->kobj) | 536 | if (!b->kobj) |
527 | goto out_free; | 537 | goto out_free; |
528 | 538 | ||
@@ -542,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
542 | if (i == cpu) | 552 | if (i == cpu) |
543 | continue; | 553 | continue; |
544 | 554 | ||
545 | err = sysfs_create_link(&per_cpu(device_mce, i).kobj, | 555 | err = sysfs_create_link(&per_cpu(mce_dev, i).kobj, |
546 | b->kobj, name); | 556 | b->kobj, name); |
547 | if (err) | 557 | if (err) |
548 | goto out; | 558 | goto out; |
@@ -605,15 +615,13 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
605 | 615 | ||
606 | static void threshold_remove_bank(unsigned int cpu, int bank) | 616 | static void threshold_remove_bank(unsigned int cpu, int bank) |
607 | { | 617 | { |
608 | int i = 0; | ||
609 | struct threshold_bank *b; | 618 | struct threshold_bank *b; |
610 | char name[32]; | 619 | char name[32]; |
620 | int i = 0; | ||
611 | 621 | ||
612 | b = per_cpu(threshold_banks, cpu)[bank]; | 622 | b = per_cpu(threshold_banks, cpu)[bank]; |
613 | |||
614 | if (!b) | 623 | if (!b) |
615 | return; | 624 | return; |
616 | |||
617 | if (!b->blocks) | 625 | if (!b->blocks) |
618 | goto free_out; | 626 | goto free_out; |
619 | 627 | ||
@@ -622,8 +630,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
622 | #ifdef CONFIG_SMP | 630 | #ifdef CONFIG_SMP |
623 | /* sibling symlink */ | 631 | /* sibling symlink */ |
624 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 632 | if (shared_bank[bank] && b->blocks->cpu != cpu) { |
625 | sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); | 633 | sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name); |
626 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 634 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
635 | |||
627 | return; | 636 | return; |
628 | } | 637 | } |
629 | #endif | 638 | #endif |
@@ -633,7 +642,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
633 | if (i == cpu) | 642 | if (i == cpu) |
634 | continue; | 643 | continue; |
635 | 644 | ||
636 | sysfs_remove_link(&per_cpu(device_mce, i).kobj, name); | 645 | sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name); |
637 | per_cpu(threshold_banks, i)[bank] = NULL; | 646 | per_cpu(threshold_banks, i)[bank] = NULL; |
638 | } | 647 | } |
639 | 648 | ||
@@ -659,12 +668,9 @@ static void threshold_remove_device(unsigned int cpu) | |||
659 | } | 668 | } |
660 | 669 | ||
661 | /* get notified when a cpu comes on/off */ | 670 | /* get notified when a cpu comes on/off */ |
662 | static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, | 671 | static void __cpuinit |
663 | unsigned int cpu) | 672 | amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu) |
664 | { | 673 | { |
665 | if (cpu >= NR_CPUS) | ||
666 | return; | ||
667 | |||
668 | switch (action) { | 674 | switch (action) { |
669 | case CPU_ONLINE: | 675 | case CPU_ONLINE: |
670 | case CPU_ONLINE_FROZEN: | 676 | case CPU_ONLINE_FROZEN: |
@@ -686,11 +692,12 @@ static __init int threshold_init_device(void) | |||
686 | /* to hit CPUs online before the notifier is up */ | 692 | /* to hit CPUs online before the notifier is up */ |
687 | for_each_online_cpu(lcpu) { | 693 | for_each_online_cpu(lcpu) { |
688 | int err = threshold_create_device(lcpu); | 694 | int err = threshold_create_device(lcpu); |
695 | |||
689 | if (err) | 696 | if (err) |
690 | return err; | 697 | return err; |
691 | } | 698 | } |
692 | threshold_cpu_callback = amd_64_threshold_cpu_callback; | 699 | threshold_cpu_callback = amd_64_threshold_cpu_callback; |
700 | |||
693 | return 0; | 701 | return 0; |
694 | } | 702 | } |
695 | |||
696 | device_initcall(threshold_init_device); | 703 | device_initcall(threshold_init_device); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index cef3ee30744b..e1acec0f7a32 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -8,85 +8,10 @@ | |||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
10 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
11 | #include <asm/processor.h> | ||
12 | #include <asm/apic.h> | 11 | #include <asm/apic.h> |
12 | #include <asm/processor.h> | ||
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/mce.h> | 14 | #include <asm/mce.h> |
15 | #include <asm/hw_irq.h> | ||
16 | #include <asm/idle.h> | ||
17 | #include <asm/therm_throt.h> | ||
18 | #include <asm/apic.h> | ||
19 | |||
20 | asmlinkage void smp_thermal_interrupt(void) | ||
21 | { | ||
22 | __u64 msr_val; | ||
23 | |||
24 | ack_APIC_irq(); | ||
25 | |||
26 | exit_idle(); | ||
27 | irq_enter(); | ||
28 | |||
29 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
30 | if (therm_throt_process(msr_val & 1)) | ||
31 | mce_log_therm_throt_event(msr_val); | ||
32 | |||
33 | inc_irq_stat(irq_thermal_count); | ||
34 | irq_exit(); | ||
35 | } | ||
36 | |||
37 | static void intel_init_thermal(struct cpuinfo_x86 *c) | ||
38 | { | ||
39 | u32 l, h; | ||
40 | int tm2 = 0; | ||
41 | unsigned int cpu = smp_processor_id(); | ||
42 | |||
43 | if (!cpu_has(c, X86_FEATURE_ACPI)) | ||
44 | return; | ||
45 | |||
46 | if (!cpu_has(c, X86_FEATURE_ACC)) | ||
47 | return; | ||
48 | |||
49 | /* first check if TM1 is already enabled by the BIOS, in which | ||
50 | * case there might be some SMM goo which handles it, so we can't even | ||
51 | * put a handler since it might be delivered via SMI already. | ||
52 | */ | ||
53 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
54 | h = apic_read(APIC_LVTTHMR); | ||
55 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
56 | printk(KERN_DEBUG | ||
57 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | ||
58 | return; | ||
59 | } | ||
60 | |||
61 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
62 | tm2 = 1; | ||
63 | |||
64 | if (h & APIC_VECTOR_MASK) { | ||
65 | printk(KERN_DEBUG | ||
66 | "CPU%d: Thermal LVT vector (%#x) already " | ||
67 | "installed\n", cpu, (h & APIC_VECTOR_MASK)); | ||
68 | return; | ||
69 | } | ||
70 | |||
71 | h = THERMAL_APIC_VECTOR; | ||
72 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); | ||
73 | apic_write(APIC_LVTTHMR, h); | ||
74 | |||
75 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
76 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); | ||
77 | |||
78 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
79 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
80 | |||
81 | l = apic_read(APIC_LVTTHMR); | ||
82 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
83 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | ||
84 | cpu, tm2 ? "TM2" : "TM1"); | ||
85 | |||
86 | /* enable thermal throttle processing */ | ||
87 | atomic_set(&therm_throt_en, 1); | ||
88 | return; | ||
89 | } | ||
90 | 15 | ||
91 | /* | 16 | /* |
92 | * Support for Intel Correct Machine Check Interrupts. This allows | 17 | * Support for Intel Correct Machine Check Interrupts. This allows |
@@ -109,6 +34,9 @@ static int cmci_supported(int *banks) | |||
109 | { | 34 | { |
110 | u64 cap; | 35 | u64 cap; |
111 | 36 | ||
37 | if (mce_cmci_disabled || mce_ignore_ce) | ||
38 | return 0; | ||
39 | |||
112 | /* | 40 | /* |
113 | * Vendor check is not strictly needed, but the initial | 41 | * Vendor check is not strictly needed, but the initial |
114 | * initialization is vendor keyed and this | 42 | * initialization is vendor keyed and this |
@@ -132,7 +60,7 @@ static int cmci_supported(int *banks) | |||
132 | static void intel_threshold_interrupt(void) | 60 | static void intel_threshold_interrupt(void) |
133 | { | 61 | { |
134 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | 62 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
135 | mce_notify_user(); | 63 | mce_notify_irq(); |
136 | } | 64 | } |
137 | 65 | ||
138 | static void print_update(char *type, int *hdr, int num) | 66 | static void print_update(char *type, int *hdr, int num) |
@@ -248,7 +176,7 @@ void cmci_rediscover(int dying) | |||
248 | return; | 176 | return; |
249 | cpumask_copy(old, ¤t->cpus_allowed); | 177 | cpumask_copy(old, ¤t->cpus_allowed); |
250 | 178 | ||
251 | for_each_online_cpu (cpu) { | 179 | for_each_online_cpu(cpu) { |
252 | if (cpu == dying) | 180 | if (cpu == dying) |
253 | continue; | 181 | continue; |
254 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | 182 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) |
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index a74af128efc9..f5f2d6f71fb6 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c | |||
@@ -6,25 +6,23 @@ | |||
6 | * This file contains routines to check for non-fatal MCEs every 15s | 6 | * This file contains routines to check for non-fatal MCEs every 15s |
7 | * | 7 | * |
8 | */ | 8 | */ |
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/types.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/jiffies.h> | ||
14 | #include <linux/workqueue.h> | ||
15 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
16 | #include <linux/smp.h> | 10 | #include <linux/workqueue.h> |
11 | #include <linux/jiffies.h> | ||
12 | #include <linux/kernel.h> | ||
17 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/types.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/smp.h> | ||
18 | 17 | ||
19 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
20 | #include <asm/system.h> | 19 | #include <asm/system.h> |
20 | #include <asm/mce.h> | ||
21 | #include <asm/msr.h> | 21 | #include <asm/msr.h> |
22 | 22 | ||
23 | #include "mce.h" | 23 | static int firstbank; |
24 | 24 | ||
25 | static int firstbank; | 25 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ |
26 | |||
27 | #define MCE_RATE 15*HZ /* timer rate is 15s */ | ||
28 | 26 | ||
29 | static void mce_checkregs(void *info) | 27 | static void mce_checkregs(void *info) |
30 | { | 28 | { |
@@ -34,23 +32,24 @@ static void mce_checkregs(void *info) | |||
34 | for (i = firstbank; i < nr_mce_banks; i++) { | 32 | for (i = firstbank; i < nr_mce_banks; i++) { |
35 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | 33 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); |
36 | 34 | ||
37 | if (high & (1<<31)) { | 35 | if (!(high & (1<<31))) |
38 | printk(KERN_INFO "MCE: The hardware reports a non " | 36 | continue; |
39 | "fatal, correctable incident occurred on " | 37 | |
40 | "CPU %d.\n", | 38 | printk(KERN_INFO "MCE: The hardware reports a non fatal, " |
39 | "correctable incident occurred on CPU %d.\n", | ||
41 | smp_processor_id()); | 40 | smp_processor_id()); |
42 | printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | 41 | |
43 | 42 | printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | |
44 | /* | 43 | |
45 | * Scrub the error so we don't pick it up in MCE_RATE | 44 | /* |
46 | * seconds time. | 45 | * Scrub the error so we don't pick it up in MCE_RATE |
47 | */ | 46 | * seconds time: |
48 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | 47 | */ |
49 | 48 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | |
50 | /* Serialize */ | 49 | |
51 | wmb(); | 50 | /* Serialize: */ |
52 | add_taint(TAINT_MACHINE_CHECK); | 51 | wmb(); |
53 | } | 52 | add_taint(TAINT_MACHINE_CHECK); |
54 | } | 53 | } |
55 | } | 54 | } |
56 | 55 | ||
@@ -77,16 +76,17 @@ static int __init init_nonfatal_mce_checker(void) | |||
77 | 76 | ||
78 | /* Some Athlons misbehave when we frob bank 0 */ | 77 | /* Some Athlons misbehave when we frob bank 0 */ |
79 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | 78 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && |
80 | boot_cpu_data.x86 == 6) | 79 | boot_cpu_data.x86 == 6) |
81 | firstbank = 1; | 80 | firstbank = 1; |
82 | else | 81 | else |
83 | firstbank = 0; | 82 | firstbank = 0; |
84 | 83 | ||
85 | /* | 84 | /* |
86 | * Check for non-fatal errors every MCE_RATE s | 85 | * Check for non-fatal errors every MCE_RATE s |
87 | */ | 86 | */ |
88 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | 87 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); |
89 | printk(KERN_INFO "Machine check exception polling timer started.\n"); | 88 | printk(KERN_INFO "Machine check exception polling timer started.\n"); |
89 | |||
90 | return 0; | 90 | return 0; |
91 | } | 91 | } |
92 | module_init(init_nonfatal_mce_checker); | 92 | module_init(init_nonfatal_mce_checker); |
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index f53bdcbaf382..4482aea9aa2e 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
@@ -1,21 +1,14 @@ | |||
1 | /* | 1 | /* |
2 | * P4 specific Machine Check Exception Reporting | 2 | * P4 specific Machine Check Exception Reporting |
3 | */ | 3 | */ |
4 | |||
5 | #include <linux/init.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
8 | #include <linux/interrupt.h> | 5 | #include <linux/types.h> |
6 | #include <linux/init.h> | ||
9 | #include <linux/smp.h> | 7 | #include <linux/smp.h> |
10 | 8 | ||
11 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 10 | #include <asm/mce.h> |
13 | #include <asm/msr.h> | 11 | #include <asm/msr.h> |
14 | #include <asm/apic.h> | ||
15 | |||
16 | #include <asm/therm_throt.h> | ||
17 | |||
18 | #include "mce.h" | ||
19 | 12 | ||
20 | /* as supported by the P4/Xeon family */ | 13 | /* as supported by the P4/Xeon family */ |
21 | struct intel_mce_extended_msrs { | 14 | struct intel_mce_extended_msrs { |
@@ -34,98 +27,8 @@ struct intel_mce_extended_msrs { | |||
34 | 27 | ||
35 | static int mce_num_extended_msrs; | 28 | static int mce_num_extended_msrs; |
36 | 29 | ||
37 | |||
38 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) | ||
40 | { | ||
41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
42 | smp_processor_id()); | ||
43 | add_taint(TAINT_MACHINE_CHECK); | ||
44 | } | ||
45 | |||
46 | /* P4/Xeon Thermal transition interrupt handler */ | ||
47 | static void intel_thermal_interrupt(struct pt_regs *regs) | ||
48 | { | ||
49 | __u64 msr_val; | ||
50 | |||
51 | ack_APIC_irq(); | ||
52 | |||
53 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
54 | therm_throt_process(msr_val & 0x1); | ||
55 | } | ||
56 | |||
57 | /* Thermal interrupt handler for this CPU setup */ | ||
58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; | ||
59 | |||
60 | void smp_thermal_interrupt(struct pt_regs *regs) | ||
61 | { | ||
62 | irq_enter(); | ||
63 | vendor_thermal_interrupt(regs); | ||
64 | __get_cpu_var(irq_stat).irq_thermal_count++; | ||
65 | irq_exit(); | ||
66 | } | ||
67 | |||
68 | /* P4/Xeon Thermal regulation detect and init */ | ||
69 | static void intel_init_thermal(struct cpuinfo_x86 *c) | ||
70 | { | ||
71 | u32 l, h; | ||
72 | unsigned int cpu = smp_processor_id(); | ||
73 | |||
74 | /* Thermal monitoring */ | ||
75 | if (!cpu_has(c, X86_FEATURE_ACPI)) | ||
76 | return; /* -ENODEV */ | ||
77 | |||
78 | /* Clock modulation */ | ||
79 | if (!cpu_has(c, X86_FEATURE_ACC)) | ||
80 | return; /* -ENODEV */ | ||
81 | |||
82 | /* first check if its enabled already, in which case there might | ||
83 | * be some SMM goo which handles it, so we can't even put a handler | ||
84 | * since it might be delivered via SMI already -zwanem. | ||
85 | */ | ||
86 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
87 | h = apic_read(APIC_LVTTHMR); | ||
88 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
89 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", | ||
90 | cpu); | ||
91 | return; /* -EBUSY */ | ||
92 | } | ||
93 | |||
94 | /* check whether a vector already exists, temporarily masked? */ | ||
95 | if (h & APIC_VECTOR_MASK) { | ||
96 | printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " | ||
97 | "installed\n", | ||
98 | cpu, (h & APIC_VECTOR_MASK)); | ||
99 | return; /* -EBUSY */ | ||
100 | } | ||
101 | |||
102 | /* The temperature transition interrupt handler setup */ | ||
103 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ | ||
104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ | ||
105 | apic_write(APIC_LVTTHMR, h); | ||
106 | |||
107 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
108 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); | ||
109 | |||
110 | /* ok we're good to go... */ | ||
111 | vendor_thermal_interrupt = intel_thermal_interrupt; | ||
112 | |||
113 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
114 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
115 | |||
116 | l = apic_read(APIC_LVTTHMR); | ||
117 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
118 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | ||
119 | |||
120 | /* enable thermal throttle processing */ | ||
121 | atomic_set(&therm_throt_en, 1); | ||
122 | return; | ||
123 | } | ||
124 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | ||
125 | |||
126 | |||
127 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | 30 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ |
128 | static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | 31 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) |
129 | { | 32 | { |
130 | u32 h; | 33 | u32 h; |
131 | 34 | ||
@@ -143,9 +46,9 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | |||
143 | 46 | ||
144 | static void intel_machine_check(struct pt_regs *regs, long error_code) | 47 | static void intel_machine_check(struct pt_regs *regs, long error_code) |
145 | { | 48 | { |
146 | int recover = 1; | ||
147 | u32 alow, ahigh, high, low; | 49 | u32 alow, ahigh, high, low; |
148 | u32 mcgstl, mcgsth; | 50 | u32 mcgstl, mcgsth; |
51 | int recover = 1; | ||
149 | int i; | 52 | int i; |
150 | 53 | ||
151 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 54 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
@@ -157,7 +60,9 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
157 | 60 | ||
158 | if (mce_num_extended_msrs > 0) { | 61 | if (mce_num_extended_msrs > 0) { |
159 | struct intel_mce_extended_msrs dbg; | 62 | struct intel_mce_extended_msrs dbg; |
63 | |||
160 | intel_get_extended_msrs(&dbg); | 64 | intel_get_extended_msrs(&dbg); |
65 | |||
161 | printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" | 66 | printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" |
162 | "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" | 67 | "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" |
163 | "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", | 68 | "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", |
@@ -171,6 +76,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
171 | if (high & (1<<31)) { | 76 | if (high & (1<<31)) { |
172 | char misc[20]; | 77 | char misc[20]; |
173 | char addr[24]; | 78 | char addr[24]; |
79 | |||
174 | misc[0] = addr[0] = '\0'; | 80 | misc[0] = addr[0] = '\0'; |
175 | if (high & (1<<29)) | 81 | if (high & (1<<29)) |
176 | recover |= 1; | 82 | recover |= 1; |
@@ -196,6 +102,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
196 | panic("Unable to continue"); | 102 | panic("Unable to continue"); |
197 | 103 | ||
198 | printk(KERN_EMERG "Attempting to continue.\n"); | 104 | printk(KERN_EMERG "Attempting to continue.\n"); |
105 | |||
199 | /* | 106 | /* |
200 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | 107 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not |
201 | * recoverable/continuable.This will allow BIOS to look at the MSRs | 108 | * recoverable/continuable.This will allow BIOS to look at the MSRs |
@@ -217,7 +124,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
217 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 124 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
218 | } | 125 | } |
219 | 126 | ||
220 | |||
221 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) | 127 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) |
222 | { | 128 | { |
223 | u32 l, h; | 129 | u32 l, h; |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index c9f77ea69edc..5c0e6533d9bc 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -2,52 +2,67 @@ | |||
2 | * P5 specific Machine Check Exception Reporting | 2 | * P5 specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
4 | */ | 4 | */ |
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
10 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
11 | 10 | ||
12 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
13 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | 15 | ||
16 | #include "mce.h" | 16 | /* By default disabled */ |
17 | int mce_p5_enabled __read_mostly; | ||
17 | 18 | ||
18 | /* Machine check handler for Pentium class Intel */ | 19 | /* Machine check handler for Pentium class Intel CPUs: */ |
19 | static void pentium_machine_check(struct pt_regs *regs, long error_code) | 20 | static void pentium_machine_check(struct pt_regs *regs, long error_code) |
20 | { | 21 | { |
21 | u32 loaddr, hi, lotype; | 22 | u32 loaddr, hi, lotype; |
23 | |||
22 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); | 24 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); |
23 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); | 25 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); |
24 | printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); | 26 | |
25 | if (lotype&(1<<5)) | 27 | printk(KERN_EMERG |
26 | printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); | 28 | "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", |
29 | smp_processor_id(), loaddr, lotype); | ||
30 | |||
31 | if (lotype & (1<<5)) { | ||
32 | printk(KERN_EMERG | ||
33 | "CPU#%d: Possible thermal failure (CPU on fire ?).\n", | ||
34 | smp_processor_id()); | ||
35 | } | ||
36 | |||
27 | add_taint(TAINT_MACHINE_CHECK); | 37 | add_taint(TAINT_MACHINE_CHECK); |
28 | } | 38 | } |
29 | 39 | ||
30 | /* Set up machine check reporting for processors with Intel style MCE */ | 40 | /* Set up machine check reporting for processors with Intel style MCE: */ |
31 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c) | 41 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c) |
32 | { | 42 | { |
33 | u32 l, h; | 43 | u32 l, h; |
34 | 44 | ||
35 | /*Check for MCE support */ | 45 | /* Default P5 to off as its often misconnected: */ |
36 | if (!cpu_has(c, X86_FEATURE_MCE)) | 46 | if (!mce_p5_enabled) |
37 | return; | 47 | return; |
38 | 48 | ||
39 | /* Default P5 to off as its often misconnected */ | 49 | /* Check for MCE support: */ |
40 | if (mce_disabled != -1) | 50 | if (!cpu_has(c, X86_FEATURE_MCE)) |
41 | return; | 51 | return; |
52 | |||
42 | machine_check_vector = pentium_machine_check; | 53 | machine_check_vector = pentium_machine_check; |
54 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
43 | wmb(); | 55 | wmb(); |
44 | 56 | ||
45 | /* Read registers before enabling */ | 57 | /* Read registers before enabling: */ |
46 | rdmsr(MSR_IA32_P5_MC_ADDR, l, h); | 58 | rdmsr(MSR_IA32_P5_MC_ADDR, l, h); |
47 | rdmsr(MSR_IA32_P5_MC_TYPE, l, h); | 59 | rdmsr(MSR_IA32_P5_MC_TYPE, l, h); |
48 | printk(KERN_INFO "Intel old style machine check architecture supported.\n"); | 60 | printk(KERN_INFO |
61 | "Intel old style machine check architecture supported.\n"); | ||
49 | 62 | ||
50 | /* Enable MCE */ | 63 | /* Enable MCE: */ |
51 | set_in_cr4(X86_CR4_MCE); | 64 | set_in_cr4(X86_CR4_MCE); |
52 | printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); | 65 | printk(KERN_INFO |
66 | "Intel old style machine check reporting enabled on CPU#%d.\n", | ||
67 | smp_processor_id()); | ||
53 | } | 68 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 2ac52d7b434b..01e4f8178183 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c | |||
@@ -2,25 +2,23 @@ | |||
2 | * P6 specific Machine Check Exception Reporting | 2 | * P6 specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
4 | */ | 4 | */ |
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
10 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
11 | 10 | ||
12 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
13 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | 15 | ||
16 | #include "mce.h" | ||
17 | |||
18 | /* Machine Check Handler For PII/PIII */ | 16 | /* Machine Check Handler For PII/PIII */ |
19 | static void intel_machine_check(struct pt_regs *regs, long error_code) | 17 | static void intel_machine_check(struct pt_regs *regs, long error_code) |
20 | { | 18 | { |
21 | int recover = 1; | ||
22 | u32 alow, ahigh, high, low; | 19 | u32 alow, ahigh, high, low; |
23 | u32 mcgstl, mcgsth; | 20 | u32 mcgstl, mcgsth; |
21 | int recover = 1; | ||
24 | int i; | 22 | int i; |
25 | 23 | ||
26 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 24 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
@@ -35,12 +33,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
35 | if (high & (1<<31)) { | 33 | if (high & (1<<31)) { |
36 | char misc[20]; | 34 | char misc[20]; |
37 | char addr[24]; | 35 | char addr[24]; |
38 | misc[0] = addr[0] = '\0'; | 36 | |
37 | misc[0] = '\0'; | ||
38 | addr[0] = '\0'; | ||
39 | |||
39 | if (high & (1<<29)) | 40 | if (high & (1<<29)) |
40 | recover |= 1; | 41 | recover |= 1; |
41 | if (high & (1<<25)) | 42 | if (high & (1<<25)) |
42 | recover |= 2; | 43 | recover |= 2; |
43 | high &= ~(1<<31); | 44 | high &= ~(1<<31); |
45 | |||
44 | if (high & (1<<27)) { | 46 | if (high & (1<<27)) { |
45 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | 47 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); |
46 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | 48 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); |
@@ -49,6 +51,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
49 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | 51 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); |
50 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | 52 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); |
51 | } | 53 | } |
54 | |||
52 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | 55 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", |
53 | smp_processor_id(), i, high, low, misc, addr); | 56 | smp_processor_id(), i, high, low, misc, addr); |
54 | } | 57 | } |
@@ -63,16 +66,17 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
63 | /* | 66 | /* |
64 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | 67 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not |
65 | * recoverable/continuable.This will allow BIOS to look at the MSRs | 68 | * recoverable/continuable.This will allow BIOS to look at the MSRs |
66 | * for errors if the OS could not log the error. | 69 | * for errors if the OS could not log the error: |
67 | */ | 70 | */ |
68 | for (i = 0; i < nr_mce_banks; i++) { | 71 | for (i = 0; i < nr_mce_banks; i++) { |
69 | unsigned int msr; | 72 | unsigned int msr; |
73 | |||
70 | msr = MSR_IA32_MC0_STATUS+i*4; | 74 | msr = MSR_IA32_MC0_STATUS+i*4; |
71 | rdmsr(msr, low, high); | 75 | rdmsr(msr, low, high); |
72 | if (high & (1<<31)) { | 76 | if (high & (1<<31)) { |
73 | /* Clear it */ | 77 | /* Clear it: */ |
74 | wrmsr(msr, 0UL, 0UL); | 78 | wrmsr(msr, 0UL, 0UL); |
75 | /* Serialize */ | 79 | /* Serialize: */ |
76 | wmb(); | 80 | wmb(); |
77 | add_taint(TAINT_MACHINE_CHECK); | 81 | add_taint(TAINT_MACHINE_CHECK); |
78 | } | 82 | } |
@@ -81,7 +85,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
81 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 85 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
82 | } | 86 | } |
83 | 87 | ||
84 | /* Set up machine check reporting for processors with Intel style MCE */ | 88 | /* Set up machine check reporting for processors with Intel style MCE: */ |
85 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c) | 89 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c) |
86 | { | 90 | { |
87 | u32 l, h; | 91 | u32 l, h; |
@@ -97,6 +101,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c) | |||
97 | 101 | ||
98 | /* Ok machine check is available */ | 102 | /* Ok machine check is available */ |
99 | machine_check_vector = intel_machine_check; | 103 | machine_check_vector = intel_machine_check; |
104 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
100 | wmb(); | 105 | wmb(); |
101 | 106 | ||
102 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | 107 | printk(KERN_INFO "Intel machine check architecture supported.\n"); |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index d5ae2243f0b9..bff8dd191dd5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * | ||
3 | * Thermal throttle event support code (such as syslog messaging and rate | 2 | * Thermal throttle event support code (such as syslog messaging and rate |
4 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). | 3 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). |
4 | * | ||
5 | * This allows consistent reporting of CPU thermal throttle events. | 5 | * This allows consistent reporting of CPU thermal throttle events. |
6 | * | 6 | * |
7 | * Maintains a counter in /sys that keeps track of the number of thermal | 7 | * Maintains a counter in /sys that keeps track of the number of thermal |
@@ -13,43 +13,53 @@ | |||
13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | 13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. |
14 | * Inspired by Ross Biro's and Al Borchers' counter code. | 14 | * Inspired by Ross Biro's and Al Borchers' counter code. |
15 | */ | 15 | */ |
16 | 16 | #include <linux/interrupt.h> | |
17 | #include <linux/notifier.h> | ||
18 | #include <linux/jiffies.h> | ||
19 | #include <linux/kernel.h> | ||
17 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
18 | #include <linux/sysdev.h> | 21 | #include <linux/sysdev.h> |
22 | #include <linux/types.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/smp.h> | ||
19 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
20 | #include <asm/cpu.h> | 26 | |
21 | #include <linux/notifier.h> | 27 | #include <asm/processor.h> |
22 | #include <linux/jiffies.h> | 28 | #include <asm/system.h> |
23 | #include <asm/therm_throt.h> | 29 | #include <asm/apic.h> |
30 | #include <asm/idle.h> | ||
31 | #include <asm/mce.h> | ||
32 | #include <asm/msr.h> | ||
24 | 33 | ||
25 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
26 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
27 | 36 | ||
28 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | 37 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; |
29 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | 38 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); |
30 | atomic_t therm_throt_en = ATOMIC_INIT(0); | 39 | |
40 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | ||
31 | 41 | ||
32 | #ifdef CONFIG_SYSFS | 42 | #ifdef CONFIG_SYSFS |
33 | #define define_therm_throt_sysdev_one_ro(_name) \ | 43 | #define define_therm_throt_sysdev_one_ro(_name) \ |
34 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 44 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) |
35 | 45 | ||
36 | #define define_therm_throt_sysdev_show_func(name) \ | 46 | #define define_therm_throt_sysdev_show_func(name) \ |
37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | 47 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ |
38 | struct sysdev_attribute *attr, \ | 48 | struct sysdev_attribute *attr, \ |
39 | char *buf) \ | 49 | char *buf) \ |
40 | { \ | 50 | { \ |
41 | unsigned int cpu = dev->id; \ | 51 | unsigned int cpu = dev->id; \ |
42 | ssize_t ret; \ | 52 | ssize_t ret; \ |
43 | \ | 53 | \ |
44 | preempt_disable(); /* CPU hotplug */ \ | 54 | preempt_disable(); /* CPU hotplug */ \ |
45 | if (cpu_online(cpu)) \ | 55 | if (cpu_online(cpu)) \ |
46 | ret = sprintf(buf, "%lu\n", \ | 56 | ret = sprintf(buf, "%lu\n", \ |
47 | per_cpu(thermal_throttle_##name, cpu)); \ | 57 | per_cpu(thermal_throttle_##name, cpu)); \ |
48 | else \ | 58 | else \ |
49 | ret = 0; \ | 59 | ret = 0; \ |
50 | preempt_enable(); \ | 60 | preempt_enable(); \ |
51 | \ | 61 | \ |
52 | return ret; \ | 62 | return ret; \ |
53 | } | 63 | } |
54 | 64 | ||
55 | define_therm_throt_sysdev_show_func(count); | 65 | define_therm_throt_sysdev_show_func(count); |
@@ -61,8 +71,8 @@ static struct attribute *thermal_throttle_attrs[] = { | |||
61 | }; | 71 | }; |
62 | 72 | ||
63 | static struct attribute_group thermal_throttle_attr_group = { | 73 | static struct attribute_group thermal_throttle_attr_group = { |
64 | .attrs = thermal_throttle_attrs, | 74 | .attrs = thermal_throttle_attrs, |
65 | .name = "thermal_throttle" | 75 | .name = "thermal_throttle" |
66 | }; | 76 | }; |
67 | #endif /* CONFIG_SYSFS */ | 77 | #endif /* CONFIG_SYSFS */ |
68 | 78 | ||
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
82 | * 1 : Event should be logged further, and a message has been | 92 | * 1 : Event should be logged further, and a message has been |
83 | * printed to the syslog. | 93 | * printed to the syslog. |
84 | */ | 94 | */ |
85 | int therm_throt_process(int curr) | 95 | static int therm_throt_process(int curr) |
86 | { | 96 | { |
87 | unsigned int cpu = smp_processor_id(); | 97 | unsigned int cpu = smp_processor_id(); |
88 | __u64 tmp_jiffs = get_jiffies_64(); | 98 | __u64 tmp_jiffs = get_jiffies_64(); |
@@ -110,10 +120,11 @@ int therm_throt_process(int curr) | |||
110 | } | 120 | } |
111 | 121 | ||
112 | #ifdef CONFIG_SYSFS | 122 | #ifdef CONFIG_SYSFS |
113 | /* Add/Remove thermal_throttle interface for CPU device */ | 123 | /* Add/Remove thermal_throttle interface for CPU device: */ |
114 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) | 124 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) |
115 | { | 125 | { |
116 | return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 126 | return sysfs_create_group(&sys_dev->kobj, |
127 | &thermal_throttle_attr_group); | ||
117 | } | 128 | } |
118 | 129 | ||
119 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 130 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) |
@@ -121,19 +132,21 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | |||
121 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 132 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); |
122 | } | 133 | } |
123 | 134 | ||
124 | /* Mutex protecting device creation against CPU hotplug */ | 135 | /* Mutex protecting device creation against CPU hotplug: */ |
125 | static DEFINE_MUTEX(therm_cpu_lock); | 136 | static DEFINE_MUTEX(therm_cpu_lock); |
126 | 137 | ||
127 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | 138 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ |
128 | static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, | 139 | static __cpuinit int |
129 | unsigned long action, | 140 | thermal_throttle_cpu_callback(struct notifier_block *nfb, |
130 | void *hcpu) | 141 | unsigned long action, |
142 | void *hcpu) | ||
131 | { | 143 | { |
132 | unsigned int cpu = (unsigned long)hcpu; | 144 | unsigned int cpu = (unsigned long)hcpu; |
133 | struct sys_device *sys_dev; | 145 | struct sys_device *sys_dev; |
134 | int err = 0; | 146 | int err = 0; |
135 | 147 | ||
136 | sys_dev = get_cpu_sysdev(cpu); | 148 | sys_dev = get_cpu_sysdev(cpu); |
149 | |||
137 | switch (action) { | 150 | switch (action) { |
138 | case CPU_UP_PREPARE: | 151 | case CPU_UP_PREPARE: |
139 | case CPU_UP_PREPARE_FROZEN: | 152 | case CPU_UP_PREPARE_FROZEN: |
@@ -183,6 +196,94 @@ static __init int thermal_throttle_init_device(void) | |||
183 | 196 | ||
184 | return 0; | 197 | return 0; |
185 | } | 198 | } |
186 | |||
187 | device_initcall(thermal_throttle_init_device); | 199 | device_initcall(thermal_throttle_init_device); |
200 | |||
188 | #endif /* CONFIG_SYSFS */ | 201 | #endif /* CONFIG_SYSFS */ |
202 | |||
203 | /* Thermal transition interrupt handler */ | ||
204 | static void intel_thermal_interrupt(void) | ||
205 | { | ||
206 | __u64 msr_val; | ||
207 | |||
208 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
209 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) | ||
210 | mce_log_therm_throt_event(msr_val); | ||
211 | } | ||
212 | |||
213 | static void unexpected_thermal_interrupt(void) | ||
214 | { | ||
215 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
216 | smp_processor_id()); | ||
217 | add_taint(TAINT_MACHINE_CHECK); | ||
218 | } | ||
219 | |||
220 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | ||
221 | |||
222 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | ||
223 | { | ||
224 | exit_idle(); | ||
225 | irq_enter(); | ||
226 | inc_irq_stat(irq_thermal_count); | ||
227 | smp_thermal_vector(); | ||
228 | irq_exit(); | ||
229 | /* Ack only at the end to avoid potential reentry */ | ||
230 | ack_APIC_irq(); | ||
231 | } | ||
232 | |||
233 | void intel_init_thermal(struct cpuinfo_x86 *c) | ||
234 | { | ||
235 | unsigned int cpu = smp_processor_id(); | ||
236 | int tm2 = 0; | ||
237 | u32 l, h; | ||
238 | |||
239 | /* Thermal monitoring depends on ACPI and clock modulation*/ | ||
240 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | ||
241 | return; | ||
242 | |||
243 | /* | ||
244 | * First check if its enabled already, in which case there might | ||
245 | * be some SMM goo which handles it, so we can't even put a handler | ||
246 | * since it might be delivered via SMI already: | ||
247 | */ | ||
248 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
249 | h = apic_read(APIC_LVTTHMR); | ||
250 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
251 | printk(KERN_DEBUG | ||
252 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | ||
253 | return; | ||
254 | } | ||
255 | |||
256 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
257 | tm2 = 1; | ||
258 | |||
259 | /* Check whether a vector already exists */ | ||
260 | if (h & APIC_VECTOR_MASK) { | ||
261 | printk(KERN_DEBUG | ||
262 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | ||
263 | cpu, (h & APIC_VECTOR_MASK)); | ||
264 | return; | ||
265 | } | ||
266 | |||
267 | /* We'll mask the thermal vector in the lapic till we're ready: */ | ||
268 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | ||
269 | apic_write(APIC_LVTTHMR, h); | ||
270 | |||
271 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
272 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
273 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
274 | |||
275 | smp_thermal_vector = intel_thermal_interrupt; | ||
276 | |||
277 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
278 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
279 | |||
280 | /* Unmask the thermal vector: */ | ||
281 | l = apic_read(APIC_LVTTHMR); | ||
282 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
283 | |||
284 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | ||
285 | cpu, tm2 ? "TM2" : "TM1"); | ||
286 | |||
287 | /* enable thermal throttle processing */ | ||
288 | atomic_set(&therm_throt_en, 1); | ||
289 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 23ee9e730f78..d746df2909c9 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c | |||
@@ -17,7 +17,7 @@ static void default_threshold_interrupt(void) | |||
17 | 17 | ||
18 | void (*mce_threshold_vector)(void) = default_threshold_interrupt; | 18 | void (*mce_threshold_vector)(void) = default_threshold_interrupt; |
19 | 19 | ||
20 | asmlinkage void mce_threshold_interrupt(void) | 20 | asmlinkage void smp_threshold_interrupt(void) |
21 | { | 21 | { |
22 | exit_idle(); | 22 | exit_idle(); |
23 | irq_enter(); | 23 | irq_enter(); |
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 2a043d89811d..54060f565974 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -2,19 +2,17 @@ | |||
2 | * IDT Winchip specific Machine Check Exception Reporting | 2 | * IDT Winchip specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
4 | */ | 4 | */ |
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
10 | 9 | ||
11 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 11 | #include <asm/system.h> |
12 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | 14 | ||
15 | #include "mce.h" | 15 | /* Machine check handler for WinChip C6: */ |
16 | |||
17 | /* Machine check handler for WinChip C6 */ | ||
18 | static void winchip_machine_check(struct pt_regs *regs, long error_code) | 16 | static void winchip_machine_check(struct pt_regs *regs, long error_code) |
19 | { | 17 | { |
20 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); | 18 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); |
@@ -25,12 +23,18 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code) | |||
25 | void winchip_mcheck_init(struct cpuinfo_x86 *c) | 23 | void winchip_mcheck_init(struct cpuinfo_x86 *c) |
26 | { | 24 | { |
27 | u32 lo, hi; | 25 | u32 lo, hi; |
26 | |||
28 | machine_check_vector = winchip_machine_check; | 27 | machine_check_vector = winchip_machine_check; |
28 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
29 | wmb(); | 29 | wmb(); |
30 | |||
30 | rdmsr(MSR_IDT_FCR1, lo, hi); | 31 | rdmsr(MSR_IDT_FCR1, lo, hi); |
31 | lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ | 32 | lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ |
32 | lo &= ~(1<<4); /* Enable MCE */ | 33 | lo &= ~(1<<4); /* Enable MCE */ |
33 | wrmsr(MSR_IDT_FCR1, lo, hi); | 34 | wrmsr(MSR_IDT_FCR1, lo, hi); |
35 | |||
34 | set_in_cr4(X86_CR4_MCE); | 36 | set_in_cr4(X86_CR4_MCE); |
35 | printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); | 37 | |
38 | printk(KERN_INFO | ||
39 | "Winchip machine check reporting enabled on CPU#0.\n"); | ||
36 | } | 40 | } |