aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2010-06-16 12:08:13 -0400
committerJiri Kosina <jkosina@suse.cz>2010-06-16 12:08:13 -0400
commitf1bbbb6912662b9f6070c5bfc4ca9eb1f06a9d5b (patch)
treec2c130a74be25b0b2dff992e1a195e2728bdaadd /arch/x86/kernel/cpu/mcheck
parentfd0961ff67727482bb20ca7e8ea97b83e9de2ddb (diff)
parent7e27d6e778cd87b6f2415515d7127eba53fe5d02 (diff)
Merge branch 'master' into for-next
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c138
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h23
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c89
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
5 files changed, 242 insertions, 12 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 4ac6d48fe11b..bb34b03af252 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
7obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o 7obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
8 8
9obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o 9obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
10
11obj-$(CONFIG_ACPI_APEI) += mce-apei.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
new file mode 100644
index 000000000000..745b54f9be89
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -0,0 +1,138 @@
1/*
2 * Bridge between MCE and APEI
3 *
4 * On some machine, corrected memory errors are reported via APEI
5 * generic hardware error source (GHES) instead of corrected Machine
6 * Check. These corrected memory errors can be reported to user space
7 * through /dev/mcelog via faking a corrected Machine Check, so that
8 * the error memory page can be offlined by /sbin/mcelog if the error
9 * count for one page is beyond the threshold.
10 *
11 * For fatal MCE, save MCE record into persistent storage via ERST, so
12 * that the MCE record can be logged after reboot via ERST.
13 *
14 * Copyright 2010 Intel Corp.
15 * Author: Huang Ying <ying.huang@intel.com>
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public License version
19 * 2 as published by the Free Software Foundation.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 * GNU General Public License for more details.
25 *
26 * You should have received a copy of the GNU General Public License
27 * along with this program; if not, write to the Free Software
28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29 */
30
31#include <linux/kernel.h>
32#include <linux/acpi.h>
33#include <linux/cper.h>
34#include <acpi/apei.h>
35#include <asm/mce.h>
36
37#include "mce-internal.h"
38
39void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
40{
41 struct mce m;
42
43 /* Only corrected MC is reported */
44 if (!corrected)
45 return;
46
47 mce_setup(&m);
48 m.bank = 1;
49 /* Fake a memory read corrected error with unknown channel */
50 m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f;
51 m.addr = mem_err->physical_addr;
52 mce_log(&m);
53 mce_notify_irq();
54}
55EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
56
57#define CPER_CREATOR_MCE \
58 UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
59 0x64, 0x90, 0xb8, 0x9d)
60#define CPER_SECTION_TYPE_MCE \
61 UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
62 0x04, 0x4a, 0x38, 0xfc)
63
64/*
65 * CPER specification (in UEFI specification 2.3 appendix N) requires
66 * byte-packed.
67 */
68struct cper_mce_record {
69 struct cper_record_header hdr;
70 struct cper_section_descriptor sec_hdr;
71 struct mce mce;
72} __packed;
73
74int apei_write_mce(struct mce *m)
75{
76 struct cper_mce_record rcd;
77
78 memset(&rcd, 0, sizeof(rcd));
79 memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
80 rcd.hdr.revision = CPER_RECORD_REV;
81 rcd.hdr.signature_end = CPER_SIG_END;
82 rcd.hdr.section_count = 1;
83 rcd.hdr.error_severity = CPER_SER_FATAL;
84 /* timestamp, platform_id, partition_id are all invalid */
85 rcd.hdr.validation_bits = 0;
86 rcd.hdr.record_length = sizeof(rcd);
87 rcd.hdr.creator_id = CPER_CREATOR_MCE;
88 rcd.hdr.notification_type = CPER_NOTIFY_MCE;
89 rcd.hdr.record_id = cper_next_record_id();
90 rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
91
92 rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
93 rcd.sec_hdr.section_length = sizeof(rcd.mce);
94 rcd.sec_hdr.revision = CPER_SEC_REV;
95 /* fru_id and fru_text is invalid */
96 rcd.sec_hdr.validation_bits = 0;
97 rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
98 rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
99 rcd.sec_hdr.section_severity = CPER_SER_FATAL;
100
101 memcpy(&rcd.mce, m, sizeof(*m));
102
103 return erst_write(&rcd.hdr);
104}
105
106ssize_t apei_read_mce(struct mce *m, u64 *record_id)
107{
108 struct cper_mce_record rcd;
109 ssize_t len;
110
111 len = erst_read_next(&rcd.hdr, sizeof(rcd));
112 if (len <= 0)
113 return len;
114 /* Can not skip other records in storage via ERST unless clear them */
115 else if (len != sizeof(rcd) ||
116 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
117 if (printk_ratelimit())
118 pr_warning(
119 "MCE-APEI: Can not skip the unknown record in ERST");
120 return -EIO;
121 }
122
123 memcpy(m, &rcd.mce, sizeof(*m));
124 *record_id = rcd.hdr.record_id;
125
126 return sizeof(*m);
127}
128
129/* Check whether there is record in ERST */
130int apei_check_mce(void)
131{
132 return erst_get_record_count();
133}
134
135int apei_clear_mce(u64 record_id)
136{
137 return erst_clear(record_id);
138}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 32996f9fab67..fefcc69ee8b5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,3 +28,26 @@ extern int mce_ser;
28 28
29extern struct mce_bank *mce_banks; 29extern struct mce_bank *mce_banks;
30 30
31#ifdef CONFIG_ACPI_APEI
32int apei_write_mce(struct mce *m);
33ssize_t apei_read_mce(struct mce *m, u64 *record_id);
34int apei_check_mce(void);
35int apei_clear_mce(u64 record_id);
36#else
37static inline int apei_write_mce(struct mce *m)
38{
39 return -EINVAL;
40}
41static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
42{
43 return 0;
44}
45static inline int apei_check_mce(void)
46{
47 return 0;
48}
49static inline int apei_clear_mce(u64 record_id)
50{
51 return -EINVAL;
52}
53#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7a355ddcc64b..18cc42562250 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -36,6 +36,7 @@
36#include <linux/fs.h> 36#include <linux/fs.h>
37#include <linux/mm.h> 37#include <linux/mm.h>
38#include <linux/debugfs.h> 38#include <linux/debugfs.h>
39#include <linux/edac_mce.h>
39 40
40#include <asm/processor.h> 41#include <asm/processor.h>
41#include <asm/hw_irq.h> 42#include <asm/hw_irq.h>
@@ -169,6 +170,15 @@ void mce_log(struct mce *mce)
169 entry = rcu_dereference_check_mce(mcelog.next); 170 entry = rcu_dereference_check_mce(mcelog.next);
170 for (;;) { 171 for (;;) {
171 /* 172 /*
173 * If edac_mce is enabled, it will check the error type
174 * and will process it, if it is a known error.
175 * Otherwise, the error will be sent through mcelog
176 * interface
177 */
178 if (edac_mce_parse(mce))
179 return;
180
181 /*
172 * When the buffer fills up discard new entries. 182 * When the buffer fills up discard new entries.
173 * Assume that the earlier errors are the more 183 * Assume that the earlier errors are the more
174 * interesting ones: 184 * interesting ones:
@@ -264,7 +274,7 @@ static void wait_for_panic(void)
264 274
265static void mce_panic(char *msg, struct mce *final, char *exp) 275static void mce_panic(char *msg, struct mce *final, char *exp)
266{ 276{
267 int i; 277 int i, apei_err = 0;
268 278
269 if (!fake_panic) { 279 if (!fake_panic) {
270 /* 280 /*
@@ -287,8 +297,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
287 struct mce *m = &mcelog.entry[i]; 297 struct mce *m = &mcelog.entry[i];
288 if (!(m->status & MCI_STATUS_VAL)) 298 if (!(m->status & MCI_STATUS_VAL))
289 continue; 299 continue;
290 if (!(m->status & MCI_STATUS_UC)) 300 if (!(m->status & MCI_STATUS_UC)) {
291 print_mce(m); 301 print_mce(m);
302 if (!apei_err)
303 apei_err = apei_write_mce(m);
304 }
292 } 305 }
293 /* Now print uncorrected but with the final one last */ 306 /* Now print uncorrected but with the final one last */
294 for (i = 0; i < MCE_LOG_LEN; i++) { 307 for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -297,11 +310,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
297 continue; 310 continue;
298 if (!(m->status & MCI_STATUS_UC)) 311 if (!(m->status & MCI_STATUS_UC))
299 continue; 312 continue;
300 if (!final || memcmp(m, final, sizeof(struct mce))) 313 if (!final || memcmp(m, final, sizeof(struct mce))) {
301 print_mce(m); 314 print_mce(m);
315 if (!apei_err)
316 apei_err = apei_write_mce(m);
317 }
302 } 318 }
303 if (final) 319 if (final) {
304 print_mce(final); 320 print_mce(final);
321 if (!apei_err)
322 apei_err = apei_write_mce(final);
323 }
305 if (cpu_missing) 324 if (cpu_missing)
306 printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); 325 printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
307 print_mce_tail(); 326 print_mce_tail();
@@ -1493,6 +1512,43 @@ static void collect_tscs(void *data)
1493 rdtscll(cpu_tsc[smp_processor_id()]); 1512 rdtscll(cpu_tsc[smp_processor_id()]);
1494} 1513}
1495 1514
1515static int mce_apei_read_done;
1516
1517/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
1518static int __mce_read_apei(char __user **ubuf, size_t usize)
1519{
1520 int rc;
1521 u64 record_id;
1522 struct mce m;
1523
1524 if (usize < sizeof(struct mce))
1525 return -EINVAL;
1526
1527 rc = apei_read_mce(&m, &record_id);
1528 /* Error or no more MCE record */
1529 if (rc <= 0) {
1530 mce_apei_read_done = 1;
1531 return rc;
1532 }
1533 rc = -EFAULT;
1534 if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
1535 return rc;
1536 /*
1537 * In fact, we should have cleared the record after that has
1538 * been flushed to the disk or sent to network in
1539 * /sbin/mcelog, but we have no interface to support that now,
1540 * so just clear it to avoid duplication.
1541 */
1542 rc = apei_clear_mce(record_id);
1543 if (rc) {
1544 mce_apei_read_done = 1;
1545 return rc;
1546 }
1547 *ubuf += sizeof(struct mce);
1548
1549 return 0;
1550}
1551
1496static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, 1552static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1497 loff_t *off) 1553 loff_t *off)
1498{ 1554{
@@ -1506,15 +1562,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1506 return -ENOMEM; 1562 return -ENOMEM;
1507 1563
1508 mutex_lock(&mce_read_mutex); 1564 mutex_lock(&mce_read_mutex);
1565
1566 if (!mce_apei_read_done) {
1567 err = __mce_read_apei(&buf, usize);
1568 if (err || buf != ubuf)
1569 goto out;
1570 }
1571
1509 next = rcu_dereference_check_mce(mcelog.next); 1572 next = rcu_dereference_check_mce(mcelog.next);
1510 1573
1511 /* Only supports full reads right now */ 1574 /* Only supports full reads right now */
1512 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { 1575 err = -EINVAL;
1513 mutex_unlock(&mce_read_mutex); 1576 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
1514 kfree(cpu_tsc); 1577 goto out;
1515
1516 return -EINVAL;
1517 }
1518 1578
1519 err = 0; 1579 err = 0;
1520 prev = 0; 1580 prev = 0;
@@ -1562,10 +1622,15 @@ timeout:
1562 memset(&mcelog.entry[i], 0, sizeof(struct mce)); 1622 memset(&mcelog.entry[i], 0, sizeof(struct mce));
1563 } 1623 }
1564 } 1624 }
1625
1626 if (err)
1627 err = -EFAULT;
1628
1629out:
1565 mutex_unlock(&mce_read_mutex); 1630 mutex_unlock(&mce_read_mutex);
1566 kfree(cpu_tsc); 1631 kfree(cpu_tsc);
1567 1632
1568 return err ? -EFAULT : buf - ubuf; 1633 return err ? err : buf - ubuf;
1569} 1634}
1570 1635
1571static unsigned int mce_poll(struct file *file, poll_table *wait) 1636static unsigned int mce_poll(struct file *file, poll_table *wait)
@@ -1573,6 +1638,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
1573 poll_wait(file, &mce_wait, wait); 1638 poll_wait(file, &mce_wait, wait);
1574 if (rcu_dereference_check_mce(mcelog.next)) 1639 if (rcu_dereference_check_mce(mcelog.next))
1575 return POLLIN | POLLRDNORM; 1640 return POLLIN | POLLRDNORM;
1641 if (!mce_apei_read_done && apei_check_mce())
1642 return POLLIN | POLLRDNORM;
1576 return 0; 1643 return 0;
1577} 1644}
1578 1645
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 81c499eceb21..e1a0a3bf9716 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -190,7 +190,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb,
190 mutex_unlock(&therm_cpu_lock); 190 mutex_unlock(&therm_cpu_lock);
191 break; 191 break;
192 } 192 }
193 return err ? NOTIFY_BAD : NOTIFY_OK; 193 return notifier_from_errno(err);
194} 194}
195 195
196static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = 196static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata =