aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorHuang Ying <ying.huang@intel.com>2010-05-18 02:35:22 -0400
committerLen Brown <len.brown@intel.com>2010-05-19 22:41:40 -0400
commit482908b49ebfa453dd0455910c951c750567c05d (patch)
treebafbeb8e02d235e5438fafe69039caf0bb208785 /arch/x86/kernel
parenta08f82d08053fb6e3aa3635c2c26456d96337c8b (diff)
ACPI, APEI, Use ERST for persistent storage of MCE
Traditionally, fatal MCE will cause Linux print error log to console then reboot. Because MCE registers will preserve their content after warm reboot, the hardware error can be logged to disk or network after reboot. But system may fail to warm reboot, then you may lose the hardware error log. ERST can help here. Through saving the hardware error log into flash via ERST before go panic, the hardware error log can be gotten from the flash after system boot successful again. The fatal MCE processing procedure with ERST involved is as follow: - Hardware detect error, MCE raised - MCE read MCE registers, check error severity (fatal), prepare error record - Write MCE error record into flash via ERST - Go panic, then trigger system reboot - System reboot, /sbin/mcelog run, it reads /dev/mcelog to check flash for error record of previous boot via ERST, and output and clear them if available - /sbin/mcelog logs error records into disk or network ERST only accepts CPER record format, but there is no pre-defined CPER section can accommodate all information in struct mce, so a customized section type is defined to hold struct mce inside a CPER record as an error section. Signed-off-by: Huang Ying <ying.huang@intel.com> Signed-off-by: Andi Kleen <ak@linux.intel.com> Signed-off-by: Len Brown <len.brown@intel.com>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c86
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h23
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c79
3 files changed, 177 insertions, 11 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 4eccd1fadb1..745b54f9be8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -8,6 +8,9 @@
8 * the error memory page can be offlined by /sbin/mcelog if the error 8 * the error memory page can be offlined by /sbin/mcelog if the error
9 * count for one page is beyond the threshold. 9 * count for one page is beyond the threshold.
10 * 10 *
11 * For fatal MCE, save MCE record into persistent storage via ERST, so
12 * that the MCE record can be logged after reboot via ERST.
13 *
11 * Copyright 2010 Intel Corp. 14 * Copyright 2010 Intel Corp.
12 * Author: Huang Ying <ying.huang@intel.com> 15 * Author: Huang Ying <ying.huang@intel.com>
13 * 16 *
@@ -50,3 +53,86 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err)
50 mce_notify_irq(); 53 mce_notify_irq();
51} 54}
52EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); 55EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
56
57#define CPER_CREATOR_MCE \
58 UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
59 0x64, 0x90, 0xb8, 0x9d)
60#define CPER_SECTION_TYPE_MCE \
61 UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
62 0x04, 0x4a, 0x38, 0xfc)
63
64/*
65 * CPER specification (in UEFI specification 2.3 appendix N) requires
66 * byte-packed.
67 */
68struct cper_mce_record {
69 struct cper_record_header hdr;
70 struct cper_section_descriptor sec_hdr;
71 struct mce mce;
72} __packed;
73
74int apei_write_mce(struct mce *m)
75{
76 struct cper_mce_record rcd;
77
78 memset(&rcd, 0, sizeof(rcd));
79 memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE);
80 rcd.hdr.revision = CPER_RECORD_REV;
81 rcd.hdr.signature_end = CPER_SIG_END;
82 rcd.hdr.section_count = 1;
83 rcd.hdr.error_severity = CPER_SER_FATAL;
84 /* timestamp, platform_id, partition_id are all invalid */
85 rcd.hdr.validation_bits = 0;
86 rcd.hdr.record_length = sizeof(rcd);
87 rcd.hdr.creator_id = CPER_CREATOR_MCE;
88 rcd.hdr.notification_type = CPER_NOTIFY_MCE;
89 rcd.hdr.record_id = cper_next_record_id();
90 rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR;
91
92 rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd;
93 rcd.sec_hdr.section_length = sizeof(rcd.mce);
94 rcd.sec_hdr.revision = CPER_SEC_REV;
95 /* fru_id and fru_text is invalid */
96 rcd.sec_hdr.validation_bits = 0;
97 rcd.sec_hdr.flags = CPER_SEC_PRIMARY;
98 rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE;
99 rcd.sec_hdr.section_severity = CPER_SER_FATAL;
100
101 memcpy(&rcd.mce, m, sizeof(*m));
102
103 return erst_write(&rcd.hdr);
104}
105
106ssize_t apei_read_mce(struct mce *m, u64 *record_id)
107{
108 struct cper_mce_record rcd;
109 ssize_t len;
110
111 len = erst_read_next(&rcd.hdr, sizeof(rcd));
112 if (len <= 0)
113 return len;
114 /* Can not skip other records in storage via ERST unless clear them */
115 else if (len != sizeof(rcd) ||
116 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
117 if (printk_ratelimit())
118 pr_warning(
119 "MCE-APEI: Can not skip the unknown record in ERST");
120 return -EIO;
121 }
122
123 memcpy(m, &rcd.mce, sizeof(*m));
124 *record_id = rcd.hdr.record_id;
125
126 return sizeof(*m);
127}
128
129/* Check whether there is record in ERST */
130int apei_check_mce(void)
131{
132 return erst_get_record_count();
133}
134
135int apei_clear_mce(u64 record_id)
136{
137 return erst_clear(record_id);
138}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 32996f9fab6..fefcc69ee8b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,3 +28,26 @@ extern int mce_ser;
28 28
29extern struct mce_bank *mce_banks; 29extern struct mce_bank *mce_banks;
30 30
31#ifdef CONFIG_ACPI_APEI
32int apei_write_mce(struct mce *m);
33ssize_t apei_read_mce(struct mce *m, u64 *record_id);
34int apei_check_mce(void);
35int apei_clear_mce(u64 record_id);
36#else
37static inline int apei_write_mce(struct mce *m)
38{
39 return -EINVAL;
40}
41static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id)
42{
43 return 0;
44}
45static inline int apei_check_mce(void)
46{
47 return 0;
48}
49static inline int apei_clear_mce(u64 record_id)
50{
51 return -EINVAL;
52}
53#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 8a6f0afa767..09535ca9b9d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -264,7 +264,7 @@ static void wait_for_panic(void)
264 264
265static void mce_panic(char *msg, struct mce *final, char *exp) 265static void mce_panic(char *msg, struct mce *final, char *exp)
266{ 266{
267 int i; 267 int i, apei_err = 0;
268 268
269 if (!fake_panic) { 269 if (!fake_panic) {
270 /* 270 /*
@@ -287,8 +287,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
287 struct mce *m = &mcelog.entry[i]; 287 struct mce *m = &mcelog.entry[i];
288 if (!(m->status & MCI_STATUS_VAL)) 288 if (!(m->status & MCI_STATUS_VAL))
289 continue; 289 continue;
290 if (!(m->status & MCI_STATUS_UC)) 290 if (!(m->status & MCI_STATUS_UC)) {
291 print_mce(m); 291 print_mce(m);
292 if (!apei_err)
293 apei_err = apei_write_mce(m);
294 }
292 } 295 }
293 /* Now print uncorrected but with the final one last */ 296 /* Now print uncorrected but with the final one last */
294 for (i = 0; i < MCE_LOG_LEN; i++) { 297 for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -297,11 +300,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
297 continue; 300 continue;
298 if (!(m->status & MCI_STATUS_UC)) 301 if (!(m->status & MCI_STATUS_UC))
299 continue; 302 continue;
300 if (!final || memcmp(m, final, sizeof(struct mce))) 303 if (!final || memcmp(m, final, sizeof(struct mce))) {
301 print_mce(m); 304 print_mce(m);
305 if (!apei_err)
306 apei_err = apei_write_mce(m);
307 }
302 } 308 }
303 if (final) 309 if (final) {
304 print_mce(final); 310 print_mce(final);
311 if (!apei_err)
312 apei_err = apei_write_mce(final);
313 }
305 if (cpu_missing) 314 if (cpu_missing)
306 printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); 315 printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n");
307 print_mce_tail(); 316 print_mce_tail();
@@ -1493,6 +1502,43 @@ static void collect_tscs(void *data)
1493 rdtscll(cpu_tsc[smp_processor_id()]); 1502 rdtscll(cpu_tsc[smp_processor_id()]);
1494} 1503}
1495 1504
1505static int mce_apei_read_done;
1506
1507/* Collect MCE record of previous boot in persistent storage via APEI ERST. */
1508static int __mce_read_apei(char __user **ubuf, size_t usize)
1509{
1510 int rc;
1511 u64 record_id;
1512 struct mce m;
1513
1514 if (usize < sizeof(struct mce))
1515 return -EINVAL;
1516
1517 rc = apei_read_mce(&m, &record_id);
1518 /* Error or no more MCE record */
1519 if (rc <= 0) {
1520 mce_apei_read_done = 1;
1521 return rc;
1522 }
1523 rc = -EFAULT;
1524 if (copy_to_user(*ubuf, &m, sizeof(struct mce)))
1525 return rc;
1526 /*
1527 * In fact, we should have cleared the record after that has
1528 * been flushed to the disk or sent to network in
1529 * /sbin/mcelog, but we have no interface to support that now,
1530 * so just clear it to avoid duplication.
1531 */
1532 rc = apei_clear_mce(record_id);
1533 if (rc) {
1534 mce_apei_read_done = 1;
1535 return rc;
1536 }
1537 *ubuf += sizeof(struct mce);
1538
1539 return 0;
1540}
1541
1496static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, 1542static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1497 loff_t *off) 1543 loff_t *off)
1498{ 1544{
@@ -1506,15 +1552,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
1506 return -ENOMEM; 1552 return -ENOMEM;
1507 1553
1508 mutex_lock(&mce_read_mutex); 1554 mutex_lock(&mce_read_mutex);
1555
1556 if (!mce_apei_read_done) {
1557 err = __mce_read_apei(&buf, usize);
1558 if (err || buf != ubuf)
1559 goto out;
1560 }
1561
1509 next = rcu_dereference_check_mce(mcelog.next); 1562 next = rcu_dereference_check_mce(mcelog.next);
1510 1563
1511 /* Only supports full reads right now */ 1564 /* Only supports full reads right now */
1512 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { 1565 err = -EINVAL;
1513 mutex_unlock(&mce_read_mutex); 1566 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
1514 kfree(cpu_tsc); 1567 goto out;
1515
1516 return -EINVAL;
1517 }
1518 1568
1519 err = 0; 1569 err = 0;
1520 prev = 0; 1570 prev = 0;
@@ -1562,10 +1612,15 @@ timeout:
1562 memset(&mcelog.entry[i], 0, sizeof(struct mce)); 1612 memset(&mcelog.entry[i], 0, sizeof(struct mce));
1563 } 1613 }
1564 } 1614 }
1615
1616 if (err)
1617 err = -EFAULT;
1618
1619out:
1565 mutex_unlock(&mce_read_mutex); 1620 mutex_unlock(&mce_read_mutex);
1566 kfree(cpu_tsc); 1621 kfree(cpu_tsc);
1567 1622
1568 return err ? -EFAULT : buf - ubuf; 1623 return err ? err : buf - ubuf;
1569} 1624}
1570 1625
1571static unsigned int mce_poll(struct file *file, poll_table *wait) 1626static unsigned int mce_poll(struct file *file, poll_table *wait)
@@ -1573,6 +1628,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait)
1573 poll_wait(file, &mce_wait, wait); 1628 poll_wait(file, &mce_wait, wait);
1574 if (rcu_dereference_check_mce(mcelog.next)) 1629 if (rcu_dereference_check_mce(mcelog.next))
1575 return POLLIN | POLLRDNORM; 1630 return POLLIN | POLLRDNORM;
1631 if (!mce_apei_read_done && apei_check_mce())
1632 return POLLIN | POLLRDNORM;
1576 return 0; 1633 return 0;
1577} 1634}
1578 1635