diff options
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-apei.c | 86 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 23 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 79 |
3 files changed, 177 insertions, 11 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 4eccd1fadb14..745b54f9be89 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -8,6 +8,9 @@ | |||
8 | * the error memory page can be offlined by /sbin/mcelog if the error | 8 | * the error memory page can be offlined by /sbin/mcelog if the error |
9 | * count for one page is beyond the threshold. | 9 | * count for one page is beyond the threshold. |
10 | * | 10 | * |
11 | * For fatal MCE, save MCE record into persistent storage via ERST, so | ||
12 | * that the MCE record can be logged after reboot via ERST. | ||
13 | * | ||
11 | * Copyright 2010 Intel Corp. | 14 | * Copyright 2010 Intel Corp. |
12 | * Author: Huang Ying <ying.huang@intel.com> | 15 | * Author: Huang Ying <ying.huang@intel.com> |
13 | * | 16 | * |
@@ -50,3 +53,86 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | |||
50 | mce_notify_irq(); | 53 | mce_notify_irq(); |
51 | } | 54 | } |
52 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); | 55 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); |
56 | |||
57 | #define CPER_CREATOR_MCE \ | ||
58 | UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ | ||
59 | 0x64, 0x90, 0xb8, 0x9d) | ||
60 | #define CPER_SECTION_TYPE_MCE \ | ||
61 | UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ | ||
62 | 0x04, 0x4a, 0x38, 0xfc) | ||
63 | |||
64 | /* | ||
65 | * CPER specification (in UEFI specification 2.3 appendix N) requires | ||
66 | * byte-packed. | ||
67 | */ | ||
68 | struct cper_mce_record { | ||
69 | struct cper_record_header hdr; | ||
70 | struct cper_section_descriptor sec_hdr; | ||
71 | struct mce mce; | ||
72 | } __packed; | ||
73 | |||
74 | int apei_write_mce(struct mce *m) | ||
75 | { | ||
76 | struct cper_mce_record rcd; | ||
77 | |||
78 | memset(&rcd, 0, sizeof(rcd)); | ||
79 | memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); | ||
80 | rcd.hdr.revision = CPER_RECORD_REV; | ||
81 | rcd.hdr.signature_end = CPER_SIG_END; | ||
82 | rcd.hdr.section_count = 1; | ||
83 | rcd.hdr.error_severity = CPER_SER_FATAL; | ||
84 | /* timestamp, platform_id, partition_id are all invalid */ | ||
85 | rcd.hdr.validation_bits = 0; | ||
86 | rcd.hdr.record_length = sizeof(rcd); | ||
87 | rcd.hdr.creator_id = CPER_CREATOR_MCE; | ||
88 | rcd.hdr.notification_type = CPER_NOTIFY_MCE; | ||
89 | rcd.hdr.record_id = cper_next_record_id(); | ||
90 | rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; | ||
91 | |||
92 | rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd; | ||
93 | rcd.sec_hdr.section_length = sizeof(rcd.mce); | ||
94 | rcd.sec_hdr.revision = CPER_SEC_REV; | ||
95 | /* fru_id and fru_text is invalid */ | ||
96 | rcd.sec_hdr.validation_bits = 0; | ||
97 | rcd.sec_hdr.flags = CPER_SEC_PRIMARY; | ||
98 | rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; | ||
99 | rcd.sec_hdr.section_severity = CPER_SER_FATAL; | ||
100 | |||
101 | memcpy(&rcd.mce, m, sizeof(*m)); | ||
102 | |||
103 | return erst_write(&rcd.hdr); | ||
104 | } | ||
105 | |||
106 | ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
107 | { | ||
108 | struct cper_mce_record rcd; | ||
109 | ssize_t len; | ||
110 | |||
111 | len = erst_read_next(&rcd.hdr, sizeof(rcd)); | ||
112 | if (len <= 0) | ||
113 | return len; | ||
114 | /* Can not skip other records in storage via ERST unless clear them */ | ||
115 | else if (len != sizeof(rcd) || | ||
116 | uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { | ||
117 | if (printk_ratelimit()) | ||
118 | pr_warning( | ||
119 | "MCE-APEI: Can not skip the unknown record in ERST"); | ||
120 | return -EIO; | ||
121 | } | ||
122 | |||
123 | memcpy(m, &rcd.mce, sizeof(*m)); | ||
124 | *record_id = rcd.hdr.record_id; | ||
125 | |||
126 | return sizeof(*m); | ||
127 | } | ||
128 | |||
129 | /* Check whether there is record in ERST */ | ||
130 | int apei_check_mce(void) | ||
131 | { | ||
132 | return erst_get_record_count(); | ||
133 | } | ||
134 | |||
135 | int apei_clear_mce(u64 record_id) | ||
136 | { | ||
137 | return erst_clear(record_id); | ||
138 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 32996f9fab67..fefcc69ee8b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -28,3 +28,26 @@ extern int mce_ser; | |||
28 | 28 | ||
29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
30 | 30 | ||
31 | #ifdef CONFIG_ACPI_APEI | ||
32 | int apei_write_mce(struct mce *m); | ||
33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | ||
34 | int apei_check_mce(void); | ||
35 | int apei_clear_mce(u64 record_id); | ||
36 | #else | ||
37 | static inline int apei_write_mce(struct mce *m) | ||
38 | { | ||
39 | return -EINVAL; | ||
40 | } | ||
41 | static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
42 | { | ||
43 | return 0; | ||
44 | } | ||
45 | static inline int apei_check_mce(void) | ||
46 | { | ||
47 | return 0; | ||
48 | } | ||
49 | static inline int apei_clear_mce(u64 record_id) | ||
50 | { | ||
51 | return -EINVAL; | ||
52 | } | ||
53 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0afa767e..09535ca9b9d7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -264,7 +264,7 @@ static void wait_for_panic(void) | |||
264 | 264 | ||
265 | static void mce_panic(char *msg, struct mce *final, char *exp) | 265 | static void mce_panic(char *msg, struct mce *final, char *exp) |
266 | { | 266 | { |
267 | int i; | 267 | int i, apei_err = 0; |
268 | 268 | ||
269 | if (!fake_panic) { | 269 | if (!fake_panic) { |
270 | /* | 270 | /* |
@@ -287,8 +287,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
287 | struct mce *m = &mcelog.entry[i]; | 287 | struct mce *m = &mcelog.entry[i]; |
288 | if (!(m->status & MCI_STATUS_VAL)) | 288 | if (!(m->status & MCI_STATUS_VAL)) |
289 | continue; | 289 | continue; |
290 | if (!(m->status & MCI_STATUS_UC)) | 290 | if (!(m->status & MCI_STATUS_UC)) { |
291 | print_mce(m); | 291 | print_mce(m); |
292 | if (!apei_err) | ||
293 | apei_err = apei_write_mce(m); | ||
294 | } | ||
292 | } | 295 | } |
293 | /* Now print uncorrected but with the final one last */ | 296 | /* Now print uncorrected but with the final one last */ |
294 | for (i = 0; i < MCE_LOG_LEN; i++) { | 297 | for (i = 0; i < MCE_LOG_LEN; i++) { |
@@ -297,11 +300,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
297 | continue; | 300 | continue; |
298 | if (!(m->status & MCI_STATUS_UC)) | 301 | if (!(m->status & MCI_STATUS_UC)) |
299 | continue; | 302 | continue; |
300 | if (!final || memcmp(m, final, sizeof(struct mce))) | 303 | if (!final || memcmp(m, final, sizeof(struct mce))) { |
301 | print_mce(m); | 304 | print_mce(m); |
305 | if (!apei_err) | ||
306 | apei_err = apei_write_mce(m); | ||
307 | } | ||
302 | } | 308 | } |
303 | if (final) | 309 | if (final) { |
304 | print_mce(final); | 310 | print_mce(final); |
311 | if (!apei_err) | ||
312 | apei_err = apei_write_mce(final); | ||
313 | } | ||
305 | if (cpu_missing) | 314 | if (cpu_missing) |
306 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | 315 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); |
307 | print_mce_tail(); | 316 | print_mce_tail(); |
@@ -1493,6 +1502,43 @@ static void collect_tscs(void *data) | |||
1493 | rdtscll(cpu_tsc[smp_processor_id()]); | 1502 | rdtscll(cpu_tsc[smp_processor_id()]); |
1494 | } | 1503 | } |
1495 | 1504 | ||
1505 | static int mce_apei_read_done; | ||
1506 | |||
1507 | /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ | ||
1508 | static int __mce_read_apei(char __user **ubuf, size_t usize) | ||
1509 | { | ||
1510 | int rc; | ||
1511 | u64 record_id; | ||
1512 | struct mce m; | ||
1513 | |||
1514 | if (usize < sizeof(struct mce)) | ||
1515 | return -EINVAL; | ||
1516 | |||
1517 | rc = apei_read_mce(&m, &record_id); | ||
1518 | /* Error or no more MCE record */ | ||
1519 | if (rc <= 0) { | ||
1520 | mce_apei_read_done = 1; | ||
1521 | return rc; | ||
1522 | } | ||
1523 | rc = -EFAULT; | ||
1524 | if (copy_to_user(*ubuf, &m, sizeof(struct mce))) | ||
1525 | return rc; | ||
1526 | /* | ||
1527 | * In fact, we should have cleared the record after that has | ||
1528 | * been flushed to the disk or sent to network in | ||
1529 | * /sbin/mcelog, but we have no interface to support that now, | ||
1530 | * so just clear it to avoid duplication. | ||
1531 | */ | ||
1532 | rc = apei_clear_mce(record_id); | ||
1533 | if (rc) { | ||
1534 | mce_apei_read_done = 1; | ||
1535 | return rc; | ||
1536 | } | ||
1537 | *ubuf += sizeof(struct mce); | ||
1538 | |||
1539 | return 0; | ||
1540 | } | ||
1541 | |||
1496 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1542 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, |
1497 | loff_t *off) | 1543 | loff_t *off) |
1498 | { | 1544 | { |
@@ -1506,15 +1552,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1506 | return -ENOMEM; | 1552 | return -ENOMEM; |
1507 | 1553 | ||
1508 | mutex_lock(&mce_read_mutex); | 1554 | mutex_lock(&mce_read_mutex); |
1555 | |||
1556 | if (!mce_apei_read_done) { | ||
1557 | err = __mce_read_apei(&buf, usize); | ||
1558 | if (err || buf != ubuf) | ||
1559 | goto out; | ||
1560 | } | ||
1561 | |||
1509 | next = rcu_dereference_check_mce(mcelog.next); | 1562 | next = rcu_dereference_check_mce(mcelog.next); |
1510 | 1563 | ||
1511 | /* Only supports full reads right now */ | 1564 | /* Only supports full reads right now */ |
1512 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | 1565 | err = -EINVAL; |
1513 | mutex_unlock(&mce_read_mutex); | 1566 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) |
1514 | kfree(cpu_tsc); | 1567 | goto out; |
1515 | |||
1516 | return -EINVAL; | ||
1517 | } | ||
1518 | 1568 | ||
1519 | err = 0; | 1569 | err = 0; |
1520 | prev = 0; | 1570 | prev = 0; |
@@ -1562,10 +1612,15 @@ timeout: | |||
1562 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 1612 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); |
1563 | } | 1613 | } |
1564 | } | 1614 | } |
1615 | |||
1616 | if (err) | ||
1617 | err = -EFAULT; | ||
1618 | |||
1619 | out: | ||
1565 | mutex_unlock(&mce_read_mutex); | 1620 | mutex_unlock(&mce_read_mutex); |
1566 | kfree(cpu_tsc); | 1621 | kfree(cpu_tsc); |
1567 | 1622 | ||
1568 | return err ? -EFAULT : buf - ubuf; | 1623 | return err ? err : buf - ubuf; |
1569 | } | 1624 | } |
1570 | 1625 | ||
1571 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1626 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
@@ -1573,6 +1628,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) | |||
1573 | poll_wait(file, &mce_wait, wait); | 1628 | poll_wait(file, &mce_wait, wait); |
1574 | if (rcu_dereference_check_mce(mcelog.next)) | 1629 | if (rcu_dereference_check_mce(mcelog.next)) |
1575 | return POLLIN | POLLRDNORM; | 1630 | return POLLIN | POLLRDNORM; |
1631 | if (!mce_apei_read_done && apei_check_mce()) | ||
1632 | return POLLIN | POLLRDNORM; | ||
1576 | return 0; | 1633 | return 0; |
1577 | } | 1634 | } |
1578 | 1635 | ||