diff options
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-apei.c | 86 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 23 | ||||
| -rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 79 |
3 files changed, 177 insertions, 11 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 4eccd1fadb14..745b54f9be89 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
| @@ -8,6 +8,9 @@ | |||
| 8 | * the error memory page can be offlined by /sbin/mcelog if the error | 8 | * the error memory page can be offlined by /sbin/mcelog if the error |
| 9 | * count for one page is beyond the threshold. | 9 | * count for one page is beyond the threshold. |
| 10 | * | 10 | * |
| 11 | * For fatal MCE, save MCE record into persistent storage via ERST, so | ||
| 12 | * that the MCE record can be logged after reboot via ERST. | ||
| 13 | * | ||
| 11 | * Copyright 2010 Intel Corp. | 14 | * Copyright 2010 Intel Corp. |
| 12 | * Author: Huang Ying <ying.huang@intel.com> | 15 | * Author: Huang Ying <ying.huang@intel.com> |
| 13 | * | 16 | * |
| @@ -50,3 +53,86 @@ void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | |||
| 50 | mce_notify_irq(); | 53 | mce_notify_irq(); |
| 51 | } | 54 | } |
| 52 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); | 55 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); |
| 56 | |||
| 57 | #define CPER_CREATOR_MCE \ | ||
| 58 | UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ | ||
| 59 | 0x64, 0x90, 0xb8, 0x9d) | ||
| 60 | #define CPER_SECTION_TYPE_MCE \ | ||
| 61 | UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ | ||
| 62 | 0x04, 0x4a, 0x38, 0xfc) | ||
| 63 | |||
| 64 | /* | ||
| 65 | * CPER specification (in UEFI specification 2.3 appendix N) requires | ||
| 66 | * byte-packed. | ||
| 67 | */ | ||
| 68 | struct cper_mce_record { | ||
| 69 | struct cper_record_header hdr; | ||
| 70 | struct cper_section_descriptor sec_hdr; | ||
| 71 | struct mce mce; | ||
| 72 | } __packed; | ||
| 73 | |||
| 74 | int apei_write_mce(struct mce *m) | ||
| 75 | { | ||
| 76 | struct cper_mce_record rcd; | ||
| 77 | |||
| 78 | memset(&rcd, 0, sizeof(rcd)); | ||
| 79 | memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); | ||
| 80 | rcd.hdr.revision = CPER_RECORD_REV; | ||
| 81 | rcd.hdr.signature_end = CPER_SIG_END; | ||
| 82 | rcd.hdr.section_count = 1; | ||
| 83 | rcd.hdr.error_severity = CPER_SER_FATAL; | ||
| 84 | /* timestamp, platform_id, partition_id are all invalid */ | ||
| 85 | rcd.hdr.validation_bits = 0; | ||
| 86 | rcd.hdr.record_length = sizeof(rcd); | ||
| 87 | rcd.hdr.creator_id = CPER_CREATOR_MCE; | ||
| 88 | rcd.hdr.notification_type = CPER_NOTIFY_MCE; | ||
| 89 | rcd.hdr.record_id = cper_next_record_id(); | ||
| 90 | rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; | ||
| 91 | |||
| 92 | rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd; | ||
| 93 | rcd.sec_hdr.section_length = sizeof(rcd.mce); | ||
| 94 | rcd.sec_hdr.revision = CPER_SEC_REV; | ||
| 95 | /* fru_id and fru_text is invalid */ | ||
| 96 | rcd.sec_hdr.validation_bits = 0; | ||
| 97 | rcd.sec_hdr.flags = CPER_SEC_PRIMARY; | ||
| 98 | rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; | ||
| 99 | rcd.sec_hdr.section_severity = CPER_SER_FATAL; | ||
| 100 | |||
| 101 | memcpy(&rcd.mce, m, sizeof(*m)); | ||
| 102 | |||
| 103 | return erst_write(&rcd.hdr); | ||
| 104 | } | ||
| 105 | |||
| 106 | ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
| 107 | { | ||
| 108 | struct cper_mce_record rcd; | ||
| 109 | ssize_t len; | ||
| 110 | |||
| 111 | len = erst_read_next(&rcd.hdr, sizeof(rcd)); | ||
| 112 | if (len <= 0) | ||
| 113 | return len; | ||
| 114 | /* Can not skip other records in storage via ERST unless clear them */ | ||
| 115 | else if (len != sizeof(rcd) || | ||
| 116 | uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { | ||
| 117 | if (printk_ratelimit()) | ||
| 118 | pr_warning( | ||
| 119 | "MCE-APEI: Can not skip the unknown record in ERST"); | ||
| 120 | return -EIO; | ||
| 121 | } | ||
| 122 | |||
| 123 | memcpy(m, &rcd.mce, sizeof(*m)); | ||
| 124 | *record_id = rcd.hdr.record_id; | ||
| 125 | |||
| 126 | return sizeof(*m); | ||
| 127 | } | ||
| 128 | |||
| 129 | /* Check whether there is record in ERST */ | ||
| 130 | int apei_check_mce(void) | ||
| 131 | { | ||
| 132 | return erst_get_record_count(); | ||
| 133 | } | ||
| 134 | |||
| 135 | int apei_clear_mce(u64 record_id) | ||
| 136 | { | ||
| 137 | return erst_clear(record_id); | ||
| 138 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 32996f9fab67..fefcc69ee8b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
| @@ -28,3 +28,26 @@ extern int mce_ser; | |||
| 28 | 28 | ||
| 29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
| 30 | 30 | ||
| 31 | #ifdef CONFIG_ACPI_APEI | ||
| 32 | int apei_write_mce(struct mce *m); | ||
| 33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | ||
| 34 | int apei_check_mce(void); | ||
| 35 | int apei_clear_mce(u64 record_id); | ||
| 36 | #else | ||
| 37 | static inline int apei_write_mce(struct mce *m) | ||
| 38 | { | ||
| 39 | return -EINVAL; | ||
| 40 | } | ||
| 41 | static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
| 42 | { | ||
| 43 | return 0; | ||
| 44 | } | ||
| 45 | static inline int apei_check_mce(void) | ||
| 46 | { | ||
| 47 | return 0; | ||
| 48 | } | ||
| 49 | static inline int apei_clear_mce(u64 record_id) | ||
| 50 | { | ||
| 51 | return -EINVAL; | ||
| 52 | } | ||
| 53 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8a6f0afa767e..09535ca9b9d7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
| @@ -264,7 +264,7 @@ static void wait_for_panic(void) | |||
| 264 | 264 | ||
| 265 | static void mce_panic(char *msg, struct mce *final, char *exp) | 265 | static void mce_panic(char *msg, struct mce *final, char *exp) |
| 266 | { | 266 | { |
| 267 | int i; | 267 | int i, apei_err = 0; |
| 268 | 268 | ||
| 269 | if (!fake_panic) { | 269 | if (!fake_panic) { |
| 270 | /* | 270 | /* |
| @@ -287,8 +287,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
| 287 | struct mce *m = &mcelog.entry[i]; | 287 | struct mce *m = &mcelog.entry[i]; |
| 288 | if (!(m->status & MCI_STATUS_VAL)) | 288 | if (!(m->status & MCI_STATUS_VAL)) |
| 289 | continue; | 289 | continue; |
| 290 | if (!(m->status & MCI_STATUS_UC)) | 290 | if (!(m->status & MCI_STATUS_UC)) { |
| 291 | print_mce(m); | 291 | print_mce(m); |
| 292 | if (!apei_err) | ||
| 293 | apei_err = apei_write_mce(m); | ||
| 294 | } | ||
| 292 | } | 295 | } |
| 293 | /* Now print uncorrected but with the final one last */ | 296 | /* Now print uncorrected but with the final one last */ |
| 294 | for (i = 0; i < MCE_LOG_LEN; i++) { | 297 | for (i = 0; i < MCE_LOG_LEN; i++) { |
| @@ -297,11 +300,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
| 297 | continue; | 300 | continue; |
| 298 | if (!(m->status & MCI_STATUS_UC)) | 301 | if (!(m->status & MCI_STATUS_UC)) |
| 299 | continue; | 302 | continue; |
| 300 | if (!final || memcmp(m, final, sizeof(struct mce))) | 303 | if (!final || memcmp(m, final, sizeof(struct mce))) { |
| 301 | print_mce(m); | 304 | print_mce(m); |
| 305 | if (!apei_err) | ||
| 306 | apei_err = apei_write_mce(m); | ||
| 307 | } | ||
| 302 | } | 308 | } |
| 303 | if (final) | 309 | if (final) { |
| 304 | print_mce(final); | 310 | print_mce(final); |
| 311 | if (!apei_err) | ||
| 312 | apei_err = apei_write_mce(final); | ||
| 313 | } | ||
| 305 | if (cpu_missing) | 314 | if (cpu_missing) |
| 306 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | 315 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); |
| 307 | print_mce_tail(); | 316 | print_mce_tail(); |
| @@ -1493,6 +1502,43 @@ static void collect_tscs(void *data) | |||
| 1493 | rdtscll(cpu_tsc[smp_processor_id()]); | 1502 | rdtscll(cpu_tsc[smp_processor_id()]); |
| 1494 | } | 1503 | } |
| 1495 | 1504 | ||
| 1505 | static int mce_apei_read_done; | ||
| 1506 | |||
| 1507 | /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ | ||
| 1508 | static int __mce_read_apei(char __user **ubuf, size_t usize) | ||
| 1509 | { | ||
| 1510 | int rc; | ||
| 1511 | u64 record_id; | ||
| 1512 | struct mce m; | ||
| 1513 | |||
| 1514 | if (usize < sizeof(struct mce)) | ||
| 1515 | return -EINVAL; | ||
| 1516 | |||
| 1517 | rc = apei_read_mce(&m, &record_id); | ||
| 1518 | /* Error or no more MCE record */ | ||
| 1519 | if (rc <= 0) { | ||
| 1520 | mce_apei_read_done = 1; | ||
| 1521 | return rc; | ||
| 1522 | } | ||
| 1523 | rc = -EFAULT; | ||
| 1524 | if (copy_to_user(*ubuf, &m, sizeof(struct mce))) | ||
| 1525 | return rc; | ||
| 1526 | /* | ||
| 1527 | * In fact, we should have cleared the record after that has | ||
| 1528 | * been flushed to the disk or sent to network in | ||
| 1529 | * /sbin/mcelog, but we have no interface to support that now, | ||
| 1530 | * so just clear it to avoid duplication. | ||
| 1531 | */ | ||
| 1532 | rc = apei_clear_mce(record_id); | ||
| 1533 | if (rc) { | ||
| 1534 | mce_apei_read_done = 1; | ||
| 1535 | return rc; | ||
| 1536 | } | ||
| 1537 | *ubuf += sizeof(struct mce); | ||
| 1538 | |||
| 1539 | return 0; | ||
| 1540 | } | ||
| 1541 | |||
| 1496 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1542 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, |
| 1497 | loff_t *off) | 1543 | loff_t *off) |
| 1498 | { | 1544 | { |
| @@ -1506,15 +1552,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
| 1506 | return -ENOMEM; | 1552 | return -ENOMEM; |
| 1507 | 1553 | ||
| 1508 | mutex_lock(&mce_read_mutex); | 1554 | mutex_lock(&mce_read_mutex); |
| 1555 | |||
| 1556 | if (!mce_apei_read_done) { | ||
| 1557 | err = __mce_read_apei(&buf, usize); | ||
| 1558 | if (err || buf != ubuf) | ||
| 1559 | goto out; | ||
| 1560 | } | ||
| 1561 | |||
| 1509 | next = rcu_dereference_check_mce(mcelog.next); | 1562 | next = rcu_dereference_check_mce(mcelog.next); |
| 1510 | 1563 | ||
| 1511 | /* Only supports full reads right now */ | 1564 | /* Only supports full reads right now */ |
| 1512 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | 1565 | err = -EINVAL; |
| 1513 | mutex_unlock(&mce_read_mutex); | 1566 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) |
| 1514 | kfree(cpu_tsc); | 1567 | goto out; |
| 1515 | |||
| 1516 | return -EINVAL; | ||
| 1517 | } | ||
| 1518 | 1568 | ||
| 1519 | err = 0; | 1569 | err = 0; |
| 1520 | prev = 0; | 1570 | prev = 0; |
| @@ -1562,10 +1612,15 @@ timeout: | |||
| 1562 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 1612 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); |
| 1563 | } | 1613 | } |
| 1564 | } | 1614 | } |
| 1615 | |||
| 1616 | if (err) | ||
| 1617 | err = -EFAULT; | ||
| 1618 | |||
| 1619 | out: | ||
| 1565 | mutex_unlock(&mce_read_mutex); | 1620 | mutex_unlock(&mce_read_mutex); |
| 1566 | kfree(cpu_tsc); | 1621 | kfree(cpu_tsc); |
| 1567 | 1622 | ||
| 1568 | return err ? -EFAULT : buf - ubuf; | 1623 | return err ? err : buf - ubuf; |
| 1569 | } | 1624 | } |
| 1570 | 1625 | ||
| 1571 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1626 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
| @@ -1573,6 +1628,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) | |||
| 1573 | poll_wait(file, &mce_wait, wait); | 1628 | poll_wait(file, &mce_wait, wait); |
| 1574 | if (rcu_dereference_check_mce(mcelog.next)) | 1629 | if (rcu_dereference_check_mce(mcelog.next)) |
| 1575 | return POLLIN | POLLRDNORM; | 1630 | return POLLIN | POLLRDNORM; |
| 1631 | if (!mce_apei_read_done && apei_check_mce()) | ||
| 1632 | return POLLIN | POLLRDNORM; | ||
| 1576 | return 0; | 1633 | return 0; |
| 1577 | } | 1634 | } |
| 1578 | 1635 | ||
