diff options
author | Jiri Kosina <jkosina@suse.cz> | 2010-06-16 12:08:13 -0400 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2010-06-16 12:08:13 -0400 |
commit | f1bbbb6912662b9f6070c5bfc4ca9eb1f06a9d5b (patch) | |
tree | c2c130a74be25b0b2dff992e1a195e2728bdaadd /arch/x86/kernel/cpu/mcheck | |
parent | fd0961ff67727482bb20ca7e8ea97b83e9de2ddb (diff) | |
parent | 7e27d6e778cd87b6f2415515d7127eba53fe5d02 (diff) |
Merge branch 'master' into for-next
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-apei.c | 138 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 23 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 89 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 2 |
5 files changed, 242 insertions, 12 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 4ac6d48fe11b..bb34b03af252 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -7,3 +7,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | |||
7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
8 | 8 | ||
9 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o | 9 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o |
10 | |||
11 | obj-$(CONFIG_ACPI_APEI) += mce-apei.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c new file mode 100644 index 000000000000..745b54f9be89 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -0,0 +1,138 @@ | |||
1 | /* | ||
2 | * Bridge between MCE and APEI | ||
3 | * | ||
4 | * On some machine, corrected memory errors are reported via APEI | ||
5 | * generic hardware error source (GHES) instead of corrected Machine | ||
6 | * Check. These corrected memory errors can be reported to user space | ||
7 | * through /dev/mcelog via faking a corrected Machine Check, so that | ||
8 | * the error memory page can be offlined by /sbin/mcelog if the error | ||
9 | * count for one page is beyond the threshold. | ||
10 | * | ||
11 | * For fatal MCE, save MCE record into persistent storage via ERST, so | ||
12 | * that the MCE record can be logged after reboot via ERST. | ||
13 | * | ||
14 | * Copyright 2010 Intel Corp. | ||
15 | * Author: Huang Ying <ying.huang@intel.com> | ||
16 | * | ||
17 | * This program is free software; you can redistribute it and/or | ||
18 | * modify it under the terms of the GNU General Public License version | ||
19 | * 2 as published by the Free Software Foundation. | ||
20 | * | ||
21 | * This program is distributed in the hope that it will be useful, | ||
22 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
23 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
24 | * GNU General Public License for more details. | ||
25 | * | ||
26 | * You should have received a copy of the GNU General Public License | ||
27 | * along with this program; if not, write to the Free Software | ||
28 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
29 | */ | ||
30 | |||
31 | #include <linux/kernel.h> | ||
32 | #include <linux/acpi.h> | ||
33 | #include <linux/cper.h> | ||
34 | #include <acpi/apei.h> | ||
35 | #include <asm/mce.h> | ||
36 | |||
37 | #include "mce-internal.h" | ||
38 | |||
39 | void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | ||
40 | { | ||
41 | struct mce m; | ||
42 | |||
43 | /* Only corrected MC is reported */ | ||
44 | if (!corrected) | ||
45 | return; | ||
46 | |||
47 | mce_setup(&m); | ||
48 | m.bank = 1; | ||
49 | /* Fake a memory read corrected error with unknown channel */ | ||
50 | m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; | ||
51 | m.addr = mem_err->physical_addr; | ||
52 | mce_log(&m); | ||
53 | mce_notify_irq(); | ||
54 | } | ||
55 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); | ||
56 | |||
57 | #define CPER_CREATOR_MCE \ | ||
58 | UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ | ||
59 | 0x64, 0x90, 0xb8, 0x9d) | ||
60 | #define CPER_SECTION_TYPE_MCE \ | ||
61 | UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ | ||
62 | 0x04, 0x4a, 0x38, 0xfc) | ||
63 | |||
64 | /* | ||
65 | * CPER specification (in UEFI specification 2.3 appendix N) requires | ||
66 | * byte-packed. | ||
67 | */ | ||
68 | struct cper_mce_record { | ||
69 | struct cper_record_header hdr; | ||
70 | struct cper_section_descriptor sec_hdr; | ||
71 | struct mce mce; | ||
72 | } __packed; | ||
73 | |||
74 | int apei_write_mce(struct mce *m) | ||
75 | { | ||
76 | struct cper_mce_record rcd; | ||
77 | |||
78 | memset(&rcd, 0, sizeof(rcd)); | ||
79 | memcpy(rcd.hdr.signature, CPER_SIG_RECORD, CPER_SIG_SIZE); | ||
80 | rcd.hdr.revision = CPER_RECORD_REV; | ||
81 | rcd.hdr.signature_end = CPER_SIG_END; | ||
82 | rcd.hdr.section_count = 1; | ||
83 | rcd.hdr.error_severity = CPER_SER_FATAL; | ||
84 | /* timestamp, platform_id, partition_id are all invalid */ | ||
85 | rcd.hdr.validation_bits = 0; | ||
86 | rcd.hdr.record_length = sizeof(rcd); | ||
87 | rcd.hdr.creator_id = CPER_CREATOR_MCE; | ||
88 | rcd.hdr.notification_type = CPER_NOTIFY_MCE; | ||
89 | rcd.hdr.record_id = cper_next_record_id(); | ||
90 | rcd.hdr.flags = CPER_HW_ERROR_FLAGS_PREVERR; | ||
91 | |||
92 | rcd.sec_hdr.section_offset = (void *)&rcd.mce - (void *)&rcd; | ||
93 | rcd.sec_hdr.section_length = sizeof(rcd.mce); | ||
94 | rcd.sec_hdr.revision = CPER_SEC_REV; | ||
95 | /* fru_id and fru_text is invalid */ | ||
96 | rcd.sec_hdr.validation_bits = 0; | ||
97 | rcd.sec_hdr.flags = CPER_SEC_PRIMARY; | ||
98 | rcd.sec_hdr.section_type = CPER_SECTION_TYPE_MCE; | ||
99 | rcd.sec_hdr.section_severity = CPER_SER_FATAL; | ||
100 | |||
101 | memcpy(&rcd.mce, m, sizeof(*m)); | ||
102 | |||
103 | return erst_write(&rcd.hdr); | ||
104 | } | ||
105 | |||
106 | ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
107 | { | ||
108 | struct cper_mce_record rcd; | ||
109 | ssize_t len; | ||
110 | |||
111 | len = erst_read_next(&rcd.hdr, sizeof(rcd)); | ||
112 | if (len <= 0) | ||
113 | return len; | ||
114 | /* Can not skip other records in storage via ERST unless clear them */ | ||
115 | else if (len != sizeof(rcd) || | ||
116 | uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { | ||
117 | if (printk_ratelimit()) | ||
118 | pr_warning( | ||
119 | "MCE-APEI: Can not skip the unknown record in ERST"); | ||
120 | return -EIO; | ||
121 | } | ||
122 | |||
123 | memcpy(m, &rcd.mce, sizeof(*m)); | ||
124 | *record_id = rcd.hdr.record_id; | ||
125 | |||
126 | return sizeof(*m); | ||
127 | } | ||
128 | |||
129 | /* Check whether there is record in ERST */ | ||
130 | int apei_check_mce(void) | ||
131 | { | ||
132 | return erst_get_record_count(); | ||
133 | } | ||
134 | |||
135 | int apei_clear_mce(u64 record_id) | ||
136 | { | ||
137 | return erst_clear(record_id); | ||
138 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 32996f9fab67..fefcc69ee8b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -28,3 +28,26 @@ extern int mce_ser; | |||
28 | 28 | ||
29 | extern struct mce_bank *mce_banks; | 29 | extern struct mce_bank *mce_banks; |
30 | 30 | ||
31 | #ifdef CONFIG_ACPI_APEI | ||
32 | int apei_write_mce(struct mce *m); | ||
33 | ssize_t apei_read_mce(struct mce *m, u64 *record_id); | ||
34 | int apei_check_mce(void); | ||
35 | int apei_clear_mce(u64 record_id); | ||
36 | #else | ||
37 | static inline int apei_write_mce(struct mce *m) | ||
38 | { | ||
39 | return -EINVAL; | ||
40 | } | ||
41 | static inline ssize_t apei_read_mce(struct mce *m, u64 *record_id) | ||
42 | { | ||
43 | return 0; | ||
44 | } | ||
45 | static inline int apei_check_mce(void) | ||
46 | { | ||
47 | return 0; | ||
48 | } | ||
49 | static inline int apei_clear_mce(u64 record_id) | ||
50 | { | ||
51 | return -EINVAL; | ||
52 | } | ||
53 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 7a355ddcc64b..18cc42562250 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/fs.h> | 36 | #include <linux/fs.h> |
37 | #include <linux/mm.h> | 37 | #include <linux/mm.h> |
38 | #include <linux/debugfs.h> | 38 | #include <linux/debugfs.h> |
39 | #include <linux/edac_mce.h> | ||
39 | 40 | ||
40 | #include <asm/processor.h> | 41 | #include <asm/processor.h> |
41 | #include <asm/hw_irq.h> | 42 | #include <asm/hw_irq.h> |
@@ -169,6 +170,15 @@ void mce_log(struct mce *mce) | |||
169 | entry = rcu_dereference_check_mce(mcelog.next); | 170 | entry = rcu_dereference_check_mce(mcelog.next); |
170 | for (;;) { | 171 | for (;;) { |
171 | /* | 172 | /* |
173 | * If edac_mce is enabled, it will check the error type | ||
174 | * and will process it, if it is a known error. | ||
175 | * Otherwise, the error will be sent through mcelog | ||
176 | * interface | ||
177 | */ | ||
178 | if (edac_mce_parse(mce)) | ||
179 | return; | ||
180 | |||
181 | /* | ||
172 | * When the buffer fills up discard new entries. | 182 | * When the buffer fills up discard new entries. |
173 | * Assume that the earlier errors are the more | 183 | * Assume that the earlier errors are the more |
174 | * interesting ones: | 184 | * interesting ones: |
@@ -264,7 +274,7 @@ static void wait_for_panic(void) | |||
264 | 274 | ||
265 | static void mce_panic(char *msg, struct mce *final, char *exp) | 275 | static void mce_panic(char *msg, struct mce *final, char *exp) |
266 | { | 276 | { |
267 | int i; | 277 | int i, apei_err = 0; |
268 | 278 | ||
269 | if (!fake_panic) { | 279 | if (!fake_panic) { |
270 | /* | 280 | /* |
@@ -287,8 +297,11 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
287 | struct mce *m = &mcelog.entry[i]; | 297 | struct mce *m = &mcelog.entry[i]; |
288 | if (!(m->status & MCI_STATUS_VAL)) | 298 | if (!(m->status & MCI_STATUS_VAL)) |
289 | continue; | 299 | continue; |
290 | if (!(m->status & MCI_STATUS_UC)) | 300 | if (!(m->status & MCI_STATUS_UC)) { |
291 | print_mce(m); | 301 | print_mce(m); |
302 | if (!apei_err) | ||
303 | apei_err = apei_write_mce(m); | ||
304 | } | ||
292 | } | 305 | } |
293 | /* Now print uncorrected but with the final one last */ | 306 | /* Now print uncorrected but with the final one last */ |
294 | for (i = 0; i < MCE_LOG_LEN; i++) { | 307 | for (i = 0; i < MCE_LOG_LEN; i++) { |
@@ -297,11 +310,17 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
297 | continue; | 310 | continue; |
298 | if (!(m->status & MCI_STATUS_UC)) | 311 | if (!(m->status & MCI_STATUS_UC)) |
299 | continue; | 312 | continue; |
300 | if (!final || memcmp(m, final, sizeof(struct mce))) | 313 | if (!final || memcmp(m, final, sizeof(struct mce))) { |
301 | print_mce(m); | 314 | print_mce(m); |
315 | if (!apei_err) | ||
316 | apei_err = apei_write_mce(m); | ||
317 | } | ||
302 | } | 318 | } |
303 | if (final) | 319 | if (final) { |
304 | print_mce(final); | 320 | print_mce(final); |
321 | if (!apei_err) | ||
322 | apei_err = apei_write_mce(final); | ||
323 | } | ||
305 | if (cpu_missing) | 324 | if (cpu_missing) |
306 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | 325 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); |
307 | print_mce_tail(); | 326 | print_mce_tail(); |
@@ -1493,6 +1512,43 @@ static void collect_tscs(void *data) | |||
1493 | rdtscll(cpu_tsc[smp_processor_id()]); | 1512 | rdtscll(cpu_tsc[smp_processor_id()]); |
1494 | } | 1513 | } |
1495 | 1514 | ||
1515 | static int mce_apei_read_done; | ||
1516 | |||
1517 | /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ | ||
1518 | static int __mce_read_apei(char __user **ubuf, size_t usize) | ||
1519 | { | ||
1520 | int rc; | ||
1521 | u64 record_id; | ||
1522 | struct mce m; | ||
1523 | |||
1524 | if (usize < sizeof(struct mce)) | ||
1525 | return -EINVAL; | ||
1526 | |||
1527 | rc = apei_read_mce(&m, &record_id); | ||
1528 | /* Error or no more MCE record */ | ||
1529 | if (rc <= 0) { | ||
1530 | mce_apei_read_done = 1; | ||
1531 | return rc; | ||
1532 | } | ||
1533 | rc = -EFAULT; | ||
1534 | if (copy_to_user(*ubuf, &m, sizeof(struct mce))) | ||
1535 | return rc; | ||
1536 | /* | ||
1537 | * In fact, we should have cleared the record after that has | ||
1538 | * been flushed to the disk or sent to network in | ||
1539 | * /sbin/mcelog, but we have no interface to support that now, | ||
1540 | * so just clear it to avoid duplication. | ||
1541 | */ | ||
1542 | rc = apei_clear_mce(record_id); | ||
1543 | if (rc) { | ||
1544 | mce_apei_read_done = 1; | ||
1545 | return rc; | ||
1546 | } | ||
1547 | *ubuf += sizeof(struct mce); | ||
1548 | |||
1549 | return 0; | ||
1550 | } | ||
1551 | |||
1496 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | 1552 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, |
1497 | loff_t *off) | 1553 | loff_t *off) |
1498 | { | 1554 | { |
@@ -1506,15 +1562,19 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
1506 | return -ENOMEM; | 1562 | return -ENOMEM; |
1507 | 1563 | ||
1508 | mutex_lock(&mce_read_mutex); | 1564 | mutex_lock(&mce_read_mutex); |
1565 | |||
1566 | if (!mce_apei_read_done) { | ||
1567 | err = __mce_read_apei(&buf, usize); | ||
1568 | if (err || buf != ubuf) | ||
1569 | goto out; | ||
1570 | } | ||
1571 | |||
1509 | next = rcu_dereference_check_mce(mcelog.next); | 1572 | next = rcu_dereference_check_mce(mcelog.next); |
1510 | 1573 | ||
1511 | /* Only supports full reads right now */ | 1574 | /* Only supports full reads right now */ |
1512 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | 1575 | err = -EINVAL; |
1513 | mutex_unlock(&mce_read_mutex); | 1576 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) |
1514 | kfree(cpu_tsc); | 1577 | goto out; |
1515 | |||
1516 | return -EINVAL; | ||
1517 | } | ||
1518 | 1578 | ||
1519 | err = 0; | 1579 | err = 0; |
1520 | prev = 0; | 1580 | prev = 0; |
@@ -1562,10 +1622,15 @@ timeout: | |||
1562 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | 1622 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); |
1563 | } | 1623 | } |
1564 | } | 1624 | } |
1625 | |||
1626 | if (err) | ||
1627 | err = -EFAULT; | ||
1628 | |||
1629 | out: | ||
1565 | mutex_unlock(&mce_read_mutex); | 1630 | mutex_unlock(&mce_read_mutex); |
1566 | kfree(cpu_tsc); | 1631 | kfree(cpu_tsc); |
1567 | 1632 | ||
1568 | return err ? -EFAULT : buf - ubuf; | 1633 | return err ? err : buf - ubuf; |
1569 | } | 1634 | } |
1570 | 1635 | ||
1571 | static unsigned int mce_poll(struct file *file, poll_table *wait) | 1636 | static unsigned int mce_poll(struct file *file, poll_table *wait) |
@@ -1573,6 +1638,8 @@ static unsigned int mce_poll(struct file *file, poll_table *wait) | |||
1573 | poll_wait(file, &mce_wait, wait); | 1638 | poll_wait(file, &mce_wait, wait); |
1574 | if (rcu_dereference_check_mce(mcelog.next)) | 1639 | if (rcu_dereference_check_mce(mcelog.next)) |
1575 | return POLLIN | POLLRDNORM; | 1640 | return POLLIN | POLLRDNORM; |
1641 | if (!mce_apei_read_done && apei_check_mce()) | ||
1642 | return POLLIN | POLLRDNORM; | ||
1576 | return 0; | 1643 | return 0; |
1577 | } | 1644 | } |
1578 | 1645 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 81c499eceb21..e1a0a3bf9716 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -190,7 +190,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
190 | mutex_unlock(&therm_cpu_lock); | 190 | mutex_unlock(&therm_cpu_lock); |
191 | break; | 191 | break; |
192 | } | 192 | } |
193 | return err ? NOTIFY_BAD : NOTIFY_OK; | 193 | return notifier_from_errno(err); |
194 | } | 194 | } |
195 | 195 | ||
196 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = | 196 | static struct notifier_block thermal_throttle_cpu_notifier __cpuinitdata = |