aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/include/asm/mce.h8
-rw-r--r--arch/x86/include/uapi/asm/mce.h3
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-genpool.c99
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c231
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c61
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/smp.c2
-rw-r--r--arch/x86/ras/Kconfig11
-rw-r--r--arch/x86/ras/Makefile2
-rw-r--r--arch/x86/ras/mce_amd_inj.c (renamed from drivers/edac/mce_amd_inj.c)6
-rw-r--r--drivers/edac/Kconfig10
-rw-r--r--drivers/edac/Makefile1
-rw-r--r--drivers/ras/Kconfig37
18 files changed, 329 insertions, 164 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b3a1a5d77d92..06dbb5da90c6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -955,6 +955,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
955 955
956config X86_MCE 956config X86_MCE
957 bool "Machine Check / overheating reporting" 957 bool "Machine Check / overheating reporting"
958 select GENERIC_ALLOCATOR
958 default y 959 default y
959 ---help--- 960 ---help---
960 Machine Check support allows the processor to notify the 961 Machine Check support allows the processor to notify the
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 118e6debc483..0f38418719ab 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -212,6 +212,8 @@ drivers-$(CONFIG_PM) += arch/x86/power/
212 212
213drivers-$(CONFIG_FB) += arch/x86/video/ 213drivers-$(CONFIG_FB) += arch/x86/video/
214 214
215drivers-$(CONFIG_RAS) += arch/x86/ras/
216
215#### 217####
216# boot loader support. Several targets are kept for legacy purposes 218# boot loader support. Several targets are kept for legacy purposes
217 219
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 982dfc3679ad..2dbc0bf2b9f3 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -151,10 +151,12 @@ extern int mce_p5_enabled;
151#ifdef CONFIG_X86_MCE 151#ifdef CONFIG_X86_MCE
152int mcheck_init(void); 152int mcheck_init(void);
153void mcheck_cpu_init(struct cpuinfo_x86 *c); 153void mcheck_cpu_init(struct cpuinfo_x86 *c);
154void mcheck_cpu_clear(struct cpuinfo_x86 *c);
154void mcheck_vendor_init_severity(void); 155void mcheck_vendor_init_severity(void);
155#else 156#else
156static inline int mcheck_init(void) { return 0; } 157static inline int mcheck_init(void) { return 0; }
157static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} 158static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
159static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {}
158static inline void mcheck_vendor_init_severity(void) {} 160static inline void mcheck_vendor_init_severity(void) {}
159#endif 161#endif
160 162
@@ -181,20 +183,18 @@ DECLARE_PER_CPU(struct device *, mce_device);
181 183
182#ifdef CONFIG_X86_MCE_INTEL 184#ifdef CONFIG_X86_MCE_INTEL
183void mce_intel_feature_init(struct cpuinfo_x86 *c); 185void mce_intel_feature_init(struct cpuinfo_x86 *c);
186void mce_intel_feature_clear(struct cpuinfo_x86 *c);
184void cmci_clear(void); 187void cmci_clear(void);
185void cmci_reenable(void); 188void cmci_reenable(void);
186void cmci_rediscover(void); 189void cmci_rediscover(void);
187void cmci_recheck(void); 190void cmci_recheck(void);
188void lmce_clear(void);
189void lmce_enable(void);
190#else 191#else
191static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } 192static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
193static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { }
192static inline void cmci_clear(void) {} 194static inline void cmci_clear(void) {}
193static inline void cmci_reenable(void) {} 195static inline void cmci_reenable(void) {}
194static inline void cmci_rediscover(void) {} 196static inline void cmci_rediscover(void) {}
195static inline void cmci_recheck(void) {} 197static inline void cmci_recheck(void) {}
196static inline void lmce_clear(void) {}
197static inline void lmce_enable(void) {}
198#endif 198#endif
199 199
200#ifdef CONFIG_X86_MCE_AMD 200#ifdef CONFIG_X86_MCE_AMD
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index a0eab85ce7b8..76880ede9a35 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -15,7 +15,8 @@ struct mce {
15 __u64 time; /* wall time_t when error was detected */ 15 __u64 time; /* wall time_t when error was detected */
16 __u8 cpuvendor; /* cpu vendor as encoded in system.h */ 16 __u8 cpuvendor; /* cpu vendor as encoded in system.h */
17 __u8 inject_flags; /* software inject flags */ 17 __u8 inject_flags; /* software inject flags */
18 __u16 pad; 18 __u8 severity;
19 __u8 usable_addr;
19 __u32 cpuid; /* CPUID 1 EAX */ 20 __u32 cpuid; /* CPUID 1 EAX */
20 __u8 cs; /* code segment */ 21 __u8 cs; /* code segment */
21 __u8 bank; /* machine check bank */ 22 __u8 bank; /* machine check bank */
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index bb34b03af252..a3311c886194 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,4 +1,4 @@
1obj-y = mce.o mce-severity.o 1obj-y = mce.o mce-severity.o mce-genpool.o
2 2
3obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o 3obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index a1aef9533154..34c89a3e8260 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
57 57
58 m.addr = mem_err->physical_addr; 58 m.addr = mem_err->physical_addr;
59 mce_log(&m); 59 mce_log(&m);
60 mce_notify_irq();
61} 60}
62EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); 61EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
63 62
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
new file mode 100644
index 000000000000..0a850100c594
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -0,0 +1,99 @@
1/*
2 * MCE event pool management in MCE context
3 *
4 * Copyright (C) 2015 Intel Corp.
5 * Author: Chen, Gong <gong.chen@linux.intel.com>
6 *
7 * This file is licensed under GPLv2.
8 */
9#include <linux/smp.h>
10#include <linux/mm.h>
11#include <linux/genalloc.h>
12#include <linux/llist.h>
13#include "mce-internal.h"
14
15/*
16 * printk() is not safe in MCE context. This is a lock-less memory allocator
17 * used to save error information organized in a lock-less list.
18 *
19 * This memory pool is only to be used to save MCE records in MCE context.
20 * MCE events are rare, so a fixed size memory pool should be enough. Use
21 * 2 pages to save MCE events for now (~80 MCE records at most).
22 */
23#define MCE_POOLSZ (2 * PAGE_SIZE)
24
25static struct gen_pool *mce_evt_pool;
26static LLIST_HEAD(mce_event_llist);
27static char gen_pool_buf[MCE_POOLSZ];
28
29void mce_gen_pool_process(void)
30{
31 struct llist_node *head;
32 struct mce_evt_llist *node;
33 struct mce *mce;
34
35 head = llist_del_all(&mce_event_llist);
36 if (!head)
37 return;
38
39 head = llist_reverse_order(head);
40 llist_for_each_entry(node, head, llnode) {
41 mce = &node->mce;
42 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
43 gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
44 }
45}
46
47bool mce_gen_pool_empty(void)
48{
49 return llist_empty(&mce_event_llist);
50}
51
52int mce_gen_pool_add(struct mce *mce)
53{
54 struct mce_evt_llist *node;
55
56 if (!mce_evt_pool)
57 return -EINVAL;
58
59 node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node));
60 if (!node) {
61 pr_warn_ratelimited("MCE records pool full!\n");
62 return -ENOMEM;
63 }
64
65 memcpy(&node->mce, mce, sizeof(*mce));
66 llist_add(&node->llnode, &mce_event_llist);
67
68 return 0;
69}
70
71static int mce_gen_pool_create(void)
72{
73 struct gen_pool *tmpp;
74 int ret = -ENOMEM;
75
76 tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1);
77 if (!tmpp)
78 goto out;
79
80 ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1);
81 if (ret) {
82 gen_pool_destroy(tmpp);
83 goto out;
84 }
85
86 mce_evt_pool = tmpp;
87
88out:
89 return ret;
90}
91
92int mce_gen_pool_init(void)
93{
94 /* Just init mce_gen_pool once. */
95 if (mce_evt_pool)
96 return 0;
97
98 return mce_gen_pool_create();
99}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index fe32074b865b..547720efd923 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -13,6 +13,8 @@ enum severity_level {
13 MCE_PANIC_SEVERITY, 13 MCE_PANIC_SEVERITY,
14}; 14};
15 15
16extern struct atomic_notifier_head x86_mce_decoder_chain;
17
16#define ATTR_LEN 16 18#define ATTR_LEN 16
17#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ 19#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
18 20
@@ -24,6 +26,16 @@ struct mce_bank {
24 char attrname[ATTR_LEN]; /* attribute name */ 26 char attrname[ATTR_LEN]; /* attribute name */
25}; 27};
26 28
29struct mce_evt_llist {
30 struct llist_node llnode;
31 struct mce mce;
32};
33
34void mce_gen_pool_process(void);
35bool mce_gen_pool_empty(void);
36int mce_gen_pool_add(struct mce *mce);
37int mce_gen_pool_init(void);
38
27extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); 39extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
28struct dentry *mce_get_debugfs_dir(void); 40struct dentry *mce_get_debugfs_dir(void);
29 41
@@ -67,3 +79,5 @@ static inline int apei_clear_mce(u64 record_id)
67 return -EINVAL; 79 return -EINVAL;
68} 80}
69#endif 81#endif
82
83void mce_inject_log(struct mce *m);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3d6b5269fb2e..0f8f21c8284a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -52,11 +52,11 @@
52 52
53static DEFINE_MUTEX(mce_chrdev_read_mutex); 53static DEFINE_MUTEX(mce_chrdev_read_mutex);
54 54
55#define rcu_dereference_check_mce(p) \ 55#define mce_log_get_idx_check(p) \
56({ \ 56({ \
57 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ 57 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
58 !lockdep_is_held(&mce_chrdev_read_mutex), \ 58 !lockdep_is_held(&mce_chrdev_read_mutex), \
59 "suspicious rcu_dereference_check_mce() usage"); \ 59 "suspicious mce_log_get_idx_check() usage"); \
60 smp_load_acquire(&(p)); \ 60 smp_load_acquire(&(p)); \
61}) 61})
62 62
@@ -110,15 +110,17 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
110 */ 110 */
111mce_banks_t mce_banks_ce_disabled; 111mce_banks_t mce_banks_ce_disabled;
112 112
113static DEFINE_PER_CPU(struct work_struct, mce_work); 113static struct work_struct mce_work;
114static struct irq_work mce_irq_work;
114 115
115static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); 116static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
117static int mce_usable_address(struct mce *m);
116 118
117/* 119/*
118 * CPU/chipset specific EDAC code can register a notifier call here to print 120 * CPU/chipset specific EDAC code can register a notifier call here to print
119 * MCE errors in a human-readable form. 121 * MCE errors in a human-readable form.
120 */ 122 */
121static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); 123ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
122 124
123/* Do initial initialization of a struct mce */ 125/* Do initial initialization of a struct mce */
124void mce_setup(struct mce *m) 126void mce_setup(struct mce *m)
@@ -157,12 +159,13 @@ void mce_log(struct mce *mce)
157 /* Emit the trace record: */ 159 /* Emit the trace record: */
158 trace_mce_record(mce); 160 trace_mce_record(mce);
159 161
160 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); 162 if (!mce_gen_pool_add(mce))
163 irq_work_queue(&mce_irq_work);
161 164
162 mce->finished = 0; 165 mce->finished = 0;
163 wmb(); 166 wmb();
164 for (;;) { 167 for (;;) {
165 entry = rcu_dereference_check_mce(mcelog.next); 168 entry = mce_log_get_idx_check(mcelog.next);
166 for (;;) { 169 for (;;) {
167 170
168 /* 171 /*
@@ -196,48 +199,23 @@ void mce_log(struct mce *mce)
196 set_bit(0, &mce_need_notify); 199 set_bit(0, &mce_need_notify);
197} 200}
198 201
199static void drain_mcelog_buffer(void) 202void mce_inject_log(struct mce *m)
200{ 203{
201 unsigned int next, i, prev = 0; 204 mutex_lock(&mce_chrdev_read_mutex);
202 205 mce_log(m);
203 next = ACCESS_ONCE(mcelog.next); 206 mutex_unlock(&mce_chrdev_read_mutex);
204
205 do {
206 struct mce *m;
207
208 /* drain what was logged during boot */
209 for (i = prev; i < next; i++) {
210 unsigned long start = jiffies;
211 unsigned retries = 1;
212
213 m = &mcelog.entry[i];
214
215 while (!m->finished) {
216 if (time_after_eq(jiffies, start + 2*retries))
217 retries++;
218
219 cpu_relax();
220
221 if (!m->finished && retries >= 4) {
222 pr_err("skipping error being logged currently!\n");
223 break;
224 }
225 }
226 smp_rmb();
227 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
228 }
229
230 memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
231 prev = next;
232 next = cmpxchg(&mcelog.next, prev, 0);
233 } while (next != prev);
234} 207}
208EXPORT_SYMBOL_GPL(mce_inject_log);
235 209
210static struct notifier_block mce_srao_nb;
236 211
237void mce_register_decode_chain(struct notifier_block *nb) 212void mce_register_decode_chain(struct notifier_block *nb)
238{ 213{
214 /* Ensure SRAO notifier has the highest priority in the decode chain. */
215 if (nb != &mce_srao_nb && nb->priority == INT_MAX)
216 nb->priority -= 1;
217
239 atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); 218 atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
240 drain_mcelog_buffer();
241} 219}
242EXPORT_SYMBOL_GPL(mce_register_decode_chain); 220EXPORT_SYMBOL_GPL(mce_register_decode_chain);
243 221
@@ -461,61 +439,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
461 } 439 }
462} 440}
463 441
464/*
465 * Simple lockless ring to communicate PFNs from the exception handler with the
466 * process context work function. This is vastly simplified because there's
467 * only a single reader and a single writer.
468 */
469#define MCE_RING_SIZE 16 /* we use one entry less */
470
471struct mce_ring {
472 unsigned short start;
473 unsigned short end;
474 unsigned long ring[MCE_RING_SIZE];
475};
476static DEFINE_PER_CPU(struct mce_ring, mce_ring);
477
478/* Runs with CPU affinity in workqueue */
479static int mce_ring_empty(void)
480{
481 struct mce_ring *r = this_cpu_ptr(&mce_ring);
482
483 return r->start == r->end;
484}
485
486static int mce_ring_get(unsigned long *pfn)
487{
488 struct mce_ring *r;
489 int ret = 0;
490
491 *pfn = 0;
492 get_cpu();
493 r = this_cpu_ptr(&mce_ring);
494 if (r->start == r->end)
495 goto out;
496 *pfn = r->ring[r->start];
497 r->start = (r->start + 1) % MCE_RING_SIZE;
498 ret = 1;
499out:
500 put_cpu();
501 return ret;
502}
503
504/* Always runs in MCE context with preempt off */
505static int mce_ring_add(unsigned long pfn)
506{
507 struct mce_ring *r = this_cpu_ptr(&mce_ring);
508 unsigned next;
509
510 next = (r->end + 1) % MCE_RING_SIZE;
511 if (next == r->start)
512 return -1;
513 r->ring[r->end] = pfn;
514 wmb();
515 r->end = next;
516 return 0;
517}
518
519int mce_available(struct cpuinfo_x86 *c) 442int mce_available(struct cpuinfo_x86 *c)
520{ 443{
521 if (mca_cfg.disabled) 444 if (mca_cfg.disabled)
@@ -525,12 +448,10 @@ int mce_available(struct cpuinfo_x86 *c)
525 448
526static void mce_schedule_work(void) 449static void mce_schedule_work(void)
527{ 450{
528 if (!mce_ring_empty()) 451 if (!mce_gen_pool_empty() && keventd_up())
529 schedule_work(this_cpu_ptr(&mce_work)); 452 schedule_work(&mce_work);
530} 453}
531 454
532static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
533
534static void mce_irq_work_cb(struct irq_work *entry) 455static void mce_irq_work_cb(struct irq_work *entry)
535{ 456{
536 mce_notify_irq(); 457 mce_notify_irq();
@@ -551,8 +472,29 @@ static void mce_report_event(struct pt_regs *regs)
551 return; 472 return;
552 } 473 }
553 474
554 irq_work_queue(this_cpu_ptr(&mce_irq_work)); 475 irq_work_queue(&mce_irq_work);
476}
477
478static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
479 void *data)
480{
481 struct mce *mce = (struct mce *)data;
482 unsigned long pfn;
483
484 if (!mce)
485 return NOTIFY_DONE;
486
487 if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
488 pfn = mce->addr >> PAGE_SHIFT;
489 memory_failure(pfn, MCE_VECTOR, 0);
490 }
491
492 return NOTIFY_OK;
555} 493}
494static struct notifier_block mce_srao_nb = {
495 .notifier_call = srao_decode_notifier,
496 .priority = INT_MAX,
497};
556 498
557/* 499/*
558 * Read ADDR and MISC registers. 500 * Read ADDR and MISC registers.
@@ -672,8 +614,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
672 */ 614 */
673 if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { 615 if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
674 if (m.status & MCI_STATUS_ADDRV) { 616 if (m.status & MCI_STATUS_ADDRV) {
675 mce_ring_add(m.addr >> PAGE_SHIFT); 617 m.severity = severity;
676 mce_schedule_work(); 618 m.usable_addr = mce_usable_address(&m);
619
620 if (!mce_gen_pool_add(&m))
621 mce_schedule_work();
677 } 622 }
678 } 623 }
679 624
@@ -1143,15 +1088,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1143 1088
1144 mce_read_aux(&m, i); 1089 mce_read_aux(&m, i);
1145 1090
1146 /* 1091 /* assuming valid severity level != 0 */
1147 * Action optional error. Queue address for later processing. 1092 m.severity = severity;
1148 * When the ring overflows we just ignore the AO error. 1093 m.usable_addr = mce_usable_address(&m);
1149 * RED-PEN add some logging mechanism when
1150 * usable_address or mce_add_ring fails.
1151 * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
1152 */
1153 if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
1154 mce_ring_add(m.addr >> PAGE_SHIFT);
1155 1094
1156 mce_log(&m); 1095 mce_log(&m);
1157 1096
@@ -1247,14 +1186,11 @@ int memory_failure(unsigned long pfn, int vector, int flags)
1247/* 1186/*
1248 * Action optional processing happens here (picking up 1187 * Action optional processing happens here (picking up
1249 * from the list of faulting pages that do_machine_check() 1188 * from the list of faulting pages that do_machine_check()
1250 * placed into the "ring"). 1189 * placed into the genpool).
1251 */ 1190 */
1252static void mce_process_work(struct work_struct *dummy) 1191static void mce_process_work(struct work_struct *dummy)
1253{ 1192{
1254 unsigned long pfn; 1193 mce_gen_pool_process();
1255
1256 while (mce_ring_get(&pfn))
1257 memory_failure(pfn, MCE_VECTOR, 0);
1258} 1194}
1259 1195
1260#ifdef CONFIG_X86_MCE_INTEL 1196#ifdef CONFIG_X86_MCE_INTEL
@@ -1678,6 +1614,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1678 } 1614 }
1679} 1615}
1680 1616
1617static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
1618{
1619 switch (c->x86_vendor) {
1620 case X86_VENDOR_INTEL:
1621 mce_intel_feature_clear(c);
1622 break;
1623 default:
1624 break;
1625 }
1626}
1627
1681static void mce_start_timer(unsigned int cpu, struct timer_list *t) 1628static void mce_start_timer(unsigned int cpu, struct timer_list *t)
1682{ 1629{
1683 unsigned long iv = check_interval * HZ; 1630 unsigned long iv = check_interval * HZ;
@@ -1731,13 +1678,36 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
1731 return; 1678 return;
1732 } 1679 }
1733 1680
1681 if (mce_gen_pool_init()) {
1682 mca_cfg.disabled = true;
1683 pr_emerg("Couldn't allocate MCE records pool!\n");
1684 return;
1685 }
1686
1734 machine_check_vector = do_machine_check; 1687 machine_check_vector = do_machine_check;
1735 1688
1736 __mcheck_cpu_init_generic(); 1689 __mcheck_cpu_init_generic();
1737 __mcheck_cpu_init_vendor(c); 1690 __mcheck_cpu_init_vendor(c);
1738 __mcheck_cpu_init_timer(); 1691 __mcheck_cpu_init_timer();
1739 INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work); 1692}
1740 init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb); 1693
1694/*
1695 * Called for each booted CPU to clear some machine checks opt-ins
1696 */
1697void mcheck_cpu_clear(struct cpuinfo_x86 *c)
1698{
1699 if (mca_cfg.disabled)
1700 return;
1701
1702 if (!mce_available(c))
1703 return;
1704
1705 /*
1706 * Possibly to clear general settings generic to x86
1707 * __mcheck_cpu_clear_generic(c);
1708 */
1709 __mcheck_cpu_clear_vendor(c);
1710
1741} 1711}
1742 1712
1743/* 1713/*
@@ -1850,7 +1820,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
1850 goto out; 1820 goto out;
1851 } 1821 }
1852 1822
1853 next = rcu_dereference_check_mce(mcelog.next); 1823 next = mce_log_get_idx_check(mcelog.next);
1854 1824
1855 /* Only supports full reads right now */ 1825 /* Only supports full reads right now */
1856 err = -EINVAL; 1826 err = -EINVAL;
@@ -2056,8 +2026,12 @@ __setup("mce", mcheck_enable);
2056int __init mcheck_init(void) 2026int __init mcheck_init(void)
2057{ 2027{
2058 mcheck_intel_therm_init(); 2028 mcheck_intel_therm_init();
2029 mce_register_decode_chain(&mce_srao_nb);
2059 mcheck_vendor_init_severity(); 2030 mcheck_vendor_init_severity();
2060 2031
2032 INIT_WORK(&mce_work, mce_process_work);
2033 init_irq_work(&mce_irq_work, mce_irq_work_cb);
2034
2061 return 0; 2035 return 0;
2062} 2036}
2063 2037
@@ -2591,5 +2565,20 @@ static int __init mcheck_debugfs_init(void)
2591 2565
2592 return 0; 2566 return 0;
2593} 2567}
2594late_initcall(mcheck_debugfs_init); 2568#else
2569static int __init mcheck_debugfs_init(void) { return -EINVAL; }
2595#endif 2570#endif
2571
2572static int __init mcheck_late_init(void)
2573{
2574 mcheck_debugfs_init();
2575
2576 /*
2577 * Flush out everything that has been logged during early boot, now that
2578 * everything has been initialized (workqueues, decoders, ...).
2579 */
2580 mce_schedule_work();
2581
2582 return 0;
2583}
2584late_initcall(mcheck_late_init);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 844f56c5616d..1e8bb6c94f14 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long cpu)
146 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; 146 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
147} 147}
148 148
149static void cmci_toggle_interrupt_mode(bool on)
150{
151 unsigned long flags, *owned;
152 int bank;
153 u64 val;
154
155 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
156 owned = this_cpu_ptr(mce_banks_owned);
157 for_each_set_bit(bank, owned, MAX_NR_BANKS) {
158 rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
159
160 if (on)
161 val |= MCI_CTL2_CMCI_EN;
162 else
163 val &= ~MCI_CTL2_CMCI_EN;
164
165 wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
166 }
167 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
168}
169
149unsigned long cmci_intel_adjust_timer(unsigned long interval) 170unsigned long cmci_intel_adjust_timer(unsigned long interval)
150{ 171{
151 if ((this_cpu_read(cmci_backoff_cnt) > 0) && 172 if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
@@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
175 */ 196 */
176 if (!atomic_read(&cmci_storm_on_cpus)) { 197 if (!atomic_read(&cmci_storm_on_cpus)) {
177 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); 198 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
178 cmci_reenable(); 199 cmci_toggle_interrupt_mode(true);
179 cmci_recheck(); 200 cmci_recheck();
180 } 201 }
181 return CMCI_POLL_INTERVAL; 202 return CMCI_POLL_INTERVAL;
@@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
186 } 207 }
187} 208}
188 209
189static void cmci_storm_disable_banks(void)
190{
191 unsigned long flags, *owned;
192 int bank;
193 u64 val;
194
195 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
196 owned = this_cpu_ptr(mce_banks_owned);
197 for_each_set_bit(bank, owned, MAX_NR_BANKS) {
198 rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
199 val &= ~MCI_CTL2_CMCI_EN;
200 wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
201 }
202 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
203}
204
205static bool cmci_storm_detect(void) 210static bool cmci_storm_detect(void)
206{ 211{
207 unsigned int cnt = __this_cpu_read(cmci_storm_cnt); 212 unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
@@ -223,7 +228,7 @@ static bool cmci_storm_detect(void)
223 if (cnt <= CMCI_STORM_THRESHOLD) 228 if (cnt <= CMCI_STORM_THRESHOLD)
224 return false; 229 return false;
225 230
226 cmci_storm_disable_banks(); 231 cmci_toggle_interrupt_mode(false);
227 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 232 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
228 r = atomic_add_return(1, &cmci_storm_on_cpus); 233 r = atomic_add_return(1, &cmci_storm_on_cpus);
229 mce_timer_kick(CMCI_STORM_INTERVAL); 234 mce_timer_kick(CMCI_STORM_INTERVAL);
@@ -246,7 +251,6 @@ static void intel_threshold_interrupt(void)
246 return; 251 return;
247 252
248 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 253 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
249 mce_notify_irq();
250} 254}
251 255
252/* 256/*
@@ -435,7 +439,7 @@ static void intel_init_cmci(void)
435 cmci_recheck(); 439 cmci_recheck();
436} 440}
437 441
438void intel_init_lmce(void) 442static void intel_init_lmce(void)
439{ 443{
440 u64 val; 444 u64 val;
441 445
@@ -448,9 +452,26 @@ void intel_init_lmce(void)
448 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); 452 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
449} 453}
450 454
455static void intel_clear_lmce(void)
456{
457 u64 val;
458
459 if (!lmce_supported())
460 return;
461
462 rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
463 val &= ~MCG_EXT_CTL_LMCE_EN;
464 wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
465}
466
451void mce_intel_feature_init(struct cpuinfo_x86 *c) 467void mce_intel_feature_init(struct cpuinfo_x86 *c)
452{ 468{
453 intel_init_thermal(c); 469 intel_init_thermal(c);
454 intel_init_cmci(); 470 intel_init_cmci();
455 intel_init_lmce(); 471 intel_init_lmce();
456} 472}
473
474void mce_intel_feature_clear(struct cpuinfo_x86 *c)
475{
476 intel_clear_lmce();
477}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c27cad726765..d83740ab85b0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -29,6 +29,7 @@
29#include <asm/debugreg.h> 29#include <asm/debugreg.h>
30#include <asm/nmi.h> 30#include <asm/nmi.h>
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
32#include <asm/mce.h>
32 33
33/* 34/*
34 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 35 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -319,6 +320,7 @@ void stop_this_cpu(void *dummy)
319 */ 320 */
320 set_cpu_online(smp_processor_id(), false); 321 set_cpu_online(smp_processor_id(), false);
321 disable_local_APIC(); 322 disable_local_APIC();
323 mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
322 324
323 for (;;) 325 for (;;)
324 halt(); 326 halt();
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 15aaa69bbb5e..12c8286206ce 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -30,6 +30,7 @@
30#include <asm/proto.h> 30#include <asm/proto.h>
31#include <asm/apic.h> 31#include <asm/apic.h>
32#include <asm/nmi.h> 32#include <asm/nmi.h>
33#include <asm/mce.h>
33#include <asm/trace/irq_vectors.h> 34#include <asm/trace/irq_vectors.h>
34/* 35/*
35 * Some notes on x86 processor bugs affecting SMP operation: 36 * Some notes on x86 processor bugs affecting SMP operation:
@@ -243,6 +244,7 @@ static void native_stop_other_cpus(int wait)
243finish: 244finish:
244 local_irq_save(flags); 245 local_irq_save(flags);
245 disable_local_APIC(); 246 disable_local_APIC();
247 mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
246 local_irq_restore(flags); 248 local_irq_restore(flags);
247} 249}
248 250
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig
new file mode 100644
index 000000000000..10fea5fc821e
--- /dev/null
+++ b/arch/x86/ras/Kconfig
@@ -0,0 +1,11 @@
1config AMD_MCE_INJ
2 tristate "Simple MCE injection interface for AMD processors"
3 depends on RAS && EDAC_DECODE_MCE && DEBUG_FS
4 default n
5 help
6 This is a simple debugfs interface to inject MCEs and test different
7 aspects of the MCE handling code.
8
9 WARNING: Do not even assume this interface is staying stable!
10
11
diff --git a/arch/x86/ras/Makefile b/arch/x86/ras/Makefile
new file mode 100644
index 000000000000..dd2c98b84037
--- /dev/null
+++ b/arch/x86/ras/Makefile
@@ -0,0 +1,2 @@
1obj-$(CONFIG_AMD_MCE_INJ) += mce_amd_inj.o
2
diff --git a/drivers/edac/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c
index 4c73e4d03d46..17e35b5bf779 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/arch/x86/ras/mce_amd_inj.c
@@ -6,7 +6,7 @@
6 * This file may be distributed under the terms of the GNU General Public 6 * This file may be distributed under the terms of the GNU General Public
7 * License version 2. 7 * License version 2.
8 * 8 *
9 * Copyright (c) 2010-14: Borislav Petkov <bp@alien8.de> 9 * Copyright (c) 2010-15: Borislav Petkov <bp@alien8.de>
10 * Advanced Micro Devices Inc. 10 * Advanced Micro Devices Inc.
11 */ 11 */
12 12
@@ -19,7 +19,7 @@
19#include <linux/uaccess.h> 19#include <linux/uaccess.h>
20#include <asm/mce.h> 20#include <asm/mce.h>
21 21
22#include "mce_amd.h" 22#include "../kernel/cpu/mcheck/mce-internal.h"
23 23
24/* 24/*
25 * Collect all the MCi_XXX settings 25 * Collect all the MCi_XXX settings
@@ -195,7 +195,7 @@ static void do_inject(void)
195 i_mce.status |= MCI_STATUS_MISCV; 195 i_mce.status |= MCI_STATUS_MISCV;
196 196
197 if (inj_type == SW_INJ) { 197 if (inj_type == SW_INJ) {
198 amd_decode_mce(NULL, 0, &i_mce); 198 mce_inject_log(&i_mce);
199 return; 199 return;
200 } 200 }
201 201
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 8677ead2a8e1..ef25000a5bc6 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -61,16 +61,6 @@ config EDAC_DECODE_MCE
61 which occur really early upon boot, before the module infrastructure 61 which occur really early upon boot, before the module infrastructure
62 has been initialized. 62 has been initialized.
63 63
64config EDAC_MCE_INJ
65 tristate "Simple MCE injection interface"
66 depends on EDAC_DECODE_MCE && DEBUG_FS
67 default n
68 help
69 This is a simple debugfs interface to inject MCEs and test different
70 aspects of the MCE handling code.
71
72 WARNING: Do not even assume this interface is staying stable!
73
74config EDAC_MM_EDAC 64config EDAC_MM_EDAC
75 tristate "Main Memory EDAC (Error Detection And Correction) reporting" 65 tristate "Main Memory EDAC (Error Detection And Correction) reporting"
76 select RAS 66 select RAS
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 28ef2a519f65..ae3c5f3ce405 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -17,7 +17,6 @@ edac_core-y += edac_pci.o edac_pci_sysfs.o
17endif 17endif
18 18
19obj-$(CONFIG_EDAC_GHES) += ghes_edac.o 19obj-$(CONFIG_EDAC_GHES) += ghes_edac.o
20obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o
21 20
22edac_mce_amd-y := mce_amd.o 21edac_mce_amd-y := mce_amd.o
23obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o 22obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig
index f9da613052c2..4c3c67d13254 100644
--- a/drivers/ras/Kconfig
+++ b/drivers/ras/Kconfig
@@ -1,2 +1,35 @@
1config RAS 1menuconfig RAS
2 bool 2 bool "Reliability, Availability and Serviceability (RAS) features"
3 help
4 Reliability, availability and serviceability (RAS) is a computer
5 hardware engineering term. Computers designed with higher levels
6 of RAS have a multitude of features that protect data integrity
7 and help them stay available for long periods of time without
8 failure.
9
10 Reliability can be defined as the probability that the system will
11 produce correct outputs up to some given time. Reliability is
12 enhanced by features that help to avoid, detect and repair hardware
13 faults.
14
15 Availability is the probability a system is operational at a given
16 time, i.e. the amount of time a device is actually operating as the
17 percentage of total time it should be operating.
18
19 Serviceability or maintainability is the simplicity and speed with
20 which a system can be repaired or maintained; if the time to repair
21 a failed system increases, then availability will decrease.
22
23 Note that Reliability and Availability are distinct concepts:
24 Reliability is a measure of the ability of a system to function
25 correctly, including avoiding data corruption, whereas Availability
26 measures how often it is available for use, even though it may not
27 be functioning correctly. For example, a server may run forever and
28 so have ideal availability, but may be unreliable, with frequent
29 data corruption.
30
31if RAS
32
33source arch/x86/ras/Kconfig
34
35endif