diff options
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 8 | ||||
-rw-r--r-- | arch/x86/include/uapi/asm/mce.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-apei.c | 1 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-genpool.c | 99 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 231 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 61 | ||||
-rw-r--r-- | arch/x86/kernel/process.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/smp.c | 2 | ||||
-rw-r--r-- | arch/x86/ras/Kconfig | 11 | ||||
-rw-r--r-- | arch/x86/ras/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/ras/mce_amd_inj.c (renamed from drivers/edac/mce_amd_inj.c) | 6 | ||||
-rw-r--r-- | drivers/edac/Kconfig | 10 | ||||
-rw-r--r-- | drivers/edac/Makefile | 1 | ||||
-rw-r--r-- | drivers/ras/Kconfig | 37 |
18 files changed, 329 insertions, 164 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b3a1a5d77d92..06dbb5da90c6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -955,6 +955,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | |||
955 | 955 | ||
956 | config X86_MCE | 956 | config X86_MCE |
957 | bool "Machine Check / overheating reporting" | 957 | bool "Machine Check / overheating reporting" |
958 | select GENERIC_ALLOCATOR | ||
958 | default y | 959 | default y |
959 | ---help--- | 960 | ---help--- |
960 | Machine Check support allows the processor to notify the | 961 | Machine Check support allows the processor to notify the |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 118e6debc483..0f38418719ab 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -212,6 +212,8 @@ drivers-$(CONFIG_PM) += arch/x86/power/ | |||
212 | 212 | ||
213 | drivers-$(CONFIG_FB) += arch/x86/video/ | 213 | drivers-$(CONFIG_FB) += arch/x86/video/ |
214 | 214 | ||
215 | drivers-$(CONFIG_RAS) += arch/x86/ras/ | ||
216 | |||
215 | #### | 217 | #### |
216 | # boot loader support. Several targets are kept for legacy purposes | 218 | # boot loader support. Several targets are kept for legacy purposes |
217 | 219 | ||
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 982dfc3679ad..2dbc0bf2b9f3 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -151,10 +151,12 @@ extern int mce_p5_enabled; | |||
151 | #ifdef CONFIG_X86_MCE | 151 | #ifdef CONFIG_X86_MCE |
152 | int mcheck_init(void); | 152 | int mcheck_init(void); |
153 | void mcheck_cpu_init(struct cpuinfo_x86 *c); | 153 | void mcheck_cpu_init(struct cpuinfo_x86 *c); |
154 | void mcheck_cpu_clear(struct cpuinfo_x86 *c); | ||
154 | void mcheck_vendor_init_severity(void); | 155 | void mcheck_vendor_init_severity(void); |
155 | #else | 156 | #else |
156 | static inline int mcheck_init(void) { return 0; } | 157 | static inline int mcheck_init(void) { return 0; } |
157 | static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} | 158 | static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} |
159 | static inline void mcheck_cpu_clear(struct cpuinfo_x86 *c) {} | ||
158 | static inline void mcheck_vendor_init_severity(void) {} | 160 | static inline void mcheck_vendor_init_severity(void) {} |
159 | #endif | 161 | #endif |
160 | 162 | ||
@@ -181,20 +183,18 @@ DECLARE_PER_CPU(struct device *, mce_device); | |||
181 | 183 | ||
182 | #ifdef CONFIG_X86_MCE_INTEL | 184 | #ifdef CONFIG_X86_MCE_INTEL |
183 | void mce_intel_feature_init(struct cpuinfo_x86 *c); | 185 | void mce_intel_feature_init(struct cpuinfo_x86 *c); |
186 | void mce_intel_feature_clear(struct cpuinfo_x86 *c); | ||
184 | void cmci_clear(void); | 187 | void cmci_clear(void); |
185 | void cmci_reenable(void); | 188 | void cmci_reenable(void); |
186 | void cmci_rediscover(void); | 189 | void cmci_rediscover(void); |
187 | void cmci_recheck(void); | 190 | void cmci_recheck(void); |
188 | void lmce_clear(void); | ||
189 | void lmce_enable(void); | ||
190 | #else | 191 | #else |
191 | static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } | 192 | static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { } |
193 | static inline void mce_intel_feature_clear(struct cpuinfo_x86 *c) { } | ||
192 | static inline void cmci_clear(void) {} | 194 | static inline void cmci_clear(void) {} |
193 | static inline void cmci_reenable(void) {} | 195 | static inline void cmci_reenable(void) {} |
194 | static inline void cmci_rediscover(void) {} | 196 | static inline void cmci_rediscover(void) {} |
195 | static inline void cmci_recheck(void) {} | 197 | static inline void cmci_recheck(void) {} |
196 | static inline void lmce_clear(void) {} | ||
197 | static inline void lmce_enable(void) {} | ||
198 | #endif | 198 | #endif |
199 | 199 | ||
200 | #ifdef CONFIG_X86_MCE_AMD | 200 | #ifdef CONFIG_X86_MCE_AMD |
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h index a0eab85ce7b8..76880ede9a35 100644 --- a/arch/x86/include/uapi/asm/mce.h +++ b/arch/x86/include/uapi/asm/mce.h | |||
@@ -15,7 +15,8 @@ struct mce { | |||
15 | __u64 time; /* wall time_t when error was detected */ | 15 | __u64 time; /* wall time_t when error was detected */ |
16 | __u8 cpuvendor; /* cpu vendor as encoded in system.h */ | 16 | __u8 cpuvendor; /* cpu vendor as encoded in system.h */ |
17 | __u8 inject_flags; /* software inject flags */ | 17 | __u8 inject_flags; /* software inject flags */ |
18 | __u16 pad; | 18 | __u8 severity; |
19 | __u8 usable_addr; | ||
19 | __u32 cpuid; /* CPUID 1 EAX */ | 20 | __u32 cpuid; /* CPUID 1 EAX */ |
20 | __u8 cs; /* code segment */ | 21 | __u8 cs; /* code segment */ |
21 | __u8 bank; /* machine check bank */ | 22 | __u8 bank; /* machine check bank */ |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index bb34b03af252..a3311c886194 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y = mce.o mce-severity.o | 1 | obj-y = mce.o mce-severity.o mce-genpool.o |
2 | 2 | ||
3 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o | 3 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o |
4 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 4 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index a1aef9533154..34c89a3e8260 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) | |||
57 | 57 | ||
58 | m.addr = mem_err->physical_addr; | 58 | m.addr = mem_err->physical_addr; |
59 | mce_log(&m); | 59 | mce_log(&m); |
60 | mce_notify_irq(); | ||
61 | } | 60 | } |
62 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); | 61 | EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); |
63 | 62 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c new file mode 100644 index 000000000000..0a850100c594 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c | |||
@@ -0,0 +1,99 @@ | |||
1 | /* | ||
2 | * MCE event pool management in MCE context | ||
3 | * | ||
4 | * Copyright (C) 2015 Intel Corp. | ||
5 | * Author: Chen, Gong <gong.chen@linux.intel.com> | ||
6 | * | ||
7 | * This file is licensed under GPLv2. | ||
8 | */ | ||
9 | #include <linux/smp.h> | ||
10 | #include <linux/mm.h> | ||
11 | #include <linux/genalloc.h> | ||
12 | #include <linux/llist.h> | ||
13 | #include "mce-internal.h" | ||
14 | |||
15 | /* | ||
16 | * printk() is not safe in MCE context. This is a lock-less memory allocator | ||
17 | * used to save error information organized in a lock-less list. | ||
18 | * | ||
19 | * This memory pool is only to be used to save MCE records in MCE context. | ||
20 | * MCE events are rare, so a fixed size memory pool should be enough. Use | ||
21 | * 2 pages to save MCE events for now (~80 MCE records at most). | ||
22 | */ | ||
23 | #define MCE_POOLSZ (2 * PAGE_SIZE) | ||
24 | |||
25 | static struct gen_pool *mce_evt_pool; | ||
26 | static LLIST_HEAD(mce_event_llist); | ||
27 | static char gen_pool_buf[MCE_POOLSZ]; | ||
28 | |||
29 | void mce_gen_pool_process(void) | ||
30 | { | ||
31 | struct llist_node *head; | ||
32 | struct mce_evt_llist *node; | ||
33 | struct mce *mce; | ||
34 | |||
35 | head = llist_del_all(&mce_event_llist); | ||
36 | if (!head) | ||
37 | return; | ||
38 | |||
39 | head = llist_reverse_order(head); | ||
40 | llist_for_each_entry(node, head, llnode) { | ||
41 | mce = &node->mce; | ||
42 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); | ||
43 | gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); | ||
44 | } | ||
45 | } | ||
46 | |||
47 | bool mce_gen_pool_empty(void) | ||
48 | { | ||
49 | return llist_empty(&mce_event_llist); | ||
50 | } | ||
51 | |||
52 | int mce_gen_pool_add(struct mce *mce) | ||
53 | { | ||
54 | struct mce_evt_llist *node; | ||
55 | |||
56 | if (!mce_evt_pool) | ||
57 | return -EINVAL; | ||
58 | |||
59 | node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node)); | ||
60 | if (!node) { | ||
61 | pr_warn_ratelimited("MCE records pool full!\n"); | ||
62 | return -ENOMEM; | ||
63 | } | ||
64 | |||
65 | memcpy(&node->mce, mce, sizeof(*mce)); | ||
66 | llist_add(&node->llnode, &mce_event_llist); | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static int mce_gen_pool_create(void) | ||
72 | { | ||
73 | struct gen_pool *tmpp; | ||
74 | int ret = -ENOMEM; | ||
75 | |||
76 | tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1); | ||
77 | if (!tmpp) | ||
78 | goto out; | ||
79 | |||
80 | ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1); | ||
81 | if (ret) { | ||
82 | gen_pool_destroy(tmpp); | ||
83 | goto out; | ||
84 | } | ||
85 | |||
86 | mce_evt_pool = tmpp; | ||
87 | |||
88 | out: | ||
89 | return ret; | ||
90 | } | ||
91 | |||
92 | int mce_gen_pool_init(void) | ||
93 | { | ||
94 | /* Just init mce_gen_pool once. */ | ||
95 | if (mce_evt_pool) | ||
96 | return 0; | ||
97 | |||
98 | return mce_gen_pool_create(); | ||
99 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index fe32074b865b..547720efd923 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -13,6 +13,8 @@ enum severity_level { | |||
13 | MCE_PANIC_SEVERITY, | 13 | MCE_PANIC_SEVERITY, |
14 | }; | 14 | }; |
15 | 15 | ||
16 | extern struct atomic_notifier_head x86_mce_decoder_chain; | ||
17 | |||
16 | #define ATTR_LEN 16 | 18 | #define ATTR_LEN 16 |
17 | #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ | 19 | #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ |
18 | 20 | ||
@@ -24,6 +26,16 @@ struct mce_bank { | |||
24 | char attrname[ATTR_LEN]; /* attribute name */ | 26 | char attrname[ATTR_LEN]; /* attribute name */ |
25 | }; | 27 | }; |
26 | 28 | ||
29 | struct mce_evt_llist { | ||
30 | struct llist_node llnode; | ||
31 | struct mce mce; | ||
32 | }; | ||
33 | |||
34 | void mce_gen_pool_process(void); | ||
35 | bool mce_gen_pool_empty(void); | ||
36 | int mce_gen_pool_add(struct mce *mce); | ||
37 | int mce_gen_pool_init(void); | ||
38 | |||
27 | extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); | 39 | extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); |
28 | struct dentry *mce_get_debugfs_dir(void); | 40 | struct dentry *mce_get_debugfs_dir(void); |
29 | 41 | ||
@@ -67,3 +79,5 @@ static inline int apei_clear_mce(u64 record_id) | |||
67 | return -EINVAL; | 79 | return -EINVAL; |
68 | } | 80 | } |
69 | #endif | 81 | #endif |
82 | |||
83 | void mce_inject_log(struct mce *m); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 3d6b5269fb2e..0f8f21c8284a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -52,11 +52,11 @@ | |||
52 | 52 | ||
53 | static DEFINE_MUTEX(mce_chrdev_read_mutex); | 53 | static DEFINE_MUTEX(mce_chrdev_read_mutex); |
54 | 54 | ||
55 | #define rcu_dereference_check_mce(p) \ | 55 | #define mce_log_get_idx_check(p) \ |
56 | ({ \ | 56 | ({ \ |
57 | RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ | 57 | RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ |
58 | !lockdep_is_held(&mce_chrdev_read_mutex), \ | 58 | !lockdep_is_held(&mce_chrdev_read_mutex), \ |
59 | "suspicious rcu_dereference_check_mce() usage"); \ | 59 | "suspicious mce_log_get_idx_check() usage"); \ |
60 | smp_load_acquire(&(p)); \ | 60 | smp_load_acquire(&(p)); \ |
61 | }) | 61 | }) |
62 | 62 | ||
@@ -110,15 +110,17 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
110 | */ | 110 | */ |
111 | mce_banks_t mce_banks_ce_disabled; | 111 | mce_banks_t mce_banks_ce_disabled; |
112 | 112 | ||
113 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 113 | static struct work_struct mce_work; |
114 | static struct irq_work mce_irq_work; | ||
114 | 115 | ||
115 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); | 116 | static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); |
117 | static int mce_usable_address(struct mce *m); | ||
116 | 118 | ||
117 | /* | 119 | /* |
118 | * CPU/chipset specific EDAC code can register a notifier call here to print | 120 | * CPU/chipset specific EDAC code can register a notifier call here to print |
119 | * MCE errors in a human-readable form. | 121 | * MCE errors in a human-readable form. |
120 | */ | 122 | */ |
121 | static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | 123 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); |
122 | 124 | ||
123 | /* Do initial initialization of a struct mce */ | 125 | /* Do initial initialization of a struct mce */ |
124 | void mce_setup(struct mce *m) | 126 | void mce_setup(struct mce *m) |
@@ -157,12 +159,13 @@ void mce_log(struct mce *mce) | |||
157 | /* Emit the trace record: */ | 159 | /* Emit the trace record: */ |
158 | trace_mce_record(mce); | 160 | trace_mce_record(mce); |
159 | 161 | ||
160 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); | 162 | if (!mce_gen_pool_add(mce)) |
163 | irq_work_queue(&mce_irq_work); | ||
161 | 164 | ||
162 | mce->finished = 0; | 165 | mce->finished = 0; |
163 | wmb(); | 166 | wmb(); |
164 | for (;;) { | 167 | for (;;) { |
165 | entry = rcu_dereference_check_mce(mcelog.next); | 168 | entry = mce_log_get_idx_check(mcelog.next); |
166 | for (;;) { | 169 | for (;;) { |
167 | 170 | ||
168 | /* | 171 | /* |
@@ -196,48 +199,23 @@ void mce_log(struct mce *mce) | |||
196 | set_bit(0, &mce_need_notify); | 199 | set_bit(0, &mce_need_notify); |
197 | } | 200 | } |
198 | 201 | ||
199 | static void drain_mcelog_buffer(void) | 202 | void mce_inject_log(struct mce *m) |
200 | { | 203 | { |
201 | unsigned int next, i, prev = 0; | 204 | mutex_lock(&mce_chrdev_read_mutex); |
202 | 205 | mce_log(m); | |
203 | next = ACCESS_ONCE(mcelog.next); | 206 | mutex_unlock(&mce_chrdev_read_mutex); |
204 | |||
205 | do { | ||
206 | struct mce *m; | ||
207 | |||
208 | /* drain what was logged during boot */ | ||
209 | for (i = prev; i < next; i++) { | ||
210 | unsigned long start = jiffies; | ||
211 | unsigned retries = 1; | ||
212 | |||
213 | m = &mcelog.entry[i]; | ||
214 | |||
215 | while (!m->finished) { | ||
216 | if (time_after_eq(jiffies, start + 2*retries)) | ||
217 | retries++; | ||
218 | |||
219 | cpu_relax(); | ||
220 | |||
221 | if (!m->finished && retries >= 4) { | ||
222 | pr_err("skipping error being logged currently!\n"); | ||
223 | break; | ||
224 | } | ||
225 | } | ||
226 | smp_rmb(); | ||
227 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | ||
228 | } | ||
229 | |||
230 | memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m)); | ||
231 | prev = next; | ||
232 | next = cmpxchg(&mcelog.next, prev, 0); | ||
233 | } while (next != prev); | ||
234 | } | 207 | } |
208 | EXPORT_SYMBOL_GPL(mce_inject_log); | ||
235 | 209 | ||
210 | static struct notifier_block mce_srao_nb; | ||
236 | 211 | ||
237 | void mce_register_decode_chain(struct notifier_block *nb) | 212 | void mce_register_decode_chain(struct notifier_block *nb) |
238 | { | 213 | { |
214 | /* Ensure SRAO notifier has the highest priority in the decode chain. */ | ||
215 | if (nb != &mce_srao_nb && nb->priority == INT_MAX) | ||
216 | nb->priority -= 1; | ||
217 | |||
239 | atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); | 218 | atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); |
240 | drain_mcelog_buffer(); | ||
241 | } | 219 | } |
242 | EXPORT_SYMBOL_GPL(mce_register_decode_chain); | 220 | EXPORT_SYMBOL_GPL(mce_register_decode_chain); |
243 | 221 | ||
@@ -461,61 +439,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs) | |||
461 | } | 439 | } |
462 | } | 440 | } |
463 | 441 | ||
464 | /* | ||
465 | * Simple lockless ring to communicate PFNs from the exception handler with the | ||
466 | * process context work function. This is vastly simplified because there's | ||
467 | * only a single reader and a single writer. | ||
468 | */ | ||
469 | #define MCE_RING_SIZE 16 /* we use one entry less */ | ||
470 | |||
471 | struct mce_ring { | ||
472 | unsigned short start; | ||
473 | unsigned short end; | ||
474 | unsigned long ring[MCE_RING_SIZE]; | ||
475 | }; | ||
476 | static DEFINE_PER_CPU(struct mce_ring, mce_ring); | ||
477 | |||
478 | /* Runs with CPU affinity in workqueue */ | ||
479 | static int mce_ring_empty(void) | ||
480 | { | ||
481 | struct mce_ring *r = this_cpu_ptr(&mce_ring); | ||
482 | |||
483 | return r->start == r->end; | ||
484 | } | ||
485 | |||
486 | static int mce_ring_get(unsigned long *pfn) | ||
487 | { | ||
488 | struct mce_ring *r; | ||
489 | int ret = 0; | ||
490 | |||
491 | *pfn = 0; | ||
492 | get_cpu(); | ||
493 | r = this_cpu_ptr(&mce_ring); | ||
494 | if (r->start == r->end) | ||
495 | goto out; | ||
496 | *pfn = r->ring[r->start]; | ||
497 | r->start = (r->start + 1) % MCE_RING_SIZE; | ||
498 | ret = 1; | ||
499 | out: | ||
500 | put_cpu(); | ||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | /* Always runs in MCE context with preempt off */ | ||
505 | static int mce_ring_add(unsigned long pfn) | ||
506 | { | ||
507 | struct mce_ring *r = this_cpu_ptr(&mce_ring); | ||
508 | unsigned next; | ||
509 | |||
510 | next = (r->end + 1) % MCE_RING_SIZE; | ||
511 | if (next == r->start) | ||
512 | return -1; | ||
513 | r->ring[r->end] = pfn; | ||
514 | wmb(); | ||
515 | r->end = next; | ||
516 | return 0; | ||
517 | } | ||
518 | |||
519 | int mce_available(struct cpuinfo_x86 *c) | 442 | int mce_available(struct cpuinfo_x86 *c) |
520 | { | 443 | { |
521 | if (mca_cfg.disabled) | 444 | if (mca_cfg.disabled) |
@@ -525,12 +448,10 @@ int mce_available(struct cpuinfo_x86 *c) | |||
525 | 448 | ||
526 | static void mce_schedule_work(void) | 449 | static void mce_schedule_work(void) |
527 | { | 450 | { |
528 | if (!mce_ring_empty()) | 451 | if (!mce_gen_pool_empty() && keventd_up()) |
529 | schedule_work(this_cpu_ptr(&mce_work)); | 452 | schedule_work(&mce_work); |
530 | } | 453 | } |
531 | 454 | ||
532 | static DEFINE_PER_CPU(struct irq_work, mce_irq_work); | ||
533 | |||
534 | static void mce_irq_work_cb(struct irq_work *entry) | 455 | static void mce_irq_work_cb(struct irq_work *entry) |
535 | { | 456 | { |
536 | mce_notify_irq(); | 457 | mce_notify_irq(); |
@@ -551,8 +472,29 @@ static void mce_report_event(struct pt_regs *regs) | |||
551 | return; | 472 | return; |
552 | } | 473 | } |
553 | 474 | ||
554 | irq_work_queue(this_cpu_ptr(&mce_irq_work)); | 475 | irq_work_queue(&mce_irq_work); |
476 | } | ||
477 | |||
478 | static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, | ||
479 | void *data) | ||
480 | { | ||
481 | struct mce *mce = (struct mce *)data; | ||
482 | unsigned long pfn; | ||
483 | |||
484 | if (!mce) | ||
485 | return NOTIFY_DONE; | ||
486 | |||
487 | if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) { | ||
488 | pfn = mce->addr >> PAGE_SHIFT; | ||
489 | memory_failure(pfn, MCE_VECTOR, 0); | ||
490 | } | ||
491 | |||
492 | return NOTIFY_OK; | ||
555 | } | 493 | } |
494 | static struct notifier_block mce_srao_nb = { | ||
495 | .notifier_call = srao_decode_notifier, | ||
496 | .priority = INT_MAX, | ||
497 | }; | ||
556 | 498 | ||
557 | /* | 499 | /* |
558 | * Read ADDR and MISC registers. | 500 | * Read ADDR and MISC registers. |
@@ -672,8 +614,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
672 | */ | 614 | */ |
673 | if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { | 615 | if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { |
674 | if (m.status & MCI_STATUS_ADDRV) { | 616 | if (m.status & MCI_STATUS_ADDRV) { |
675 | mce_ring_add(m.addr >> PAGE_SHIFT); | 617 | m.severity = severity; |
676 | mce_schedule_work(); | 618 | m.usable_addr = mce_usable_address(&m); |
619 | |||
620 | if (!mce_gen_pool_add(&m)) | ||
621 | mce_schedule_work(); | ||
677 | } | 622 | } |
678 | } | 623 | } |
679 | 624 | ||
@@ -1143,15 +1088,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
1143 | 1088 | ||
1144 | mce_read_aux(&m, i); | 1089 | mce_read_aux(&m, i); |
1145 | 1090 | ||
1146 | /* | 1091 | /* assuming valid severity level != 0 */ |
1147 | * Action optional error. Queue address for later processing. | 1092 | m.severity = severity; |
1148 | * When the ring overflows we just ignore the AO error. | 1093 | m.usable_addr = mce_usable_address(&m); |
1149 | * RED-PEN add some logging mechanism when | ||
1150 | * usable_address or mce_add_ring fails. | ||
1151 | * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0 | ||
1152 | */ | ||
1153 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) | ||
1154 | mce_ring_add(m.addr >> PAGE_SHIFT); | ||
1155 | 1094 | ||
1156 | mce_log(&m); | 1095 | mce_log(&m); |
1157 | 1096 | ||
@@ -1247,14 +1186,11 @@ int memory_failure(unsigned long pfn, int vector, int flags) | |||
1247 | /* | 1186 | /* |
1248 | * Action optional processing happens here (picking up | 1187 | * Action optional processing happens here (picking up |
1249 | * from the list of faulting pages that do_machine_check() | 1188 | * from the list of faulting pages that do_machine_check() |
1250 | * placed into the "ring"). | 1189 | * placed into the genpool). |
1251 | */ | 1190 | */ |
1252 | static void mce_process_work(struct work_struct *dummy) | 1191 | static void mce_process_work(struct work_struct *dummy) |
1253 | { | 1192 | { |
1254 | unsigned long pfn; | 1193 | mce_gen_pool_process(); |
1255 | |||
1256 | while (mce_ring_get(&pfn)) | ||
1257 | memory_failure(pfn, MCE_VECTOR, 0); | ||
1258 | } | 1194 | } |
1259 | 1195 | ||
1260 | #ifdef CONFIG_X86_MCE_INTEL | 1196 | #ifdef CONFIG_X86_MCE_INTEL |
@@ -1678,6 +1614,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1678 | } | 1614 | } |
1679 | } | 1615 | } |
1680 | 1616 | ||
1617 | static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) | ||
1618 | { | ||
1619 | switch (c->x86_vendor) { | ||
1620 | case X86_VENDOR_INTEL: | ||
1621 | mce_intel_feature_clear(c); | ||
1622 | break; | ||
1623 | default: | ||
1624 | break; | ||
1625 | } | ||
1626 | } | ||
1627 | |||
1681 | static void mce_start_timer(unsigned int cpu, struct timer_list *t) | 1628 | static void mce_start_timer(unsigned int cpu, struct timer_list *t) |
1682 | { | 1629 | { |
1683 | unsigned long iv = check_interval * HZ; | 1630 | unsigned long iv = check_interval * HZ; |
@@ -1731,13 +1678,36 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) | |||
1731 | return; | 1678 | return; |
1732 | } | 1679 | } |
1733 | 1680 | ||
1681 | if (mce_gen_pool_init()) { | ||
1682 | mca_cfg.disabled = true; | ||
1683 | pr_emerg("Couldn't allocate MCE records pool!\n"); | ||
1684 | return; | ||
1685 | } | ||
1686 | |||
1734 | machine_check_vector = do_machine_check; | 1687 | machine_check_vector = do_machine_check; |
1735 | 1688 | ||
1736 | __mcheck_cpu_init_generic(); | 1689 | __mcheck_cpu_init_generic(); |
1737 | __mcheck_cpu_init_vendor(c); | 1690 | __mcheck_cpu_init_vendor(c); |
1738 | __mcheck_cpu_init_timer(); | 1691 | __mcheck_cpu_init_timer(); |
1739 | INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work); | 1692 | } |
1740 | init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb); | 1693 | |
1694 | /* | ||
1695 | * Called for each booted CPU to clear some machine checks opt-ins | ||
1696 | */ | ||
1697 | void mcheck_cpu_clear(struct cpuinfo_x86 *c) | ||
1698 | { | ||
1699 | if (mca_cfg.disabled) | ||
1700 | return; | ||
1701 | |||
1702 | if (!mce_available(c)) | ||
1703 | return; | ||
1704 | |||
1705 | /* | ||
1706 | * Possibly to clear general settings generic to x86 | ||
1707 | * __mcheck_cpu_clear_generic(c); | ||
1708 | */ | ||
1709 | __mcheck_cpu_clear_vendor(c); | ||
1710 | |||
1741 | } | 1711 | } |
1742 | 1712 | ||
1743 | /* | 1713 | /* |
@@ -1850,7 +1820,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf, | |||
1850 | goto out; | 1820 | goto out; |
1851 | } | 1821 | } |
1852 | 1822 | ||
1853 | next = rcu_dereference_check_mce(mcelog.next); | 1823 | next = mce_log_get_idx_check(mcelog.next); |
1854 | 1824 | ||
1855 | /* Only supports full reads right now */ | 1825 | /* Only supports full reads right now */ |
1856 | err = -EINVAL; | 1826 | err = -EINVAL; |
@@ -2056,8 +2026,12 @@ __setup("mce", mcheck_enable); | |||
2056 | int __init mcheck_init(void) | 2026 | int __init mcheck_init(void) |
2057 | { | 2027 | { |
2058 | mcheck_intel_therm_init(); | 2028 | mcheck_intel_therm_init(); |
2029 | mce_register_decode_chain(&mce_srao_nb); | ||
2059 | mcheck_vendor_init_severity(); | 2030 | mcheck_vendor_init_severity(); |
2060 | 2031 | ||
2032 | INIT_WORK(&mce_work, mce_process_work); | ||
2033 | init_irq_work(&mce_irq_work, mce_irq_work_cb); | ||
2034 | |||
2061 | return 0; | 2035 | return 0; |
2062 | } | 2036 | } |
2063 | 2037 | ||
@@ -2591,5 +2565,20 @@ static int __init mcheck_debugfs_init(void) | |||
2591 | 2565 | ||
2592 | return 0; | 2566 | return 0; |
2593 | } | 2567 | } |
2594 | late_initcall(mcheck_debugfs_init); | 2568 | #else |
2569 | static int __init mcheck_debugfs_init(void) { return -EINVAL; } | ||
2595 | #endif | 2570 | #endif |
2571 | |||
2572 | static int __init mcheck_late_init(void) | ||
2573 | { | ||
2574 | mcheck_debugfs_init(); | ||
2575 | |||
2576 | /* | ||
2577 | * Flush out everything that has been logged during early boot, now that | ||
2578 | * everything has been initialized (workqueues, decoders, ...). | ||
2579 | */ | ||
2580 | mce_schedule_work(); | ||
2581 | |||
2582 | return 0; | ||
2583 | } | ||
2584 | late_initcall(mcheck_late_init); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 844f56c5616d..1e8bb6c94f14 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long cpu) | |||
146 | per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; | 146 | per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; |
147 | } | 147 | } |
148 | 148 | ||
149 | static void cmci_toggle_interrupt_mode(bool on) | ||
150 | { | ||
151 | unsigned long flags, *owned; | ||
152 | int bank; | ||
153 | u64 val; | ||
154 | |||
155 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | ||
156 | owned = this_cpu_ptr(mce_banks_owned); | ||
157 | for_each_set_bit(bank, owned, MAX_NR_BANKS) { | ||
158 | rdmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
159 | |||
160 | if (on) | ||
161 | val |= MCI_CTL2_CMCI_EN; | ||
162 | else | ||
163 | val &= ~MCI_CTL2_CMCI_EN; | ||
164 | |||
165 | wrmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
166 | } | ||
167 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
168 | } | ||
169 | |||
149 | unsigned long cmci_intel_adjust_timer(unsigned long interval) | 170 | unsigned long cmci_intel_adjust_timer(unsigned long interval) |
150 | { | 171 | { |
151 | if ((this_cpu_read(cmci_backoff_cnt) > 0) && | 172 | if ((this_cpu_read(cmci_backoff_cnt) > 0) && |
@@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) | |||
175 | */ | 196 | */ |
176 | if (!atomic_read(&cmci_storm_on_cpus)) { | 197 | if (!atomic_read(&cmci_storm_on_cpus)) { |
177 | __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); | 198 | __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); |
178 | cmci_reenable(); | 199 | cmci_toggle_interrupt_mode(true); |
179 | cmci_recheck(); | 200 | cmci_recheck(); |
180 | } | 201 | } |
181 | return CMCI_POLL_INTERVAL; | 202 | return CMCI_POLL_INTERVAL; |
@@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval) | |||
186 | } | 207 | } |
187 | } | 208 | } |
188 | 209 | ||
189 | static void cmci_storm_disable_banks(void) | ||
190 | { | ||
191 | unsigned long flags, *owned; | ||
192 | int bank; | ||
193 | u64 val; | ||
194 | |||
195 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | ||
196 | owned = this_cpu_ptr(mce_banks_owned); | ||
197 | for_each_set_bit(bank, owned, MAX_NR_BANKS) { | ||
198 | rdmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
199 | val &= ~MCI_CTL2_CMCI_EN; | ||
200 | wrmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
201 | } | ||
202 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
203 | } | ||
204 | |||
205 | static bool cmci_storm_detect(void) | 210 | static bool cmci_storm_detect(void) |
206 | { | 211 | { |
207 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); | 212 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); |
@@ -223,7 +228,7 @@ static bool cmci_storm_detect(void) | |||
223 | if (cnt <= CMCI_STORM_THRESHOLD) | 228 | if (cnt <= CMCI_STORM_THRESHOLD) |
224 | return false; | 229 | return false; |
225 | 230 | ||
226 | cmci_storm_disable_banks(); | 231 | cmci_toggle_interrupt_mode(false); |
227 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); | 232 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); |
228 | r = atomic_add_return(1, &cmci_storm_on_cpus); | 233 | r = atomic_add_return(1, &cmci_storm_on_cpus); |
229 | mce_timer_kick(CMCI_STORM_INTERVAL); | 234 | mce_timer_kick(CMCI_STORM_INTERVAL); |
@@ -246,7 +251,6 @@ static void intel_threshold_interrupt(void) | |||
246 | return; | 251 | return; |
247 | 252 | ||
248 | machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); | 253 | machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); |
249 | mce_notify_irq(); | ||
250 | } | 254 | } |
251 | 255 | ||
252 | /* | 256 | /* |
@@ -435,7 +439,7 @@ static void intel_init_cmci(void) | |||
435 | cmci_recheck(); | 439 | cmci_recheck(); |
436 | } | 440 | } |
437 | 441 | ||
438 | void intel_init_lmce(void) | 442 | static void intel_init_lmce(void) |
439 | { | 443 | { |
440 | u64 val; | 444 | u64 val; |
441 | 445 | ||
@@ -448,9 +452,26 @@ void intel_init_lmce(void) | |||
448 | wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); | 452 | wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); |
449 | } | 453 | } |
450 | 454 | ||
455 | static void intel_clear_lmce(void) | ||
456 | { | ||
457 | u64 val; | ||
458 | |||
459 | if (!lmce_supported()) | ||
460 | return; | ||
461 | |||
462 | rdmsrl(MSR_IA32_MCG_EXT_CTL, val); | ||
463 | val &= ~MCG_EXT_CTL_LMCE_EN; | ||
464 | wrmsrl(MSR_IA32_MCG_EXT_CTL, val); | ||
465 | } | ||
466 | |||
451 | void mce_intel_feature_init(struct cpuinfo_x86 *c) | 467 | void mce_intel_feature_init(struct cpuinfo_x86 *c) |
452 | { | 468 | { |
453 | intel_init_thermal(c); | 469 | intel_init_thermal(c); |
454 | intel_init_cmci(); | 470 | intel_init_cmci(); |
455 | intel_init_lmce(); | 471 | intel_init_lmce(); |
456 | } | 472 | } |
473 | |||
474 | void mce_intel_feature_clear(struct cpuinfo_x86 *c) | ||
475 | { | ||
476 | intel_clear_lmce(); | ||
477 | } | ||
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c27cad726765..d83740ab85b0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/debugreg.h> | 29 | #include <asm/debugreg.h> |
30 | #include <asm/nmi.h> | 30 | #include <asm/nmi.h> |
31 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
32 | #include <asm/mce.h> | ||
32 | 33 | ||
33 | /* | 34 | /* |
34 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, | 35 | * per-CPU TSS segments. Threads are completely 'soft' on Linux, |
@@ -319,6 +320,7 @@ void stop_this_cpu(void *dummy) | |||
319 | */ | 320 | */ |
320 | set_cpu_online(smp_processor_id(), false); | 321 | set_cpu_online(smp_processor_id(), false); |
321 | disable_local_APIC(); | 322 | disable_local_APIC(); |
323 | mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); | ||
322 | 324 | ||
323 | for (;;) | 325 | for (;;) |
324 | halt(); | 326 | halt(); |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 15aaa69bbb5e..12c8286206ce 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
31 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
32 | #include <asm/nmi.h> | 32 | #include <asm/nmi.h> |
33 | #include <asm/mce.h> | ||
33 | #include <asm/trace/irq_vectors.h> | 34 | #include <asm/trace/irq_vectors.h> |
34 | /* | 35 | /* |
35 | * Some notes on x86 processor bugs affecting SMP operation: | 36 | * Some notes on x86 processor bugs affecting SMP operation: |
@@ -243,6 +244,7 @@ static void native_stop_other_cpus(int wait) | |||
243 | finish: | 244 | finish: |
244 | local_irq_save(flags); | 245 | local_irq_save(flags); |
245 | disable_local_APIC(); | 246 | disable_local_APIC(); |
247 | mcheck_cpu_clear(this_cpu_ptr(&cpu_info)); | ||
246 | local_irq_restore(flags); | 248 | local_irq_restore(flags); |
247 | } | 249 | } |
248 | 250 | ||
diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig new file mode 100644 index 000000000000..10fea5fc821e --- /dev/null +++ b/arch/x86/ras/Kconfig | |||
@@ -0,0 +1,11 @@ | |||
1 | config AMD_MCE_INJ | ||
2 | tristate "Simple MCE injection interface for AMD processors" | ||
3 | depends on RAS && EDAC_DECODE_MCE && DEBUG_FS | ||
4 | default n | ||
5 | help | ||
6 | This is a simple debugfs interface to inject MCEs and test different | ||
7 | aspects of the MCE handling code. | ||
8 | |||
9 | WARNING: Do not even assume this interface is staying stable! | ||
10 | |||
11 | |||
diff --git a/arch/x86/ras/Makefile b/arch/x86/ras/Makefile new file mode 100644 index 000000000000..dd2c98b84037 --- /dev/null +++ b/arch/x86/ras/Makefile | |||
@@ -0,0 +1,2 @@ | |||
1 | obj-$(CONFIG_AMD_MCE_INJ) += mce_amd_inj.o | ||
2 | |||
diff --git a/drivers/edac/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 4c73e4d03d46..17e35b5bf779 100644 --- a/drivers/edac/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c | |||
@@ -6,7 +6,7 @@ | |||
6 | * This file may be distributed under the terms of the GNU General Public | 6 | * This file may be distributed under the terms of the GNU General Public |
7 | * License version 2. | 7 | * License version 2. |
8 | * | 8 | * |
9 | * Copyright (c) 2010-14: Borislav Petkov <bp@alien8.de> | 9 | * Copyright (c) 2010-15: Borislav Petkov <bp@alien8.de> |
10 | * Advanced Micro Devices Inc. | 10 | * Advanced Micro Devices Inc. |
11 | */ | 11 | */ |
12 | 12 | ||
@@ -19,7 +19,7 @@ | |||
19 | #include <linux/uaccess.h> | 19 | #include <linux/uaccess.h> |
20 | #include <asm/mce.h> | 20 | #include <asm/mce.h> |
21 | 21 | ||
22 | #include "mce_amd.h" | 22 | #include "../kernel/cpu/mcheck/mce-internal.h" |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * Collect all the MCi_XXX settings | 25 | * Collect all the MCi_XXX settings |
@@ -195,7 +195,7 @@ static void do_inject(void) | |||
195 | i_mce.status |= MCI_STATUS_MISCV; | 195 | i_mce.status |= MCI_STATUS_MISCV; |
196 | 196 | ||
197 | if (inj_type == SW_INJ) { | 197 | if (inj_type == SW_INJ) { |
198 | amd_decode_mce(NULL, 0, &i_mce); | 198 | mce_inject_log(&i_mce); |
199 | return; | 199 | return; |
200 | } | 200 | } |
201 | 201 | ||
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 8677ead2a8e1..ef25000a5bc6 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig | |||
@@ -61,16 +61,6 @@ config EDAC_DECODE_MCE | |||
61 | which occur really early upon boot, before the module infrastructure | 61 | which occur really early upon boot, before the module infrastructure |
62 | has been initialized. | 62 | has been initialized. |
63 | 63 | ||
64 | config EDAC_MCE_INJ | ||
65 | tristate "Simple MCE injection interface" | ||
66 | depends on EDAC_DECODE_MCE && DEBUG_FS | ||
67 | default n | ||
68 | help | ||
69 | This is a simple debugfs interface to inject MCEs and test different | ||
70 | aspects of the MCE handling code. | ||
71 | |||
72 | WARNING: Do not even assume this interface is staying stable! | ||
73 | |||
74 | config EDAC_MM_EDAC | 64 | config EDAC_MM_EDAC |
75 | tristate "Main Memory EDAC (Error Detection And Correction) reporting" | 65 | tristate "Main Memory EDAC (Error Detection And Correction) reporting" |
76 | select RAS | 66 | select RAS |
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 28ef2a519f65..ae3c5f3ce405 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile | |||
@@ -17,7 +17,6 @@ edac_core-y += edac_pci.o edac_pci_sysfs.o | |||
17 | endif | 17 | endif |
18 | 18 | ||
19 | obj-$(CONFIG_EDAC_GHES) += ghes_edac.o | 19 | obj-$(CONFIG_EDAC_GHES) += ghes_edac.o |
20 | obj-$(CONFIG_EDAC_MCE_INJ) += mce_amd_inj.o | ||
21 | 20 | ||
22 | edac_mce_amd-y := mce_amd.o | 21 | edac_mce_amd-y := mce_amd.o |
23 | obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o | 22 | obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o |
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig index f9da613052c2..4c3c67d13254 100644 --- a/drivers/ras/Kconfig +++ b/drivers/ras/Kconfig | |||
@@ -1,2 +1,35 @@ | |||
1 | config RAS | 1 | menuconfig RAS |
2 | bool | 2 | bool "Reliability, Availability and Serviceability (RAS) features" |
3 | help | ||
4 | Reliability, availability and serviceability (RAS) is a computer | ||
5 | hardware engineering term. Computers designed with higher levels | ||
6 | of RAS have a multitude of features that protect data integrity | ||
7 | and help them stay available for long periods of time without | ||
8 | failure. | ||
9 | |||
10 | Reliability can be defined as the probability that the system will | ||
11 | produce correct outputs up to some given time. Reliability is | ||
12 | enhanced by features that help to avoid, detect and repair hardware | ||
13 | faults. | ||
14 | |||
15 | Availability is the probability a system is operational at a given | ||
16 | time, i.e. the amount of time a device is actually operating as the | ||
17 | percentage of total time it should be operating. | ||
18 | |||
19 | Serviceability or maintainability is the simplicity and speed with | ||
20 | which a system can be repaired or maintained; if the time to repair | ||
21 | a failed system increases, then availability will decrease. | ||
22 | |||
23 | Note that Reliability and Availability are distinct concepts: | ||
24 | Reliability is a measure of the ability of a system to function | ||
25 | correctly, including avoiding data corruption, whereas Availability | ||
26 | measures how often it is available for use, even though it may not | ||
27 | be functioning correctly. For example, a server may run forever and | ||
28 | so have ideal availability, but may be unreliable, with frequent | ||
29 | data corruption. | ||
30 | |||
31 | if RAS | ||
32 | |||
33 | source arch/x86/ras/Kconfig | ||
34 | |||
35 | endif | ||