aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-08-31 23:20:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-08-31 23:20:30 -0400
commit3959df1dfb9538498ec3372a2d390bc7fbdbfac2 (patch)
tree31da3a7082c78cf5efbfe37ba27b1b79bbe69f63 /arch/x86/kernel
parent41d859a83c567a9c9f50a34082cc64aab0abb0cd (diff)
parent6c36dfe949187dc2729abfad4b083758ac5c2e0e (diff)
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Ingo Molnar: "MCE handling updates, but also some generic drivers/edac/ changes to better organize the Kconfig space" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/ras: Move AMD MCE injector to arch/x86/ras/ x86/mce: Add a wrapper around mce_log() for injection x86/mce: Rename rcu_dereference_check_mce() to mce_log_get_idx_check() RAS: Add a menuconfig option with descriptive text x86/mce: Reenable CMCI banks when swiching back to interrupt mode x86/mce: Clear Local MCE opt-in before kexec x86/mce: Remove unused function declarations x86/mce: Kill drain_mcelog_buffer() x86/mce: Avoid potential deadlock due to printk() in MCE context x86/mce: Remove the MCE ring for Action Optional errors x86/mce: Don't use percpu workqueues x86/mce: Provide a lockless memory pool to save error records x86/mce: Reuse one of the u16 padding fields in 'struct mce'
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-genpool.c99
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h14
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c231
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c61
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/smp.c2
8 files changed, 269 insertions, 143 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index bb34b03af252..a3311c886194 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,4 +1,4 @@
1obj-y = mce.o mce-severity.o 1obj-y = mce.o mce-severity.o mce-genpool.o
2 2
3obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o 3obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index a1aef9533154..34c89a3e8260 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -57,7 +57,6 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
57 57
58 m.addr = mem_err->physical_addr; 58 m.addr = mem_err->physical_addr;
59 mce_log(&m); 59 mce_log(&m);
60 mce_notify_irq();
61} 60}
62EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); 61EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
63 62
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
new file mode 100644
index 000000000000..0a850100c594
--- /dev/null
+++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c
@@ -0,0 +1,99 @@
1/*
2 * MCE event pool management in MCE context
3 *
4 * Copyright (C) 2015 Intel Corp.
5 * Author: Chen, Gong <gong.chen@linux.intel.com>
6 *
7 * This file is licensed under GPLv2.
8 */
9#include <linux/smp.h>
10#include <linux/mm.h>
11#include <linux/genalloc.h>
12#include <linux/llist.h>
13#include "mce-internal.h"
14
15/*
16 * printk() is not safe in MCE context. This is a lock-less memory allocator
17 * used to save error information organized in a lock-less list.
18 *
19 * This memory pool is only to be used to save MCE records in MCE context.
20 * MCE events are rare, so a fixed size memory pool should be enough. Use
21 * 2 pages to save MCE events for now (~80 MCE records at most).
22 */
23#define MCE_POOLSZ (2 * PAGE_SIZE)
24
25static struct gen_pool *mce_evt_pool;
26static LLIST_HEAD(mce_event_llist);
27static char gen_pool_buf[MCE_POOLSZ];
28
29void mce_gen_pool_process(void)
30{
31 struct llist_node *head;
32 struct mce_evt_llist *node;
33 struct mce *mce;
34
35 head = llist_del_all(&mce_event_llist);
36 if (!head)
37 return;
38
39 head = llist_reverse_order(head);
40 llist_for_each_entry(node, head, llnode) {
41 mce = &node->mce;
42 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce);
43 gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node));
44 }
45}
46
47bool mce_gen_pool_empty(void)
48{
49 return llist_empty(&mce_event_llist);
50}
51
52int mce_gen_pool_add(struct mce *mce)
53{
54 struct mce_evt_llist *node;
55
56 if (!mce_evt_pool)
57 return -EINVAL;
58
59 node = (void *)gen_pool_alloc(mce_evt_pool, sizeof(*node));
60 if (!node) {
61 pr_warn_ratelimited("MCE records pool full!\n");
62 return -ENOMEM;
63 }
64
65 memcpy(&node->mce, mce, sizeof(*mce));
66 llist_add(&node->llnode, &mce_event_llist);
67
68 return 0;
69}
70
71static int mce_gen_pool_create(void)
72{
73 struct gen_pool *tmpp;
74 int ret = -ENOMEM;
75
76 tmpp = gen_pool_create(ilog2(sizeof(struct mce_evt_llist)), -1);
77 if (!tmpp)
78 goto out;
79
80 ret = gen_pool_add(tmpp, (unsigned long)gen_pool_buf, MCE_POOLSZ, -1);
81 if (ret) {
82 gen_pool_destroy(tmpp);
83 goto out;
84 }
85
86 mce_evt_pool = tmpp;
87
88out:
89 return ret;
90}
91
92int mce_gen_pool_init(void)
93{
94 /* Just init mce_gen_pool once. */
95 if (mce_evt_pool)
96 return 0;
97
98 return mce_gen_pool_create();
99}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index fe32074b865b..547720efd923 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -13,6 +13,8 @@ enum severity_level {
13 MCE_PANIC_SEVERITY, 13 MCE_PANIC_SEVERITY,
14}; 14};
15 15
16extern struct atomic_notifier_head x86_mce_decoder_chain;
17
16#define ATTR_LEN 16 18#define ATTR_LEN 16
17#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ 19#define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */
18 20
@@ -24,6 +26,16 @@ struct mce_bank {
24 char attrname[ATTR_LEN]; /* attribute name */ 26 char attrname[ATTR_LEN]; /* attribute name */
25}; 27};
26 28
29struct mce_evt_llist {
30 struct llist_node llnode;
31 struct mce mce;
32};
33
34void mce_gen_pool_process(void);
35bool mce_gen_pool_empty(void);
36int mce_gen_pool_add(struct mce *mce);
37int mce_gen_pool_init(void);
38
27extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp); 39extern int (*mce_severity)(struct mce *a, int tolerant, char **msg, bool is_excp);
28struct dentry *mce_get_debugfs_dir(void); 40struct dentry *mce_get_debugfs_dir(void);
29 41
@@ -67,3 +79,5 @@ static inline int apei_clear_mce(u64 record_id)
67 return -EINVAL; 79 return -EINVAL;
68} 80}
69#endif 81#endif
82
83void mce_inject_log(struct mce *m);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3d6b5269fb2e..0f8f21c8284a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -52,11 +52,11 @@
52 52
53static DEFINE_MUTEX(mce_chrdev_read_mutex); 53static DEFINE_MUTEX(mce_chrdev_read_mutex);
54 54
55#define rcu_dereference_check_mce(p) \ 55#define mce_log_get_idx_check(p) \
56({ \ 56({ \
57 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ 57 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
58 !lockdep_is_held(&mce_chrdev_read_mutex), \ 58 !lockdep_is_held(&mce_chrdev_read_mutex), \
59 "suspicious rcu_dereference_check_mce() usage"); \ 59 "suspicious mce_log_get_idx_check() usage"); \
60 smp_load_acquire(&(p)); \ 60 smp_load_acquire(&(p)); \
61}) 61})
62 62
@@ -110,15 +110,17 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
110 */ 110 */
111mce_banks_t mce_banks_ce_disabled; 111mce_banks_t mce_banks_ce_disabled;
112 112
113static DEFINE_PER_CPU(struct work_struct, mce_work); 113static struct work_struct mce_work;
114static struct irq_work mce_irq_work;
114 115
115static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); 116static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs);
117static int mce_usable_address(struct mce *m);
116 118
117/* 119/*
118 * CPU/chipset specific EDAC code can register a notifier call here to print 120 * CPU/chipset specific EDAC code can register a notifier call here to print
119 * MCE errors in a human-readable form. 121 * MCE errors in a human-readable form.
120 */ 122 */
121static ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); 123ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
122 124
123/* Do initial initialization of a struct mce */ 125/* Do initial initialization of a struct mce */
124void mce_setup(struct mce *m) 126void mce_setup(struct mce *m)
@@ -157,12 +159,13 @@ void mce_log(struct mce *mce)
157 /* Emit the trace record: */ 159 /* Emit the trace record: */
158 trace_mce_record(mce); 160 trace_mce_record(mce);
159 161
160 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); 162 if (!mce_gen_pool_add(mce))
163 irq_work_queue(&mce_irq_work);
161 164
162 mce->finished = 0; 165 mce->finished = 0;
163 wmb(); 166 wmb();
164 for (;;) { 167 for (;;) {
165 entry = rcu_dereference_check_mce(mcelog.next); 168 entry = mce_log_get_idx_check(mcelog.next);
166 for (;;) { 169 for (;;) {
167 170
168 /* 171 /*
@@ -196,48 +199,23 @@ void mce_log(struct mce *mce)
196 set_bit(0, &mce_need_notify); 199 set_bit(0, &mce_need_notify);
197} 200}
198 201
199static void drain_mcelog_buffer(void) 202void mce_inject_log(struct mce *m)
200{ 203{
201 unsigned int next, i, prev = 0; 204 mutex_lock(&mce_chrdev_read_mutex);
202 205 mce_log(m);
203 next = ACCESS_ONCE(mcelog.next); 206 mutex_unlock(&mce_chrdev_read_mutex);
204
205 do {
206 struct mce *m;
207
208 /* drain what was logged during boot */
209 for (i = prev; i < next; i++) {
210 unsigned long start = jiffies;
211 unsigned retries = 1;
212
213 m = &mcelog.entry[i];
214
215 while (!m->finished) {
216 if (time_after_eq(jiffies, start + 2*retries))
217 retries++;
218
219 cpu_relax();
220
221 if (!m->finished && retries >= 4) {
222 pr_err("skipping error being logged currently!\n");
223 break;
224 }
225 }
226 smp_rmb();
227 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
228 }
229
230 memset(mcelog.entry + prev, 0, (next - prev) * sizeof(*m));
231 prev = next;
232 next = cmpxchg(&mcelog.next, prev, 0);
233 } while (next != prev);
234} 207}
208EXPORT_SYMBOL_GPL(mce_inject_log);
235 209
210static struct notifier_block mce_srao_nb;
236 211
237void mce_register_decode_chain(struct notifier_block *nb) 212void mce_register_decode_chain(struct notifier_block *nb)
238{ 213{
214 /* Ensure SRAO notifier has the highest priority in the decode chain. */
215 if (nb != &mce_srao_nb && nb->priority == INT_MAX)
216 nb->priority -= 1;
217
239 atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); 218 atomic_notifier_chain_register(&x86_mce_decoder_chain, nb);
240 drain_mcelog_buffer();
241} 219}
242EXPORT_SYMBOL_GPL(mce_register_decode_chain); 220EXPORT_SYMBOL_GPL(mce_register_decode_chain);
243 221
@@ -461,61 +439,6 @@ static inline void mce_gather_info(struct mce *m, struct pt_regs *regs)
461 } 439 }
462} 440}
463 441
464/*
465 * Simple lockless ring to communicate PFNs from the exception handler with the
466 * process context work function. This is vastly simplified because there's
467 * only a single reader and a single writer.
468 */
469#define MCE_RING_SIZE 16 /* we use one entry less */
470
471struct mce_ring {
472 unsigned short start;
473 unsigned short end;
474 unsigned long ring[MCE_RING_SIZE];
475};
476static DEFINE_PER_CPU(struct mce_ring, mce_ring);
477
478/* Runs with CPU affinity in workqueue */
479static int mce_ring_empty(void)
480{
481 struct mce_ring *r = this_cpu_ptr(&mce_ring);
482
483 return r->start == r->end;
484}
485
486static int mce_ring_get(unsigned long *pfn)
487{
488 struct mce_ring *r;
489 int ret = 0;
490
491 *pfn = 0;
492 get_cpu();
493 r = this_cpu_ptr(&mce_ring);
494 if (r->start == r->end)
495 goto out;
496 *pfn = r->ring[r->start];
497 r->start = (r->start + 1) % MCE_RING_SIZE;
498 ret = 1;
499out:
500 put_cpu();
501 return ret;
502}
503
504/* Always runs in MCE context with preempt off */
505static int mce_ring_add(unsigned long pfn)
506{
507 struct mce_ring *r = this_cpu_ptr(&mce_ring);
508 unsigned next;
509
510 next = (r->end + 1) % MCE_RING_SIZE;
511 if (next == r->start)
512 return -1;
513 r->ring[r->end] = pfn;
514 wmb();
515 r->end = next;
516 return 0;
517}
518
519int mce_available(struct cpuinfo_x86 *c) 442int mce_available(struct cpuinfo_x86 *c)
520{ 443{
521 if (mca_cfg.disabled) 444 if (mca_cfg.disabled)
@@ -525,12 +448,10 @@ int mce_available(struct cpuinfo_x86 *c)
525 448
526static void mce_schedule_work(void) 449static void mce_schedule_work(void)
527{ 450{
528 if (!mce_ring_empty()) 451 if (!mce_gen_pool_empty() && keventd_up())
529 schedule_work(this_cpu_ptr(&mce_work)); 452 schedule_work(&mce_work);
530} 453}
531 454
532static DEFINE_PER_CPU(struct irq_work, mce_irq_work);
533
534static void mce_irq_work_cb(struct irq_work *entry) 455static void mce_irq_work_cb(struct irq_work *entry)
535{ 456{
536 mce_notify_irq(); 457 mce_notify_irq();
@@ -551,8 +472,29 @@ static void mce_report_event(struct pt_regs *regs)
551 return; 472 return;
552 } 473 }
553 474
554 irq_work_queue(this_cpu_ptr(&mce_irq_work)); 475 irq_work_queue(&mce_irq_work);
476}
477
478static int srao_decode_notifier(struct notifier_block *nb, unsigned long val,
479 void *data)
480{
481 struct mce *mce = (struct mce *)data;
482 unsigned long pfn;
483
484 if (!mce)
485 return NOTIFY_DONE;
486
487 if (mce->usable_addr && (mce->severity == MCE_AO_SEVERITY)) {
488 pfn = mce->addr >> PAGE_SHIFT;
489 memory_failure(pfn, MCE_VECTOR, 0);
490 }
491
492 return NOTIFY_OK;
555} 493}
494static struct notifier_block mce_srao_nb = {
495 .notifier_call = srao_decode_notifier,
496 .priority = INT_MAX,
497};
556 498
557/* 499/*
558 * Read ADDR and MISC registers. 500 * Read ADDR and MISC registers.
@@ -672,8 +614,11 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
672 */ 614 */
673 if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { 615 if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
674 if (m.status & MCI_STATUS_ADDRV) { 616 if (m.status & MCI_STATUS_ADDRV) {
675 mce_ring_add(m.addr >> PAGE_SHIFT); 617 m.severity = severity;
676 mce_schedule_work(); 618 m.usable_addr = mce_usable_address(&m);
619
620 if (!mce_gen_pool_add(&m))
621 mce_schedule_work();
677 } 622 }
678 } 623 }
679 624
@@ -1143,15 +1088,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
1143 1088
1144 mce_read_aux(&m, i); 1089 mce_read_aux(&m, i);
1145 1090
1146 /* 1091 /* assuming valid severity level != 0 */
1147 * Action optional error. Queue address for later processing. 1092 m.severity = severity;
1148 * When the ring overflows we just ignore the AO error. 1093 m.usable_addr = mce_usable_address(&m);
1149 * RED-PEN add some logging mechanism when
1150 * usable_address or mce_add_ring fails.
1151 * RED-PEN don't ignore overflow for mca_cfg.tolerant == 0
1152 */
1153 if (severity == MCE_AO_SEVERITY && mce_usable_address(&m))
1154 mce_ring_add(m.addr >> PAGE_SHIFT);
1155 1094
1156 mce_log(&m); 1095 mce_log(&m);
1157 1096
@@ -1247,14 +1186,11 @@ int memory_failure(unsigned long pfn, int vector, int flags)
1247/* 1186/*
1248 * Action optional processing happens here (picking up 1187 * Action optional processing happens here (picking up
1249 * from the list of faulting pages that do_machine_check() 1188 * from the list of faulting pages that do_machine_check()
1250 * placed into the "ring"). 1189 * placed into the genpool).
1251 */ 1190 */
1252static void mce_process_work(struct work_struct *dummy) 1191static void mce_process_work(struct work_struct *dummy)
1253{ 1192{
1254 unsigned long pfn; 1193 mce_gen_pool_process();
1255
1256 while (mce_ring_get(&pfn))
1257 memory_failure(pfn, MCE_VECTOR, 0);
1258} 1194}
1259 1195
1260#ifdef CONFIG_X86_MCE_INTEL 1196#ifdef CONFIG_X86_MCE_INTEL
@@ -1678,6 +1614,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
1678 } 1614 }
1679} 1615}
1680 1616
1617static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c)
1618{
1619 switch (c->x86_vendor) {
1620 case X86_VENDOR_INTEL:
1621 mce_intel_feature_clear(c);
1622 break;
1623 default:
1624 break;
1625 }
1626}
1627
1681static void mce_start_timer(unsigned int cpu, struct timer_list *t) 1628static void mce_start_timer(unsigned int cpu, struct timer_list *t)
1682{ 1629{
1683 unsigned long iv = check_interval * HZ; 1630 unsigned long iv = check_interval * HZ;
@@ -1731,13 +1678,36 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
1731 return; 1678 return;
1732 } 1679 }
1733 1680
1681 if (mce_gen_pool_init()) {
1682 mca_cfg.disabled = true;
1683 pr_emerg("Couldn't allocate MCE records pool!\n");
1684 return;
1685 }
1686
1734 machine_check_vector = do_machine_check; 1687 machine_check_vector = do_machine_check;
1735 1688
1736 __mcheck_cpu_init_generic(); 1689 __mcheck_cpu_init_generic();
1737 __mcheck_cpu_init_vendor(c); 1690 __mcheck_cpu_init_vendor(c);
1738 __mcheck_cpu_init_timer(); 1691 __mcheck_cpu_init_timer();
1739 INIT_WORK(this_cpu_ptr(&mce_work), mce_process_work); 1692}
1740 init_irq_work(this_cpu_ptr(&mce_irq_work), &mce_irq_work_cb); 1693
1694/*
1695 * Called for each booted CPU to clear some machine checks opt-ins
1696 */
1697void mcheck_cpu_clear(struct cpuinfo_x86 *c)
1698{
1699 if (mca_cfg.disabled)
1700 return;
1701
1702 if (!mce_available(c))
1703 return;
1704
1705 /*
1706 * Possibly to clear general settings generic to x86
1707 * __mcheck_cpu_clear_generic(c);
1708 */
1709 __mcheck_cpu_clear_vendor(c);
1710
1741} 1711}
1742 1712
1743/* 1713/*
@@ -1850,7 +1820,7 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
1850 goto out; 1820 goto out;
1851 } 1821 }
1852 1822
1853 next = rcu_dereference_check_mce(mcelog.next); 1823 next = mce_log_get_idx_check(mcelog.next);
1854 1824
1855 /* Only supports full reads right now */ 1825 /* Only supports full reads right now */
1856 err = -EINVAL; 1826 err = -EINVAL;
@@ -2056,8 +2026,12 @@ __setup("mce", mcheck_enable);
2056int __init mcheck_init(void) 2026int __init mcheck_init(void)
2057{ 2027{
2058 mcheck_intel_therm_init(); 2028 mcheck_intel_therm_init();
2029 mce_register_decode_chain(&mce_srao_nb);
2059 mcheck_vendor_init_severity(); 2030 mcheck_vendor_init_severity();
2060 2031
2032 INIT_WORK(&mce_work, mce_process_work);
2033 init_irq_work(&mce_irq_work, mce_irq_work_cb);
2034
2061 return 0; 2035 return 0;
2062} 2036}
2063 2037
@@ -2591,5 +2565,20 @@ static int __init mcheck_debugfs_init(void)
2591 2565
2592 return 0; 2566 return 0;
2593} 2567}
2594late_initcall(mcheck_debugfs_init); 2568#else
2569static int __init mcheck_debugfs_init(void) { return -EINVAL; }
2595#endif 2570#endif
2571
2572static int __init mcheck_late_init(void)
2573{
2574 mcheck_debugfs_init();
2575
2576 /*
2577 * Flush out everything that has been logged during early boot, now that
2578 * everything has been initialized (workqueues, decoders, ...).
2579 */
2580 mce_schedule_work();
2581
2582 return 0;
2583}
2584late_initcall(mcheck_late_init);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 844f56c5616d..1e8bb6c94f14 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -146,6 +146,27 @@ void mce_intel_hcpu_update(unsigned long cpu)
146 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE; 146 per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
147} 147}
148 148
149static void cmci_toggle_interrupt_mode(bool on)
150{
151 unsigned long flags, *owned;
152 int bank;
153 u64 val;
154
155 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
156 owned = this_cpu_ptr(mce_banks_owned);
157 for_each_set_bit(bank, owned, MAX_NR_BANKS) {
158 rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
159
160 if (on)
161 val |= MCI_CTL2_CMCI_EN;
162 else
163 val &= ~MCI_CTL2_CMCI_EN;
164
165 wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
166 }
167 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
168}
169
149unsigned long cmci_intel_adjust_timer(unsigned long interval) 170unsigned long cmci_intel_adjust_timer(unsigned long interval)
150{ 171{
151 if ((this_cpu_read(cmci_backoff_cnt) > 0) && 172 if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
@@ -175,7 +196,7 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
175 */ 196 */
176 if (!atomic_read(&cmci_storm_on_cpus)) { 197 if (!atomic_read(&cmci_storm_on_cpus)) {
177 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE); 198 __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
178 cmci_reenable(); 199 cmci_toggle_interrupt_mode(true);
179 cmci_recheck(); 200 cmci_recheck();
180 } 201 }
181 return CMCI_POLL_INTERVAL; 202 return CMCI_POLL_INTERVAL;
@@ -186,22 +207,6 @@ unsigned long cmci_intel_adjust_timer(unsigned long interval)
186 } 207 }
187} 208}
188 209
189static void cmci_storm_disable_banks(void)
190{
191 unsigned long flags, *owned;
192 int bank;
193 u64 val;
194
195 raw_spin_lock_irqsave(&cmci_discover_lock, flags);
196 owned = this_cpu_ptr(mce_banks_owned);
197 for_each_set_bit(bank, owned, MAX_NR_BANKS) {
198 rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
199 val &= ~MCI_CTL2_CMCI_EN;
200 wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
201 }
202 raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
203}
204
205static bool cmci_storm_detect(void) 210static bool cmci_storm_detect(void)
206{ 211{
207 unsigned int cnt = __this_cpu_read(cmci_storm_cnt); 212 unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
@@ -223,7 +228,7 @@ static bool cmci_storm_detect(void)
223 if (cnt <= CMCI_STORM_THRESHOLD) 228 if (cnt <= CMCI_STORM_THRESHOLD)
224 return false; 229 return false;
225 230
226 cmci_storm_disable_banks(); 231 cmci_toggle_interrupt_mode(false);
227 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); 232 __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
228 r = atomic_add_return(1, &cmci_storm_on_cpus); 233 r = atomic_add_return(1, &cmci_storm_on_cpus);
229 mce_timer_kick(CMCI_STORM_INTERVAL); 234 mce_timer_kick(CMCI_STORM_INTERVAL);
@@ -246,7 +251,6 @@ static void intel_threshold_interrupt(void)
246 return; 251 return;
247 252
248 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)); 253 machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
249 mce_notify_irq();
250} 254}
251 255
252/* 256/*
@@ -435,7 +439,7 @@ static void intel_init_cmci(void)
435 cmci_recheck(); 439 cmci_recheck();
436} 440}
437 441
438void intel_init_lmce(void) 442static void intel_init_lmce(void)
439{ 443{
440 u64 val; 444 u64 val;
441 445
@@ -448,9 +452,26 @@ void intel_init_lmce(void)
448 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN); 452 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
449} 453}
450 454
455static void intel_clear_lmce(void)
456{
457 u64 val;
458
459 if (!lmce_supported())
460 return;
461
462 rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
463 val &= ~MCG_EXT_CTL_LMCE_EN;
464 wrmsrl(MSR_IA32_MCG_EXT_CTL, val);
465}
466
451void mce_intel_feature_init(struct cpuinfo_x86 *c) 467void mce_intel_feature_init(struct cpuinfo_x86 *c)
452{ 468{
453 intel_init_thermal(c); 469 intel_init_thermal(c);
454 intel_init_cmci(); 470 intel_init_cmci();
455 intel_init_lmce(); 471 intel_init_lmce();
456} 472}
473
474void mce_intel_feature_clear(struct cpuinfo_x86 *c)
475{
476 intel_clear_lmce();
477}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c27cad726765..d83740ab85b0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -29,6 +29,7 @@
29#include <asm/debugreg.h> 29#include <asm/debugreg.h>
30#include <asm/nmi.h> 30#include <asm/nmi.h>
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
32#include <asm/mce.h>
32 33
33/* 34/*
34 * per-CPU TSS segments. Threads are completely 'soft' on Linux, 35 * per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -319,6 +320,7 @@ void stop_this_cpu(void *dummy)
319 */ 320 */
320 set_cpu_online(smp_processor_id(), false); 321 set_cpu_online(smp_processor_id(), false);
321 disable_local_APIC(); 322 disable_local_APIC();
323 mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
322 324
323 for (;;) 325 for (;;)
324 halt(); 326 halt();
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 15aaa69bbb5e..12c8286206ce 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -30,6 +30,7 @@
30#include <asm/proto.h> 30#include <asm/proto.h>
31#include <asm/apic.h> 31#include <asm/apic.h>
32#include <asm/nmi.h> 32#include <asm/nmi.h>
33#include <asm/mce.h>
33#include <asm/trace/irq_vectors.h> 34#include <asm/trace/irq_vectors.h>
34/* 35/*
35 * Some notes on x86 processor bugs affecting SMP operation: 36 * Some notes on x86 processor bugs affecting SMP operation:
@@ -243,6 +244,7 @@ static void native_stop_other_cpus(int wait)
243finish: 244finish:
244 local_irq_save(flags); 245 local_irq_save(flags);
245 disable_local_APIC(); 246 disable_local_APIC();
247 mcheck_cpu_clear(this_cpu_ptr(&cpu_info));
246 local_irq_restore(flags); 248 local_irq_restore(flags);
247} 249}
248 250