diff options
author | Vishal Verma <vishal.l.verma@intel.com> | 2017-04-18 14:42:35 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-04-27 03:10:39 -0400 |
commit | f8bc0881fe95496ddf3f8a16808d9860db207941 (patch) | |
tree | 72d8b3bd7ca801e389295120fc3bc3ff61d8b8f4 | |
parent | 6966a6579e1bc7b3486bed395e7d6875912a9a9b (diff) |
x86/mce: Make the MCE notifier a blocking one
commit 0dc9c639e6553e39c13b2c0d54c8a1b098cb95e2 upstream.
The NFIT MCE handler callback (for handling media errors on NVDIMMs)
takes a mutex to add the location of a memory error to a list. But since
the notifier call chain for machine checks (x86_mce_decoder_chain) is
atomic, we get a lockdep splat like:
BUG: sleeping function called from invalid context at kernel/locking/mutex.c:620
in_atomic(): 1, irqs_disabled(): 0, pid: 4, name: kworker/0:0
[..]
Call Trace:
dump_stack
___might_sleep
__might_sleep
mutex_lock_nested
? __lock_acquire
nfit_handle_mce
notifier_call_chain
atomic_notifier_call_chain
? atomic_notifier_call_chain
mce_gen_pool_process
Convert the notifier to a blocking one which gets to run only in process
context.
Boris: remove the notifier call in atomic context in print_mce(). For
now, let's print the MCE on the atomic path so that we can make sure
they go out and get logged at least.
Fixes: 6839a6d96f4e ("nfit: do an ARS scrub on hitting a latent media error")
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: linux-edac <linux-edac@vger.kernel.org>
Cc: x86-ml <x86@kernel.org>
Link: http://lkml.kernel.org/r/20170411224457.24777-1-vishal.l.verma@intel.com
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-genpool.c | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce-internal.h | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 16 |
3 files changed, 5 insertions, 15 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 93d824ec3120..040af1939460 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c | |||
@@ -85,7 +85,7 @@ void mce_gen_pool_process(void) | |||
85 | head = llist_reverse_order(head); | 85 | head = llist_reverse_order(head); |
86 | llist_for_each_entry_safe(node, tmp, head, llnode) { | 86 | llist_for_each_entry_safe(node, tmp, head, llnode) { |
87 | mce = &node->mce; | 87 | mce = &node->mce; |
88 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); | 88 | blocking_notifier_call_chain(&x86_mce_decoder_chain, 0, mce); |
89 | gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); | 89 | gen_pool_free(mce_evt_pool, (unsigned long)node, sizeof(*node)); |
90 | } | 90 | } |
91 | } | 91 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index cd74a3f00aea..de20902ecf23 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -13,7 +13,7 @@ enum severity_level { | |||
13 | MCE_PANIC_SEVERITY, | 13 | MCE_PANIC_SEVERITY, |
14 | }; | 14 | }; |
15 | 15 | ||
16 | extern struct atomic_notifier_head x86_mce_decoder_chain; | 16 | extern struct blocking_notifier_head x86_mce_decoder_chain; |
17 | 17 | ||
18 | #define ATTR_LEN 16 | 18 | #define ATTR_LEN 16 |
19 | #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ | 19 | #define INITIAL_CHECK_INTERVAL 5 * 60 /* 5 minutes */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index a7fdf453d895..22cda29d654e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -120,7 +120,7 @@ static void (*quirk_no_way_out)(int bank, struct mce *m, struct pt_regs *regs); | |||
120 | * CPU/chipset specific EDAC code can register a notifier call here to print | 120 | * CPU/chipset specific EDAC code can register a notifier call here to print |
121 | * MCE errors in a human-readable form. | 121 | * MCE errors in a human-readable form. |
122 | */ | 122 | */ |
123 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | 123 | BLOCKING_NOTIFIER_HEAD(x86_mce_decoder_chain); |
124 | 124 | ||
125 | /* Do initial initialization of a struct mce */ | 125 | /* Do initial initialization of a struct mce */ |
126 | void mce_setup(struct mce *m) | 126 | void mce_setup(struct mce *m) |
@@ -213,13 +213,13 @@ void mce_register_decode_chain(struct notifier_block *nb) | |||
213 | if (nb != &mce_srao_nb && nb->priority == INT_MAX) | 213 | if (nb != &mce_srao_nb && nb->priority == INT_MAX) |
214 | nb->priority -= 1; | 214 | nb->priority -= 1; |
215 | 215 | ||
216 | atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); | 216 | blocking_notifier_chain_register(&x86_mce_decoder_chain, nb); |
217 | } | 217 | } |
218 | EXPORT_SYMBOL_GPL(mce_register_decode_chain); | 218 | EXPORT_SYMBOL_GPL(mce_register_decode_chain); |
219 | 219 | ||
220 | void mce_unregister_decode_chain(struct notifier_block *nb) | 220 | void mce_unregister_decode_chain(struct notifier_block *nb) |
221 | { | 221 | { |
222 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, nb); | 222 | blocking_notifier_chain_unregister(&x86_mce_decoder_chain, nb); |
223 | } | 223 | } |
224 | EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); | 224 | EXPORT_SYMBOL_GPL(mce_unregister_decode_chain); |
225 | 225 | ||
@@ -272,8 +272,6 @@ struct mca_msr_regs msr_ops = { | |||
272 | 272 | ||
273 | static void print_mce(struct mce *m) | 273 | static void print_mce(struct mce *m) |
274 | { | 274 | { |
275 | int ret = 0; | ||
276 | |||
277 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", | 275 | pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", |
278 | m->extcpu, m->mcgstatus, m->bank, m->status); | 276 | m->extcpu, m->mcgstatus, m->bank, m->status); |
279 | 277 | ||
@@ -309,14 +307,6 @@ static void print_mce(struct mce *m) | |||
309 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, | 307 | m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid, |
310 | cpu_data(m->extcpu).microcode); | 308 | cpu_data(m->extcpu).microcode); |
311 | 309 | ||
312 | /* | ||
313 | * Print out human-readable details about the MCE error, | ||
314 | * (if the CPU has an implementation for that) | ||
315 | */ | ||
316 | ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); | ||
317 | if (ret == NOTIFY_STOP) | ||
318 | return; | ||
319 | |||
320 | pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); | 310 | pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n"); |
321 | } | 311 | } |
322 | 312 | ||