diff options
author | Borislav Petkov <borislav.petkov@amd.com> | 2009-10-07 07:20:38 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-10-12 06:24:45 -0400 |
commit | fb2531953fd8855abdcf458459020fd382c5deca (patch) | |
tree | 2967323398aba0369bff02225f0e9d4d00db0c35 | |
parent | d93a8f829fe1d2f3002f2c6ddb553d12db420412 (diff) |
mce, edac: Use an atomic notifier for MCEs decoding
Add an atomic notifier which ensures proper locking when conveying
MCE info to EDAC for decoding. The actual notifier call overrides a
default, negative priority notifier.
Note: make sure we register the default decoder only once since
mcheck_init() runs on each CPU.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <20091003065752.GA8935@liondog.tnic>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/include/asm/mce.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 29 | ||||
-rw-r--r-- | drivers/edac/edac_mce_amd.c | 21 |
3 files changed, 34 insertions, 19 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f1363b72364f..227a72df6441 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -108,6 +108,8 @@ struct mce_log { | |||
108 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) | 108 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) |
109 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) | 109 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) |
110 | 110 | ||
111 | extern struct atomic_notifier_head x86_mce_decoder_chain; | ||
112 | |||
111 | #ifdef __KERNEL__ | 113 | #ifdef __KERNEL__ |
112 | 114 | ||
113 | #include <linux/percpu.h> | 115 | #include <linux/percpu.h> |
@@ -213,6 +215,5 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | |||
213 | void intel_init_thermal(struct cpuinfo_x86 *c); | 215 | void intel_init_thermal(struct cpuinfo_x86 *c); |
214 | 216 | ||
215 | void mce_log_therm_throt_event(__u64 status); | 217 | void mce_log_therm_throt_event(__u64 status); |
216 | |||
217 | #endif /* __KERNEL__ */ | 218 | #endif /* __KERNEL__ */ |
218 | #endif /* _ASM_X86_MCE_H */ | 219 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b1598a9436d0..15ba9c972d7a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -85,18 +85,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | |||
85 | static DEFINE_PER_CPU(struct mce, mces_seen); | 85 | static DEFINE_PER_CPU(struct mce, mces_seen); |
86 | static int cpu_missing; | 86 | static int cpu_missing; |
87 | 87 | ||
88 | static void default_decode_mce(struct mce *m) | 88 | /* |
89 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
90 | * MCE errors in a human-readable form. | ||
91 | */ | ||
92 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
93 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | ||
94 | |||
95 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | ||
96 | void *data) | ||
89 | { | 97 | { |
90 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | 98 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); |
91 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | 99 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); |
100 | |||
101 | return NOTIFY_STOP; | ||
92 | } | 102 | } |
93 | 103 | ||
94 | /* | 104 | static struct notifier_block mce_dec_nb = { |
95 | * CPU/chipset specific EDAC code can register a callback here to print | 105 | .notifier_call = default_decode_mce, |
96 | * MCE errors in a human-readable form: | 106 | .priority = -1, |
97 | */ | 107 | }; |
98 | void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; | ||
99 | EXPORT_SYMBOL(x86_mce_decode_callback); | ||
100 | 108 | ||
101 | /* MCA banks polled by the period polling timer for corrected events */ | 109 | /* MCA banks polled by the period polling timer for corrected events */ |
102 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 110 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
@@ -204,9 +212,9 @@ static void print_mce(struct mce *m) | |||
204 | 212 | ||
205 | /* | 213 | /* |
206 | * Print out human-readable details about the MCE error, | 214 | * Print out human-readable details about the MCE error, |
207 | * (if the CPU has an implementation for that): | 215 | * (if the CPU has an implementation for that) |
208 | */ | 216 | */ |
209 | x86_mce_decode_callback(m); | 217 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
210 | } | 218 | } |
211 | 219 | ||
212 | static void print_mce_head(void) | 220 | static void print_mce_head(void) |
@@ -1420,6 +1428,9 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | |||
1420 | mce_cpu_features(c); | 1428 | mce_cpu_features(c); |
1421 | mce_init_timer(); | 1429 | mce_init_timer(); |
1422 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | 1430 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); |
1431 | |||
1432 | if (raw_smp_processor_id() == 0) | ||
1433 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); | ||
1423 | } | 1434 | } |
1424 | 1435 | ||
1425 | /* | 1436 | /* |
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 713ed7d37247..689cc6a6214d 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c | |||
@@ -3,7 +3,6 @@ | |||
3 | 3 | ||
4 | static bool report_gart_errors; | 4 | static bool report_gart_errors; |
5 | static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); | 5 | static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); |
6 | static void (*orig_mce_callback)(struct mce *m); | ||
7 | 6 | ||
8 | void amd_report_gart_errors(bool v) | 7 | void amd_report_gart_errors(bool v) |
9 | { | 8 | { |
@@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec) | |||
363 | pr_warning("Huh? Unknown MCE error 0x%x\n", ec); | 362 | pr_warning("Huh? Unknown MCE error 0x%x\n", ec); |
364 | } | 363 | } |
365 | 364 | ||
366 | static void amd_decode_mce(struct mce *m) | 365 | static int amd_decode_mce(struct notifier_block *nb, unsigned long val, |
366 | void *data) | ||
367 | { | 367 | { |
368 | struct mce *m = (struct mce *)data; | ||
368 | struct err_regs regs; | 369 | struct err_regs regs; |
369 | int node, ecc; | 370 | int node, ecc; |
370 | 371 | ||
@@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m) | |||
420 | } | 421 | } |
421 | 422 | ||
422 | amd_decode_err_code(m->status & 0xffff); | 423 | amd_decode_err_code(m->status & 0xffff); |
424 | |||
425 | return NOTIFY_STOP; | ||
423 | } | 426 | } |
424 | 427 | ||
428 | static struct notifier_block amd_mce_dec_nb = { | ||
429 | .notifier_call = amd_decode_mce, | ||
430 | }; | ||
431 | |||
425 | static int __init mce_amd_init(void) | 432 | static int __init mce_amd_init(void) |
426 | { | 433 | { |
427 | /* | 434 | /* |
428 | * We can decode MCEs for Opteron and later CPUs: | 435 | * We can decode MCEs for Opteron and later CPUs: |
429 | */ | 436 | */ |
430 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && | 437 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && |
431 | (boot_cpu_data.x86 >= 0xf)) { | 438 | (boot_cpu_data.x86 >= 0xf)) |
432 | /* safe the default decode mce callback */ | 439 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); |
433 | orig_mce_callback = x86_mce_decode_callback; | ||
434 | |||
435 | x86_mce_decode_callback = amd_decode_mce; | ||
436 | } | ||
437 | 440 | ||
438 | return 0; | 441 | return 0; |
439 | } | 442 | } |
@@ -442,7 +445,7 @@ early_initcall(mce_amd_init); | |||
442 | #ifdef MODULE | 445 | #ifdef MODULE |
443 | static void __exit mce_amd_exit(void) | 446 | static void __exit mce_amd_exit(void) |
444 | { | 447 | { |
445 | x86_mce_decode_callback = orig_mce_callback; | 448 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); |
446 | } | 449 | } |
447 | 450 | ||
448 | MODULE_DESCRIPTION("AMD MCE decoder"); | 451 | MODULE_DESCRIPTION("AMD MCE decoder"); |