aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck/mce.c
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2016-11-10 12:44:45 -0500
committerThomas Gleixner <tglx@linutronix.de>2016-11-16 03:34:18 -0500
commit39f152ffbfedb42b57b6e0c896eeae51dbe83b7a (patch)
tree2a39554918703a15fc6869afbfd77a83adb5236c /arch/x86/kernel/cpu/mcheck/mce.c
parent4d7b02d58c4000597d08930193d7aed81fba6b7c (diff)
x86/mcheck: Reorganize the hotplug callbacks
Initially I wanted to remove mcheck_cpu_init() from identify_cpu() and let it become an independent early hotplug callback. The main problem here was that the init on the boot CPU may happen too late (device_initcall_sync(mcheck_init_device)) and nobody wanted to risk receiving and MCE event at boot time leading to a shutdown (if the MCE feature is not yet enabled). Here is attempt two: the timming stays as-is but the ordering of the functions is changed: - mcheck_cpu_init() (which is run from identify_cpu()) will setup the timer struct but won't fire the timer. This is moved to CPU_ONLINE since its cleanup part is in CPU_DOWN_PREPARE. So if it is okay to stop the timer early in the shutdown phase, it should be okay to start it late in the bring up phase. - CPU_DOWN_PREPARE disables the MCE feature flags for !INTEL CPUs in mce_disable_cpu(). If a failure occures it would be re-enabled on all vendor CPUs (including Intel where it was not disabled during shutdown). To keep this working I am moving it to CPU_ONLINE. smp_call_function_single() is dropped beause the notifier runs nowdays on the target CPU. - CPU_ONLINE is invoking mce_device_create() + mce_threshold_create_device() but its cleanup part is in CPU_DEAD (mce_threshold_remove_device() and mce_device_remove()). In order to keep this symmetrical I am moving the clean up from CPU_DEAD to CPU_DOWN_PREPARE. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Acked-by: Borislav Petkov <bp@alien8.de> Cc: Tony Luck <tony.luck@intel.com> Cc: rt@linutronix.de Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/20161110174447.11848-6-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce.c')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c36
1 files changed, 20 insertions, 16 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 052b5e05c3c4..a524faa51400 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1745,6 +1745,14 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t)
1745 add_timer_on(t, cpu); 1745 add_timer_on(t, cpu);
1746} 1746}
1747 1747
1748static void __mcheck_cpu_setup_timer(void)
1749{
1750 struct timer_list *t = this_cpu_ptr(&mce_timer);
1751 unsigned int cpu = smp_processor_id();
1752
1753 setup_pinned_timer(t, mce_timer_fn, cpu);
1754}
1755
1748static void __mcheck_cpu_init_timer(void) 1756static void __mcheck_cpu_init_timer(void)
1749{ 1757{
1750 struct timer_list *t = this_cpu_ptr(&mce_timer); 1758 struct timer_list *t = this_cpu_ptr(&mce_timer);
@@ -1796,7 +1804,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
1796 __mcheck_cpu_init_generic(); 1804 __mcheck_cpu_init_generic();
1797 __mcheck_cpu_init_vendor(c); 1805 __mcheck_cpu_init_vendor(c);
1798 __mcheck_cpu_init_clear_banks(); 1806 __mcheck_cpu_init_clear_banks();
1799 __mcheck_cpu_init_timer(); 1807 __mcheck_cpu_setup_timer();
1800} 1808}
1801 1809
1802/* 1810/*
@@ -2470,28 +2478,25 @@ static void mce_device_remove(unsigned int cpu)
2470} 2478}
2471 2479
2472/* Make sure there are no machine checks on offlined CPUs. */ 2480/* Make sure there are no machine checks on offlined CPUs. */
2473static void mce_disable_cpu(void *h) 2481static void mce_disable_cpu(void)
2474{ 2482{
2475 unsigned long action = *(unsigned long *)h;
2476
2477 if (!mce_available(raw_cpu_ptr(&cpu_info))) 2483 if (!mce_available(raw_cpu_ptr(&cpu_info)))
2478 return; 2484 return;
2479 2485
2480 if (!(action & CPU_TASKS_FROZEN)) 2486 if (!cpuhp_tasks_frozen)
2481 cmci_clear(); 2487 cmci_clear();
2482 2488
2483 vendor_disable_error_reporting(); 2489 vendor_disable_error_reporting();
2484} 2490}
2485 2491
2486static void mce_reenable_cpu(void *h) 2492static void mce_reenable_cpu(void)
2487{ 2493{
2488 unsigned long action = *(unsigned long *)h;
2489 int i; 2494 int i;
2490 2495
2491 if (!mce_available(raw_cpu_ptr(&cpu_info))) 2496 if (!mce_available(raw_cpu_ptr(&cpu_info)))
2492 return; 2497 return;
2493 2498
2494 if (!(action & CPU_TASKS_FROZEN)) 2499 if (!cpuhp_tasks_frozen)
2495 cmci_reenable(); 2500 cmci_reenable();
2496 for (i = 0; i < mca_cfg.banks; i++) { 2501 for (i = 0; i < mca_cfg.banks; i++) {
2497 struct mce_bank *b = &mce_banks[i]; 2502 struct mce_bank *b = &mce_banks[i];
@@ -2510,6 +2515,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2510 2515
2511 switch (action & ~CPU_TASKS_FROZEN) { 2516 switch (action & ~CPU_TASKS_FROZEN) {
2512 case CPU_ONLINE: 2517 case CPU_ONLINE:
2518 case CPU_DOWN_FAILED:
2513 2519
2514 mce_device_create(cpu); 2520 mce_device_create(cpu);
2515 2521
@@ -2517,11 +2523,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2517 mce_device_remove(cpu); 2523 mce_device_remove(cpu);
2518 return NOTIFY_BAD; 2524 return NOTIFY_BAD;
2519 } 2525 }
2520 2526 mce_reenable_cpu();
2527 mce_start_timer(cpu, t);
2521 break; 2528 break;
2522 case CPU_DEAD: 2529 case CPU_DEAD:
2523 mce_threshold_remove_device(cpu);
2524 mce_device_remove(cpu);
2525 mce_intel_hcpu_update(cpu); 2530 mce_intel_hcpu_update(cpu);
2526 2531
2527 /* intentionally ignoring frozen here */ 2532 /* intentionally ignoring frozen here */
@@ -2529,12 +2534,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
2529 cmci_rediscover(); 2534 cmci_rediscover();
2530 break; 2535 break;
2531 case CPU_DOWN_PREPARE: 2536 case CPU_DOWN_PREPARE:
2532 smp_call_function_single(cpu, mce_disable_cpu, &action, 1); 2537 mce_disable_cpu();
2533 del_timer_sync(t); 2538 del_timer_sync(t);
2534 break; 2539
2535 case CPU_DOWN_FAILED: 2540 mce_threshold_remove_device(cpu);
2536 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 2541 mce_device_remove(cpu);
2537 mce_start_timer(cpu, t);
2538 break; 2542 break;
2539 } 2543 }
2540 2544