diff options
author | Sebastian Andrzej Siewior <bigeasy@linutronix.de> | 2016-11-10 12:44:45 -0500 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2016-11-16 03:34:18 -0500 |
commit | 39f152ffbfedb42b57b6e0c896eeae51dbe83b7a (patch) | |
tree | 2a39554918703a15fc6869afbfd77a83adb5236c /arch/x86/kernel/cpu/mcheck/mce.c | |
parent | 4d7b02d58c4000597d08930193d7aed81fba6b7c (diff) |
x86/mcheck: Reorganize the hotplug callbacks
Initially I wanted to remove mcheck_cpu_init() from identify_cpu() and let it
become an independent early hotplug callback. The main problem here was that
the init on the boot CPU may happen too late
(device_initcall_sync(mcheck_init_device)) and nobody wanted to risk receiving
and MCE event at boot time leading to a shutdown (if the MCE feature is not yet
enabled).
Here is attempt two: the timming stays as-is but the ordering of the functions
is changed:
- mcheck_cpu_init() (which is run from identify_cpu()) will setup the timer
struct but won't fire the timer. This is moved to CPU_ONLINE since its
cleanup part is in CPU_DOWN_PREPARE. So if it is okay to stop the timer early
in the shutdown phase, it should be okay to start it late in the bring up phase.
- CPU_DOWN_PREPARE disables the MCE feature flags for !INTEL CPUs in
mce_disable_cpu(). If a failure occures it would be re-enabled on all vendor
CPUs (including Intel where it was not disabled during shutdown). To keep this
working I am moving it to CPU_ONLINE. smp_call_function_single() is dropped
beause the notifier runs nowdays on the target CPU.
- CPU_ONLINE is invoking mce_device_create() + mce_threshold_create_device()
but its cleanup part is in CPU_DEAD (mce_threshold_remove_device() and
mce_device_remove()). In order to keep this symmetrical I am moving the clean
up from CPU_DEAD to CPU_DOWN_PREPARE.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Acked-by: Borislav Petkov <bp@alien8.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: rt@linutronix.de
Cc: linux-edac@vger.kernel.org
Link: http://lkml.kernel.org/r/20161110174447.11848-6-bigeasy@linutronix.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck/mce.c')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 36 |
1 files changed, 20 insertions, 16 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 052b5e05c3c4..a524faa51400 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -1745,6 +1745,14 @@ static void mce_start_timer(unsigned int cpu, struct timer_list *t) | |||
1745 | add_timer_on(t, cpu); | 1745 | add_timer_on(t, cpu); |
1746 | } | 1746 | } |
1747 | 1747 | ||
1748 | static void __mcheck_cpu_setup_timer(void) | ||
1749 | { | ||
1750 | struct timer_list *t = this_cpu_ptr(&mce_timer); | ||
1751 | unsigned int cpu = smp_processor_id(); | ||
1752 | |||
1753 | setup_pinned_timer(t, mce_timer_fn, cpu); | ||
1754 | } | ||
1755 | |||
1748 | static void __mcheck_cpu_init_timer(void) | 1756 | static void __mcheck_cpu_init_timer(void) |
1749 | { | 1757 | { |
1750 | struct timer_list *t = this_cpu_ptr(&mce_timer); | 1758 | struct timer_list *t = this_cpu_ptr(&mce_timer); |
@@ -1796,7 +1804,7 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c) | |||
1796 | __mcheck_cpu_init_generic(); | 1804 | __mcheck_cpu_init_generic(); |
1797 | __mcheck_cpu_init_vendor(c); | 1805 | __mcheck_cpu_init_vendor(c); |
1798 | __mcheck_cpu_init_clear_banks(); | 1806 | __mcheck_cpu_init_clear_banks(); |
1799 | __mcheck_cpu_init_timer(); | 1807 | __mcheck_cpu_setup_timer(); |
1800 | } | 1808 | } |
1801 | 1809 | ||
1802 | /* | 1810 | /* |
@@ -2470,28 +2478,25 @@ static void mce_device_remove(unsigned int cpu) | |||
2470 | } | 2478 | } |
2471 | 2479 | ||
2472 | /* Make sure there are no machine checks on offlined CPUs. */ | 2480 | /* Make sure there are no machine checks on offlined CPUs. */ |
2473 | static void mce_disable_cpu(void *h) | 2481 | static void mce_disable_cpu(void) |
2474 | { | 2482 | { |
2475 | unsigned long action = *(unsigned long *)h; | ||
2476 | |||
2477 | if (!mce_available(raw_cpu_ptr(&cpu_info))) | 2483 | if (!mce_available(raw_cpu_ptr(&cpu_info))) |
2478 | return; | 2484 | return; |
2479 | 2485 | ||
2480 | if (!(action & CPU_TASKS_FROZEN)) | 2486 | if (!cpuhp_tasks_frozen) |
2481 | cmci_clear(); | 2487 | cmci_clear(); |
2482 | 2488 | ||
2483 | vendor_disable_error_reporting(); | 2489 | vendor_disable_error_reporting(); |
2484 | } | 2490 | } |
2485 | 2491 | ||
2486 | static void mce_reenable_cpu(void *h) | 2492 | static void mce_reenable_cpu(void) |
2487 | { | 2493 | { |
2488 | unsigned long action = *(unsigned long *)h; | ||
2489 | int i; | 2494 | int i; |
2490 | 2495 | ||
2491 | if (!mce_available(raw_cpu_ptr(&cpu_info))) | 2496 | if (!mce_available(raw_cpu_ptr(&cpu_info))) |
2492 | return; | 2497 | return; |
2493 | 2498 | ||
2494 | if (!(action & CPU_TASKS_FROZEN)) | 2499 | if (!cpuhp_tasks_frozen) |
2495 | cmci_reenable(); | 2500 | cmci_reenable(); |
2496 | for (i = 0; i < mca_cfg.banks; i++) { | 2501 | for (i = 0; i < mca_cfg.banks; i++) { |
2497 | struct mce_bank *b = &mce_banks[i]; | 2502 | struct mce_bank *b = &mce_banks[i]; |
@@ -2510,6 +2515,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2510 | 2515 | ||
2511 | switch (action & ~CPU_TASKS_FROZEN) { | 2516 | switch (action & ~CPU_TASKS_FROZEN) { |
2512 | case CPU_ONLINE: | 2517 | case CPU_ONLINE: |
2518 | case CPU_DOWN_FAILED: | ||
2513 | 2519 | ||
2514 | mce_device_create(cpu); | 2520 | mce_device_create(cpu); |
2515 | 2521 | ||
@@ -2517,11 +2523,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2517 | mce_device_remove(cpu); | 2523 | mce_device_remove(cpu); |
2518 | return NOTIFY_BAD; | 2524 | return NOTIFY_BAD; |
2519 | } | 2525 | } |
2520 | 2526 | mce_reenable_cpu(); | |
2527 | mce_start_timer(cpu, t); | ||
2521 | break; | 2528 | break; |
2522 | case CPU_DEAD: | 2529 | case CPU_DEAD: |
2523 | mce_threshold_remove_device(cpu); | ||
2524 | mce_device_remove(cpu); | ||
2525 | mce_intel_hcpu_update(cpu); | 2530 | mce_intel_hcpu_update(cpu); |
2526 | 2531 | ||
2527 | /* intentionally ignoring frozen here */ | 2532 | /* intentionally ignoring frozen here */ |
@@ -2529,12 +2534,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
2529 | cmci_rediscover(); | 2534 | cmci_rediscover(); |
2530 | break; | 2535 | break; |
2531 | case CPU_DOWN_PREPARE: | 2536 | case CPU_DOWN_PREPARE: |
2532 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | 2537 | mce_disable_cpu(); |
2533 | del_timer_sync(t); | 2538 | del_timer_sync(t); |
2534 | break; | 2539 | |
2535 | case CPU_DOWN_FAILED: | 2540 | mce_threshold_remove_device(cpu); |
2536 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 2541 | mce_device_remove(cpu); |
2537 | mce_start_timer(cpu, t); | ||
2538 | break; | 2542 | break; |
2539 | } | 2543 | } |
2540 | 2544 | ||