diff options
author | Vitaly Kuznetsov <vkuznets@redhat.com> | 2017-08-03 06:58:18 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-08-10 11:02:47 -0400 |
commit | 10e66760fa8ee11f254a69433fc132d04758a5fc (patch) | |
tree | 5feb4401244f2371d3f46a9ac752a211da1f6455 | |
parent | e93c17301ac55321fc18e0f8316e924e58a83c8c (diff) |
x86/smpboot: Unbreak CPU0 hotplug
A hang on CPU0 onlining after a preceding offlining is observed. Trace
shows that CPU0 is stuck in check_tsc_sync_target() waiting for source
CPU to run check_tsc_sync_source() but this never happens. Source CPU,
in its turn, is stuck on synchronize_sched() which is called from
native_cpu_up() -> do_boot_cpu() -> unregister_nmi_handler().
So it's a classic ABBA deadlock, due to the use of synchronize_sched() in
unregister_nmi_handler().
Fix the bug by moving unregister_nmi_handler() from do_boot_cpu() to
native_cpu_up() after cpu onlining is done.
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170803105818.9934-1-vkuznets@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r-- | arch/x86/kernel/smpboot.c | 30 |
1 files changed, 17 insertions, 13 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b474c8de7fba..54b9e89d4d6b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -971,7 +971,8 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) | |||
971 | * Returns zero if CPU booted OK, else error code from | 971 | * Returns zero if CPU booted OK, else error code from |
972 | * ->wakeup_secondary_cpu. | 972 | * ->wakeup_secondary_cpu. |
973 | */ | 973 | */ |
974 | static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | 974 | static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, |
975 | int *cpu0_nmi_registered) | ||
975 | { | 976 | { |
976 | volatile u32 *trampoline_status = | 977 | volatile u32 *trampoline_status = |
977 | (volatile u32 *) __va(real_mode_header->trampoline_status); | 978 | (volatile u32 *) __va(real_mode_header->trampoline_status); |
@@ -979,7 +980,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
979 | unsigned long start_ip = real_mode_header->trampoline_start; | 980 | unsigned long start_ip = real_mode_header->trampoline_start; |
980 | 981 | ||
981 | unsigned long boot_error = 0; | 982 | unsigned long boot_error = 0; |
982 | int cpu0_nmi_registered = 0; | ||
983 | unsigned long timeout; | 983 | unsigned long timeout; |
984 | 984 | ||
985 | idle->thread.sp = (unsigned long)task_pt_regs(idle); | 985 | idle->thread.sp = (unsigned long)task_pt_regs(idle); |
@@ -1035,7 +1035,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
1035 | boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); | 1035 | boot_error = apic->wakeup_secondary_cpu(apicid, start_ip); |
1036 | else | 1036 | else |
1037 | boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, | 1037 | boot_error = wakeup_cpu_via_init_nmi(cpu, start_ip, apicid, |
1038 | &cpu0_nmi_registered); | 1038 | cpu0_nmi_registered); |
1039 | 1039 | ||
1040 | if (!boot_error) { | 1040 | if (!boot_error) { |
1041 | /* | 1041 | /* |
@@ -1080,12 +1080,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
1080 | */ | 1080 | */ |
1081 | smpboot_restore_warm_reset_vector(); | 1081 | smpboot_restore_warm_reset_vector(); |
1082 | } | 1082 | } |
1083 | /* | ||
1084 | * Clean up the nmi handler. Do this after the callin and callout sync | ||
1085 | * to avoid impact of possible long unregister time. | ||
1086 | */ | ||
1087 | if (cpu0_nmi_registered) | ||
1088 | unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); | ||
1089 | 1083 | ||
1090 | return boot_error; | 1084 | return boot_error; |
1091 | } | 1085 | } |
@@ -1093,8 +1087,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
1093 | int native_cpu_up(unsigned int cpu, struct task_struct *tidle) | 1087 | int native_cpu_up(unsigned int cpu, struct task_struct *tidle) |
1094 | { | 1088 | { |
1095 | int apicid = apic->cpu_present_to_apicid(cpu); | 1089 | int apicid = apic->cpu_present_to_apicid(cpu); |
1090 | int cpu0_nmi_registered = 0; | ||
1096 | unsigned long flags; | 1091 | unsigned long flags; |
1097 | int err; | 1092 | int err, ret = 0; |
1098 | 1093 | ||
1099 | WARN_ON(irqs_disabled()); | 1094 | WARN_ON(irqs_disabled()); |
1100 | 1095 | ||
@@ -1131,10 +1126,11 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
1131 | 1126 | ||
1132 | common_cpu_up(cpu, tidle); | 1127 | common_cpu_up(cpu, tidle); |
1133 | 1128 | ||
1134 | err = do_boot_cpu(apicid, cpu, tidle); | 1129 | err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered); |
1135 | if (err) { | 1130 | if (err) { |
1136 | pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); | 1131 | pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu); |
1137 | return -EIO; | 1132 | ret = -EIO; |
1133 | goto unreg_nmi; | ||
1138 | } | 1134 | } |
1139 | 1135 | ||
1140 | /* | 1136 | /* |
@@ -1150,7 +1146,15 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle) | |||
1150 | touch_nmi_watchdog(); | 1146 | touch_nmi_watchdog(); |
1151 | } | 1147 | } |
1152 | 1148 | ||
1153 | return 0; | 1149 | unreg_nmi: |
1150 | /* | ||
1151 | * Clean up the nmi handler. Do this after the callin and callout sync | ||
1152 | * to avoid impact of possible long unregister time. | ||
1153 | */ | ||
1154 | if (cpu0_nmi_registered) | ||
1155 | unregister_nmi_handler(NMI_LOCAL, "wake_cpu0"); | ||
1156 | |||
1157 | return ret; | ||
1154 | } | 1158 | } |
1155 | 1159 | ||
1156 | /** | 1160 | /** |