summaryrefslogtreecommitdiffstats
path: root/kernel/cpu.c
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2017-07-04 16:20:23 -0400
committerThomas Gleixner <tglx@linutronix.de>2017-07-06 04:55:10 -0400
commit9cd4f1a4e7a858849e889a081a99adff83e08e4c (patch)
treeed7ff2e6ba5d575798febc2ed14124e52de9ef40 /kernel/cpu.c
parent4422d80ed7d4bdb2d6e9fb890c66c3d9250ba694 (diff)
smp/hotplug: Move unparking of percpu threads to the control CPU
Vikram reported the following backtrace: BUG: scheduling while atomic: swapper/7/0/0x00000002 CPU: 7 PID: 0 Comm: swapper/7 Not tainted 4.9.32-perf+ #680 schedule schedule_hrtimeout_range_clock schedule_hrtimeout wait_task_inactive __kthread_bind_mask __kthread_bind __kthread_unpark kthread_unpark cpuhp_online_idle cpu_startup_entry secondary_start_kernel He analyzed correctly that a parked cpu hotplug thread of an offlined CPU was still on the runqueue when the CPU came back online and tried to unpark it. This causes the thread which invoked kthread_unpark() to call wait_task_inactive() and subsequently schedule() with preemption disabled. His proposed workaround was to "make sure" that a parked thread has scheduled out when the CPU goes offline, so the situation cannot happen. But that's still wrong because the root cause is not the fact that the percpu thread is still on the runqueue and neither that preemption is disabled, which could be simply solved by enabling preemption before calling kthread_unpark(). The real issue is that the calling thread is the idle task of the upcoming CPU, which is not supposed to call anything which might sleep. The moron, who wrote that code, missed completely that kthread_unpark() might end up in schedule(). The solution is simpler than expected. The thread which controls the hotplug operation is waiting for the CPU to call complete() on the hotplug state completion. So the idle task of the upcoming CPU can set its state to CPUHP_AP_ONLINE_IDLE and invoke complete(). This in turn wakes the control task on a different CPU, which then can safely do the unpark and kick the now unparked hotplug thread of the upcoming CPU to complete the bringup to the final target state. Control CPU AP bringup_cpu(); __cpu_up() ------------> bringup_ap(); bringup_wait_for_ap() wait_for_completion(); cpuhp_online_idle(); <------------ complete(); unpark(AP->stopper); unpark(AP->hotplugthread); while(1) do_idle(); kick(AP->hotplugthread); wait_for_completion(); hotplug_thread() run_online_callbacks(); complete(); Fixes: 8df3e07e7f21 ("cpu/hotplug: Let upcoming cpu bring itself fully up") Reported-by: Vikram Mulukutla <markivx@codeaurora.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Sebastian Sewior <bigeasy@linutronix.de> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Tejun Heo <tj@kernel.org> Cc: Andrew Morton <akpm@linux-foundation.org> Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1707042218020.2131@nanos Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/cpu.c')
-rw-r--r--kernel/cpu.c37
1 files changed, 19 insertions, 18 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index b03a32595cfe..ab860453841d 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -271,11 +271,25 @@ void cpu_hotplug_enable(void)
271EXPORT_SYMBOL_GPL(cpu_hotplug_enable); 271EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
272#endif /* CONFIG_HOTPLUG_CPU */ 272#endif /* CONFIG_HOTPLUG_CPU */
273 273
274static void __cpuhp_kick_ap_work(struct cpuhp_cpu_state *st);
275
274static int bringup_wait_for_ap(unsigned int cpu) 276static int bringup_wait_for_ap(unsigned int cpu)
275{ 277{
276 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); 278 struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
277 279
280 /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */
278 wait_for_completion(&st->done); 281 wait_for_completion(&st->done);
282 BUG_ON(!cpu_online(cpu));
283
284 /* Unpark the stopper thread and the hotplug thread of the target cpu */
285 stop_machine_unpark(cpu);
286 kthread_unpark(st->thread);
287
288 /* Should we go further up ? */
289 if (st->target > CPUHP_AP_ONLINE_IDLE) {
290 __cpuhp_kick_ap_work(st);
291 wait_for_completion(&st->done);
292 }
279 return st->result; 293 return st->result;
280} 294}
281 295
@@ -296,9 +310,7 @@ static int bringup_cpu(unsigned int cpu)
296 irq_unlock_sparse(); 310 irq_unlock_sparse();
297 if (ret) 311 if (ret)
298 return ret; 312 return ret;
299 ret = bringup_wait_for_ap(cpu); 313 return bringup_wait_for_ap(cpu);
300 BUG_ON(!cpu_online(cpu));
301 return ret;
302} 314}
303 315
304/* 316/*
@@ -767,31 +779,20 @@ void notify_cpu_starting(unsigned int cpu)
767} 779}
768 780
769/* 781/*
770 * Called from the idle task. We need to set active here, so we can kick off 782 * Called from the idle task. Wake up the controlling task which brings the
771 * the stopper thread and unpark the smpboot threads. If the target state is 783 * stopper and the hotplug thread of the upcoming CPU up and then delegates
772 * beyond CPUHP_AP_ONLINE_IDLE we kick cpuhp thread and let it bring up the 784 * the rest of the online bringup to the hotplug thread.
773 * cpu further.
774 */ 785 */
775void cpuhp_online_idle(enum cpuhp_state state) 786void cpuhp_online_idle(enum cpuhp_state state)
776{ 787{
777 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); 788 struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state);
778 unsigned int cpu = smp_processor_id();
779 789
780 /* Happens for the boot cpu */ 790 /* Happens for the boot cpu */
781 if (state != CPUHP_AP_ONLINE_IDLE) 791 if (state != CPUHP_AP_ONLINE_IDLE)
782 return; 792 return;
783 793
784 st->state = CPUHP_AP_ONLINE_IDLE; 794 st->state = CPUHP_AP_ONLINE_IDLE;
785 795 complete(&st->done);
786 /* Unpark the stopper thread and the hotplug thread of this cpu */
787 stop_machine_unpark(cpu);
788 kthread_unpark(st->thread);
789
790 /* Should we go further up ? */
791 if (st->target > CPUHP_AP_ONLINE_IDLE)
792 __cpuhp_kick_ap_work(st);
793 else
794 complete(&st->done);
795} 796}
796 797
797/* Requires cpu_add_remove_lock to be held */ 798/* Requires cpu_add_remove_lock to be held */