summaryrefslogtreecommitdiffstats
path: root/kernel/cpu.c
diff options
context:
space:
mode:
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>2016-04-08 08:40:15 -0400
committerThomas Gleixner <tglx@linutronix.de>2016-04-22 03:49:49 -0400
commit3b9d6da67e11ca8f78fde887918983523a36b0fa (patch)
tree00f22715eb1c6b773ac5166e61a545874a0d39f0 /kernel/cpu.c
parentc3b46c73264b03000d1e18b22f5caf63332547c9 (diff)
cpu/hotplug: Fix rollback during error-out in __cpu_disable()
The recent introduction of the hotplug thread which invokes the callbacks on the plugged cpu, cased the following regression: If takedown_cpu() fails, then we run into several issues: 1) The rollback of the target cpu states is not invoked. That leaves the smp threads and the hotplug thread in disabled state. 2) notify_online() is executed due to a missing skip_onerr flag. That causes that both CPU_DOWN_FAILED and CPU_ONLINE notifications are invoked which confuses quite some notifiers. 3) The CPU_DOWN_FAILED notification is not invoked on the target CPU. That's not an issue per se, but it is inconsistent and in consequence blocks the patches which rely on these states being invoked on the target CPU and not on the controlling cpu. It also does not preserve the strict call order on rollback which is problematic for the ongoing state machine conversion as well. To fix this we add a rollback flag to the remote callback machinery and invoke the rollback including the CPU_DOWN_FAILED notification on the remote cpu. Further mark the notify online state with 'skip_onerr' so we don't get a double invokation. This workaround will go away once we moved the unplug invocation to the target cpu itself. [ tglx: Massaged changelog and moved the CPU_DOWN_FAILED notifiaction to the target cpu ] Fixes: 4cb28ced23c4 ("cpu/hotplug: Create hotplug threads") Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: linux-s390@vger.kernel.org Cc: rt@linutronix.de Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Anna-Maria Gleixner <anna-maria@linutronix.de> Link: http://lkml.kernel.org/r/20160408124015.GA21960@linutronix.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/cpu.c')
-rw-r--r--kernel/cpu.c33
1 files changed, 26 insertions, 7 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ea42e8da861..3e3f6e49eabb 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -36,6 +36,7 @@
36 * @target: The target state 36 * @target: The target state
37 * @thread: Pointer to the hotplug thread 37 * @thread: Pointer to the hotplug thread
38 * @should_run: Thread should execute 38 * @should_run: Thread should execute
39 * @rollback: Perform a rollback
39 * @cb_stat: The state for a single callback (install/uninstall) 40 * @cb_stat: The state for a single callback (install/uninstall)
40 * @cb: Single callback function (install/uninstall) 41 * @cb: Single callback function (install/uninstall)
41 * @result: Result of the operation 42 * @result: Result of the operation
@@ -47,6 +48,7 @@ struct cpuhp_cpu_state {
47#ifdef CONFIG_SMP 48#ifdef CONFIG_SMP
48 struct task_struct *thread; 49 struct task_struct *thread;
49 bool should_run; 50 bool should_run;
51 bool rollback;
50 enum cpuhp_state cb_state; 52 enum cpuhp_state cb_state;
51 int (*cb)(unsigned int cpu); 53 int (*cb)(unsigned int cpu);
52 int result; 54 int result;
@@ -301,6 +303,11 @@ static int cpu_notify(unsigned long val, unsigned int cpu)
301 return __cpu_notify(val, cpu, -1, NULL); 303 return __cpu_notify(val, cpu, -1, NULL);
302} 304}
303 305
306static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
307{
308 BUG_ON(cpu_notify(val, cpu));
309}
310
304/* Notifier wrappers for transitioning to state machine */ 311/* Notifier wrappers for transitioning to state machine */
305static int notify_prepare(unsigned int cpu) 312static int notify_prepare(unsigned int cpu)
306{ 313{
@@ -477,6 +484,16 @@ static void cpuhp_thread_fun(unsigned int cpu)
477 } else { 484 } else {
478 ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb); 485 ret = cpuhp_invoke_callback(cpu, st->cb_state, st->cb);
479 } 486 }
487 } else if (st->rollback) {
488 BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
489
490 undo_cpu_down(cpu, st, cpuhp_ap_states);
491 /*
492 * This is a momentary workaround to keep the notifier users
493 * happy. Will go away once we got rid of the notifiers.
494 */
495 cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
496 st->rollback = false;
480 } else { 497 } else {
481 /* Cannot happen .... */ 498 /* Cannot happen .... */
482 BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE); 499 BUG_ON(st->state < CPUHP_AP_ONLINE_IDLE);
@@ -636,11 +653,6 @@ static inline void check_for_tasks(int dead_cpu)
636 read_unlock(&tasklist_lock); 653 read_unlock(&tasklist_lock);
637} 654}
638 655
639static void cpu_notify_nofail(unsigned long val, unsigned int cpu)
640{
641 BUG_ON(cpu_notify(val, cpu));
642}
643
644static int notify_down_prepare(unsigned int cpu) 656static int notify_down_prepare(unsigned int cpu)
645{ 657{
646 int err, nr_calls = 0; 658 int err, nr_calls = 0;
@@ -721,9 +733,10 @@ static int takedown_cpu(unsigned int cpu)
721 */ 733 */
722 err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu)); 734 err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
723 if (err) { 735 if (err) {
724 /* CPU didn't die: tell everyone. Can't complain. */ 736 /* CPU refused to die */
725 cpu_notify_nofail(CPU_DOWN_FAILED, cpu);
726 irq_unlock_sparse(); 737 irq_unlock_sparse();
738 /* Unpark the hotplug thread so we can rollback there */
739 kthread_unpark(per_cpu_ptr(&cpuhp_state, cpu)->thread);
727 return err; 740 return err;
728 } 741 }
729 BUG_ON(cpu_online(cpu)); 742 BUG_ON(cpu_online(cpu));
@@ -832,6 +845,11 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
832 * to do the further cleanups. 845 * to do the further cleanups.
833 */ 846 */
834 ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target); 847 ret = cpuhp_down_callbacks(cpu, st, cpuhp_bp_states, target);
848 if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
849 st->target = prev_state;
850 st->rollback = true;
851 cpuhp_kick_ap_work(cpu);
852 }
835 853
836 hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE; 854 hasdied = prev_state != st->state && st->state == CPUHP_OFFLINE;
837out: 855out:
@@ -1249,6 +1267,7 @@ static struct cpuhp_step cpuhp_ap_states[] = {
1249 .name = "notify:online", 1267 .name = "notify:online",
1250 .startup = notify_online, 1268 .startup = notify_online,
1251 .teardown = notify_down_prepare, 1269 .teardown = notify_down_prepare,
1270 .skip_onerr = true,
1252 }, 1271 },
1253#endif 1272#endif
1254 /* 1273 /*