2 files changed, 168 insertions, 0 deletions
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 4260e8594bd7..4744ef915acd 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -95,6 +95,8 @@ enum {
                                        * Called on the new cpu, just before
                                        * enabling interrupts. Must not sleep,
                                        * must not fail */
+#define CPU_BROKEN              0x000C /* CPU (unsigned)v did not die properly,
+                                        * perhaps due to preemption. */
 /* Used for CPU hotplug events occurring while tasks are frozen due to a suspend
 * operation in progress
@@ -271,4 +273,14 @@ void arch_cpu_idle_enter(void);
 void arch_cpu_idle_exit(void);
 void arch_cpu_idle_dead(void);
+DECLARE_PER_CPU(bool, cpu_dead_idle);
+int cpu_report_state(int cpu);
+int cpu_check_up_prepare(int cpu);
+void cpu_set_state_online(int cpu);
+#ifdef CONFIG_HOTPLUG_CPU
+bool cpu_wait_death(unsigned int cpu, int seconds);
+bool cpu_report_death(void);
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/smpboot.c b/kernel/smpboot.c
index 40190f28db35..c697f73d82d6 100644
--- a/kernel/smpboot.c
+++ b/kernel/smpboot.c
@@ -4,6 +4,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/smp.h>
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/slab.h>
@@ -314,3 +315,158 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
        put_online_cpus();
 }
 EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
+static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
+/*
+ * Called to poll specified CPU's state, for example, when waiting for
+ * a CPU to come online.
+ */
+int cpu_report_state(int cpu)
+{
+        return atomic_read(&per_cpu(cpu_hotplug_state, cpu));
+}
+/*
+ * If CPU has died properly, set its state to CPU_UP_PREPARE and
+ * return success.  Otherwise, return -EBUSY if the CPU died after
+ * cpu_wait_death() timed out.  And yet otherwise again, return -EAGAIN
+ * if cpu_wait_death() timed out and the CPU still hasn't gotten around
+ * to dying.  In the latter two cases, the CPU might not be set up
+ * properly, but it is up to the arch-specific code to decide.
+ * Finally, -EIO indicates an unanticipated problem.
+ *
+ * Note that it is permissible to omit this call entirely, as is
+ * done in architectures that do no CPU-hotplug error checking.
+ */
+int cpu_check_up_prepare(int cpu)
+{
+        if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
+                atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE);
+                return 0;
+        }
+        switch (atomic_read(&per_cpu(cpu_hotplug_state, cpu))) {
+        case CPU_POST_DEAD:
+                /* The CPU died properly, so just start it up again. */
+                atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE);
+                return 0;
+        case CPU_DEAD_FROZEN:
+                /*
+                 * Timeout during CPU death, so let caller know.
+                 * The outgoing CPU completed its processing, but after
+                 * cpu_wait_death() timed out and reported the error. The
+                 * caller is free to proceed, in which case the state
+                 * will be reset properly by cpu_set_state_online().
+                 * Proceeding despite this -EBUSY return makes sense
+                 * for systems where the outgoing CPUs take themselves
+                 * offline, with no post-death manipulation required from
+                 * a surviving CPU.
+                 */
+                return -EBUSY;
+        case CPU_BROKEN:
+                /*
+                 * The most likely reason we got here is that there was
+                 * a timeout during CPU death, and the outgoing CPU never
+                 * did complete its processing.  This could happen on
+                 * a virtualized system if the outgoing VCPU gets preempted
+                 * for more than five seconds, and the user attempts to
+                 * immediately online that same CPU.  Trying again later
+                 * might return -EBUSY above, hence -EAGAIN.
+                 */
+                return -EAGAIN;
+        default:
+                /* Should not happen.  Famous last words. */
+                return -EIO;
+        }
+}
+/*
+ * Mark the specified CPU online.
+ *
+ * Note that it is permissible to omit this call entirely, as is
+ * done in architectures that do no CPU-hotplug error checking.
+ */
+void cpu_set_state_online(int cpu)
+{
+        (void)atomic_xchg(&per_cpu(cpu_hotplug_state, cpu), CPU_ONLINE);
+}
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Wait for the specified CPU to exit the idle loop and die.
+ */
+bool cpu_wait_death(unsigned int cpu, int seconds)
+{
+        int jf_left = seconds * HZ;
+        int oldstate;
+        bool ret = true;
+        int sleep_jf = 1;
+        might_sleep();
+        /* The outgoing CPU will normally get done quite quickly. */
+        if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD)
+                goto update_state;
+        udelay(5);
+        /* But if the outgoing CPU dawdles, wait increasingly long times. */
+        while (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) != CPU_DEAD) {
+                schedule_timeout_uninterruptible(sleep_jf);
+                jf_left -= sleep_jf;
+                if (jf_left <= 0)
+                        break;
+                sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10);
+        }
+update_state:
+        oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
+        if (oldstate == CPU_DEAD) {
+                /* Outgoing CPU died normally, update state. */
+                smp_mb(); /* atomic_read() before update. */
+                atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD);
+        } else {
+                /* Outgoing CPU still hasn't died, set state accordingly. */
+                if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
+                                   oldstate, CPU_BROKEN) != oldstate)
+                        goto update_state;
+                ret = false;
+        }
+        return ret;
+}
+/*
+ * Called by the outgoing CPU to report its successful death.  Return
+ * false if this report follows the surviving CPU's timing out.
+ *
+ * A separate "CPU_DEAD_FROZEN" is used when the surviving CPU
+ * timed out.  This approach allows architectures to omit calls to
+ * cpu_check_up_prepare() and cpu_set_state_online() without defeating
+ * the next cpu_wait_death()'s polling loop.
+ */
+bool cpu_report_death(void)
+{
+        int oldstate;
+        int newstate;
+        int cpu = smp_processor_id();
+        do {
+                oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
+                if (oldstate != CPU_BROKEN)
+                        newstate = CPU_DEAD;
+                else
+                        newstate = CPU_DEAD_FROZEN;
+        } while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
+                                oldstate, newstate) != oldstate);
+        return newstate == CPU_DEAD;
+}
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */

diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4260e8594bd7..4744ef915acd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h
@@ -95,6 +95,8 @@ enum {
95	* Called on the new cpu, just before	95	* Called on the new cpu, just before
96	* enabling interrupts. Must not sleep,	96	* enabling interrupts. Must not sleep,
97	* must not fail */	97	* must not fail */
		98	#define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly,
		99	* perhaps due to preemption. */
98		100
99	/* Used for CPU hotplug events occurring while tasks are frozen due to a suspend	101	/* Used for CPU hotplug events occurring while tasks are frozen due to a suspend
100	* operation in progress	102	* operation in progress
@@ -271,4 +273,14 @@ void arch_cpu_idle_enter(void);
271	void arch_cpu_idle_exit(void);	273	void arch_cpu_idle_exit(void);
272	void arch_cpu_idle_dead(void);	274	void arch_cpu_idle_dead(void);
273		275
		276	DECLARE_PER_CPU(bool, cpu_dead_idle);
		277
		278	int cpu_report_state(int cpu);
		279	int cpu_check_up_prepare(int cpu);
		280	void cpu_set_state_online(int cpu);
		281	#ifdef CONFIG_HOTPLUG_CPU
		282	bool cpu_wait_death(unsigned int cpu, int seconds);
		283	bool cpu_report_death(void);
		284	#endif /* #ifdef CONFIG_HOTPLUG_CPU */
		285
274	#endif /* _LINUX_CPU_H_ */	286	#endif /* _LINUX_CPU_H_ */


diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 40190f28db35..c697f73d82d6 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c
@@ -4,6 +4,7 @@
4	#include <linux/cpu.h>	4	#include <linux/cpu.h>
5	#include <linux/err.h>	5	#include <linux/err.h>
6	#include <linux/smp.h>	6	#include <linux/smp.h>
		7	#include <linux/delay.h>
7	#include <linux/init.h>	8	#include <linux/init.h>
8	#include <linux/list.h>	9	#include <linux/list.h>
9	#include <linux/slab.h>	10	#include <linux/slab.h>
@@ -314,3 +315,158 @@ void smpboot_unregister_percpu_thread(struct smp_hotplug_thread *plug_thread)
314	put_online_cpus();	315	put_online_cpus();
315	}	316	}
316	EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);	317	EXPORT_SYMBOL_GPL(smpboot_unregister_percpu_thread);
		318
		319	static DEFINE_PER_CPU(atomic_t, cpu_hotplug_state) = ATOMIC_INIT(CPU_POST_DEAD);
		320
		321	/*
		322	* Called to poll specified CPU's state, for example, when waiting for
		323	* a CPU to come online.
		324	*/
		325	int cpu_report_state(int cpu)
		326	{
		327	return atomic_read(&per_cpu(cpu_hotplug_state, cpu));
		328	}
		329
		330	/*
		331	* If CPU has died properly, set its state to CPU_UP_PREPARE and
		332	* return success. Otherwise, return -EBUSY if the CPU died after
		333	* cpu_wait_death() timed out. And yet otherwise again, return -EAGAIN
		334	* if cpu_wait_death() timed out and the CPU still hasn't gotten around
		335	* to dying. In the latter two cases, the CPU might not be set up
		336	* properly, but it is up to the arch-specific code to decide.
		337	* Finally, -EIO indicates an unanticipated problem.
		338	*
		339	* Note that it is permissible to omit this call entirely, as is
		340	* done in architectures that do no CPU-hotplug error checking.
		341	*/
		342	int cpu_check_up_prepare(int cpu)
		343	{
		344	if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) {
		345	atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE);
		346	return 0;
		347	}
		348
		349	switch (atomic_read(&per_cpu(cpu_hotplug_state, cpu))) {
		350
		351	case CPU_POST_DEAD:
		352
		353	/* The CPU died properly, so just start it up again. */
		354	atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_UP_PREPARE);
		355	return 0;
		356
		357	case CPU_DEAD_FROZEN:
		358
		359	/*
		360	* Timeout during CPU death, so let caller know.
		361	* The outgoing CPU completed its processing, but after
		362	* cpu_wait_death() timed out and reported the error. The
		363	* caller is free to proceed, in which case the state
		364	* will be reset properly by cpu_set_state_online().
		365	* Proceeding despite this -EBUSY return makes sense
		366	* for systems where the outgoing CPUs take themselves
		367	* offline, with no post-death manipulation required from
		368	* a surviving CPU.
		369	*/
		370	return -EBUSY;
		371
		372	case CPU_BROKEN:
		373
		374	/*
		375	* The most likely reason we got here is that there was
		376	* a timeout during CPU death, and the outgoing CPU never
		377	* did complete its processing. This could happen on
		378	* a virtualized system if the outgoing VCPU gets preempted
		379	* for more than five seconds, and the user attempts to
		380	* immediately online that same CPU. Trying again later
		381	* might return -EBUSY above, hence -EAGAIN.
		382	*/
		383	return -EAGAIN;
		384
		385	default:
		386
		387	/* Should not happen. Famous last words. */
		388	return -EIO;
		389	}
		390	}
		391
		392	/*
		393	* Mark the specified CPU online.
		394	*
		395	* Note that it is permissible to omit this call entirely, as is
		396	* done in architectures that do no CPU-hotplug error checking.
		397	*/
		398	void cpu_set_state_online(int cpu)
		399	{
		400	(void)atomic_xchg(&per_cpu(cpu_hotplug_state, cpu), CPU_ONLINE);
		401	}
		402
		403	#ifdef CONFIG_HOTPLUG_CPU
		404
		405	/*
		406	* Wait for the specified CPU to exit the idle loop and die.
		407	*/
		408	bool cpu_wait_death(unsigned int cpu, int seconds)
		409	{
		410	int jf_left = seconds * HZ;
		411	int oldstate;
		412	bool ret = true;
		413	int sleep_jf = 1;
		414
		415	might_sleep();
		416
		417	/* The outgoing CPU will normally get done quite quickly. */
		418	if (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) == CPU_DEAD)
		419	goto update_state;
		420	udelay(5);
		421
		422	/* But if the outgoing CPU dawdles, wait increasingly long times. */
		423	while (atomic_read(&per_cpu(cpu_hotplug_state, cpu)) != CPU_DEAD) {
		424	schedule_timeout_uninterruptible(sleep_jf);
		425	jf_left -= sleep_jf;
		426	if (jf_left <= 0)
		427	break;
		428	sleep_jf = DIV_ROUND_UP(sleep_jf * 11, 10);
		429	}
		430	update_state:
		431	oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
		432	if (oldstate == CPU_DEAD) {
		433	/* Outgoing CPU died normally, update state. */
		434	smp_mb(); /* atomic_read() before update. */
		435	atomic_set(&per_cpu(cpu_hotplug_state, cpu), CPU_POST_DEAD);
		436	} else {
		437	/* Outgoing CPU still hasn't died, set state accordingly. */
		438	if (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
		439	oldstate, CPU_BROKEN) != oldstate)
		440	goto update_state;
		441	ret = false;
		442	}
		443	return ret;
		444	}
		445
		446	/*
		447	* Called by the outgoing CPU to report its successful death. Return
		448	* false if this report follows the surviving CPU's timing out.
		449	*
		450	* A separate "CPU_DEAD_FROZEN" is used when the surviving CPU
		451	* timed out. This approach allows architectures to omit calls to
		452	* cpu_check_up_prepare() and cpu_set_state_online() without defeating
		453	* the next cpu_wait_death()'s polling loop.
		454	*/
		455	bool cpu_report_death(void)
		456	{
		457	int oldstate;
		458	int newstate;
		459	int cpu = smp_processor_id();
		460
		461	do {
		462	oldstate = atomic_read(&per_cpu(cpu_hotplug_state, cpu));
		463	if (oldstate != CPU_BROKEN)
		464	newstate = CPU_DEAD;
		465	else
		466	newstate = CPU_DEAD_FROZEN;
		467	} while (atomic_cmpxchg(&per_cpu(cpu_hotplug_state, cpu),
		468	oldstate, newstate) != oldstate);
		469	return newstate == CPU_DEAD;
		470	}
		471
		472	#endif /* #ifdef CONFIG_HOTPLUG_CPU */