Preempt-RCU: CPU Hotplug handling

This patch allows preemptible RCU to tolerate CPU-hotplug operations. It accomplishes this by maintaining a local copy of a map of online CPUs, which it accesses under its own lock. Signed-off-by: Gautham R Shenoy <ego@in.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 2008-01-25 15:08:25 -0500
committer: Ingo Molnar <mingo@elte.hu> 2008-01-25 15:08:25 -0500
commit: eaf649e9fe6685f4c5a392cd0e16df5fd6660b7c (patch)
tree: 05fb08fc2e8bf9e87e9892130f4bd6e147d5a69d
parent: e260be673a15b6125068270e0216a3bfbfc12f87 (diff)
1 files changed, 142 insertions, 5 deletions
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index a5aabb1677f8..987cfb7ade89 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -147,6 +147,8 @@ static char *rcu_try_flip_state_names[] =
        { "idle", "waitack", "waitzero", "waitmb" };
 #endif /* #ifdef CONFIG_RCU_TRACE */
+static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
 /*
 * Enum and per-CPU flag to determine when each CPU has seen
 * the most recent counter flip.
@@ -445,7 +447,7 @@ rcu_try_flip_idle(void)
        /* Now ask each CPU for acknowledgement of the flip. */
-        for_each_possible_cpu(cpu)
+        for_each_cpu_mask(cpu, rcu_cpu_online_map)
                per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
        return 1;
@@ -461,7 +463,7 @@ rcu_try_flip_waitack(void)
        int cpu;
        RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
-        for_each_possible_cpu(cpu)
+        for_each_cpu_mask(cpu, rcu_cpu_online_map)
                if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
                        RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
                        return 0;
@@ -492,7 +494,7 @@ rcu_try_flip_waitzero(void)
        /* Check to see if the sum of the "last" counters is zero. */
        RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
-        for_each_possible_cpu(cpu)
+        for_each_cpu_mask(cpu, rcu_cpu_online_map)
                sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
        if (sum != 0) {
                RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
@@ -507,7 +509,7 @@ rcu_try_flip_waitzero(void)
        smp_mb();  /*  ^^^^^^^^^^^^ */
        /* Call for a memory barrier from each CPU. */
-        for_each_possible_cpu(cpu)
+        for_each_cpu_mask(cpu, rcu_cpu_online_map)
                per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
        RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
@@ -525,7 +527,7 @@ rcu_try_flip_waitmb(void)
        int cpu;
        RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
-        for_each_possible_cpu(cpu)
+        for_each_cpu_mask(cpu, rcu_cpu_online_map)
                if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
                        RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
                        return 0;
@@ -637,6 +639,98 @@ void rcu_advance_callbacks(int cpu, int user)
        spin_unlock_irqrestore(&rdp->lock, flags);
 }
+#ifdef CONFIG_HOTPLUG_CPU
+#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
+                *dsttail = srclist; \
+                if (srclist != NULL) { \
+                        dsttail = srctail; \
+                        srclist = NULL; \
+                        srctail = &srclist;\
+                } \
+        } while (0)
+void rcu_offline_cpu(int cpu)
+{
+        int i;
+        struct rcu_head *list = NULL;
+        unsigned long flags;
+        struct rcu_data *rdp = RCU_DATA_CPU(cpu);
+        struct rcu_head **tail = &list;
+        /*
+         * Remove all callbacks from the newly dead CPU, retaining order.
+         * Otherwise rcu_barrier() will fail
+         */
+        spin_lock_irqsave(&rdp->lock, flags);
+        rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
+        for (i = GP_STAGES - 1; i >= 0; i--)
+                rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
+                                                list, tail);
+        rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
+        spin_unlock_irqrestore(&rdp->lock, flags);
+        rdp->waitlistcount = 0;
+        /* Disengage the newly dead CPU from the grace-period computation. */
+        spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
+        rcu_check_mb(cpu);
+        if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
+                smp_mb();  /* Subsequent counter accesses must see new value */
+                per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
+                smp_mb();  /* Subsequent RCU read-side critical sections */
+                           /*  seen -after- acknowledgement. */
+        }
+        RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
+        RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
+        RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
+        RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
+        cpu_clear(cpu, rcu_cpu_online_map);
+        spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
+        /*
+         * Place the removed callbacks on the current CPU's queue.
+         * Make them all start a new grace period: simple approach,
+         * in theory could starve a given set of callbacks, but
+         * you would need to be doing some serious CPU hotplugging
+         * to make this happen.  If this becomes a problem, adding
+         * a synchronize_rcu() to the hotplug path would be a simple
+         * fix.
+         */
+        rdp = RCU_DATA_ME();
+        spin_lock_irqsave(&rdp->lock, flags);
+        *rdp->nexttail = list;
+        if (list)
+                rdp->nexttail = tail;
+        spin_unlock_irqrestore(&rdp->lock, flags);
+}
+void __devinit rcu_online_cpu(int cpu)
+{
+        unsigned long flags;
+        spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
+        cpu_set(cpu, rcu_cpu_online_map);
+        spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
+}
+#else /* #ifdef CONFIG_HOTPLUG_CPU */
+void rcu_offline_cpu(int cpu)
+{
+}
+void __devinit rcu_online_cpu(int cpu)
+{
+}
+#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
 static void rcu_process_callbacks(struct softirq_action *unused)
 {
        unsigned long flags;
@@ -746,6 +840,32 @@ int rcu_pending(int cpu)
        return 0;
 }
+static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
+                                unsigned long action, void *hcpu)
+{
+        long cpu = (long)hcpu;
+        switch (action) {
+        case CPU_UP_PREPARE:
+        case CPU_UP_PREPARE_FROZEN:
+                rcu_online_cpu(cpu);
+                break;
+        case CPU_UP_CANCELED:
+        case CPU_UP_CANCELED_FROZEN:
+        case CPU_DEAD:
+        case CPU_DEAD_FROZEN:
+                rcu_offline_cpu(cpu);
+                break;
+        default:
+                break;
+        }
+        return NOTIFY_OK;
+}
+static struct notifier_block __cpuinitdata rcu_nb = {
+        .notifier_call = rcu_cpu_notify,
+};
 void __init __rcu_init(void)
 {
        int cpu;
@@ -769,6 +889,23 @@ void __init __rcu_init(void)
                rdp->rcu_flipctr[0] = 0;
                rdp->rcu_flipctr[1] = 0;
        }
+        register_cpu_notifier(&rcu_nb);
+        /*
+         * We don't need protection against CPU-Hotplug here
+         * since
+         * a) If a CPU comes online while we are iterating over the
+         *    cpu_online_map below, we would only end up making a
+         *    duplicate call to rcu_online_cpu() which sets the corresponding
+         *    CPU's mask in the rcu_cpu_online_map.
+         *
+         * b) A CPU cannot go offline at this point in time since the user
+         *    does not have access to the sysfs interface, nor do we
+         *    suspend the system.
+         */
+        for_each_online_cpu(cpu)
+                rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu);
        open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
 }
author	Paul E. McKenney <paulmck@linux.vnet.ibm.com>	2008-01-25 15:08:25 -0500
committer	Ingo Molnar <mingo@elte.hu>	2008-01-25 15:08:25 -0500
commit	eaf649e9fe6685f4c5a392cd0e16df5fd6660b7c (patch)
tree	05fb08fc2e8bf9e87e9892130f4bd6e147d5a69d
parent	e260be673a15b6125068270e0216a3bfbfc12f87 (diff)

diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index a5aabb1677f8..987cfb7ade89 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c
@@ -147,6 +147,8 @@ static char *rcu_try_flip_state_names[] =
147	{ "idle", "waitack", "waitzero", "waitmb" };	147	{ "idle", "waitack", "waitzero", "waitmb" };
148	#endif /* #ifdef CONFIG_RCU_TRACE */	148	#endif /* #ifdef CONFIG_RCU_TRACE */
149		149
		150	static cpumask_t rcu_cpu_online_map __read_mostly = CPU_MASK_NONE;
		151
150	/*	152	/*
151	* Enum and per-CPU flag to determine when each CPU has seen	153	* Enum and per-CPU flag to determine when each CPU has seen
152	* the most recent counter flip.	154	* the most recent counter flip.
@@ -445,7 +447,7 @@ rcu_try_flip_idle(void)
445		447
446	/* Now ask each CPU for acknowledgement of the flip. */	448	/* Now ask each CPU for acknowledgement of the flip. */
447		449
448	for_each_possible_cpu(cpu)	450	for_each_cpu_mask(cpu, rcu_cpu_online_map)
449	per_cpu(rcu_flip_flag, cpu) = rcu_flipped;	451	per_cpu(rcu_flip_flag, cpu) = rcu_flipped;
450		452
451	return 1;	453	return 1;
@@ -461,7 +463,7 @@ rcu_try_flip_waitack(void)
461	int cpu;	463	int cpu;
462		464
463	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);	465	RCU_TRACE_ME(rcupreempt_trace_try_flip_a1);
464	for_each_possible_cpu(cpu)	466	for_each_cpu_mask(cpu, rcu_cpu_online_map)
465	if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {	467	if (per_cpu(rcu_flip_flag, cpu) != rcu_flip_seen) {
466	RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);	468	RCU_TRACE_ME(rcupreempt_trace_try_flip_ae1);
467	return 0;	469	return 0;
@@ -492,7 +494,7 @@ rcu_try_flip_waitzero(void)
492	/* Check to see if the sum of the "last" counters is zero. */	494	/* Check to see if the sum of the "last" counters is zero. */
493		495
494	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);	496	RCU_TRACE_ME(rcupreempt_trace_try_flip_z1);
495	for_each_possible_cpu(cpu)	497	for_each_cpu_mask(cpu, rcu_cpu_online_map)
496	sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];	498	sum += RCU_DATA_CPU(cpu)->rcu_flipctr[lastidx];
497	if (sum != 0) {	499	if (sum != 0) {
498	RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);	500	RCU_TRACE_ME(rcupreempt_trace_try_flip_ze1);
@@ -507,7 +509,7 @@ rcu_try_flip_waitzero(void)
507	smp_mb(); /* ^^^^^^^^^^^^ */	509	smp_mb(); /* ^^^^^^^^^^^^ */
508		510
509	/* Call for a memory barrier from each CPU. */	511	/* Call for a memory barrier from each CPU. */
510	for_each_possible_cpu(cpu)	512	for_each_cpu_mask(cpu, rcu_cpu_online_map)
511	per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;	513	per_cpu(rcu_mb_flag, cpu) = rcu_mb_needed;
512		514
513	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);	515	RCU_TRACE_ME(rcupreempt_trace_try_flip_z2);
@@ -525,7 +527,7 @@ rcu_try_flip_waitmb(void)
525	int cpu;	527	int cpu;
526		528
527	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);	529	RCU_TRACE_ME(rcupreempt_trace_try_flip_m1);
528	for_each_possible_cpu(cpu)	530	for_each_cpu_mask(cpu, rcu_cpu_online_map)
529	if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {	531	if (per_cpu(rcu_mb_flag, cpu) != rcu_mb_done) {
530	RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);	532	RCU_TRACE_ME(rcupreempt_trace_try_flip_me1);
531	return 0;	533	return 0;
@@ -637,6 +639,98 @@ void rcu_advance_callbacks(int cpu, int user)
637	spin_unlock_irqrestore(&rdp->lock, flags);	639	spin_unlock_irqrestore(&rdp->lock, flags);
638	}	640	}
639		641
		642	#ifdef CONFIG_HOTPLUG_CPU
		643	#define rcu_offline_cpu_enqueue(srclist, srctail, dstlist, dsttail) do { \
		644	*dsttail = srclist; \
		645	if (srclist != NULL) { \
		646	dsttail = srctail; \
		647	srclist = NULL; \
		648	srctail = &srclist;\
		649	} \
		650	} while (0)
		651
		652	void rcu_offline_cpu(int cpu)
		653	{
		654	int i;
		655	struct rcu_head *list = NULL;
		656	unsigned long flags;
		657	struct rcu_data *rdp = RCU_DATA_CPU(cpu);
		658	struct rcu_head **tail = &list;
		659
		660	/*
		661	* Remove all callbacks from the newly dead CPU, retaining order.
		662	* Otherwise rcu_barrier() will fail
		663	*/
		664
		665	spin_lock_irqsave(&rdp->lock, flags);
		666	rcu_offline_cpu_enqueue(rdp->donelist, rdp->donetail, list, tail);
		667	for (i = GP_STAGES - 1; i >= 0; i--)
		668	rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
		669	list, tail);
		670	rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
		671	spin_unlock_irqrestore(&rdp->lock, flags);
		672	rdp->waitlistcount = 0;
		673
		674	/* Disengage the newly dead CPU from the grace-period computation. */
		675
		676	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
		677	rcu_check_mb(cpu);
		678	if (per_cpu(rcu_flip_flag, cpu) == rcu_flipped) {
		679	smp_mb(); /* Subsequent counter accesses must see new value */
		680	per_cpu(rcu_flip_flag, cpu) = rcu_flip_seen;
		681	smp_mb(); /* Subsequent RCU read-side critical sections */
		682	/* seen -after- acknowledgement. */
		683	}
		684
		685	RCU_DATA_ME()->rcu_flipctr[0] += RCU_DATA_CPU(cpu)->rcu_flipctr[0];
		686	RCU_DATA_ME()->rcu_flipctr[1] += RCU_DATA_CPU(cpu)->rcu_flipctr[1];
		687
		688	RCU_DATA_CPU(cpu)->rcu_flipctr[0] = 0;
		689	RCU_DATA_CPU(cpu)->rcu_flipctr[1] = 0;
		690
		691	cpu_clear(cpu, rcu_cpu_online_map);
		692
		693	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
		694
		695	/*
		696	* Place the removed callbacks on the current CPU's queue.
		697	* Make them all start a new grace period: simple approach,
		698	* in theory could starve a given set of callbacks, but
		699	* you would need to be doing some serious CPU hotplugging
		700	* to make this happen. If this becomes a problem, adding
		701	* a synchronize_rcu() to the hotplug path would be a simple
		702	* fix.
		703	*/
		704
		705	rdp = RCU_DATA_ME();
		706	spin_lock_irqsave(&rdp->lock, flags);
		707	*rdp->nexttail = list;
		708	if (list)
		709	rdp->nexttail = tail;
		710	spin_unlock_irqrestore(&rdp->lock, flags);
		711	}
		712
		713	void __devinit rcu_online_cpu(int cpu)
		714	{
		715	unsigned long flags;
		716
		717	spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
		718	cpu_set(cpu, rcu_cpu_online_map);
		719	spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
		720	}
		721
		722	#else /* #ifdef CONFIG_HOTPLUG_CPU */
		723
		724	void rcu_offline_cpu(int cpu)
		725	{
		726	}
		727
		728	void __devinit rcu_online_cpu(int cpu)
		729	{
		730	}
		731
		732	#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */
		733
640	static void rcu_process_callbacks(struct softirq_action *unused)	734	static void rcu_process_callbacks(struct softirq_action *unused)
641	{	735	{
642	unsigned long flags;	736	unsigned long flags;
@@ -746,6 +840,32 @@ int rcu_pending(int cpu)
746	return 0;	840	return 0;
747	}	841	}
748		842
		843	static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
		844	unsigned long action, void *hcpu)
		845	{
		846	long cpu = (long)hcpu;
		847
		848	switch (action) {
		849	case CPU_UP_PREPARE:
		850	case CPU_UP_PREPARE_FROZEN:
		851	rcu_online_cpu(cpu);
		852	break;
		853	case CPU_UP_CANCELED:
		854	case CPU_UP_CANCELED_FROZEN:
		855	case CPU_DEAD:
		856	case CPU_DEAD_FROZEN:
		857	rcu_offline_cpu(cpu);
		858	break;
		859	default:
		860	break;
		861	}
		862	return NOTIFY_OK;
		863	}
		864
		865	static struct notifier_block __cpuinitdata rcu_nb = {
		866	.notifier_call = rcu_cpu_notify,
		867	};
		868
749	void __init __rcu_init(void)	869	void __init __rcu_init(void)
750	{	870	{
751	int cpu;	871	int cpu;
@@ -769,6 +889,23 @@ void __init __rcu_init(void)
769	rdp->rcu_flipctr[0] = 0;	889	rdp->rcu_flipctr[0] = 0;
770	rdp->rcu_flipctr[1] = 0;	890	rdp->rcu_flipctr[1] = 0;
771	}	891	}
		892	register_cpu_notifier(&rcu_nb);
		893
		894	/*
		895	* We don't need protection against CPU-Hotplug here
		896	* since
		897	* a) If a CPU comes online while we are iterating over the
		898	* cpu_online_map below, we would only end up making a
		899	* duplicate call to rcu_online_cpu() which sets the corresponding
		900	* CPU's mask in the rcu_cpu_online_map.
		901	*
		902	* b) A CPU cannot go offline at this point in time since the user
		903	* does not have access to the sysfs interface, nor do we
		904	* suspend the system.
		905	*/
		906	for_each_online_cpu(cpu)
		907	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE, (void *)(long) cpu);
		908
772	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);	909	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks, NULL);
773	}	910	}
774		911