1 files changed, 131 insertions, 4 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 71a4147473f9..a3638710dc67 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -25,6 +25,7 @@
 */
 #include <linux/delay.h>
+#include <linux/stop_machine.h>
 /*
 * Check the RCU kernel configuration parameters and print informative
@@ -773,11 +774,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 }
 /*
- * Move preemptable RCU's callbacks to ->orphan_cbs_list.
+ * Move preemptable RCU's callbacks from dying CPU to other online CPU.
 */
-static void rcu_preempt_send_cbs_to_orphanage(void)
+static void rcu_preempt_send_cbs_to_online(void)
 {
-        rcu_send_cbs_to_orphanage(&rcu_preempt_state);
+        rcu_send_cbs_to_online(&rcu_preempt_state);
 }
 /*
@@ -1001,7 +1002,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
 /*
 * Because there is no preemptable RCU, there are no callbacks to move.
 */
-static void rcu_preempt_send_cbs_to_orphanage(void)
+static void rcu_preempt_send_cbs_to_online(void)
 {
 }
@@ -1014,6 +1015,132 @@ static void __init __rcu_init_preempt(void)
 #endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifndef CONFIG_SMP
+void synchronize_sched_expedited(void)
+{
+        cond_resched();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+#else /* #ifndef CONFIG_SMP */
+static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
+static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
+static int synchronize_sched_expedited_cpu_stop(void *data)
+{
+        /*
+         * There must be a full memory barrier on each affected CPU
+         * between the time that try_stop_cpus() is called and the
+         * time that it returns.
+         *
+         * In the current initial implementation of cpu_stop, the
+         * above condition is already met when the control reaches
+         * this point and the following smp_mb() is not strictly
+         * necessary.  Do smp_mb() anyway for documentation and
+         * robustness against future implementation changes.
+         */
+        smp_mb(); /* See above comment block. */
+        return 0;
+}
+/*
+ * Wait for an rcu-sched grace period to elapse, but use "big hammer"
+ * approach to force grace period to end quickly.  This consumes
+ * significant time on all CPUs, and is thus not recommended for
+ * any sort of common-case code.
+ *
+ * Note that it is illegal to call this function while holding any
+ * lock that is acquired by a CPU-hotplug notifier.  Failing to
+ * observe this restriction will result in deadlock.
+ *
+ * This implementation can be thought of as an application of ticket
+ * locking to RCU, with sync_sched_expedited_started and
+ * sync_sched_expedited_done taking on the roles of the halves
+ * of the ticket-lock word.  Each task atomically increments
+ * sync_sched_expedited_started upon entry, snapshotting the old value,
+ * then attempts to stop all the CPUs.  If this succeeds, then each
+ * CPU will have executed a context switch, resulting in an RCU-sched
+ * grace period.  We are then done, so we use atomic_cmpxchg() to
+ * update sync_sched_expedited_done to match our snapshot -- but
+ * only if someone else has not already advanced past our snapshot.
+ *
+ * On the other hand, if try_stop_cpus() fails, we check the value
+ * of sync_sched_expedited_done.  If it has advanced past our
+ * initial snapshot, then someone else must have forced a grace period
+ * some time after we took our snapshot.  In this case, our work is
+ * done for us, and we can simply return.  Otherwise, we try again,
+ * but keep our initial snapshot for purposes of checking for someone
+ * doing our work for us.
+ *
+ * If we fail too many times in a row, we fall back to synchronize_sched().
+ */
+void synchronize_sched_expedited(void)
+{
+        int firstsnap, s, snap, trycount = 0;
+        /* Note that atomic_inc_return() implies full memory barrier. */
+        firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
+        get_online_cpus();
+        /*
+         * Each pass through the following loop attempts to force a
+         * context switch on each CPU.
+         */
+        while (try_stop_cpus(cpu_online_mask,
+                             synchronize_sched_expedited_cpu_stop,
+                             NULL) == -EAGAIN) {
+                put_online_cpus();
+                /* No joy, try again later.  Or just synchronize_sched(). */
+                if (trycount++ < 10)
+                        udelay(trycount * num_online_cpus());
+                else {
+                        synchronize_sched();
+                        return;
+                }
+                /* Check to see if someone else did our work for us. */
+                s = atomic_read(&sync_sched_expedited_done);
+                if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
+                        smp_mb(); /* ensure test happens before caller kfree */
+                        return;
+                }
+                /*
+                 * Refetching sync_sched_expedited_started allows later
+                 * callers to piggyback on our grace period.  We subtract
+                 * 1 to get the same token that the last incrementer got.
+                 * We retry after they started, so our grace period works
+                 * for them, and they started after our first try, so their
+                 * grace period works for us.
+                 */
+                get_online_cpus();
+                snap = atomic_read(&sync_sched_expedited_started) - 1;
+                smp_mb(); /* ensure read is before try_stop_cpus(). */
+        }
+        /*
+         * Everyone up to our most recent fetch is covered by our grace
+         * period.  Update the counter, but only if our work is still
+         * relevant -- which it won't be if someone who started later
+         * than we did beat us to the punch.
+         */
+        do {
+                s = atomic_read(&sync_sched_expedited_done);
+                if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
+                        smp_mb(); /* ensure test happens before caller kfree */
+                        break;
+                }
+        } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
+        put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
+#endif /* #else #ifndef CONFIG_SMP */
 #if !defined(CONFIG_RCU_FAST_NO_HZ)
 /*

diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 71a4147473f9..a3638710dc67 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h
@@ -25,6 +25,7 @@
25	*/	25	*/
26		26
27	#include <linux/delay.h>	27	#include <linux/delay.h>
		28	#include <linux/stop_machine.h>
28		29
29	/*	30	/*
30	* Check the RCU kernel configuration parameters and print informative	31	* Check the RCU kernel configuration parameters and print informative
@@ -773,11 +774,11 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
773	}	774	}
774		775
775	/*	776	/*
776	* Move preemptable RCU's callbacks to ->orphan_cbs_list.	777	* Move preemptable RCU's callbacks from dying CPU to other online CPU.
777	*/	778	*/
778	static void rcu_preempt_send_cbs_to_orphanage(void)	779	static void rcu_preempt_send_cbs_to_online(void)
779	{	780	{
780	rcu_send_cbs_to_orphanage(&rcu_preempt_state);	781	rcu_send_cbs_to_online(&rcu_preempt_state);
781	}	782	}
782		783
783	/*	784	/*
@@ -1001,7 +1002,7 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
1001	/*	1002	/*
1002	* Because there is no preemptable RCU, there are no callbacks to move.	1003	* Because there is no preemptable RCU, there are no callbacks to move.
1003	*/	1004	*/
1004	static void rcu_preempt_send_cbs_to_orphanage(void)	1005	static void rcu_preempt_send_cbs_to_online(void)
1005	{	1006	{
1006	}	1007	}
1007		1008
@@ -1014,6 +1015,132 @@ static void __init __rcu_init_preempt(void)
1014		1015
1015	#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */	1016	#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
1016		1017
		1018	#ifndef CONFIG_SMP
		1019
		1020	void synchronize_sched_expedited(void)
		1021	{
		1022	cond_resched();
		1023	}
		1024	EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
		1025
		1026	#else /* #ifndef CONFIG_SMP */
		1027
		1028	static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
		1029	static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
		1030
		1031	static int synchronize_sched_expedited_cpu_stop(void *data)
		1032	{
		1033	/*
		1034	* There must be a full memory barrier on each affected CPU
		1035	* between the time that try_stop_cpus() is called and the
		1036	* time that it returns.
		1037	*
		1038	* In the current initial implementation of cpu_stop, the
		1039	* above condition is already met when the control reaches
		1040	* this point and the following smp_mb() is not strictly
		1041	* necessary. Do smp_mb() anyway for documentation and
		1042	* robustness against future implementation changes.
		1043	*/
		1044	smp_mb(); /* See above comment block. */
		1045	return 0;
		1046	}
		1047
		1048	/*
		1049	* Wait for an rcu-sched grace period to elapse, but use "big hammer"
		1050	* approach to force grace period to end quickly. This consumes
		1051	* significant time on all CPUs, and is thus not recommended for
		1052	* any sort of common-case code.
		1053	*
		1054	* Note that it is illegal to call this function while holding any
		1055	* lock that is acquired by a CPU-hotplug notifier. Failing to
		1056	* observe this restriction will result in deadlock.
		1057	*
		1058	* This implementation can be thought of as an application of ticket
		1059	* locking to RCU, with sync_sched_expedited_started and
		1060	* sync_sched_expedited_done taking on the roles of the halves
		1061	* of the ticket-lock word. Each task atomically increments
		1062	* sync_sched_expedited_started upon entry, snapshotting the old value,
		1063	* then attempts to stop all the CPUs. If this succeeds, then each
		1064	* CPU will have executed a context switch, resulting in an RCU-sched
		1065	* grace period. We are then done, so we use atomic_cmpxchg() to
		1066	* update sync_sched_expedited_done to match our snapshot -- but
		1067	* only if someone else has not already advanced past our snapshot.
		1068	*
		1069	* On the other hand, if try_stop_cpus() fails, we check the value
		1070	* of sync_sched_expedited_done. If it has advanced past our
		1071	* initial snapshot, then someone else must have forced a grace period
		1072	* some time after we took our snapshot. In this case, our work is
		1073	* done for us, and we can simply return. Otherwise, we try again,
		1074	* but keep our initial snapshot for purposes of checking for someone
		1075	* doing our work for us.
		1076	*
		1077	* If we fail too many times in a row, we fall back to synchronize_sched().
		1078	*/
		1079	void synchronize_sched_expedited(void)
		1080	{
		1081	int firstsnap, s, snap, trycount = 0;
		1082
		1083	/* Note that atomic_inc_return() implies full memory barrier. */
		1084	firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
		1085	get_online_cpus();
		1086
		1087	/*
		1088	* Each pass through the following loop attempts to force a
		1089	* context switch on each CPU.
		1090	*/
		1091	while (try_stop_cpus(cpu_online_mask,
		1092	synchronize_sched_expedited_cpu_stop,
		1093	NULL) == -EAGAIN) {
		1094	put_online_cpus();
		1095
		1096	/* No joy, try again later. Or just synchronize_sched(). */
		1097	if (trycount++ < 10)
		1098	udelay(trycount * num_online_cpus());
		1099	else {
		1100	synchronize_sched();
		1101	return;
		1102	}
		1103
		1104	/* Check to see if someone else did our work for us. */
		1105	s = atomic_read(&sync_sched_expedited_done);
		1106	if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
		1107	smp_mb(); /* ensure test happens before caller kfree */
		1108	return;
		1109	}
		1110
		1111	/*
		1112	* Refetching sync_sched_expedited_started allows later
		1113	* callers to piggyback on our grace period. We subtract
		1114	* 1 to get the same token that the last incrementer got.
		1115	* We retry after they started, so our grace period works
		1116	* for them, and they started after our first try, so their
		1117	* grace period works for us.
		1118	*/
		1119	get_online_cpus();
		1120	snap = atomic_read(&sync_sched_expedited_started) - 1;
		1121	smp_mb(); /* ensure read is before try_stop_cpus(). */
		1122	}
		1123
		1124	/*
		1125	* Everyone up to our most recent fetch is covered by our grace
		1126	* period. Update the counter, but only if our work is still
		1127	* relevant -- which it won't be if someone who started later
		1128	* than we did beat us to the punch.
		1129	*/
		1130	do {
		1131	s = atomic_read(&sync_sched_expedited_done);
		1132	if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
		1133	smp_mb(); /* ensure test happens before caller kfree */
		1134	break;
		1135	}
		1136	} while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
		1137
		1138	put_online_cpus();
		1139	}
		1140	EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
		1141
		1142	#endif /* #else #ifndef CONFIG_SMP */
		1143
1017	#if !defined(CONFIG_RCU_FAST_NO_HZ)	1144	#if !defined(CONFIG_RCU_FAST_NO_HZ)
1018		1145
1019	/*	1146	/*