1 files changed, 124 insertions, 66 deletions
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c4d159a21e04..0cf8146bd585 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/smp.h>
+#include <linux/rcupdate.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <asm/atomic.h>
@@ -45,26 +46,21 @@
 #include <linux/percpu.h>
 #include <linux/notifier.h>
 #include <linux/rcupdate.h>
-#include <linux/rcuref.h>
 #include <linux/cpu.h>
 /* Definition for rcupdate control block. */
-struct rcu_ctrlblk rcu_ctrlblk = 
+struct rcu_ctrlblk rcu_ctrlblk = {
-        { .cur = -300, .completed = -300 };
+        .cur = -300,
-struct rcu_ctrlblk rcu_bh_ctrlblk =
+        .completed = -300,
-        { .cur = -300, .completed = -300 };
+        .lock = SPIN_LOCK_UNLOCKED,
+        .cpumask = CPU_MASK_NONE,
-/* Bookkeeping of the progress of the grace period */
+};
-struct rcu_state {
+struct rcu_ctrlblk rcu_bh_ctrlblk = {
-        spinlock_t      lock; /* Guard this struct and writes to rcu_ctrlblk */
+        .cur = -300,
-        cpumask_t       cpumask; /* CPUs that need to switch in order    */
+        .completed = -300,
-                                      /* for current batch to proceed.        */
+        .lock = SPIN_LOCK_UNLOCKED,
+        .cpumask = CPU_MASK_NONE,
 };
-static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
-          {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
-static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
-          {.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
 DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
@@ -73,19 +69,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
 static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
 static int maxbatch = 10000;
-#ifndef __HAVE_ARCH_CMPXCHG
-/*
- * We use an array of spinlocks for the rcurefs -- similar to ones in sparc
- * 32 bit atomic_t implementations, and a hash function similar to that
- * for our refcounting needs.
- * Can't help multiprocessors which donot have cmpxchg :(
- */
-spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = {
-        [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
-};
-#endif
 /**
 * call_rcu - Queue an RCU callback for invocation after a grace period.
 * @head: structure to be used for queueing the RCU updates.
@@ -116,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head,
        local_irq_restore(flags);
 }
+static atomic_t rcu_barrier_cpu_count;
+static struct semaphore rcu_barrier_sema;
+static struct completion rcu_barrier_completion;
 /**
 * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
 * @head: structure to be used for queueing the RCU updates.
@@ -162,6 +149,42 @@ long rcu_batches_completed(void)
        return rcu_ctrlblk.completed;
 }
+static void rcu_barrier_callback(struct rcu_head *notused)
+{
+        if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+                complete(&rcu_barrier_completion);
+}
+/*
+ * Called with preemption disabled, and from cross-cpu IRQ context.
+ */
+static void rcu_barrier_func(void *notused)
+{
+        int cpu = smp_processor_id();
+        struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+        struct rcu_head *head;
+        head = &rdp->barrier;
+        atomic_inc(&rcu_barrier_cpu_count);
+        call_rcu(head, rcu_barrier_callback);
+}
+/**
+ * rcu_barrier - Wait until all the in-flight RCUs are complete.
+ */
+void rcu_barrier(void)
+{
+        BUG_ON(in_interrupt());
+        /* Take cpucontrol semaphore to protect against CPU hotplug */
+        down(&rcu_barrier_sema);
+        init_completion(&rcu_barrier_completion);
+        atomic_set(&rcu_barrier_cpu_count, 0);
+        on_each_cpu(rcu_barrier_func, NULL, 0, 1);
+        wait_for_completion(&rcu_barrier_completion);
+        up(&rcu_barrier_sema);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
 /*
 * Invoke the completed RCU callbacks. They are expected to be in
 * a per-cpu list.
@@ -193,13 +216,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
 *   This is done by rcu_start_batch. The start is not broadcasted to
 *   all cpus, they must pick this up by comparing rcp->cur with
 *   rdp->quiescbatch. All cpus are recorded  in the
- *   rcu_state.cpumask bitmap.
+ *   rcu_ctrlblk.cpumask bitmap.
 * - All cpus must go through a quiescent state.
 *   Since the start of the grace period is not broadcasted, at least two
 *   calls to rcu_check_quiescent_state are required:
 *   The first call just notices that a new grace period is running. The
 *   following calls check if there was a quiescent state since the beginning
- *   of the grace period. If so, it updates rcu_state.cpumask. If
+ *   of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
 *   the bitmap is empty, then the grace period is completed.
 *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
 *   period (if necessary).
@@ -207,25 +230,29 @@ static void rcu_do_batch(struct rcu_data *rdp)
 /*
 * Register a new batch of callbacks, and start it up if there is currently no
 * active batch and the batch to be registered has not already occurred.
- * Caller must hold rcu_state.lock.
+ * Caller must hold rcu_ctrlblk.lock.
 */
-static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
+static void rcu_start_batch(struct rcu_ctrlblk *rcp)
-                                int next_pending)
 {
-        if (next_pending)
-                rcp->next_pending = 1;
        if (rcp->next_pending &&
                        rcp->completed == rcp->cur) {
-                /* Can't change, since spin lock held. */
-                cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
                rcp->next_pending = 0;
-                /* next_pending == 0 must be visible in __rcu_process_callbacks()
+                /*
-                 * before it can see new value of cur.
+                 * next_pending == 0 must be visible in
+                 * __rcu_process_callbacks() before it can see new value of cur.
                 */
                smp_wmb();
                rcp->cur++;
+                /*
+                 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
+                 * Barrier  Otherwise it can cause tickless idle CPUs to be
+                 * included in rcp->cpumask, which will extend graceperiods
+                 * unnecessarily.
+                 */
+                smp_mb();
+                cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
        }
 }
@@ -234,13 +261,13 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp, struct rcu_state *rsp,
 * Clear it from the cpu mask and complete the grace period if it was the last
 * cpu. Start another grace period if someone has further entries pending
 */
-static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
+static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
 {
-        cpu_clear(cpu, rsp->cpumask);
+        cpu_clear(cpu, rcp->cpumask);
-        if (cpus_empty(rsp->cpumask)) {
+        if (cpus_empty(rcp->cpumask)) {
                /* batch completed ! */
                rcp->completed = rcp->cur;
-                rcu_start_batch(rcp, rsp, 0);
+                rcu_start_batch(rcp);
        }
 }
@@ -250,7 +277,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp, struct rcu_state *rsp)
 * quiescent cycle, then indicate that it has done so.
 */
 static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
-                        struct rcu_state *rsp, struct rcu_data *rdp)
+                                        struct rcu_data *rdp)
 {
        if (rdp->quiescbatch != rcp->cur) {
                /* start new grace period: */
@@ -275,15 +302,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
                return;
        rdp->qs_pending = 0;
-        spin_lock(&rsp->lock);
+        spin_lock(&rcp->lock);
        /*
         * rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
         * during cpu startup. Ignore the quiescent state.
         */
        if (likely(rdp->quiescbatch == rcp->cur))
-                cpu_quiet(rdp->cpu, rcp, rsp);
+                cpu_quiet(rdp->cpu, rcp);
-        spin_unlock(&rsp->lock);
+        spin_unlock(&rcp->lock);
 }
@@ -304,28 +331,29 @@ static void rcu_move_batch(struct rcu_data *this_rdp, struct rcu_head *list,
 }
 static void __rcu_offline_cpu(struct rcu_data *this_rdp,
-        struct rcu_ctrlblk *rcp, struct rcu_state *rsp, struct rcu_data *rdp)
+                                struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
        /* if the cpu going offline owns the grace period
         * we can block indefinitely waiting for it, so flush
         * it here
         */
-        spin_lock_bh(&rsp->lock);
+        spin_lock_bh(&rcp->lock);
        if (rcp->cur != rcp->completed)
-                cpu_quiet(rdp->cpu, rcp, rsp);
+                cpu_quiet(rdp->cpu, rcp);
-        spin_unlock_bh(&rsp->lock);
+        spin_unlock_bh(&rcp->lock);
        rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
        rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
+        rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
 }
 static void rcu_offline_cpu(int cpu)
 {
        struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
        struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
-        __rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state,
+        __rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
                                        &per_cpu(rcu_data, cpu));
-        __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state,
+        __rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
                                        &per_cpu(rcu_bh_data, cpu));
        put_cpu_var(rcu_data);
        put_cpu_var(rcu_bh_data);
@@ -344,7 +372,7 @@ static void rcu_offline_cpu(int cpu)
 * This does the RCU processing work from tasklet context. 
 */
 static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
-                        struct rcu_state *rsp, struct rcu_data *rdp)
+                                        struct rcu_data *rdp)
 {
        if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
                *rdp->donetail = rdp->curlist;
@@ -374,24 +402,53 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
                if (!rcp->next_pending) {
                        /* and start it/schedule start if it's a new batch */
-                        spin_lock(&rsp->lock);
+                        spin_lock(&rcp->lock);
-                        rcu_start_batch(rcp, rsp, 1);
+                        rcp->next_pending = 1;
-                        spin_unlock(&rsp->lock);
+                        rcu_start_batch(rcp);
+                        spin_unlock(&rcp->lock);
                }
        } else {
                local_irq_enable();
        }
-        rcu_check_quiescent_state(rcp, rsp, rdp);
+        rcu_check_quiescent_state(rcp, rdp);
        if (rdp->donelist)
                rcu_do_batch(rdp);
 }
 static void rcu_process_callbacks(unsigned long unused)
 {
-        __rcu_process_callbacks(&rcu_ctrlblk, &rcu_state,
+        __rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
-                                &__get_cpu_var(rcu_data));
+        __rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
-        __rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state,
+}
-                                &__get_cpu_var(rcu_bh_data));
+static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+        /* This cpu has pending rcu entries and the grace period
+         * for them has completed.
+         */
+        if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
+                return 1;
+        /* This cpu has no pending entries, but there are new entries */
+        if (!rdp->curlist && rdp->nxtlist)
+                return 1;
+        /* This cpu has finished callbacks to invoke */
+        if (rdp->donelist)
+                return 1;
+        /* The rcu core waits for a quiescent state from the cpu */
+        if (rdp->quiescbatch != rcp->cur || rdp->qs_pending)
+                return 1;
+        /* nothing to do */
+        return 0;
+}
+int rcu_pending(int cpu)
+{
+        return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) ||
+                __rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
 }
 void rcu_check_callbacks(int cpu, int user)
@@ -457,6 +514,7 @@ static struct notifier_block __devinitdata rcu_nb = {
 */
 void __init rcu_init(void)
 {
+        sema_init(&rcu_barrier_sema, 1);
        rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
                        (void *)(long)smp_processor_id());
        /* Register notifier for non-boot CPUs */

diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index c4d159a21e04..0cf8146bd585 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c
@@ -35,6 +35,7 @@
35	#include <linux/init.h>	35	#include <linux/init.h>
36	#include <linux/spinlock.h>	36	#include <linux/spinlock.h>
37	#include <linux/smp.h>	37	#include <linux/smp.h>
		38	#include <linux/rcupdate.h>
38	#include <linux/interrupt.h>	39	#include <linux/interrupt.h>
39	#include <linux/sched.h>	40	#include <linux/sched.h>
40	#include <asm/atomic.h>	41	#include <asm/atomic.h>
@@ -45,26 +46,21 @@
45	#include <linux/percpu.h>	46	#include <linux/percpu.h>
46	#include <linux/notifier.h>	47	#include <linux/notifier.h>
47	#include <linux/rcupdate.h>	48	#include <linux/rcupdate.h>
48	#include <linux/rcuref.h>
49	#include <linux/cpu.h>	49	#include <linux/cpu.h>
50		50
51	/* Definition for rcupdate control block. */	51	/* Definition for rcupdate control block. */
52	struct rcu_ctrlblk rcu_ctrlblk =	52	struct rcu_ctrlblk rcu_ctrlblk = {
53	{ .cur = -300, .completed = -300 };	53	.cur = -300,
54	struct rcu_ctrlblk rcu_bh_ctrlblk =	54	.completed = -300,
55	{ .cur = -300, .completed = -300 };	55	.lock = SPIN_LOCK_UNLOCKED,
56		56	.cpumask = CPU_MASK_NONE,
57	/* Bookkeeping of the progress of the grace period */	57	};
58	struct rcu_state {	58	struct rcu_ctrlblk rcu_bh_ctrlblk = {
59	spinlock_t lock; /* Guard this struct and writes to rcu_ctrlblk */	59	.cur = -300,
60	cpumask_t cpumask; /* CPUs that need to switch in order */	60	.completed = -300,
61	/* for current batch to proceed. */	61	.lock = SPIN_LOCK_UNLOCKED,
		62	.cpumask = CPU_MASK_NONE,
62	};	63	};
63
64	static struct rcu_state rcu_state ____cacheline_maxaligned_in_smp =
65	{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
66	static struct rcu_state rcu_bh_state ____cacheline_maxaligned_in_smp =
67	{.lock = SPIN_LOCK_UNLOCKED, .cpumask = CPU_MASK_NONE };
68		64
69	DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };	65	DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
70	DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };	66	DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
@@ -73,19 +69,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
73	static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};	69	static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL};
74	static int maxbatch = 10000;	70	static int maxbatch = 10000;
75		71
76	#ifndef __HAVE_ARCH_CMPXCHG
77	/*
78	* We use an array of spinlocks for the rcurefs -- similar to ones in sparc
79	* 32 bit atomic_t implementations, and a hash function similar to that
80	* for our refcounting needs.
81	* Can't help multiprocessors which donot have cmpxchg :(
82	*/
83
84	spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = {
85	[0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED
86	};
87	#endif
88
89	/**	72	/**
90	* call_rcu - Queue an RCU callback for invocation after a grace period.	73	* call_rcu - Queue an RCU callback for invocation after a grace period.
91	* @head: structure to be used for queueing the RCU updates.	74	* @head: structure to be used for queueing the RCU updates.
@@ -116,6 +99,10 @@ void fastcall call_rcu(struct rcu_head *head,
116	local_irq_restore(flags);	99	local_irq_restore(flags);
117	}	100	}
118		101
		102	static atomic_t rcu_barrier_cpu_count;
		103	static struct semaphore rcu_barrier_sema;
		104	static struct completion rcu_barrier_completion;
		105
119	/**	106	/**
120	* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.	107	* call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
121	* @head: structure to be used for queueing the RCU updates.	108	* @head: structure to be used for queueing the RCU updates.
@@ -162,6 +149,42 @@ long rcu_batches_completed(void)
162	return rcu_ctrlblk.completed;	149	return rcu_ctrlblk.completed;
163	}	150	}
164		151
		152	static void rcu_barrier_callback(struct rcu_head *notused)
		153	{
		154	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
		155	complete(&rcu_barrier_completion);
		156	}
		157
		158	/*
		159	* Called with preemption disabled, and from cross-cpu IRQ context.
		160	*/
		161	static void rcu_barrier_func(void *notused)
		162	{
		163	int cpu = smp_processor_id();
		164	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
		165	struct rcu_head *head;
		166
		167	head = &rdp->barrier;
		168	atomic_inc(&rcu_barrier_cpu_count);
		169	call_rcu(head, rcu_barrier_callback);
		170	}
		171
		172	/**
		173	* rcu_barrier - Wait until all the in-flight RCUs are complete.
		174	*/
		175	void rcu_barrier(void)
		176	{
		177	BUG_ON(in_interrupt());
		178	/* Take cpucontrol semaphore to protect against CPU hotplug */
		179	down(&rcu_barrier_sema);
		180	init_completion(&rcu_barrier_completion);
		181	atomic_set(&rcu_barrier_cpu_count, 0);
		182	on_each_cpu(rcu_barrier_func, NULL, 0, 1);
		183	wait_for_completion(&rcu_barrier_completion);
		184	up(&rcu_barrier_sema);
		185	}
		186	EXPORT_SYMBOL_GPL(rcu_barrier);
		187
165	/*	188	/*
166	* Invoke the completed RCU callbacks. They are expected to be in	189	* Invoke the completed RCU callbacks. They are expected to be in
167	* a per-cpu list.	190	* a per-cpu list.
@@ -193,13 +216,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
193	* This is done by rcu_start_batch. The start is not broadcasted to	216	* This is done by rcu_start_batch. The start is not broadcasted to
194	* all cpus, they must pick this up by comparing rcp->cur with	217	* all cpus, they must pick this up by comparing rcp->cur with
195	* rdp->quiescbatch. All cpus are recorded in the	218	* rdp->quiescbatch. All cpus are recorded in the
196	* rcu_state.cpumask bitmap.	219	* rcu_ctrlblk.cpumask bitmap.
197	* - All cpus must go through a quiescent state.	220	* - All cpus must go through a quiescent state.
198	* Since the start of the grace period is not broadcasted, at least two	221	* Since the start of the grace period is not broadcasted, at least two
199	* calls to rcu_check_quiescent_state are required:	222	* calls to rcu_check_quiescent_state are required:
200	* The first call just notices that a new grace period is running. The	223	* The first call just notices that a new grace period is running. The
201	* following calls check if there was a quiescent state since the beginning	224	* following calls check if there was a quiescent state since the beginning
202	* of the grace period. If so, it updates rcu_state.cpumask. If	225	* of the grace period. If so, it updates rcu_ctrlblk.cpumask. If
203	* the bitmap is empty, then the grace period is completed.	226	* the bitmap is empty, then the grace period is completed.
204	* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace	227	* rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
205	* period (if necessary).	228	* period (if necessary).
@@ -207,25 +230,29 @@ static void rcu_do_batch(struct rcu_data *rdp)
207	/*	230	/*
208	* Register a new batch of callbacks, and start it up if there is currently no	231	* Register a new batch of callbacks, and start it up if there is currently no
209	* active batch and the batch to be registered has not already occurred.	232	* active batch and the batch to be registered has not already occurred.
210	* Caller must hold rcu_state.lock.	233	* Caller must hold rcu_ctrlblk.lock.
211	*/	234	*/
212	static void rcu_start_batch(struct rcu_ctrlblk rcp, struct rcu_state rsp,	235	static void rcu_start_batch(struct rcu_ctrlblk *rcp)
213	int next_pending)
214	{	236	{
215	if (next_pending)
216	rcp->next_pending = 1;
217
218	if (rcp->next_pending &&	237	if (rcp->next_pending &&
219	rcp->completed == rcp->cur) {	238	rcp->completed == rcp->cur) {
220	/* Can't change, since spin lock held. */
221	cpus_andnot(rsp->cpumask, cpu_online_map, nohz_cpu_mask);
222
223	rcp->next_pending = 0;	239	rcp->next_pending = 0;
224	/* next_pending == 0 must be visible in __rcu_process_callbacks()	240	/*
225	* before it can see new value of cur.	241	* next_pending == 0 must be visible in
		242	* __rcu_process_callbacks() before it can see new value of cur.
226	*/	243	*/
227	smp_wmb();	244	smp_wmb();
228	rcp->cur++;	245	rcp->cur++;
		246
		247	/*
		248	* Accessing nohz_cpu_mask before incrementing rcp->cur needs a
		249	* Barrier Otherwise it can cause tickless idle CPUs to be
		250	* included in rcp->cpumask, which will extend graceperiods
		251	* unnecessarily.
		252	*/
		253	smp_mb();
		254	cpus_andnot(rcp->cpumask, cpu_online_map, nohz_cpu_mask);
		255
229	}	256	}
230	}	257	}
231		258
@@ -234,13 +261,13 @@ static void rcu_start_batch(struct rcu_ctrlblk rcp, struct rcu_state rsp,
234	* Clear it from the cpu mask and complete the grace period if it was the last	261	* Clear it from the cpu mask and complete the grace period if it was the last
235	* cpu. Start another grace period if someone has further entries pending	262	* cpu. Start another grace period if someone has further entries pending
236	*/	263	*/
237	static void cpu_quiet(int cpu, struct rcu_ctrlblk rcp, struct rcu_state rsp)	264	static void cpu_quiet(int cpu, struct rcu_ctrlblk *rcp)
238	{	265	{
239	cpu_clear(cpu, rsp->cpumask);	266	cpu_clear(cpu, rcp->cpumask);
240	if (cpus_empty(rsp->cpumask)) {	267	if (cpus_empty(rcp->cpumask)) {
241	/* batch completed ! */	268	/* batch completed ! */
242	rcp->completed = rcp->cur;	269	rcp->completed = rcp->cur;
243	rcu_start_batch(rcp, rsp, 0);	270	rcu_start_batch(rcp);
244	}	271	}
245	}	272	}
246		273
@@ -250,7 +277,7 @@ static void cpu_quiet(int cpu, struct rcu_ctrlblk rcp, struct rcu_state rsp)
250	* quiescent cycle, then indicate that it has done so.	277	* quiescent cycle, then indicate that it has done so.
251	*/	278	*/
252	static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,	279	static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
253	struct rcu_state rsp, struct rcu_data rdp)	280	struct rcu_data *rdp)
254	{	281	{
255	if (rdp->quiescbatch != rcp->cur) {	282	if (rdp->quiescbatch != rcp->cur) {
256	/* start new grace period: */	283	/* start new grace period: */
@@ -275,15 +302,15 @@ static void rcu_check_quiescent_state(struct rcu_ctrlblk *rcp,
275	return;	302	return;
276	rdp->qs_pending = 0;	303	rdp->qs_pending = 0;
277		304
278	spin_lock(&rsp->lock);	305	spin_lock(&rcp->lock);
279	/*	306	/*
280	* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync	307	* rdp->quiescbatch/rcp->cur and the cpu bitmap can come out of sync
281	* during cpu startup. Ignore the quiescent state.	308	* during cpu startup. Ignore the quiescent state.
282	*/	309	*/
283	if (likely(rdp->quiescbatch == rcp->cur))	310	if (likely(rdp->quiescbatch == rcp->cur))
284	cpu_quiet(rdp->cpu, rcp, rsp);	311	cpu_quiet(rdp->cpu, rcp);
285		312
286	spin_unlock(&rsp->lock);	313	spin_unlock(&rcp->lock);
287	}	314	}
288		315
289		316
@@ -304,28 +331,29 @@ static void rcu_move_batch(struct rcu_data this_rdp, struct rcu_head list,
304	}	331	}
305		332
306	static void __rcu_offline_cpu(struct rcu_data *this_rdp,	333	static void __rcu_offline_cpu(struct rcu_data *this_rdp,
307	struct rcu_ctrlblk rcp, struct rcu_state rsp, struct rcu_data *rdp)	334	struct rcu_ctrlblk rcp, struct rcu_data rdp)
308	{	335	{
309	/* if the cpu going offline owns the grace period	336	/* if the cpu going offline owns the grace period
310	* we can block indefinitely waiting for it, so flush	337	* we can block indefinitely waiting for it, so flush
311	* it here	338	* it here
312	*/	339	*/
313	spin_lock_bh(&rsp->lock);	340	spin_lock_bh(&rcp->lock);
314	if (rcp->cur != rcp->completed)	341	if (rcp->cur != rcp->completed)
315	cpu_quiet(rdp->cpu, rcp, rsp);	342	cpu_quiet(rdp->cpu, rcp);
316	spin_unlock_bh(&rsp->lock);	343	spin_unlock_bh(&rcp->lock);
317	rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);	344	rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
318	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);	345	rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
319		346	rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
320	}	347	}
		348
321	static void rcu_offline_cpu(int cpu)	349	static void rcu_offline_cpu(int cpu)
322	{	350	{
323	struct rcu_data *this_rdp = &get_cpu_var(rcu_data);	351	struct rcu_data *this_rdp = &get_cpu_var(rcu_data);
324	struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);	352	struct rcu_data *this_bh_rdp = &get_cpu_var(rcu_bh_data);
325		353
326	__rcu_offline_cpu(this_rdp, &rcu_ctrlblk, &rcu_state,	354	__rcu_offline_cpu(this_rdp, &rcu_ctrlblk,
327	&per_cpu(rcu_data, cpu));	355	&per_cpu(rcu_data, cpu));
328	__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk, &rcu_bh_state,	356	__rcu_offline_cpu(this_bh_rdp, &rcu_bh_ctrlblk,
329	&per_cpu(rcu_bh_data, cpu));	357	&per_cpu(rcu_bh_data, cpu));
330	put_cpu_var(rcu_data);	358	put_cpu_var(rcu_data);
331	put_cpu_var(rcu_bh_data);	359	put_cpu_var(rcu_bh_data);
@@ -344,7 +372,7 @@ static void rcu_offline_cpu(int cpu)
344	* This does the RCU processing work from tasklet context.	372	* This does the RCU processing work from tasklet context.
345	*/	373	*/
346	static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,	374	static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
347	struct rcu_state rsp, struct rcu_data rdp)	375	struct rcu_data *rdp)
348	{	376	{
349	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {	377	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch)) {
350	*rdp->donetail = rdp->curlist;	378	*rdp->donetail = rdp->curlist;
@@ -374,24 +402,53 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp,
374		402
375	if (!rcp->next_pending) {	403	if (!rcp->next_pending) {
376	/* and start it/schedule start if it's a new batch */	404	/* and start it/schedule start if it's a new batch */
377	spin_lock(&rsp->lock);	405	spin_lock(&rcp->lock);
378	rcu_start_batch(rcp, rsp, 1);	406	rcp->next_pending = 1;
379	spin_unlock(&rsp->lock);	407	rcu_start_batch(rcp);
		408	spin_unlock(&rcp->lock);
380	}	409	}
381	} else {	410	} else {
382	local_irq_enable();	411	local_irq_enable();
383	}	412	}
384	rcu_check_quiescent_state(rcp, rsp, rdp);	413	rcu_check_quiescent_state(rcp, rdp);
385	if (rdp->donelist)	414	if (rdp->donelist)
386	rcu_do_batch(rdp);	415	rcu_do_batch(rdp);
387	}	416	}
388		417
389	static void rcu_process_callbacks(unsigned long unused)	418	static void rcu_process_callbacks(unsigned long unused)
390	{	419	{
391	__rcu_process_callbacks(&rcu_ctrlblk, &rcu_state,	420	__rcu_process_callbacks(&rcu_ctrlblk, &__get_cpu_var(rcu_data));
392	&__get_cpu_var(rcu_data));	421	__rcu_process_callbacks(&rcu_bh_ctrlblk, &__get_cpu_var(rcu_bh_data));
393	__rcu_process_callbacks(&rcu_bh_ctrlblk, &rcu_bh_state,	422	}
394	&__get_cpu_var(rcu_bh_data));	423
		424	static int __rcu_pending(struct rcu_ctrlblk rcp, struct rcu_data rdp)
		425	{
		426	/* This cpu has pending rcu entries and the grace period
		427	* for them has completed.
		428	*/
		429	if (rdp->curlist && !rcu_batch_before(rcp->completed, rdp->batch))
		430	return 1;
		431
		432	/* This cpu has no pending entries, but there are new entries */
		433	if (!rdp->curlist && rdp->nxtlist)
		434	return 1;
		435
		436	/* This cpu has finished callbacks to invoke */
		437	if (rdp->donelist)
		438	return 1;
		439
		440	/* The rcu core waits for a quiescent state from the cpu */
		441	if (rdp->quiescbatch != rcp->cur \|\| rdp->qs_pending)
		442	return 1;
		443
		444	/* nothing to do */
		445	return 0;
		446	}
		447
		448	int rcu_pending(int cpu)
		449	{
		450	return __rcu_pending(&rcu_ctrlblk, &per_cpu(rcu_data, cpu)) \|\|
		451	__rcu_pending(&rcu_bh_ctrlblk, &per_cpu(rcu_bh_data, cpu));
395	}	452	}
396		453
397	void rcu_check_callbacks(int cpu, int user)	454	void rcu_check_callbacks(int cpu, int user)
@@ -457,6 +514,7 @@ static struct notifier_block __devinitdata rcu_nb = {
457	*/	514	*/
458	void __init rcu_init(void)	515	void __init rcu_init(void)
459	{	516	{
		517	sema_init(&rcu_barrier_sema, 1);
460	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,	518	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
461	(void *)(long)smp_processor_id());	519	(void *)(long)smp_processor_id());
462	/* Register notifier for non-boot CPUs */	520	/* Register notifier for non-boot CPUs */