1 files changed, 566 insertions, 0 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
new file mode 100644
index 000000000000..1cee04f627eb
--- /dev/null
+++ b/kernel/rcutree_plugin.h
@@ -0,0 +1,566 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ * Internal non-public definitions that provide either classic
+ * or preemptable semantics.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright Red Hat, 2009
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Ingo Molnar <mingo@elte.hu>
+ *         Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+#ifdef CONFIG_TREE_PREEMPT_RCU
+struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
+DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
+/*
+ * Tell them what RCU they are running.
+ */
+static inline void rcu_bootup_announce(void)
+{
+        printk(KERN_INFO
+               "Experimental preemptable hierarchical RCU implementation.\n");
+}
+/*
+ * Return the number of RCU-preempt batches processed thus far
+ * for debug and statistics.
+ */
+long rcu_batches_completed_preempt(void)
+{
+        return rcu_preempt_state.completed;
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
+/*
+ * Return the number of RCU batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed(void)
+{
+        return rcu_batches_completed_preempt();
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed);
+/*
+ * Record a preemptable-RCU quiescent state for the specified CPU.  Note
+ * that this just means that the task currently running on the CPU is
+ * not in a quiescent state.  There might be any number of tasks blocked
+ * while in an RCU read-side critical section.
+ */
+static void rcu_preempt_qs(int cpu)
+{
+        struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
+        rdp->passed_quiesc_completed = rdp->completed;
+        barrier();
+        rdp->passed_quiesc = 1;
+}
+/*
+ * We have entered the scheduler, and the current task might soon be
+ * context-switched away from.  If this task is in an RCU read-side
+ * critical section, we will no longer be able to rely on the CPU to
+ * record that fact, so we enqueue the task on the appropriate entry
+ * of the blocked_tasks[] array.  The task will dequeue itself when
+ * it exits the outermost enclosing RCU read-side critical section.
+ * Therefore, the current grace period cannot be permitted to complete
+ * until the blocked_tasks[] entry indexed by the low-order bit of
+ * rnp->gpnum empties.
+ *
+ * Caller must disable preemption.
+ */
+static void rcu_preempt_note_context_switch(int cpu)
+{
+        struct task_struct *t = current;
+        unsigned long flags;
+        int phase;
+        struct rcu_data *rdp;
+        struct rcu_node *rnp;
+        if (t->rcu_read_lock_nesting &&
+            (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
+                /* Possibly blocking in an RCU read-side critical section. */
+                rdp = rcu_preempt_state.rda[cpu];
+                rnp = rdp->mynode;
+                spin_lock_irqsave(&rnp->lock, flags);
+                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
+                t->rcu_blocked_node = rnp;
+                /*
+                 * If this CPU has already checked in, then this task
+                 * will hold up the next grace period rather than the
+                 * current grace period.  Queue the task accordingly.
+                 * If the task is queued for the current grace period
+                 * (i.e., this CPU has not yet passed through a quiescent
+                 * state for the current grace period), then as long
+                 * as that task remains queued, the current grace period
+                 * cannot end.
+                 *
+                 * But first, note that the current CPU must still be
+                 * on line!
+                 */
+                WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
+                WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
+                phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
+                list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
+                spin_unlock_irqrestore(&rnp->lock, flags);
+        }
+        /*
+         * Either we were not in an RCU read-side critical section to
+         * begin with, or we have now recorded that critical section
+         * globally.  Either way, we can now note a quiescent state
+         * for this CPU.  Again, if we were in an RCU read-side critical
+         * section, and if that critical section was blocking the current
+         * grace period, then the fact that the task has been enqueued
+         * means that we continue to block the current grace period.
+         */
+        rcu_preempt_qs(cpu);
+        local_irq_save(flags);
+        t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+        local_irq_restore(flags);
+}
+/*
+ * Tree-preemptable RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+        ACCESS_ONCE(current->rcu_read_lock_nesting)++;
+        barrier();  /* needed if we ever invoke rcu_read_lock in rcutree.c */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+static void rcu_read_unlock_special(struct task_struct *t)
+{
+        int empty;
+        unsigned long flags;
+        unsigned long mask;
+        struct rcu_node *rnp;
+        int special;
+        /* NMI handlers cannot block and cannot safely manipulate state. */
+        if (in_nmi())
+                return;
+        local_irq_save(flags);
+        /*
+         * If RCU core is waiting for this CPU to exit critical section,
+         * let it know that we have done so.
+         */
+        special = t->rcu_read_unlock_special;
+        if (special & RCU_READ_UNLOCK_NEED_QS) {
+                t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+                rcu_preempt_qs(smp_processor_id());
+        }
+        /* Hardware IRQ handlers cannot block. */
+        if (in_irq()) {
+                local_irq_restore(flags);
+                return;
+        }
+        /* Clean up if blocked during RCU read-side critical section. */
+        if (special & RCU_READ_UNLOCK_BLOCKED) {
+                t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
+                /*
+                 * Remove this task from the list it blocked on.  The
+                 * task can migrate while we acquire the lock, but at
+                 * most one time.  So at most two passes through loop.
+                 */
+                for (;;) {
+                        rnp = t->rcu_blocked_node;
+                        spin_lock(&rnp->lock);  /* irqs already disabled. */
+                        if (rnp == t->rcu_blocked_node)
+                                break;
+                        spin_unlock(&rnp->lock);  /* irqs remain disabled. */
+                }
+                empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
+                list_del_init(&t->rcu_node_entry);
+                t->rcu_blocked_node = NULL;
+                /*
+                 * If this was the last task on the current list, and if
+                 * we aren't waiting on any CPUs, report the quiescent state.
+                 * Note that both cpu_quiet_msk_finish() and cpu_quiet_msk()
+                 * drop rnp->lock and restore irq.
+                 */
+                if (!empty && rnp->qsmask == 0 &&
+                    list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
+                        struct rcu_node *rnp_p;
+                        if (rnp->parent == NULL) {
+                                /* Only one rcu_node in the tree. */
+                                cpu_quiet_msk_finish(&rcu_preempt_state, flags);
+                                return;
+                        }
+                        /* Report up the rest of the hierarchy. */
+                        mask = rnp->grpmask;
+                        spin_unlock_irqrestore(&rnp->lock, flags);
+                        rnp_p = rnp->parent;
+                        spin_lock_irqsave(&rnp_p->lock, flags);
+                        WARN_ON_ONCE(rnp->qsmask);
+                        cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags);
+                        return;
+                }
+                spin_unlock(&rnp->lock);
+        }
+        local_irq_restore(flags);
+}
+/*
+ * Tree-preemptable RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+        struct task_struct *t = current;
+        barrier();  /* needed if we ever invoke rcu_read_unlock in rcutree.c */
+        if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
+            unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+                rcu_read_unlock_special(t);
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+/*
+ * Scan the current list of tasks blocked within RCU read-side critical
+ * sections, printing out the tid of each.
+ */
+static void rcu_print_task_stall(struct rcu_node *rnp)
+{
+        unsigned long flags;
+        struct list_head *lp;
+        int phase = rnp->gpnum & 0x1;
+        struct task_struct *t;
+        if (!list_empty(&rnp->blocked_tasks[phase])) {
+                spin_lock_irqsave(&rnp->lock, flags);
+                phase = rnp->gpnum & 0x1; /* re-read under lock. */
+                lp = &rnp->blocked_tasks[phase];
+                list_for_each_entry(t, lp, rcu_node_entry)
+                        printk(" P%d", t->pid);
+                spin_unlock_irqrestore(&rnp->lock, flags);
+        }
+}
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+/*
+ * Check that the list of blocked tasks for the newly completed grace
+ * period is in fact empty.  It is a serious bug to complete a grace
+ * period that still has RCU readers blocked!  This function must be
+ * invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
+ * must be held by the caller.
+ */
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
+{
+        WARN_ON_ONCE(!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]));
+        WARN_ON_ONCE(rnp->qsmask);
+}
+/*
+ * Check for preempted RCU readers for the specified rcu_node structure.
+ * If the caller needs a reliable answer, it must hold the rcu_node's
+ * >lock.
+ */
+static int rcu_preempted_readers(struct rcu_node *rnp)
+{
+        return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
+}
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Handle tasklist migration for case in which all CPUs covered by the
+ * specified rcu_node have gone offline.  Move them up to the root
+ * rcu_node.  The reason for not just moving them to the immediate
+ * parent is to remove the need for rcu_read_unlock_special() to
+ * make more than two attempts to acquire the target rcu_node's lock.
+ *
+ * The caller must hold rnp->lock with irqs disabled.
+ */
+static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
+                                      struct rcu_node *rnp,
+                                      struct rcu_data *rdp)
+{
+        int i;
+        struct list_head *lp;
+        struct list_head *lp_root;
+        struct rcu_node *rnp_root = rcu_get_root(rsp);
+        struct task_struct *tp;
+        if (rnp == rnp_root) {
+                WARN_ONCE(1, "Last CPU thought to be offlined?");
+                return;  /* Shouldn't happen: at least one CPU online. */
+        }
+        WARN_ON_ONCE(rnp != rdp->mynode &&
+                     (!list_empty(&rnp->blocked_tasks[0]) ||
+                      !list_empty(&rnp->blocked_tasks[1])));
+        /*
+         * Move tasks up to root rcu_node.  Rely on the fact that the
+         * root rcu_node can be at most one ahead of the rest of the
+         * rcu_nodes in terms of gp_num value.  This fact allows us to
+         * move the blocked_tasks[] array directly, element by element.
+         */
+        for (i = 0; i < 2; i++) {
+                lp = &rnp->blocked_tasks[i];
+                lp_root = &rnp_root->blocked_tasks[i];
+                while (!list_empty(lp)) {
+                        tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
+                        spin_lock(&rnp_root->lock); /* irqs already disabled */
+                        list_del(&tp->rcu_node_entry);
+                        tp->rcu_blocked_node = rnp_root;
+                        list_add(&tp->rcu_node_entry, lp_root);
+                        spin_unlock(&rnp_root->lock); /* irqs remain disabled */
+                }
+        }
+}
+/*
+ * Do CPU-offline processing for preemptable RCU.
+ */
+static void rcu_preempt_offline_cpu(int cpu)
+{
+        __rcu_offline_cpu(cpu, &rcu_preempt_state);
+}
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+/*
+ * Check for a quiescent state from the current CPU.  When a task blocks,
+ * the task is recorded in the corresponding CPU's rcu_node structure,
+ * which is checked elsewhere.
+ *
+ * Caller must disable hard irqs.
+ */
+static void rcu_preempt_check_callbacks(int cpu)
+{
+        struct task_struct *t = current;
+        if (t->rcu_read_lock_nesting == 0) {
+                t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+                rcu_preempt_qs(cpu);
+                return;
+        }
+        if (per_cpu(rcu_preempt_data, cpu).qs_pending)
+                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
+}
+/*
+ * Process callbacks for preemptable RCU.
+ */
+static void rcu_preempt_process_callbacks(void)
+{
+        __rcu_process_callbacks(&rcu_preempt_state,
+                                &__get_cpu_var(rcu_preempt_data));
+}
+/*
+ * Queue a preemptable-RCU callback for invocation after a grace period.
+ */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+        __call_rcu(head, func, &rcu_preempt_state);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+/*
+ * Check to see if there is any immediate preemptable-RCU-related work
+ * to be done.
+ */
+static int rcu_preempt_pending(int cpu)
+{
+        return __rcu_pending(&rcu_preempt_state,
+                             &per_cpu(rcu_preempt_data, cpu));
+}
+/*
+ * Does preemptable RCU need the CPU to stay out of dynticks mode?
+ */
+static int rcu_preempt_needs_cpu(int cpu)
+{
+        return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
+}
+/*
+ * Initialize preemptable RCU's per-CPU data.
+ */
+static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
+{
+        rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
+}
+/*
+ * Check for a task exiting while in a preemptable-RCU read-side
+ * critical section, clean up if so.  No need to issue warnings,
+ * as debug_check_no_locks_held() already does this if lockdep
+ * is enabled.
+ */
+void exit_rcu(void)
+{
+        struct task_struct *t = current;
+        if (t->rcu_read_lock_nesting == 0)
+                return;
+        t->rcu_read_lock_nesting = 1;
+        rcu_read_unlock();
+}
+#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+/*
+ * Tell them what RCU they are running.
+ */
+static inline void rcu_bootup_announce(void)
+{
+        printk(KERN_INFO "Hierarchical RCU implementation.\n");
+}
+/*
+ * Return the number of RCU batches processed thus far for debug & stats.
+ */
+long rcu_batches_completed(void)
+{
+        return rcu_batches_completed_sched();
+}
+EXPORT_SYMBOL_GPL(rcu_batches_completed);
+/*
+ * Because preemptable RCU does not exist, we never have to check for
+ * CPUs being in quiescent states.
+ */
+static void rcu_preempt_note_context_switch(int cpu)
+{
+}
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+/*
+ * Because preemptable RCU does not exist, we never have to check for
+ * tasks blocked within RCU read-side critical sections.
+ */
+static void rcu_print_task_stall(struct rcu_node *rnp)
+{
+}
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+/*
+ * Because there is no preemptable RCU, there can be no readers blocked,
+ * so there is no need to check for blocked tasks.  So check only for
+ * bogus qsmask values.
+ */
+static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
+{
+        WARN_ON_ONCE(rnp->qsmask);
+}
+/*
+ * Because preemptable RCU does not exist, there are never any preempted
+ * RCU readers.
+ */
+static int rcu_preempted_readers(struct rcu_node *rnp)
+{
+        return 0;
+}
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Because preemptable RCU does not exist, it never needs to migrate
+ * tasks that were blocked within RCU read-side critical sections.
+ */
+static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
+                                      struct rcu_node *rnp,
+                                      struct rcu_data *rdp)
+{
+}
+/*
+ * Because preemptable RCU does not exist, it never needs CPU-offline
+ * processing.
+ */
+static void rcu_preempt_offline_cpu(int cpu)
+{
+}
+#endif /* #ifdef CONFIG_HOTPLUG_CPU */
+/*
+ * Because preemptable RCU does not exist, it never has any callbacks
+ * to check.
+ */
+void rcu_preempt_check_callbacks(int cpu)
+{
+}
+/*
+ * Because preemptable RCU does not exist, it never has any callbacks
+ * to process.
+ */
+void rcu_preempt_process_callbacks(void)
+{
+}
+/*
+ * In classic RCU, call_rcu() is just call_rcu_sched().
+ */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
+{
+        call_rcu_sched(head, func);
+}
+EXPORT_SYMBOL_GPL(call_rcu);
+/*
+ * Because preemptable RCU does not exist, it never has any work to do.
+ */
+static int rcu_preempt_pending(int cpu)
+{
+        return 0;
+}
+/*
+ * Because preemptable RCU does not exist, it never needs any CPU.
+ */
+static int rcu_preempt_needs_cpu(int cpu)
+{
+        return 0;
+}
+/*
+ * Because preemptable RCU does not exist, there is no per-CPU
+ * data to initialize.
+ */
+static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
+{
+}
+#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */

diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h new file mode 100644 index 000000000000..1cee04f627eb --- /dev/null +++ b/kernel/rcutree_plugin.h
@@ -0,0 +1,566 @@
	1	/*
	2	* Read-Copy Update mechanism for mutual exclusion (tree-based version)
	3	* Internal non-public definitions that provide either classic
	4	* or preemptable semantics.
	5	*
	6	* This program is free software; you can redistribute it and/or modify
	7	* it under the terms of the GNU General Public License as published by
	8	* the Free Software Foundation; either version 2 of the License, or
	9	* (at your option) any later version.
	10	*
	11	* This program is distributed in the hope that it will be useful,
	12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	14	* GNU General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU General Public License
	17	* along with this program; if not, write to the Free Software
	18	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	19	*
	20	* Copyright Red Hat, 2009
	21	* Copyright IBM Corporation, 2009
	22	*
	23	* Author: Ingo Molnar <mingo@elte.hu>
	24	* Paul E. McKenney <paulmck@linux.vnet.ibm.com>
	25	*/
	26
	27
	28	#ifdef CONFIG_TREE_PREEMPT_RCU
	29
	30	struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state);
	31	DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
	32
	33	/*
	34	* Tell them what RCU they are running.
	35	*/
	36	static inline void rcu_bootup_announce(void)
	37	{
	38	printk(KERN_INFO
	39	"Experimental preemptable hierarchical RCU implementation.\n");
	40	}
	41
	42	/*
	43	* Return the number of RCU-preempt batches processed thus far
	44	* for debug and statistics.
	45	*/
	46	long rcu_batches_completed_preempt(void)
	47	{
	48	return rcu_preempt_state.completed;
	49	}
	50	EXPORT_SYMBOL_GPL(rcu_batches_completed_preempt);
	51
	52	/*
	53	* Return the number of RCU batches processed thus far for debug & stats.
	54	*/
	55	long rcu_batches_completed(void)
	56	{
	57	return rcu_batches_completed_preempt();
	58	}
	59	EXPORT_SYMBOL_GPL(rcu_batches_completed);
	60
	61	/*
	62	* Record a preemptable-RCU quiescent state for the specified CPU. Note
	63	* that this just means that the task currently running on the CPU is
	64	* not in a quiescent state. There might be any number of tasks blocked
	65	* while in an RCU read-side critical section.
	66	*/
	67	static void rcu_preempt_qs(int cpu)
	68	{
	69	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
	70	rdp->passed_quiesc_completed = rdp->completed;
	71	barrier();
	72	rdp->passed_quiesc = 1;
	73	}
	74
	75	/*
	76	* We have entered the scheduler, and the current task might soon be
	77	* context-switched away from. If this task is in an RCU read-side
	78	* critical section, we will no longer be able to rely on the CPU to
	79	* record that fact, so we enqueue the task on the appropriate entry
	80	* of the blocked_tasks[] array. The task will dequeue itself when
	81	* it exits the outermost enclosing RCU read-side critical section.
	82	* Therefore, the current grace period cannot be permitted to complete
	83	* until the blocked_tasks[] entry indexed by the low-order bit of
	84	* rnp->gpnum empties.
	85	*
	86	* Caller must disable preemption.
	87	*/
	88	static void rcu_preempt_note_context_switch(int cpu)
	89	{
	90	struct task_struct *t = current;
	91	unsigned long flags;
	92	int phase;
	93	struct rcu_data *rdp;
	94	struct rcu_node *rnp;
	95
	96	if (t->rcu_read_lock_nesting &&
	97	(t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
	98
	99	/* Possibly blocking in an RCU read-side critical section. */
	100	rdp = rcu_preempt_state.rda[cpu];
	101	rnp = rdp->mynode;
	102	spin_lock_irqsave(&rnp->lock, flags);
	103	t->rcu_read_unlock_special \|= RCU_READ_UNLOCK_BLOCKED;
	104	t->rcu_blocked_node = rnp;
	105
	106	/*
	107	* If this CPU has already checked in, then this task
	108	* will hold up the next grace period rather than the
	109	* current grace period. Queue the task accordingly.
	110	* If the task is queued for the current grace period
	111	* (i.e., this CPU has not yet passed through a quiescent
	112	* state for the current grace period), then as long
	113	* as that task remains queued, the current grace period
	114	* cannot end.
	115	*
	116	* But first, note that the current CPU must still be
	117	* on line!
	118	*/
	119	WARN_ON_ONCE((rdp->grpmask & rnp->qsmaskinit) == 0);
	120	WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
	121	phase = (rnp->gpnum + !(rnp->qsmask & rdp->grpmask)) & 0x1;
	122	list_add(&t->rcu_node_entry, &rnp->blocked_tasks[phase]);
	123	spin_unlock_irqrestore(&rnp->lock, flags);
	124	}
	125
	126	/*
	127	* Either we were not in an RCU read-side critical section to
	128	* begin with, or we have now recorded that critical section
	129	* globally. Either way, we can now note a quiescent state
	130	* for this CPU. Again, if we were in an RCU read-side critical
	131	* section, and if that critical section was blocking the current
	132	* grace period, then the fact that the task has been enqueued
	133	* means that we continue to block the current grace period.
	134	*/
	135	rcu_preempt_qs(cpu);
	136	local_irq_save(flags);
	137	t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
	138	local_irq_restore(flags);
	139	}
	140
	141	/*
	142	* Tree-preemptable RCU implementation for rcu_read_lock().
	143	* Just increment ->rcu_read_lock_nesting, shared state will be updated
	144	* if we block.
	145	*/
	146	void __rcu_read_lock(void)
	147	{
	148	ACCESS_ONCE(current->rcu_read_lock_nesting)++;
	149	barrier(); /* needed if we ever invoke rcu_read_lock in rcutree.c */
	150	}
	151	EXPORT_SYMBOL_GPL(__rcu_read_lock);
	152
	153	static void rcu_read_unlock_special(struct task_struct *t)
	154	{
	155	int empty;
	156	unsigned long flags;
	157	unsigned long mask;
	158	struct rcu_node *rnp;
	159	int special;
	160
	161	/* NMI handlers cannot block and cannot safely manipulate state. */
	162	if (in_nmi())
	163	return;
	164
	165	local_irq_save(flags);
	166
	167	/*
	168	* If RCU core is waiting for this CPU to exit critical section,
	169	* let it know that we have done so.
	170	*/
	171	special = t->rcu_read_unlock_special;
	172	if (special & RCU_READ_UNLOCK_NEED_QS) {
	173	t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
	174	rcu_preempt_qs(smp_processor_id());
	175	}
	176
	177	/* Hardware IRQ handlers cannot block. */
	178	if (in_irq()) {
	179	local_irq_restore(flags);
	180	return;
	181	}
	182
	183	/* Clean up if blocked during RCU read-side critical section. */
	184	if (special & RCU_READ_UNLOCK_BLOCKED) {
	185	t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
	186
	187	/*
	188	* Remove this task from the list it blocked on. The
	189	* task can migrate while we acquire the lock, but at
	190	* most one time. So at most two passes through loop.
	191	*/
	192	for (;;) {
	193	rnp = t->rcu_blocked_node;
	194	spin_lock(&rnp->lock); /* irqs already disabled. */
	195	if (rnp == t->rcu_blocked_node)
	196	break;
	197	spin_unlock(&rnp->lock); /* irqs remain disabled. */
	198	}
	199	empty = list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
	200	list_del_init(&t->rcu_node_entry);
	201	t->rcu_blocked_node = NULL;
	202
	203	/*
	204	* If this was the last task on the current list, and if
	205	* we aren't waiting on any CPUs, report the quiescent state.
	206	* Note that both cpu_quiet_msk_finish() and cpu_quiet_msk()
	207	* drop rnp->lock and restore irq.
	208	*/
	209	if (!empty && rnp->qsmask == 0 &&
	210	list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1])) {
	211	struct rcu_node *rnp_p;
	212
	213	if (rnp->parent == NULL) {
	214	/* Only one rcu_node in the tree. */
	215	cpu_quiet_msk_finish(&rcu_preempt_state, flags);
	216	return;
	217	}
	218	/* Report up the rest of the hierarchy. */
	219	mask = rnp->grpmask;
	220	spin_unlock_irqrestore(&rnp->lock, flags);
	221	rnp_p = rnp->parent;
	222	spin_lock_irqsave(&rnp_p->lock, flags);
	223	WARN_ON_ONCE(rnp->qsmask);
	224	cpu_quiet_msk(mask, &rcu_preempt_state, rnp_p, flags);
	225	return;
	226	}
	227	spin_unlock(&rnp->lock);
	228	}
	229	local_irq_restore(flags);
	230	}
	231
	232	/*
	233	* Tree-preemptable RCU implementation for rcu_read_unlock().
	234	* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
	235	* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
	236	* invoke rcu_read_unlock_special() to clean up after a context switch
	237	* in an RCU read-side critical section and other special cases.
	238	*/
	239	void __rcu_read_unlock(void)
	240	{
	241	struct task_struct *t = current;
	242
	243	barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
	244	if (--ACCESS_ONCE(t->rcu_read_lock_nesting) == 0 &&
	245	unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
	246	rcu_read_unlock_special(t);
	247	}
	248	EXPORT_SYMBOL_GPL(__rcu_read_unlock);
	249
	250	#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
	251
	252	/*
	253	* Scan the current list of tasks blocked within RCU read-side critical
	254	* sections, printing out the tid of each.
	255	*/
	256	static void rcu_print_task_stall(struct rcu_node *rnp)
	257	{
	258	unsigned long flags;
	259	struct list_head *lp;
	260	int phase = rnp->gpnum & 0x1;
	261	struct task_struct *t;
	262
	263	if (!list_empty(&rnp->blocked_tasks[phase])) {
	264	spin_lock_irqsave(&rnp->lock, flags);
	265	phase = rnp->gpnum & 0x1; /* re-read under lock. */
	266	lp = &rnp->blocked_tasks[phase];
	267	list_for_each_entry(t, lp, rcu_node_entry)
	268	printk(" P%d", t->pid);
	269	spin_unlock_irqrestore(&rnp->lock, flags);
	270	}
	271	}
	272
	273	#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
	274
	275	/*
	276	* Check that the list of blocked tasks for the newly completed grace
	277	* period is in fact empty. It is a serious bug to complete a grace
	278	* period that still has RCU readers blocked! This function must be
	279	* invoked -before- updating this rnp's ->gpnum, and the rnp's ->lock
	280	* must be held by the caller.
	281	*/
	282	static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
	283	{
	284	WARN_ON_ONCE(!list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]));
	285	WARN_ON_ONCE(rnp->qsmask);
	286	}
	287
	288	/*
	289	* Check for preempted RCU readers for the specified rcu_node structure.
	290	* If the caller needs a reliable answer, it must hold the rcu_node's
	291	* >lock.
	292	*/
	293	static int rcu_preempted_readers(struct rcu_node *rnp)
	294	{
	295	return !list_empty(&rnp->blocked_tasks[rnp->gpnum & 0x1]);
	296	}
	297
	298	#ifdef CONFIG_HOTPLUG_CPU
	299
	300	/*
	301	* Handle tasklist migration for case in which all CPUs covered by the
	302	* specified rcu_node have gone offline. Move them up to the root
	303	* rcu_node. The reason for not just moving them to the immediate
	304	* parent is to remove the need for rcu_read_unlock_special() to
	305	* make more than two attempts to acquire the target rcu_node's lock.
	306	*
	307	* The caller must hold rnp->lock with irqs disabled.
	308	*/
	309	static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
	310	struct rcu_node *rnp,
	311	struct rcu_data *rdp)
	312	{
	313	int i;
	314	struct list_head *lp;
	315	struct list_head *lp_root;
	316	struct rcu_node *rnp_root = rcu_get_root(rsp);
	317	struct task_struct *tp;
	318
	319	if (rnp == rnp_root) {
	320	WARN_ONCE(1, "Last CPU thought to be offlined?");
	321	return; /* Shouldn't happen: at least one CPU online. */
	322	}
	323	WARN_ON_ONCE(rnp != rdp->mynode &&
	324	(!list_empty(&rnp->blocked_tasks[0]) \|\|
	325	!list_empty(&rnp->blocked_tasks[1])));
	326
	327	/*
	328	* Move tasks up to root rcu_node. Rely on the fact that the
	329	* root rcu_node can be at most one ahead of the rest of the
	330	* rcu_nodes in terms of gp_num value. This fact allows us to
	331	* move the blocked_tasks[] array directly, element by element.
	332	*/
	333	for (i = 0; i < 2; i++) {
	334	lp = &rnp->blocked_tasks[i];
	335	lp_root = &rnp_root->blocked_tasks[i];
	336	while (!list_empty(lp)) {
	337	tp = list_entry(lp->next, typeof(*tp), rcu_node_entry);
	338	spin_lock(&rnp_root->lock); /* irqs already disabled */
	339	list_del(&tp->rcu_node_entry);
	340	tp->rcu_blocked_node = rnp_root;
	341	list_add(&tp->rcu_node_entry, lp_root);
	342	spin_unlock(&rnp_root->lock); /* irqs remain disabled */
	343	}
	344	}
	345	}
	346
	347	/*
	348	* Do CPU-offline processing for preemptable RCU.
	349	*/
	350	static void rcu_preempt_offline_cpu(int cpu)
	351	{
	352	__rcu_offline_cpu(cpu, &rcu_preempt_state);
	353	}
	354
	355	#endif /* #ifdef CONFIG_HOTPLUG_CPU */
	356
	357	/*
	358	* Check for a quiescent state from the current CPU. When a task blocks,
	359	* the task is recorded in the corresponding CPU's rcu_node structure,
	360	* which is checked elsewhere.
	361	*
	362	* Caller must disable hard irqs.
	363	*/
	364	static void rcu_preempt_check_callbacks(int cpu)
	365	{
	366	struct task_struct *t = current;
	367
	368	if (t->rcu_read_lock_nesting == 0) {
	369	t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
	370	rcu_preempt_qs(cpu);
	371	return;
	372	}
	373	if (per_cpu(rcu_preempt_data, cpu).qs_pending)
	374	t->rcu_read_unlock_special \|= RCU_READ_UNLOCK_NEED_QS;
	375	}
	376
	377	/*
	378	* Process callbacks for preemptable RCU.
	379	*/
	380	static void rcu_preempt_process_callbacks(void)
	381	{
	382	__rcu_process_callbacks(&rcu_preempt_state,
	383	&__get_cpu_var(rcu_preempt_data));
	384	}
	385
	386	/*
	387	* Queue a preemptable-RCU callback for invocation after a grace period.
	388	*/
	389	void call_rcu(struct rcu_head head, void (func)(struct rcu_head *rcu))
	390	{
	391	__call_rcu(head, func, &rcu_preempt_state);
	392	}
	393	EXPORT_SYMBOL_GPL(call_rcu);
	394
	395	/*
	396	* Check to see if there is any immediate preemptable-RCU-related work
	397	* to be done.
	398	*/
	399	static int rcu_preempt_pending(int cpu)
	400	{
	401	return __rcu_pending(&rcu_preempt_state,
	402	&per_cpu(rcu_preempt_data, cpu));
	403	}
	404
	405	/*
	406	* Does preemptable RCU need the CPU to stay out of dynticks mode?
	407	*/
	408	static int rcu_preempt_needs_cpu(int cpu)
	409	{
	410	return !!per_cpu(rcu_preempt_data, cpu).nxtlist;
	411	}
	412
	413	/*
	414	* Initialize preemptable RCU's per-CPU data.
	415	*/
	416	static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
	417	{
	418	rcu_init_percpu_data(cpu, &rcu_preempt_state, 1);
	419	}
	420
	421	/*
	422	* Check for a task exiting while in a preemptable-RCU read-side
	423	* critical section, clean up if so. No need to issue warnings,
	424	* as debug_check_no_locks_held() already does this if lockdep
	425	* is enabled.
	426	*/
	427	void exit_rcu(void)
	428	{
	429	struct task_struct *t = current;
	430
	431	if (t->rcu_read_lock_nesting == 0)
	432	return;
	433	t->rcu_read_lock_nesting = 1;
	434	rcu_read_unlock();
	435	}
	436
	437	#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
	438
	439	/*
	440	* Tell them what RCU they are running.
	441	*/
	442	static inline void rcu_bootup_announce(void)
	443	{
	444	printk(KERN_INFO "Hierarchical RCU implementation.\n");
	445	}
	446
	447	/*
	448	* Return the number of RCU batches processed thus far for debug & stats.
	449	*/
	450	long rcu_batches_completed(void)
	451	{
	452	return rcu_batches_completed_sched();
	453	}
	454	EXPORT_SYMBOL_GPL(rcu_batches_completed);
	455
	456	/*
	457	* Because preemptable RCU does not exist, we never have to check for
	458	* CPUs being in quiescent states.
	459	*/
	460	static void rcu_preempt_note_context_switch(int cpu)
	461	{
	462	}
	463
	464	#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
	465
	466	/*
	467	* Because preemptable RCU does not exist, we never have to check for
	468	* tasks blocked within RCU read-side critical sections.
	469	*/
	470	static void rcu_print_task_stall(struct rcu_node *rnp)
	471	{
	472	}
	473
	474	#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
	475
	476	/*
	477	* Because there is no preemptable RCU, there can be no readers blocked,
	478	* so there is no need to check for blocked tasks. So check only for
	479	* bogus qsmask values.
	480	*/
	481	static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
	482	{
	483	WARN_ON_ONCE(rnp->qsmask);
	484	}
	485
	486	/*
	487	* Because preemptable RCU does not exist, there are never any preempted
	488	* RCU readers.
	489	*/
	490	static int rcu_preempted_readers(struct rcu_node *rnp)
	491	{
	492	return 0;
	493	}
	494
	495	#ifdef CONFIG_HOTPLUG_CPU
	496
	497	/*
	498	* Because preemptable RCU does not exist, it never needs to migrate
	499	* tasks that were blocked within RCU read-side critical sections.
	500	*/
	501	static void rcu_preempt_offline_tasks(struct rcu_state *rsp,
	502	struct rcu_node *rnp,
	503	struct rcu_data *rdp)
	504	{
	505	}
	506
	507	/*
	508	* Because preemptable RCU does not exist, it never needs CPU-offline
	509	* processing.
	510	*/
	511	static void rcu_preempt_offline_cpu(int cpu)
	512	{
	513	}
	514
	515	#endif /* #ifdef CONFIG_HOTPLUG_CPU */
	516
	517	/*
	518	* Because preemptable RCU does not exist, it never has any callbacks
	519	* to check.
	520	*/
	521	void rcu_preempt_check_callbacks(int cpu)
	522	{
	523	}
	524
	525	/*
	526	* Because preemptable RCU does not exist, it never has any callbacks
	527	* to process.
	528	*/
	529	void rcu_preempt_process_callbacks(void)
	530	{
	531	}
	532
	533	/*
	534	* In classic RCU, call_rcu() is just call_rcu_sched().
	535	*/
	536	void call_rcu(struct rcu_head head, void (func)(struct rcu_head *rcu))
	537	{
	538	call_rcu_sched(head, func);
	539	}
	540	EXPORT_SYMBOL_GPL(call_rcu);
	541
	542	/*
	543	* Because preemptable RCU does not exist, it never has any work to do.
	544	*/
	545	static int rcu_preempt_pending(int cpu)
	546	{
	547	return 0;
	548	}
	549
	550	/*
	551	* Because preemptable RCU does not exist, it never needs any CPU.
	552	*/
	553	static int rcu_preempt_needs_cpu(int cpu)
	554	{
	555	return 0;
	556	}
	557
	558	/*
	559	* Because preemptable RCU does not exist, there is no per-CPU
	560	* data to initialize.
	561	*/
	562	static void __cpuinit rcu_preempt_init_percpu_data(int cpu)
	563	{
	564	}
	565
	566	#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */