1 files changed, 344 insertions, 1 deletions
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d7992a6c..3ef8ba58694e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -47,6 +47,8 @@
 #include <linux/hardirq.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/tick.h>
 #define CREATE_TRACE_POINTS
@@ -91,7 +93,7 @@ void __rcu_read_unlock(void)
                barrier();  /* critical section before exit code. */
                t->rcu_read_lock_nesting = INT_MIN;
                barrier();  /* assign before ->rcu_read_unlock_special load */
-                if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+                if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
                        rcu_read_unlock_special(t);
                barrier();  /* ->rcu_read_unlock_special load before assign */
                t->rcu_read_lock_nesting = 0;
@@ -137,6 +139,38 @@ int notrace debug_lockdep_rcu_enabled(void)
 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
 /**
+ * rcu_read_lock_held() - might we be in RCU read-side critical section?
+ *
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
+ * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
+ * this assumes we are in an RCU read-side critical section unless it can
+ * prove otherwise.  This is useful for debug checks in functions that
+ * require that they be called within an RCU read-side critical section.
+ *
+ * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
+ *
+ * Note that rcu_read_lock() and the matching rcu_read_unlock() must
+ * occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock() in process context if the matching rcu_read_lock()
+ * was invoked from within an irq handler.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
+ */
+int rcu_read_lock_held(void)
+{
+        if (!debug_lockdep_rcu_enabled())
+                return 1;
+        if (!rcu_is_watching())
+                return 0;
+        if (!rcu_lockdep_current_cpu_online())
+                return 0;
+        return lock_is_held(&rcu_lock_map);
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_held);
+/**
 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
 *
 * Check for bottom half being disabled, which covers both the
@@ -347,3 +381,312 @@ static int __init check_cpu_stall_init(void)
 early_initcall(check_cpu_stall_init);
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+#ifdef CONFIG_TASKS_RCU
+/*
+ * Simple variant of RCU whose quiescent states are voluntary context switch,
+ * user-space execution, and idle.  As such, grace periods can take one good
+ * long time.  There are no read-side primitives similar to rcu_read_lock()
+ * and rcu_read_unlock() because this implementation is intended to get
+ * the system into a safe state for some of the manipulations involved in
+ * tracing and the like.  Finally, this implementation does not support
+ * high call_rcu_tasks() rates from multiple CPUs.  If this is required,
+ * per-CPU callback lists will be needed.
+ */
+/* Global list of callbacks and associated lock. */
+static struct rcu_head *rcu_tasks_cbs_head;
+static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
+static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
+static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
+/* Track exiting tasks in order to allow them to be waited for. */
+DEFINE_SRCU(tasks_rcu_exit_srcu);
+/* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
+static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
+module_param(rcu_task_stall_timeout, int, 0644);
+static void rcu_spawn_tasks_kthread(void);
+/*
+ * Post an RCU-tasks callback.  First call must be from process context
+ * after the scheduler if fully operational.
+ */
+void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
+{
+        unsigned long flags;
+        bool needwake;
+        rhp->next = NULL;
+        rhp->func = func;
+        raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
+        needwake = !rcu_tasks_cbs_head;
+        *rcu_tasks_cbs_tail = rhp;
+        rcu_tasks_cbs_tail = &rhp->next;
+        raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
+        if (needwake) {
+                rcu_spawn_tasks_kthread();
+                wake_up(&rcu_tasks_cbs_wq);
+        }
+}
+EXPORT_SYMBOL_GPL(call_rcu_tasks);
+/**
+ * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu-tasks
+ * grace period has elapsed, in other words after all currently
+ * executing rcu-tasks read-side critical sections have elapsed.  These
+ * read-side critical sections are delimited by calls to schedule(),
+ * cond_resched_rcu_qs(), idle execution, userspace execution, calls
+ * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
+ *
+ * This is a very specialized primitive, intended only for a few uses in
+ * tracing and other situations requiring manipulation of function
+ * preambles and profiling hooks.  The synchronize_rcu_tasks() function
+ * is not (yet) intended for heavy use from multiple CPUs.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_rcu_tasks() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-tasks read-side critical section whose beginning
+ * preceded the call to synchronize_rcu_tasks().  In addition, each CPU
+ * having an RCU-tasks read-side critical section that extends beyond
+ * the return from synchronize_rcu_tasks() is guaranteed to have executed
+ * a full memory barrier after the beginning of synchronize_rcu_tasks()
+ * and before the beginning of that RCU-tasks read-side critical section.
+ * Note that these guarantees include CPUs that are offline, idle, or
+ * executing in user mode, as well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
+ * (but again only if the system has more than one CPU).
+ */
+void synchronize_rcu_tasks(void)
+{
+        /* Complain if the scheduler has not started.  */
+        rcu_lockdep_assert(!rcu_scheduler_active,
+                           "synchronize_rcu_tasks called too soon");
+        /* Wait for the grace period. */
+        wait_rcu_gp(call_rcu_tasks);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
+/**
+ * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
+ *
+ * Although the current implementation is guaranteed to wait, it is not
+ * obligated to, for example, if there are no pending callbacks.
+ */
+void rcu_barrier_tasks(void)
+{
+        /* There is only one callback queue, so this is easy.  ;-) */
+        synchronize_rcu_tasks();
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
+/* See if tasks are still holding out, complain if so. */
+static void check_holdout_task(struct task_struct *t,
+                               bool needreport, bool *firstreport)
+{
+        int cpu;
+        if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
+            t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
+            !ACCESS_ONCE(t->on_rq) ||
+            (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
+             !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
+                ACCESS_ONCE(t->rcu_tasks_holdout) = false;
+                list_del_init(&t->rcu_tasks_holdout_list);
+                put_task_struct(t);
+                return;
+        }
+        if (!needreport)
+                return;
+        if (*firstreport) {
+                pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
+                *firstreport = false;
+        }
+        cpu = task_cpu(t);
+        pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
+                 t, ".I"[is_idle_task(t)],
+                 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
+                 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
+                 t->rcu_tasks_idle_cpu, cpu);
+        sched_show_task(t);
+}
+/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
+static int __noreturn rcu_tasks_kthread(void *arg)
+{
+        unsigned long flags;
+        struct task_struct *g, *t;
+        unsigned long lastreport;
+        struct rcu_head *list;
+        struct rcu_head *next;
+        LIST_HEAD(rcu_tasks_holdouts);
+        /* FIXME: Add housekeeping affinity. */
+        /*
+         * Each pass through the following loop makes one check for
+         * newly arrived callbacks, and, if there are some, waits for
+         * one RCU-tasks grace period and then invokes the callbacks.
+         * This loop is terminated by the system going down.  ;-)
+         */
+        for (;;) {
+                /* Pick up any new callbacks. */
+                raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
+                list = rcu_tasks_cbs_head;
+                rcu_tasks_cbs_head = NULL;
+                rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
+                raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
+                /* If there were none, wait a bit and start over. */
+                if (!list) {
+                        wait_event_interruptible(rcu_tasks_cbs_wq,
+                                                 rcu_tasks_cbs_head);
+                        if (!rcu_tasks_cbs_head) {
+                                WARN_ON(signal_pending(current));
+                                schedule_timeout_interruptible(HZ/10);
+                        }
+                        continue;
+                }
+                /*
+                 * Wait for all pre-existing t->on_rq and t->nvcsw
+                 * transitions to complete.  Invoking synchronize_sched()
+                 * suffices because all these transitions occur with
+                 * interrupts disabled.  Without this synchronize_sched(),
+                 * a read-side critical section that started before the
+                 * grace period might be incorrectly seen as having started
+                 * after the grace period.
+                 *
+                 * This synchronize_sched() also dispenses with the
+                 * need for a memory barrier on the first store to
+                 * ->rcu_tasks_holdout, as it forces the store to happen
+                 * after the beginning of the grace period.
+                 */
+                synchronize_sched();
+                /*
+                 * There were callbacks, so we need to wait for an
+                 * RCU-tasks grace period.  Start off by scanning
+                 * the task list for tasks that are not already
+                 * voluntarily blocked.  Mark these tasks and make
+                 * a list of them in rcu_tasks_holdouts.
+                 */
+                rcu_read_lock();
+                for_each_process_thread(g, t) {
+                        if (t != current && ACCESS_ONCE(t->on_rq) &&
+                            !is_idle_task(t)) {
+                                get_task_struct(t);
+                                t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
+                                ACCESS_ONCE(t->rcu_tasks_holdout) = true;
+                                list_add(&t->rcu_tasks_holdout_list,
+                                         &rcu_tasks_holdouts);
+                        }
+                }
+                rcu_read_unlock();
+                /*
+                 * Wait for tasks that are in the process of exiting.
+                 * This does only part of the job, ensuring that all
+                 * tasks that were previously exiting reach the point
+                 * where they have disabled preemption, allowing the
+                 * later synchronize_sched() to finish the job.
+                 */
+                synchronize_srcu(&tasks_rcu_exit_srcu);
+                /*
+                 * Each pass through the following loop scans the list
+                 * of holdout tasks, removing any that are no longer
+                 * holdouts.  When the list is empty, we are done.
+                 */
+                lastreport = jiffies;
+                while (!list_empty(&rcu_tasks_holdouts)) {
+                        bool firstreport;
+                        bool needreport;
+                        int rtst;
+                        struct task_struct *t1;
+                        schedule_timeout_interruptible(HZ);
+                        rtst = ACCESS_ONCE(rcu_task_stall_timeout);
+                        needreport = rtst > 0 &&
+                                     time_after(jiffies, lastreport + rtst);
+                        if (needreport)
+                                lastreport = jiffies;
+                        firstreport = true;
+                        WARN_ON(signal_pending(current));
+                        list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
+                                                rcu_tasks_holdout_list) {
+                                check_holdout_task(t, needreport, &firstreport);
+                                cond_resched();
+                        }
+                }
+                /*
+                 * Because ->on_rq and ->nvcsw are not guaranteed
+                 * to have a full memory barriers prior to them in the
+                 * schedule() path, memory reordering on other CPUs could
+                 * cause their RCU-tasks read-side critical sections to
+                 * extend past the end of the grace period.  However,
+                 * because these ->nvcsw updates are carried out with
+                 * interrupts disabled, we can use synchronize_sched()
+                 * to force the needed ordering on all such CPUs.
+                 *
+                 * This synchronize_sched() also confines all
+                 * ->rcu_tasks_holdout accesses to be within the grace
+                 * period, avoiding the need for memory barriers for
+                 * ->rcu_tasks_holdout accesses.
+                 *
+                 * In addition, this synchronize_sched() waits for exiting
+                 * tasks to complete their final preempt_disable() region
+                 * of execution, cleaning up after the synchronize_srcu()
+                 * above.
+                 */
+                synchronize_sched();
+                /* Invoke the callbacks. */
+                while (list) {
+                        next = list->next;
+                        local_bh_disable();
+                        list->func(list);
+                        local_bh_enable();
+                        list = next;
+                        cond_resched();
+                }
+                schedule_timeout_uninterruptible(HZ/10);
+        }
+}
+/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
+static void rcu_spawn_tasks_kthread(void)
+{
+        static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
+        static struct task_struct *rcu_tasks_kthread_ptr;
+        struct task_struct *t;
+        if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
+                smp_mb(); /* Ensure caller sees full kthread. */
+                return;
+        }
+        mutex_lock(&rcu_tasks_kthread_mutex);
+        if (rcu_tasks_kthread_ptr) {
+                mutex_unlock(&rcu_tasks_kthread_mutex);
+                return;
+        }
+        t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
+        BUG_ON(IS_ERR(t));
+        smp_mb(); /* Ensure others see full kthread. */
+        ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
+        mutex_unlock(&rcu_tasks_kthread_mutex);
+}
+#endif /* #ifdef CONFIG_TASKS_RCU */

diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 4056d7992a6c..3ef8ba58694e 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c
@@ -47,6 +47,8 @@
47	#include <linux/hardirq.h>	47	#include <linux/hardirq.h>
48	#include <linux/delay.h>	48	#include <linux/delay.h>
49	#include <linux/module.h>	49	#include <linux/module.h>
		50	#include <linux/kthread.h>
		51	#include <linux/tick.h>
50		52
51	#define CREATE_TRACE_POINTS	53	#define CREATE_TRACE_POINTS
52		54
@@ -91,7 +93,7 @@ void __rcu_read_unlock(void)
91	barrier(); /* critical section before exit code. */	93	barrier(); /* critical section before exit code. */
92	t->rcu_read_lock_nesting = INT_MIN;	94	t->rcu_read_lock_nesting = INT_MIN;
93	barrier(); /* assign before ->rcu_read_unlock_special load */	95	barrier(); /* assign before ->rcu_read_unlock_special load */
94	if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))	96	if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
95	rcu_read_unlock_special(t);	97	rcu_read_unlock_special(t);
96	barrier(); /* ->rcu_read_unlock_special load before assign */	98	barrier(); /* ->rcu_read_unlock_special load before assign */
97	t->rcu_read_lock_nesting = 0;	99	t->rcu_read_lock_nesting = 0;
@@ -137,6 +139,38 @@ int notrace debug_lockdep_rcu_enabled(void)
137	EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);	139	EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
138		140
139	/**	141	/**
		142	* rcu_read_lock_held() - might we be in RCU read-side critical section?
		143	*
		144	* If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
		145	* read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
		146	* this assumes we are in an RCU read-side critical section unless it can
		147	* prove otherwise. This is useful for debug checks in functions that
		148	* require that they be called within an RCU read-side critical section.
		149	*
		150	* Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
		151	* and while lockdep is disabled.
		152	*
		153	* Note that rcu_read_lock() and the matching rcu_read_unlock() must
		154	* occur in the same context, for example, it is illegal to invoke
		155	* rcu_read_unlock() in process context if the matching rcu_read_lock()
		156	* was invoked from within an irq handler.
		157	*
		158	* Note that rcu_read_lock() is disallowed if the CPU is either idle or
		159	* offline from an RCU perspective, so check for those as well.
		160	*/
		161	int rcu_read_lock_held(void)
		162	{
		163	if (!debug_lockdep_rcu_enabled())
		164	return 1;
		165	if (!rcu_is_watching())
		166	return 0;
		167	if (!rcu_lockdep_current_cpu_online())
		168	return 0;
		169	return lock_is_held(&rcu_lock_map);
		170	}
		171	EXPORT_SYMBOL_GPL(rcu_read_lock_held);
		172
		173	/**
140	* rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?	174	* rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
141	*	175	*
142	* Check for bottom half being disabled, which covers both the	176	* Check for bottom half being disabled, which covers both the
@@ -347,3 +381,312 @@ static int __init check_cpu_stall_init(void)
347	early_initcall(check_cpu_stall_init);	381	early_initcall(check_cpu_stall_init);
348		382
349	#endif /* #ifdef CONFIG_RCU_STALL_COMMON */	383	#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
		384
		385	#ifdef CONFIG_TASKS_RCU
		386
		387	/*
		388	* Simple variant of RCU whose quiescent states are voluntary context switch,
		389	* user-space execution, and idle. As such, grace periods can take one good
		390	* long time. There are no read-side primitives similar to rcu_read_lock()
		391	* and rcu_read_unlock() because this implementation is intended to get
		392	* the system into a safe state for some of the manipulations involved in
		393	* tracing and the like. Finally, this implementation does not support
		394	* high call_rcu_tasks() rates from multiple CPUs. If this is required,
		395	* per-CPU callback lists will be needed.
		396	*/
		397
		398	/* Global list of callbacks and associated lock. */
		399	static struct rcu_head *rcu_tasks_cbs_head;
		400	static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
		401	static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
		402	static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
		403
		404	/* Track exiting tasks in order to allow them to be waited for. */
		405	DEFINE_SRCU(tasks_rcu_exit_srcu);
		406
		407	/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
		408	static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
		409	module_param(rcu_task_stall_timeout, int, 0644);
		410
		411	static void rcu_spawn_tasks_kthread(void);
		412
		413	/*
		414	* Post an RCU-tasks callback. First call must be from process context
		415	* after the scheduler if fully operational.
		416	*/
		417	void call_rcu_tasks(struct rcu_head rhp, void (func)(struct rcu_head *rhp))
		418	{
		419	unsigned long flags;
		420	bool needwake;
		421
		422	rhp->next = NULL;
		423	rhp->func = func;
		424	raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
		425	needwake = !rcu_tasks_cbs_head;
		426	*rcu_tasks_cbs_tail = rhp;
		427	rcu_tasks_cbs_tail = &rhp->next;
		428	raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
		429	if (needwake) {
		430	rcu_spawn_tasks_kthread();
		431	wake_up(&rcu_tasks_cbs_wq);
		432	}
		433	}
		434	EXPORT_SYMBOL_GPL(call_rcu_tasks);
		435
		436	/**
		437	* synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
		438	*
		439	* Control will return to the caller some time after a full rcu-tasks
		440	* grace period has elapsed, in other words after all currently
		441	* executing rcu-tasks read-side critical sections have elapsed. These
		442	* read-side critical sections are delimited by calls to schedule(),
		443	* cond_resched_rcu_qs(), idle execution, userspace execution, calls
		444	* to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
		445	*
		446	* This is a very specialized primitive, intended only for a few uses in
		447	* tracing and other situations requiring manipulation of function
		448	* preambles and profiling hooks. The synchronize_rcu_tasks() function
		449	* is not (yet) intended for heavy use from multiple CPUs.
		450	*
		451	* Note that this guarantee implies further memory-ordering guarantees.
		452	* On systems with more than one CPU, when synchronize_rcu_tasks() returns,
		453	* each CPU is guaranteed to have executed a full memory barrier since the
		454	* end of its last RCU-tasks read-side critical section whose beginning
		455	* preceded the call to synchronize_rcu_tasks(). In addition, each CPU
		456	* having an RCU-tasks read-side critical section that extends beyond
		457	* the return from synchronize_rcu_tasks() is guaranteed to have executed
		458	* a full memory barrier after the beginning of synchronize_rcu_tasks()
		459	* and before the beginning of that RCU-tasks read-side critical section.
		460	* Note that these guarantees include CPUs that are offline, idle, or
		461	* executing in user mode, as well as CPUs that are executing in the kernel.
		462	*
		463	* Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
		464	* to its caller on CPU B, then both CPU A and CPU B are guaranteed
		465	* to have executed a full memory barrier during the execution of
		466	* synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
		467	* (but again only if the system has more than one CPU).
		468	*/
		469	void synchronize_rcu_tasks(void)
		470	{
		471	/* Complain if the scheduler has not started. */
		472	rcu_lockdep_assert(!rcu_scheduler_active,
		473	"synchronize_rcu_tasks called too soon");
		474
		475	/* Wait for the grace period. */
		476	wait_rcu_gp(call_rcu_tasks);
		477	}
		478	EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
		479
		480	/**
		481	* rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
		482	*
		483	* Although the current implementation is guaranteed to wait, it is not
		484	* obligated to, for example, if there are no pending callbacks.
		485	*/
		486	void rcu_barrier_tasks(void)
		487	{
		488	/* There is only one callback queue, so this is easy. ;-) */
		489	synchronize_rcu_tasks();
		490	}
		491	EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
		492
		493	/* See if tasks are still holding out, complain if so. */
		494	static void check_holdout_task(struct task_struct *t,
		495	bool needreport, bool *firstreport)
		496	{
		497	int cpu;
		498
		499	if (!ACCESS_ONCE(t->rcu_tasks_holdout) \|\|
		500	t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) \|\|
		501	!ACCESS_ONCE(t->on_rq) \|\|
		502	(IS_ENABLED(CONFIG_NO_HZ_FULL) &&
		503	!is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
		504	ACCESS_ONCE(t->rcu_tasks_holdout) = false;
		505	list_del_init(&t->rcu_tasks_holdout_list);
		506	put_task_struct(t);
		507	return;
		508	}
		509	if (!needreport)
		510	return;
		511	if (*firstreport) {
		512	pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
		513	*firstreport = false;
		514	}
		515	cpu = task_cpu(t);
		516	pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
		517	t, ".I"[is_idle_task(t)],
		518	"N."[cpu < 0 \|\| !tick_nohz_full_cpu(cpu)],
		519	t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
		520	t->rcu_tasks_idle_cpu, cpu);
		521	sched_show_task(t);
		522	}
		523
		524	/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
		525	static int __noreturn rcu_tasks_kthread(void *arg)
		526	{
		527	unsigned long flags;
		528	struct task_struct g, t;
		529	unsigned long lastreport;
		530	struct rcu_head *list;
		531	struct rcu_head *next;
		532	LIST_HEAD(rcu_tasks_holdouts);
		533
		534	/* FIXME: Add housekeeping affinity. */
		535
		536	/*
		537	* Each pass through the following loop makes one check for
		538	* newly arrived callbacks, and, if there are some, waits for
		539	* one RCU-tasks grace period and then invokes the callbacks.
		540	* This loop is terminated by the system going down. ;-)
		541	*/
		542	for (;;) {
		543
		544	/* Pick up any new callbacks. */
		545	raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
		546	list = rcu_tasks_cbs_head;
		547	rcu_tasks_cbs_head = NULL;
		548	rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
		549	raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
		550
		551	/* If there were none, wait a bit and start over. */
		552	if (!list) {
		553	wait_event_interruptible(rcu_tasks_cbs_wq,
		554	rcu_tasks_cbs_head);
		555	if (!rcu_tasks_cbs_head) {
		556	WARN_ON(signal_pending(current));
		557	schedule_timeout_interruptible(HZ/10);
		558	}
		559	continue;
		560	}
		561
		562	/*
		563	* Wait for all pre-existing t->on_rq and t->nvcsw
		564	* transitions to complete. Invoking synchronize_sched()
		565	* suffices because all these transitions occur with
		566	* interrupts disabled. Without this synchronize_sched(),
		567	* a read-side critical section that started before the
		568	* grace period might be incorrectly seen as having started
		569	* after the grace period.
		570	*
		571	* This synchronize_sched() also dispenses with the
		572	* need for a memory barrier on the first store to
		573	* ->rcu_tasks_holdout, as it forces the store to happen
		574	* after the beginning of the grace period.
		575	*/
		576	synchronize_sched();
		577
		578	/*
		579	* There were callbacks, so we need to wait for an
		580	* RCU-tasks grace period. Start off by scanning
		581	* the task list for tasks that are not already
		582	* voluntarily blocked. Mark these tasks and make
		583	* a list of them in rcu_tasks_holdouts.
		584	*/
		585	rcu_read_lock();
		586	for_each_process_thread(g, t) {
		587	if (t != current && ACCESS_ONCE(t->on_rq) &&
		588	!is_idle_task(t)) {
		589	get_task_struct(t);
		590	t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
		591	ACCESS_ONCE(t->rcu_tasks_holdout) = true;
		592	list_add(&t->rcu_tasks_holdout_list,
		593	&rcu_tasks_holdouts);
		594	}
		595	}
		596	rcu_read_unlock();
		597
		598	/*
		599	* Wait for tasks that are in the process of exiting.
		600	* This does only part of the job, ensuring that all
		601	* tasks that were previously exiting reach the point
		602	* where they have disabled preemption, allowing the
		603	* later synchronize_sched() to finish the job.
		604	*/
		605	synchronize_srcu(&tasks_rcu_exit_srcu);
		606
		607	/*
		608	* Each pass through the following loop scans the list
		609	* of holdout tasks, removing any that are no longer
		610	* holdouts. When the list is empty, we are done.
		611	*/
		612	lastreport = jiffies;
		613	while (!list_empty(&rcu_tasks_holdouts)) {
		614	bool firstreport;
		615	bool needreport;
		616	int rtst;
		617	struct task_struct *t1;
		618
		619	schedule_timeout_interruptible(HZ);
		620	rtst = ACCESS_ONCE(rcu_task_stall_timeout);
		621	needreport = rtst > 0 &&
		622	time_after(jiffies, lastreport + rtst);
		623	if (needreport)
		624	lastreport = jiffies;
		625	firstreport = true;
		626	WARN_ON(signal_pending(current));
		627	list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
		628	rcu_tasks_holdout_list) {
		629	check_holdout_task(t, needreport, &firstreport);
		630	cond_resched();
		631	}
		632	}
		633
		634	/*
		635	* Because ->on_rq and ->nvcsw are not guaranteed
		636	* to have a full memory barriers prior to them in the
		637	* schedule() path, memory reordering on other CPUs could
		638	* cause their RCU-tasks read-side critical sections to
		639	* extend past the end of the grace period. However,
		640	* because these ->nvcsw updates are carried out with
		641	* interrupts disabled, we can use synchronize_sched()
		642	* to force the needed ordering on all such CPUs.
		643	*
		644	* This synchronize_sched() also confines all
		645	* ->rcu_tasks_holdout accesses to be within the grace
		646	* period, avoiding the need for memory barriers for
		647	* ->rcu_tasks_holdout accesses.
		648	*
		649	* In addition, this synchronize_sched() waits for exiting
		650	* tasks to complete their final preempt_disable() region
		651	* of execution, cleaning up after the synchronize_srcu()
		652	* above.
		653	*/
		654	synchronize_sched();
		655
		656	/* Invoke the callbacks. */
		657	while (list) {
		658	next = list->next;
		659	local_bh_disable();
		660	list->func(list);
		661	local_bh_enable();
		662	list = next;
		663	cond_resched();
		664	}
		665	schedule_timeout_uninterruptible(HZ/10);
		666	}
		667	}
		668
		669	/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
		670	static void rcu_spawn_tasks_kthread(void)
		671	{
		672	static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
		673	static struct task_struct *rcu_tasks_kthread_ptr;
		674	struct task_struct *t;
		675
		676	if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
		677	smp_mb(); /* Ensure caller sees full kthread. */
		678	return;
		679	}
		680	mutex_lock(&rcu_tasks_kthread_mutex);
		681	if (rcu_tasks_kthread_ptr) {
		682	mutex_unlock(&rcu_tasks_kthread_mutex);
		683	return;
		684	}
		685	t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
		686	BUG_ON(IS_ERR(t));
		687	smp_mb(); /* Ensure others see full kthread. */
		688	ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
		689	mutex_unlock(&rcu_tasks_kthread_mutex);
		690	}
		691
		692	#endif /* #ifdef CONFIG_TASKS_RCU */