13 files changed, 1397 insertions, 379 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 81e2a388a0f6..356450f09c1f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -79,6 +79,8 @@ static struct {
 /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
 #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
+#define cpuhp_lock_acquire_tryread() \
+                                  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
 #define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
 #define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
@@ -91,10 +93,22 @@ void get_online_cpus(void)
        mutex_lock(&cpu_hotplug.lock);
        cpu_hotplug.refcount++;
        mutex_unlock(&cpu_hotplug.lock);
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
+bool try_get_online_cpus(void)
+{
+        if (cpu_hotplug.active_writer == current)
+                return true;
+        if (!mutex_trylock(&cpu_hotplug.lock))
+                return false;
+        cpuhp_lock_acquire_tryread();
+        cpu_hotplug.refcount++;
+        mutex_unlock(&cpu_hotplug.lock);
+        return true;
+}
+EXPORT_SYMBOL_GPL(try_get_online_cpus);
 void put_online_cpus(void)
 {
        if (cpu_hotplug.active_writer == current)
diff --git a/kernel/exit.c b/kernel/exit.c
index 32c58f7433a3..d13f2eec4bb8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -667,6 +667,7 @@ void do_exit(long code)
 {
        struct task_struct *tsk = current;
        int group_dead;
+        TASKS_RCU(int tasks_rcu_i);
        profile_task_exit(tsk);
@@ -775,6 +776,7 @@ void do_exit(long code)
         */
        flush_ptrace_hw_breakpoint(tsk);
+        TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
        exit_notify(tsk, group_dead);
        proc_exit_connector(tsk);
 #ifdef CONFIG_NUMA
@@ -814,6 +816,7 @@ void do_exit(long code)
        if (tsk->nr_dirtied)
                __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
        exit_rcu();
+        TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
        /*
         * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 0955b885d0dc..ec8cce259779 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -20,30 +20,20 @@
 * Author: Paul E. McKenney <paulmck@us.ibm.com>
 *      Based on kernel/rcu/torture.c.
 */
-#include <linux/types.h>
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
-#include <linux/err.h>
 #include <linux/spinlock.h>
+#include <linux/rwlock.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/atomic.h>
-#include <linux/bitops.h>
-#include <linux/completion.h>
 #include <linux/moduleparam.h>
-#include <linux/percpu.h>
-#include <linux/notifier.h>
-#include <linux/reboot.h>
-#include <linux/freezer.h>
-#include <linux/cpu.h>
 #include <linux/delay.h>
-#include <linux/stat.h>
 #include <linux/slab.h>
-#include <linux/trace_clock.h>
-#include <asm/byteorder.h>
 #include <linux/torture.h>
 MODULE_LICENSE("GPL");
@@ -51,6 +41,8 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
 torture_param(int, nwriters_stress, -1,
             "Number of write-locking stress-test threads");
+torture_param(int, nreaders_stress, -1,
+             "Number of read-locking stress-test threads");
 torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
 torture_param(int, onoff_interval, 0,
             "Time between CPU hotplugs (s), 0=disable");
@@ -66,30 +58,28 @@ torture_param(bool, verbose, true,
 static char *torture_type = "spin_lock";
 module_param(torture_type, charp, 0444);
 MODULE_PARM_DESC(torture_type,
-                 "Type of lock to torture (spin_lock, spin_lock_irq, ...)");
+                 "Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)");
-static atomic_t n_lock_torture_errors;
 static struct task_struct *stats_task;
 static struct task_struct **writer_tasks;
+static struct task_struct **reader_tasks;
-static int nrealwriters_stress;
 static bool lock_is_write_held;
+static bool lock_is_read_held;
-struct lock_writer_stress_stats {
+struct lock_stress_stats {
-        long n_write_lock_fail;
+        long n_lock_fail;
-        long n_write_lock_acquired;
+        long n_lock_acquired;
 };
-static struct lock_writer_stress_stats *lwsa;
 #if defined(MODULE)
 #define LOCKTORTURE_RUNNABLE_INIT 1
 #else
 #define LOCKTORTURE_RUNNABLE_INIT 0
 #endif
-int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT;
+int torture_runnable = LOCKTORTURE_RUNNABLE_INIT;
-module_param(locktorture_runnable, int, 0444);
+module_param(torture_runnable, int, 0444);
-MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at module init");
+MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
 /* Forward reference. */
 static void lock_torture_cleanup(void);
@@ -102,12 +92,25 @@ struct lock_torture_ops {
        int (*writelock)(void);
        void (*write_delay)(struct torture_random_state *trsp);
        void (*writeunlock)(void);
+        int (*readlock)(void);
+        void (*read_delay)(struct torture_random_state *trsp);
+        void (*readunlock)(void);
        unsigned long flags;
        const char *name;
 };
-static struct lock_torture_ops *cur_ops;
+struct lock_torture_cxt {
+        int nrealwriters_stress;
+        int nrealreaders_stress;
+        bool debug_lock;
+        atomic_t n_lock_torture_errors;
+        struct lock_torture_ops *cur_ops;
+        struct lock_stress_stats *lwsa; /* writer statistics */
+        struct lock_stress_stats *lrsa; /* reader statistics */
+};
+static struct lock_torture_cxt cxt = { 0, 0, false,
+                                       ATOMIC_INIT(0),
+                                       NULL, NULL};
 /*
 * Definitions for lock torture testing.
 */
@@ -123,10 +126,10 @@ static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
        /* We want a long delay occasionally to force massive contention.  */
        if (!(torture_random(trsp) %
-              (nrealwriters_stress * 2000 * longdelay_us)))
+              (cxt.nrealwriters_stress * 2000 * longdelay_us)))
                mdelay(longdelay_us);
 #ifdef CONFIG_PREEMPT
-        if (!(torture_random(trsp) % (nrealwriters_stress * 20000)))
+        if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
                preempt_schedule();  /* Allow test to be preempted. */
 #endif
 }
@@ -140,6 +143,9 @@ static struct lock_torture_ops lock_busted_ops = {
        .writelock      = torture_lock_busted_write_lock,
        .write_delay    = torture_lock_busted_write_delay,
        .writeunlock    = torture_lock_busted_write_unlock,
+        .readlock       = NULL,
+        .read_delay     = NULL,
+        .readunlock     = NULL,
        .name           = "lock_busted"
 };
@@ -160,13 +166,13 @@ static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
         * we want a long delay occasionally to force massive contention.
         */
        if (!(torture_random(trsp) %
-              (nrealwriters_stress * 2000 * longdelay_us)))
+              (cxt.nrealwriters_stress * 2000 * longdelay_us)))
                mdelay(longdelay_us);
        if (!(torture_random(trsp) %
-              (nrealwriters_stress * 2 * shortdelay_us)))
+              (cxt.nrealwriters_stress * 2 * shortdelay_us)))
                udelay(shortdelay_us);
 #ifdef CONFIG_PREEMPT
-        if (!(torture_random(trsp) % (nrealwriters_stress * 20000)))
+        if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
                preempt_schedule();  /* Allow test to be preempted. */
 #endif
 }
@@ -180,39 +186,253 @@ static struct lock_torture_ops spin_lock_ops = {
        .writelock      = torture_spin_lock_write_lock,
        .write_delay    = torture_spin_lock_write_delay,
        .writeunlock    = torture_spin_lock_write_unlock,
+        .readlock       = NULL,
+        .read_delay     = NULL,
+        .readunlock     = NULL,
        .name           = "spin_lock"
 };
 static int torture_spin_lock_write_lock_irq(void)
-__acquires(torture_spinlock_irq)
+__acquires(torture_spinlock)
 {
        unsigned long flags;
        spin_lock_irqsave(&torture_spinlock, flags);
-        cur_ops->flags = flags;
+        cxt.cur_ops->flags = flags;
        return 0;
 }
 static void torture_lock_spin_write_unlock_irq(void)
 __releases(torture_spinlock)
 {
-        spin_unlock_irqrestore(&torture_spinlock, cur_ops->flags);
+        spin_unlock_irqrestore(&torture_spinlock, cxt.cur_ops->flags);
 }
 static struct lock_torture_ops spin_lock_irq_ops = {
        .writelock      = torture_spin_lock_write_lock_irq,
        .write_delay    = torture_spin_lock_write_delay,
        .writeunlock    = torture_lock_spin_write_unlock_irq,
+        .readlock       = NULL,
+        .read_delay     = NULL,
+        .readunlock     = NULL,
        .name           = "spin_lock_irq"
 };
+static DEFINE_RWLOCK(torture_rwlock);
+static int torture_rwlock_write_lock(void) __acquires(torture_rwlock)
+{
+        write_lock(&torture_rwlock);
+        return 0;
+}
+static void torture_rwlock_write_delay(struct torture_random_state *trsp)
+{
+        const unsigned long shortdelay_us = 2;
+        const unsigned long longdelay_ms = 100;
+        /* We want a short delay mostly to emulate likely code, and
+         * we want a long delay occasionally to force massive contention.
+         */
+        if (!(torture_random(trsp) %
+              (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+                mdelay(longdelay_ms);
+        else
+                udelay(shortdelay_us);
+}
+static void torture_rwlock_write_unlock(void) __releases(torture_rwlock)
+{
+        write_unlock(&torture_rwlock);
+}
+static int torture_rwlock_read_lock(void) __acquires(torture_rwlock)
+{
+        read_lock(&torture_rwlock);
+        return 0;
+}
+static void torture_rwlock_read_delay(struct torture_random_state *trsp)
+{
+        const unsigned long shortdelay_us = 10;
+        const unsigned long longdelay_ms = 100;
+        /* We want a short delay mostly to emulate likely code, and
+         * we want a long delay occasionally to force massive contention.
+         */
+        if (!(torture_random(trsp) %
+              (cxt.nrealreaders_stress * 2000 * longdelay_ms)))
+                mdelay(longdelay_ms);
+        else
+                udelay(shortdelay_us);
+}
+static void torture_rwlock_read_unlock(void) __releases(torture_rwlock)
+{
+        read_unlock(&torture_rwlock);
+}
+static struct lock_torture_ops rw_lock_ops = {
+        .writelock      = torture_rwlock_write_lock,
+        .write_delay    = torture_rwlock_write_delay,
+        .writeunlock    = torture_rwlock_write_unlock,
+        .readlock       = torture_rwlock_read_lock,
+        .read_delay     = torture_rwlock_read_delay,
+        .readunlock     = torture_rwlock_read_unlock,
+        .name           = "rw_lock"
+};
+static int torture_rwlock_write_lock_irq(void) __acquires(torture_rwlock)
+{
+        unsigned long flags;
+        write_lock_irqsave(&torture_rwlock, flags);
+        cxt.cur_ops->flags = flags;
+        return 0;
+}
+static void torture_rwlock_write_unlock_irq(void)
+__releases(torture_rwlock)
+{
+        write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
+}
+static int torture_rwlock_read_lock_irq(void) __acquires(torture_rwlock)
+{
+        unsigned long flags;
+        read_lock_irqsave(&torture_rwlock, flags);
+        cxt.cur_ops->flags = flags;
+        return 0;
+}
+static void torture_rwlock_read_unlock_irq(void)
+__releases(torture_rwlock)
+{
+        write_unlock_irqrestore(&torture_rwlock, cxt.cur_ops->flags);
+}
+static struct lock_torture_ops rw_lock_irq_ops = {
+        .writelock      = torture_rwlock_write_lock_irq,
+        .write_delay    = torture_rwlock_write_delay,
+        .writeunlock    = torture_rwlock_write_unlock_irq,
+        .readlock       = torture_rwlock_read_lock_irq,
+        .read_delay     = torture_rwlock_read_delay,
+        .readunlock     = torture_rwlock_read_unlock_irq,
+        .name           = "rw_lock_irq"
+};
+static DEFINE_MUTEX(torture_mutex);
+static int torture_mutex_lock(void) __acquires(torture_mutex)
+{
+        mutex_lock(&torture_mutex);
+        return 0;
+}
+static void torture_mutex_delay(struct torture_random_state *trsp)
+{
+        const unsigned long longdelay_ms = 100;
+        /* We want a long delay occasionally to force massive contention.  */
+        if (!(torture_random(trsp) %
+              (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+                mdelay(longdelay_ms * 5);
+        else
+                mdelay(longdelay_ms / 5);
+#ifdef CONFIG_PREEMPT
+        if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
+                preempt_schedule();  /* Allow test to be preempted. */
+#endif
+}
+static void torture_mutex_unlock(void) __releases(torture_mutex)
+{
+        mutex_unlock(&torture_mutex);
+}
+static struct lock_torture_ops mutex_lock_ops = {
+        .writelock      = torture_mutex_lock,
+        .write_delay    = torture_mutex_delay,
+        .writeunlock    = torture_mutex_unlock,
+        .readlock       = NULL,
+        .read_delay     = NULL,
+        .readunlock     = NULL,
+        .name           = "mutex_lock"
+};
+static DECLARE_RWSEM(torture_rwsem);
+static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
+{
+        down_write(&torture_rwsem);
+        return 0;
+}
+static void torture_rwsem_write_delay(struct torture_random_state *trsp)
+{
+        const unsigned long longdelay_ms = 100;
+        /* We want a long delay occasionally to force massive contention.  */
+        if (!(torture_random(trsp) %
+              (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+                mdelay(longdelay_ms * 10);
+        else
+                mdelay(longdelay_ms / 10);
+#ifdef CONFIG_PREEMPT
+        if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
+                preempt_schedule();  /* Allow test to be preempted. */
+#endif
+}
+static void torture_rwsem_up_write(void) __releases(torture_rwsem)
+{
+        up_write(&torture_rwsem);
+}
+static int torture_rwsem_down_read(void) __acquires(torture_rwsem)
+{
+        down_read(&torture_rwsem);
+        return 0;
+}
+static void torture_rwsem_read_delay(struct torture_random_state *trsp)
+{
+        const unsigned long longdelay_ms = 100;
+        /* We want a long delay occasionally to force massive contention.  */
+        if (!(torture_random(trsp) %
+              (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
+                mdelay(longdelay_ms * 2);
+        else
+                mdelay(longdelay_ms / 2);
+#ifdef CONFIG_PREEMPT
+        if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000)))
+                preempt_schedule();  /* Allow test to be preempted. */
+#endif
+}
+static void torture_rwsem_up_read(void) __releases(torture_rwsem)
+{
+        up_read(&torture_rwsem);
+}
+static struct lock_torture_ops rwsem_lock_ops = {
+        .writelock      = torture_rwsem_down_write,
+        .write_delay    = torture_rwsem_write_delay,
+        .writeunlock    = torture_rwsem_up_write,
+        .readlock       = torture_rwsem_down_read,
+        .read_delay     = torture_rwsem_read_delay,
+        .readunlock     = torture_rwsem_up_read,
+        .name           = "rwsem_lock"
+};
 /*
 * Lock torture writer kthread.  Repeatedly acquires and releases
 * the lock, checking for duplicate acquisitions.
 */
 static int lock_torture_writer(void *arg)
 {
-        struct lock_writer_stress_stats *lwsp = arg;
+        struct lock_stress_stats *lwsp = arg;
        static DEFINE_TORTURE_RANDOM(rand);
        VERBOSE_TOROUT_STRING("lock_torture_writer task started");
@@ -221,14 +441,19 @@ static int lock_torture_writer(void *arg)
        do {
                if ((torture_random(&rand) & 0xfffff) == 0)
                        schedule_timeout_uninterruptible(1);
-                cur_ops->writelock();
+                cxt.cur_ops->writelock();
                if (WARN_ON_ONCE(lock_is_write_held))
-                        lwsp->n_write_lock_fail++;
+                        lwsp->n_lock_fail++;
                lock_is_write_held = 1;
-                lwsp->n_write_lock_acquired++;
+                if (WARN_ON_ONCE(lock_is_read_held))
-                cur_ops->write_delay(&rand);
+                        lwsp->n_lock_fail++; /* rare, but... */
+                lwsp->n_lock_acquired++;
+                cxt.cur_ops->write_delay(&rand);
                lock_is_write_held = 0;
-                cur_ops->writeunlock();
+                cxt.cur_ops->writeunlock();
                stutter_wait("lock_torture_writer");
        } while (!torture_must_stop());
        torture_kthread_stopping("lock_torture_writer");
@@ -236,32 +461,66 @@ static int lock_torture_writer(void *arg)
 }
 /*
+ * Lock torture reader kthread.  Repeatedly acquires and releases
+ * the reader lock.
+ */
+static int lock_torture_reader(void *arg)
+{
+        struct lock_stress_stats *lrsp = arg;
+        static DEFINE_TORTURE_RANDOM(rand);
+        VERBOSE_TOROUT_STRING("lock_torture_reader task started");
+        set_user_nice(current, MAX_NICE);
+        do {
+                if ((torture_random(&rand) & 0xfffff) == 0)
+                        schedule_timeout_uninterruptible(1);
+                cxt.cur_ops->readlock();
+                lock_is_read_held = 1;
+                if (WARN_ON_ONCE(lock_is_write_held))
+                        lrsp->n_lock_fail++; /* rare, but... */
+                lrsp->n_lock_acquired++;
+                cxt.cur_ops->read_delay(&rand);
+                lock_is_read_held = 0;
+                cxt.cur_ops->readunlock();
+                stutter_wait("lock_torture_reader");
+        } while (!torture_must_stop());
+        torture_kthread_stopping("lock_torture_reader");
+        return 0;
+}
+/*
 * Create an lock-torture-statistics message in the specified buffer.
 */
-static void lock_torture_printk(char *page)
+static void __torture_print_stats(char *page,
+                                  struct lock_stress_stats *statp, bool write)
 {
        bool fail = 0;
-        int i;
+        int i, n_stress;
        long max = 0;
-        long min = lwsa[0].n_write_lock_acquired;
+        long min = statp[0].n_lock_acquired;
        long long sum = 0;
-        for (i = 0; i < nrealwriters_stress; i++) {
+        n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress;
-                if (lwsa[i].n_write_lock_fail)
+        for (i = 0; i < n_stress; i++) {
+                if (statp[i].n_lock_fail)
                        fail = true;
-                sum += lwsa[i].n_write_lock_acquired;
+                sum += statp[i].n_lock_acquired;
-                if (max < lwsa[i].n_write_lock_fail)
+                if (max < statp[i].n_lock_fail)
-                        max = lwsa[i].n_write_lock_fail;
+                        max = statp[i].n_lock_fail;
-                if (min > lwsa[i].n_write_lock_fail)
+                if (min > statp[i].n_lock_fail)
-                        min = lwsa[i].n_write_lock_fail;
+                        min = statp[i].n_lock_fail;
        }
-        page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG);
        page += sprintf(page,
-                        "Writes:  Total: %lld  Max/Min: %ld/%ld %s  Fail: %d %s\n",
+                        "%s:  Total: %lld  Max/Min: %ld/%ld %s  Fail: %d %s\n",
+                        write ? "Writes" : "Reads ",
                        sum, max, min, max / 2 > min ? "???" : "",
                        fail, fail ? "!!!" : "");
        if (fail)
-                atomic_inc(&n_lock_torture_errors);
+                atomic_inc(&cxt.n_lock_torture_errors);
 }
 /*
@@ -274,18 +533,35 @@ static void lock_torture_printk(char *page)
 */
 static void lock_torture_stats_print(void)
 {
-        int size = nrealwriters_stress * 200 + 8192;
+        int size = cxt.nrealwriters_stress * 200 + 8192;
        char *buf;
+        if (cxt.cur_ops->readlock)
+                size += cxt.nrealreaders_stress * 200 + 8192;
        buf = kmalloc(size, GFP_KERNEL);
        if (!buf) {
                pr_err("lock_torture_stats_print: Out of memory, need: %d",
                       size);
                return;
        }
-        lock_torture_printk(buf);
+        __torture_print_stats(buf, cxt.lwsa, true);
        pr_alert("%s", buf);
        kfree(buf);
+        if (cxt.cur_ops->readlock) {
+                buf = kmalloc(size, GFP_KERNEL);
+                if (!buf) {
+                        pr_err("lock_torture_stats_print: Out of memory, need: %d",
+                               size);
+                        return;
+                }
+                __torture_print_stats(buf, cxt.lrsa, false);
+                pr_alert("%s", buf);
+                kfree(buf);
+        }
 }
 /*
@@ -312,9 +588,10 @@ lock_torture_print_module_parms(struct lock_torture_ops *cur_ops,
                                const char *tag)
 {
        pr_alert("%s" TORTURE_FLAG
-                 "--- %s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n",
+                 "--- %s%s: nwriters_stress=%d nreaders_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n",
-                 torture_type, tag, nrealwriters_stress, stat_interval, verbose,
+                 torture_type, tag, cxt.debug_lock ? " [debug]": "",
-                 shuffle_interval, stutter, shutdown_secs,
+                 cxt.nrealwriters_stress, cxt.nrealreaders_stress, stat_interval,
+                 verbose, shuffle_interval, stutter, shutdown_secs,
                 onoff_interval, onoff_holdoff);
 }
@@ -322,46 +599,59 @@ static void lock_torture_cleanup(void)
 {
        int i;
-        if (torture_cleanup())
+        if (torture_cleanup_begin())
                return;
        if (writer_tasks) {
-                for (i = 0; i < nrealwriters_stress; i++)
+                for (i = 0; i < cxt.nrealwriters_stress; i++)
                        torture_stop_kthread(lock_torture_writer,
                                             writer_tasks[i]);
                kfree(writer_tasks);
                writer_tasks = NULL;
        }
+        if (reader_tasks) {
+                for (i = 0; i < cxt.nrealreaders_stress; i++)
+                        torture_stop_kthread(lock_torture_reader,
+                                             reader_tasks[i]);
+                kfree(reader_tasks);
+                reader_tasks = NULL;
+        }
        torture_stop_kthread(lock_torture_stats, stats_task);
        lock_torture_stats_print();  /* -After- the stats thread is stopped! */
-        if (atomic_read(&n_lock_torture_errors))
+        if (atomic_read(&cxt.n_lock_torture_errors))
-                lock_torture_print_module_parms(cur_ops,
+                lock_torture_print_module_parms(cxt.cur_ops,
                                                "End of test: FAILURE");
        else if (torture_onoff_failures())
-                lock_torture_print_module_parms(cur_ops,
+                lock_torture_print_module_parms(cxt.cur_ops,
                                                "End of test: LOCK_HOTPLUG");
        else
-                lock_torture_print_module_parms(cur_ops,
+                lock_torture_print_module_parms(cxt.cur_ops,
                                                "End of test: SUCCESS");
+        torture_cleanup_end();
 }
 static int __init lock_torture_init(void)
 {
-        int i;
+        int i, j;
        int firsterr = 0;
        static struct lock_torture_ops *torture_ops[] = {
-                &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops,
+                &lock_busted_ops,
+                &spin_lock_ops, &spin_lock_irq_ops,
+                &rw_lock_ops, &rw_lock_irq_ops,
+                &mutex_lock_ops,
+                &rwsem_lock_ops,
        };
-        if (!torture_init_begin(torture_type, verbose, &locktorture_runnable))
+        if (!torture_init_begin(torture_type, verbose, &torture_runnable))
                return -EBUSY;
        /* Process args and tell the world that the torturer is on the job. */
        for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
-                cur_ops = torture_ops[i];
+                cxt.cur_ops = torture_ops[i];
-                if (strcmp(torture_type, cur_ops->name) == 0)
+                if (strcmp(torture_type, cxt.cur_ops->name) == 0)
                        break;
        }
        if (i == ARRAY_SIZE(torture_ops)) {
@@ -374,31 +664,69 @@ static int __init lock_torture_init(void)
                torture_init_end();
                return -EINVAL;
        }
-        if (cur_ops->init)
+        if (cxt.cur_ops->init)
-                cur_ops->init(); /* no "goto unwind" prior to this point!!! */
+                cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
        if (nwriters_stress >= 0)
-                nrealwriters_stress = nwriters_stress;
+                cxt.nrealwriters_stress = nwriters_stress;
        else
-                nrealwriters_stress = 2 * num_online_cpus();
+                cxt.nrealwriters_stress = 2 * num_online_cpus();
-        lock_torture_print_module_parms(cur_ops, "Start of test");
+#ifdef CONFIG_DEBUG_MUTEXES
+        if (strncmp(torture_type, "mutex", 5) == 0)
+                cxt.debug_lock = true;
+#endif
+#ifdef CONFIG_DEBUG_SPINLOCK
+        if ((strncmp(torture_type, "spin", 4) == 0) ||
+            (strncmp(torture_type, "rw_lock", 7) == 0))
+                cxt.debug_lock = true;
+#endif
        /* Initialize the statistics so that each run gets its own numbers. */
        lock_is_write_held = 0;
-        lwsa = kmalloc(sizeof(*lwsa) * nrealwriters_stress, GFP_KERNEL);
+        cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL);
-        if (lwsa == NULL) {
+        if (cxt.lwsa == NULL) {
-                VERBOSE_TOROUT_STRING("lwsa: Out of memory");
+                VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory");
                firsterr = -ENOMEM;
                goto unwind;
        }
-        for (i = 0; i < nrealwriters_stress; i++) {
+        for (i = 0; i < cxt.nrealwriters_stress; i++) {
-                lwsa[i].n_write_lock_fail = 0;
+                cxt.lwsa[i].n_lock_fail = 0;
-                lwsa[i].n_write_lock_acquired = 0;
+                cxt.lwsa[i].n_lock_acquired = 0;
        }
-        /* Start up the kthreads. */
+        if (cxt.cur_ops->readlock) {
+                if (nreaders_stress >= 0)
+                        cxt.nrealreaders_stress = nreaders_stress;
+                else {
+                        /*
+                         * By default distribute evenly the number of
+                         * readers and writers. We still run the same number
+                         * of threads as the writer-only locks default.
+                         */
+                        if (nwriters_stress < 0) /* user doesn't care */
+                                cxt.nrealwriters_stress = num_online_cpus();
+                        cxt.nrealreaders_stress = cxt.nrealwriters_stress;
+                }
+                lock_is_read_held = 0;
+                cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL);
+                if (cxt.lrsa == NULL) {
+                        VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
+                        firsterr = -ENOMEM;
+                        kfree(cxt.lwsa);
+                        goto unwind;
+                }
+                for (i = 0; i < cxt.nrealreaders_stress; i++) {
+                        cxt.lrsa[i].n_lock_fail = 0;
+                        cxt.lrsa[i].n_lock_acquired = 0;
+                }
+        }
+        lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
+        /* Prepare torture context. */
        if (onoff_interval > 0) {
                firsterr = torture_onoff_init(onoff_holdoff * HZ,
                                              onoff_interval * HZ);
@@ -422,18 +750,51 @@ static int __init lock_torture_init(void)
                        goto unwind;
        }
-        writer_tasks = kzalloc(nrealwriters_stress * sizeof(writer_tasks[0]),
+        writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]),
                               GFP_KERNEL);
        if (writer_tasks == NULL) {
                VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
                firsterr = -ENOMEM;
                goto unwind;
        }
-        for (i = 0; i < nrealwriters_stress; i++) {
-                firsterr = torture_create_kthread(lock_torture_writer, &lwsa[i],
+        if (cxt.cur_ops->readlock) {
+                reader_tasks = kzalloc(cxt.nrealreaders_stress * sizeof(reader_tasks[0]),
+                                       GFP_KERNEL);
+                if (reader_tasks == NULL) {
+                        VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
+                        firsterr = -ENOMEM;
+                        goto unwind;
+                }
+        }
+        /*
+         * Create the kthreads and start torturing (oh, those poor little locks).
+         *
+         * TODO: Note that we interleave writers with readers, giving writers a
+         * slight advantage, by creating its kthread first. This can be modified
+         * for very specific needs, or even let the user choose the policy, if
+         * ever wanted.
+         */
+        for (i = 0, j = 0; i < cxt.nrealwriters_stress ||
+                    j < cxt.nrealreaders_stress; i++, j++) {
+                if (i >= cxt.nrealwriters_stress)
+                        goto create_reader;
+                /* Create writer. */
+                firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i],
                                                  writer_tasks[i]);
                if (firsterr)
                        goto unwind;
+        create_reader:
+                if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress))
+                        continue;
+                /* Create reader. */
+                firsterr = torture_create_kthread(lock_torture_reader, &cxt.lrsa[j],
+                                                  reader_tasks[j]);
+                if (firsterr)
+                        goto unwind;
        }
        if (stat_interval > 0) {
                firsterr = torture_create_kthread(lock_torture_stats, NULL,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 948a7693748e..240fa9094f83 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -49,11 +49,19 @@
 #include <linux/trace_clock.h>
 #include <asm/byteorder.h>
 #include <linux/torture.h>
+#include <linux/vmalloc.h>
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
+torture_param(int, cbflood_inter_holdoff, HZ,
+              "Holdoff between floods (jiffies)");
+torture_param(int, cbflood_intra_holdoff, 1,
+              "Holdoff between bursts (jiffies)");
+torture_param(int, cbflood_n_burst, 3, "# bursts in flood, zero to disable");
+torture_param(int, cbflood_n_per_burst, 20000,
+              "# callbacks per burst in flood");
 torture_param(int, fqs_duration, 0,
              "Duration of fqs bursts (us), 0 to disable");
 torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
@@ -96,10 +104,12 @@ module_param(torture_type, charp, 0444);
 MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)");
 static int nrealreaders;
+static int ncbflooders;
 static struct task_struct *writer_task;
 static struct task_struct **fakewriter_tasks;
 static struct task_struct **reader_tasks;
 static struct task_struct *stats_task;
+static struct task_struct **cbflood_task;
 static struct task_struct *fqs_task;
 static struct task_struct *boost_tasks[NR_CPUS];
 static struct task_struct *stall_task;
@@ -138,6 +148,7 @@ static long n_rcu_torture_boosts;
 static long n_rcu_torture_timers;
 static long n_barrier_attempts;
 static long n_barrier_successes;
+static atomic_long_t n_cbfloods;
 static struct list_head rcu_torture_removed;
 static int rcu_torture_writer_state;
@@ -157,9 +168,9 @@ static int rcu_torture_writer_state;
 #else
 #define RCUTORTURE_RUNNABLE_INIT 0
 #endif
-int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
+static int torture_runnable = RCUTORTURE_RUNNABLE_INIT;
-module_param(rcutorture_runnable, int, 0444);
+module_param(torture_runnable, int, 0444);
-MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
+MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
 #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
 #define rcu_can_boost() 1
@@ -182,7 +193,7 @@ static u64 notrace rcu_trace_clock_local(void)
 #endif /* #else #ifdef CONFIG_RCU_TRACE */
 static unsigned long boost_starttime;   /* jiffies of next boost test start. */
-DEFINE_MUTEX(boost_mutex);              /* protect setting boost_starttime */
+static DEFINE_MUTEX(boost_mutex);       /* protect setting boost_starttime */
                                        /*  and boost task create/destroy. */
 static atomic_t barrier_cbs_count;      /* Barrier callbacks registered. */
 static bool barrier_phase;              /* Test phase. */
@@ -242,7 +253,7 @@ struct rcu_torture_ops {
        void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
        void (*cb_barrier)(void);
        void (*fqs)(void);
-        void (*stats)(char *page);
+        void (*stats)(void);
        int irq_capable;
        int can_boost;
        const char *name;
@@ -525,21 +536,21 @@ static void srcu_torture_barrier(void)
        srcu_barrier(&srcu_ctl);
 }
-static void srcu_torture_stats(char *page)
+static void srcu_torture_stats(void)
 {
        int cpu;
        int idx = srcu_ctl.completed & 0x1;
-        page += sprintf(page, "%s%s per-CPU(idx=%d):",
+        pr_alert("%s%s per-CPU(idx=%d):",
-                       torture_type, TORTURE_FLAG, idx);
+                 torture_type, TORTURE_FLAG, idx);
        for_each_possible_cpu(cpu) {
                long c0, c1;
                c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx];
                c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx];
-                page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1);
+                pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
        }
-        sprintf(page, "\n");
+        pr_cont("\n");
 }
 static void srcu_torture_synchronize_expedited(void)
@@ -601,6 +612,52 @@ static struct rcu_torture_ops sched_ops = {
        .name           = "sched"
 };
+#ifdef CONFIG_TASKS_RCU
+/*
+ * Definitions for RCU-tasks torture testing.
+ */
+static int tasks_torture_read_lock(void)
+{
+        return 0;
+}
+static void tasks_torture_read_unlock(int idx)
+{
+}
+static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
+{
+        call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
+}
+static struct rcu_torture_ops tasks_ops = {
+        .ttype          = RCU_TASKS_FLAVOR,
+        .init           = rcu_sync_torture_init,
+        .readlock       = tasks_torture_read_lock,
+        .read_delay     = rcu_read_delay,  /* just reuse rcu's version. */
+        .readunlock     = tasks_torture_read_unlock,
+        .completed      = rcu_no_completed,
+        .deferred_free  = rcu_tasks_torture_deferred_free,
+        .sync           = synchronize_rcu_tasks,
+        .exp_sync       = synchronize_rcu_tasks,
+        .call           = call_rcu_tasks,
+        .cb_barrier     = rcu_barrier_tasks,
+        .fqs            = NULL,
+        .stats          = NULL,
+        .irq_capable    = 1,
+        .name           = "tasks"
+};
+#define RCUTORTURE_TASKS_OPS &tasks_ops,
+#else /* #ifdef CONFIG_TASKS_RCU */
+#define RCUTORTURE_TASKS_OPS
+#endif /* #else #ifdef CONFIG_TASKS_RCU */
 /*
 * RCU torture priority-boost testing.  Runs one real-time thread per
 * CPU for moderate bursts, repeatedly registering RCU callbacks and
@@ -667,7 +724,7 @@ static int rcu_torture_boost(void *arg)
                                }
                                call_rcu_time = jiffies;
                        }
-                        cond_resched();
+                        cond_resched_rcu_qs();
                        stutter_wait("rcu_torture_boost");
                        if (torture_must_stop())
                                goto checkwait;
@@ -707,6 +764,58 @@ checkwait:	stutter_wait("rcu_torture_boost");
        return 0;
 }
+static void rcu_torture_cbflood_cb(struct rcu_head *rhp)
+{
+}
+/*
+ * RCU torture callback-flood kthread.  Repeatedly induces bursts of calls
+ * to call_rcu() or analogous, increasing the probability of occurrence
+ * of callback-overflow corner cases.
+ */
+static int
+rcu_torture_cbflood(void *arg)
+{
+        int err = 1;
+        int i;
+        int j;
+        struct rcu_head *rhp;
+        if (cbflood_n_per_burst > 0 &&
+            cbflood_inter_holdoff > 0 &&
+            cbflood_intra_holdoff > 0 &&
+            cur_ops->call &&
+            cur_ops->cb_barrier) {
+                rhp = vmalloc(sizeof(*rhp) *
+                              cbflood_n_burst * cbflood_n_per_burst);
+                err = !rhp;
+        }
+        if (err) {
+                VERBOSE_TOROUT_STRING("rcu_torture_cbflood disabled: Bad args or OOM");
+                while (!torture_must_stop())
+                        schedule_timeout_interruptible(HZ);
+                return 0;
+        }
+        VERBOSE_TOROUT_STRING("rcu_torture_cbflood task started");
+        do {
+                schedule_timeout_interruptible(cbflood_inter_holdoff);
+                atomic_long_inc(&n_cbfloods);
+                WARN_ON(signal_pending(current));
+                for (i = 0; i < cbflood_n_burst; i++) {
+                        for (j = 0; j < cbflood_n_per_burst; j++) {
+                                cur_ops->call(&rhp[i * cbflood_n_per_burst + j],
+                                              rcu_torture_cbflood_cb);
+                        }
+                        schedule_timeout_interruptible(cbflood_intra_holdoff);
+                        WARN_ON(signal_pending(current));
+                }
+                cur_ops->cb_barrier();
+                stutter_wait("rcu_torture_cbflood");
+        } while (!torture_must_stop());
+        torture_kthread_stopping("rcu_torture_cbflood");
+        return 0;
+}
 /*
 * RCU torture force-quiescent-state kthread.  Repeatedly induces
 * bursts of calls to force_quiescent_state(), increasing the probability
@@ -1019,7 +1128,7 @@ rcu_torture_reader(void *arg)
                __this_cpu_inc(rcu_torture_batch[completed]);
                preempt_enable();
                cur_ops->readunlock(idx);
-                cond_resched();
+                cond_resched_rcu_qs();
                stutter_wait("rcu_torture_reader");
        } while (!torture_must_stop());
        if (irqreader && cur_ops->irq_capable) {
@@ -1031,10 +1140,15 @@ rcu_torture_reader(void *arg)
 }
 /*
- * Create an RCU-torture statistics message in the specified buffer.
+ * Print torture statistics.  Caller must ensure that there is only
+ * one call to this function at a given time!!!  This is normally
+ * accomplished by relying on the module system to only have one copy
+ * of the module loaded, and then by giving the rcu_torture_stats
+ * kthread full control (or the init/cleanup functions when rcu_torture_stats
+ * thread is not running).
 */
 static void
-rcu_torture_printk(char *page)
+rcu_torture_stats_print(void)
 {
        int cpu;
        int i;
@@ -1052,55 +1166,61 @@ rcu_torture_printk(char *page)
                if (pipesummary[i] != 0)
                        break;
        }
-        page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG);
-        page += sprintf(page,
+        pr_alert("%s%s ", torture_type, TORTURE_FLAG);
-                       "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
+        pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
-                       rcu_torture_current,
+                rcu_torture_current,
-                       rcu_torture_current_version,
+                rcu_torture_current_version,
-                       list_empty(&rcu_torture_freelist),
+                list_empty(&rcu_torture_freelist),
-                       atomic_read(&n_rcu_torture_alloc),
+                atomic_read(&n_rcu_torture_alloc),
-                       atomic_read(&n_rcu_torture_alloc_fail),
+                atomic_read(&n_rcu_torture_alloc_fail),
-                       atomic_read(&n_rcu_torture_free));
+                atomic_read(&n_rcu_torture_free));
-        page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ",
+        pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ",
-                       atomic_read(&n_rcu_torture_mberror),
+                atomic_read(&n_rcu_torture_mberror),
-                       n_rcu_torture_boost_ktrerror,
+                n_rcu_torture_boost_ktrerror,
-                       n_rcu_torture_boost_rterror);
+                n_rcu_torture_boost_rterror);
-        page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ",
+        pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
-                       n_rcu_torture_boost_failure,
+                n_rcu_torture_boost_failure,
-                       n_rcu_torture_boosts,
+                n_rcu_torture_boosts,
-                       n_rcu_torture_timers);
+                n_rcu_torture_timers);
-        page = torture_onoff_stats(page);
+        torture_onoff_stats();
-        page += sprintf(page, "barrier: %ld/%ld:%ld",
+        pr_cont("barrier: %ld/%ld:%ld ",
-                       n_barrier_successes,
+                n_barrier_successes,
-                       n_barrier_attempts,
+                n_barrier_attempts,
-                       n_rcu_torture_barrier_error);
+                n_rcu_torture_barrier_error);
-        page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
+        pr_cont("cbflood: %ld\n", atomic_long_read(&n_cbfloods));
+        pr_alert("%s%s ", torture_type, TORTURE_FLAG);
        if (atomic_read(&n_rcu_torture_mberror) != 0 ||
            n_rcu_torture_barrier_error != 0 ||
            n_rcu_torture_boost_ktrerror != 0 ||
            n_rcu_torture_boost_rterror != 0 ||
            n_rcu_torture_boost_failure != 0 ||
            i > 1) {
-                page += sprintf(page, "!!! ");
+                pr_cont("%s", "!!! ");
                atomic_inc(&n_rcu_torture_error);
                WARN_ON_ONCE(1);
        }
-        page += sprintf(page, "Reader Pipe: ");
+        pr_cont("Reader Pipe: ");
        for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
-                page += sprintf(page, " %ld", pipesummary[i]);
+                pr_cont(" %ld", pipesummary[i]);
-        page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
+        pr_cont("\n");
-        page += sprintf(page, "Reader Batch: ");
+        pr_alert("%s%s ", torture_type, TORTURE_FLAG);
+        pr_cont("Reader Batch: ");
        for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
-                page += sprintf(page, " %ld", batchsummary[i]);
+                pr_cont(" %ld", batchsummary[i]);
-        page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
+        pr_cont("\n");
-        page += sprintf(page, "Free-Block Circulation: ");
+        pr_alert("%s%s ", torture_type, TORTURE_FLAG);
+        pr_cont("Free-Block Circulation: ");
        for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
-                page += sprintf(page, " %d",
+                pr_cont(" %d", atomic_read(&rcu_torture_wcount[i]));
-                               atomic_read(&rcu_torture_wcount[i]));
        }
-        page += sprintf(page, "\n");
+        pr_cont("\n");
        if (cur_ops->stats)
-                cur_ops->stats(page);
+                cur_ops->stats();
        if (rtcv_snap == rcu_torture_current_version &&
            rcu_torture_current != NULL) {
                int __maybe_unused flags;
@@ -1109,10 +1229,9 @@ rcu_torture_printk(char *page)
                rcutorture_get_gp_data(cur_ops->ttype,
                                       &flags, &gpnum, &completed);
-                page += sprintf(page,
+                pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n",
-                                "??? Writer stall state %d g%lu c%lu f%#x\n",
+                         rcu_torture_writer_state,
-                                rcu_torture_writer_state,
+                         gpnum, completed, flags);
-                                gpnum, completed, flags);
                show_rcu_gp_kthreads();
                rcutorture_trace_dump();
        }
@@ -1120,30 +1239,6 @@ rcu_torture_printk(char *page)
 }
 /*
- * Print torture statistics.  Caller must ensure that there is only
- * one call to this function at a given time!!!  This is normally
- * accomplished by relying on the module system to only have one copy
- * of the module loaded, and then by giving the rcu_torture_stats
- * kthread full control (or the init/cleanup functions when rcu_torture_stats
- * thread is not running).
- */
-static void
-rcu_torture_stats_print(void)
-{
-        int size = nr_cpu_ids * 200 + 8192;
-        char *buf;
-        buf = kmalloc(size, GFP_KERNEL);
-        if (!buf) {
-                pr_err("rcu-torture: Out of memory, need: %d", size);
-                return;
-        }
-        rcu_torture_printk(buf);
-        pr_alert("%s", buf);
-        kfree(buf);
-}
-/*
 * Periodically prints torture statistics, if periodic statistics printing
 * was specified via the stat_interval module parameter.
 */
@@ -1295,7 +1390,8 @@ static int rcu_torture_barrier_cbs(void *arg)
                if (atomic_dec_and_test(&barrier_cbs_count))
                        wake_up(&barrier_wq);
        } while (!torture_must_stop());
-        cur_ops->cb_barrier();
+        if (cur_ops->cb_barrier != NULL)
+                cur_ops->cb_barrier();
        destroy_rcu_head_on_stack(&rcu);
        torture_kthread_stopping("rcu_torture_barrier_cbs");
        return 0;
@@ -1418,7 +1514,7 @@ rcu_torture_cleanup(void)
        int i;
        rcutorture_record_test_transition();
-        if (torture_cleanup()) {
+        if (torture_cleanup_begin()) {
                if (cur_ops->cb_barrier != NULL)
                        cur_ops->cb_barrier();
                return;
@@ -1447,6 +1543,8 @@ rcu_torture_cleanup(void)
        torture_stop_kthread(rcu_torture_stats, stats_task);
        torture_stop_kthread(rcu_torture_fqs, fqs_task);
+        for (i = 0; i < ncbflooders; i++)
+                torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]);
        if ((test_boost == 1 && cur_ops->can_boost) ||
            test_boost == 2) {
                unregister_cpu_notifier(&rcutorture_cpu_nb);
@@ -1468,6 +1566,7 @@ rcu_torture_cleanup(void)
                                               "End of test: RCU_HOTPLUG");
        else
                rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
+        torture_cleanup_end();
 }
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
@@ -1534,9 +1633,10 @@ rcu_torture_init(void)
        int firsterr = 0;
        static struct rcu_torture_ops *torture_ops[] = {
                &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
+                RCUTORTURE_TASKS_OPS
        };
-        if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
+        if (!torture_init_begin(torture_type, verbose, &torture_runnable))
                return -EBUSY;
        /* Process args and tell the world that the torturer is on the job. */
@@ -1693,6 +1793,24 @@ rcu_torture_init(void)
                goto unwind;
        if (object_debug)
                rcu_test_debug_objects();
+        if (cbflood_n_burst > 0) {
+                /* Create the cbflood threads */
+                ncbflooders = (num_online_cpus() + 3) / 4;
+                cbflood_task = kcalloc(ncbflooders, sizeof(*cbflood_task),
+                                       GFP_KERNEL);
+                if (!cbflood_task) {
+                        VERBOSE_TOROUT_ERRSTRING("out of memory");
+                        firsterr = -ENOMEM;
+                        goto unwind;
+                }
+                for (i = 0; i < ncbflooders; i++) {
+                        firsterr = torture_create_kthread(rcu_torture_cbflood,
+                                                          NULL,
+                                                          cbflood_task[i]);
+                        if (firsterr)
+                                goto unwind;
+                }
+        }
        rcutorture_record_test_transition();
        torture_init_end();
        return 0;
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index d9efcc13008c..c0623fc47125 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -51,7 +51,7 @@ static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
 #include "tiny_plugin.h"
-/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */
+/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */
 static void rcu_idle_enter_common(long long newval)
 {
        if (newval) {
@@ -62,7 +62,7 @@ static void rcu_idle_enter_common(long long newval)
        }
        RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
                                    rcu_dynticks_nesting, newval));
-        if (!is_idle_task(current)) {
+        if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
                struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
                RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
@@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval)
                          current->pid, current->comm,
                          idle->pid, idle->comm); /* must be idle task! */
        }
-        rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
+        rcu_sched_qs(); /* implies rcu_bh_inc() */
        barrier();
        rcu_dynticks_nesting = newval;
 }
@@ -114,7 +114,7 @@ void rcu_irq_exit(void)
 }
 EXPORT_SYMBOL_GPL(rcu_irq_exit);
-/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */
+/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */
 static void rcu_idle_exit_common(long long oldval)
 {
        if (oldval) {
@@ -123,7 +123,7 @@ static void rcu_idle_exit_common(long long oldval)
                return;
        }
        RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
-        if (!is_idle_task(current)) {
+        if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
                struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
                RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
@@ -217,7 +217,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
 * are at it, given that any rcu quiescent state is also an rcu_bh
 * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
 */
-void rcu_sched_qs(int cpu)
+void rcu_sched_qs(void)
 {
        unsigned long flags;
@@ -231,7 +231,7 @@ void rcu_sched_qs(int cpu)
 /*
 * Record an rcu_bh quiescent state.
 */
-void rcu_bh_qs(int cpu)
+void rcu_bh_qs(void)
 {
        unsigned long flags;
@@ -251,9 +251,11 @@ void rcu_check_callbacks(int cpu, int user)
 {
        RCU_TRACE(check_cpu_stalls());
        if (user || rcu_is_cpu_rrupt_from_idle())
-                rcu_sched_qs(cpu);
+                rcu_sched_qs();
        else if (!in_softirq())
-                rcu_bh_qs(cpu);
+                rcu_bh_qs();
+        if (user)
+                rcu_note_voluntary_context_switch(current);
 }
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1b70cb6fbe3c..133e47223095 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -79,9 +79,18 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
 * the tracing userspace tools to be able to decipher the string
 * address to the matching string.
 */
-#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
+#ifdef CONFIG_TRACING
+# define DEFINE_RCU_TPS(sname) \
 static char sname##_varname[] = #sname; \
-static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \
+static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname;
+# define RCU_STATE_NAME(sname) sname##_varname
+#else
+# define DEFINE_RCU_TPS(sname)
+# define RCU_STATE_NAME(sname) __stringify(sname)
+#endif
+#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
+DEFINE_RCU_TPS(sname) \
 struct rcu_state sname##_state = { \
        .level = { &sname##_state.node[0] }, \
        .call = cr, \
@@ -93,7 +102,7 @@ struct rcu_state sname##_state = { \
        .orphan_donetail = &sname##_state.orphan_donelist, \
        .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
        .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
-        .name = sname##_varname, \
+        .name = RCU_STATE_NAME(sname), \
        .abbr = sabbr, \
 }; \
 DEFINE_PER_CPU(struct rcu_data, sname##_data)
@@ -188,22 +197,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
 * one since the start of the grace period, this just sets a flag.
 * The caller must have disabled preemption.
 */
-void rcu_sched_qs(int cpu)
+void rcu_sched_qs(void)
 {
-        struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
+        if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) {
+                trace_rcu_grace_period(TPS("rcu_sched"),
-        if (rdp->passed_quiesce == 0)
+                                       __this_cpu_read(rcu_sched_data.gpnum),
-                trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
+                                       TPS("cpuqs"));
-        rdp->passed_quiesce = 1;
+                __this_cpu_write(rcu_sched_data.passed_quiesce, 1);
+        }
 }
-void rcu_bh_qs(int cpu)
+void rcu_bh_qs(void)
 {
-        struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+        if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
+                trace_rcu_grace_period(TPS("rcu_bh"),
-        if (rdp->passed_quiesce == 0)
+                                       __this_cpu_read(rcu_bh_data.gpnum),
-                trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
+                                       TPS("cpuqs"));
-        rdp->passed_quiesce = 1;
+                __this_cpu_write(rcu_bh_data.passed_quiesce, 1);
+        }
 }
 static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
@@ -278,7 +289,7 @@ static void rcu_momentary_dyntick_idle(void)
 void rcu_note_context_switch(int cpu)
 {
        trace_rcu_utilization(TPS("Start context switch"));
-        rcu_sched_qs(cpu);
+        rcu_sched_qs();
        rcu_preempt_note_context_switch(cpu);
        if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
                rcu_momentary_dyntick_idle();
@@ -526,6 +537,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
        atomic_inc(&rdtp->dynticks);
        smp_mb__after_atomic();  /* Force ordering with next sojourn. */
        WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+        rcu_dynticks_task_enter();
        /*
         * It is illegal to enter an extended quiescent state while
@@ -642,6 +654,7 @@ void rcu_irq_exit(void)
 static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
                               int user)
 {
+        rcu_dynticks_task_exit();
        smp_mb__before_atomic();  /* Force ordering w/previous sojourn. */
        atomic_inc(&rdtp->dynticks);
        /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
@@ -819,7 +832,7 @@ bool notrace __rcu_is_watching(void)
 */
 bool notrace rcu_is_watching(void)
 {
-        int ret;
+        bool ret;
        preempt_disable();
        ret = __rcu_is_watching();
@@ -1647,7 +1660,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
                                            rnp->level, rnp->grplo,
                                            rnp->grphi, rnp->qsmask);
                raw_spin_unlock_irq(&rnp->lock);
-                cond_resched();
+                cond_resched_rcu_qs();
        }
        mutex_unlock(&rsp->onoff_mutex);
@@ -1668,7 +1681,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
        if (fqs_state == RCU_SAVE_DYNTICK) {
                /* Collect dyntick-idle snapshots. */
                if (is_sysidle_rcu_state(rsp)) {
-                        isidle = 1;
+                        isidle = true;
                        maxj = jiffies - ULONG_MAX / 4;
                }
                force_qs_rnp(rsp, dyntick_save_progress_counter,
@@ -1677,14 +1690,15 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
                fqs_state = RCU_FORCE_QS;
        } else {
                /* Handle dyntick-idle and offline CPUs. */
-                isidle = 0;
+                isidle = false;
                force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
        }
        /* Clear flag to prevent immediate re-entry. */
        if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
                raw_spin_lock_irq(&rnp->lock);
                smp_mb__after_unlock_lock();
-                ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS;
+                ACCESS_ONCE(rsp->gp_flags) =
+                        ACCESS_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS;
                raw_spin_unlock_irq(&rnp->lock);
        }
        return fqs_state;
@@ -1736,7 +1750,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
                /* smp_mb() provided by prior unlock-lock pair. */
                nocb += rcu_future_gp_cleanup(rsp, rnp);
                raw_spin_unlock_irq(&rnp->lock);
-                cond_resched();
+                cond_resched_rcu_qs();
        }
        rnp = rcu_get_root(rsp);
        raw_spin_lock_irq(&rnp->lock);
@@ -1785,8 +1799,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
                        /* Locking provides needed memory barrier. */
                        if (rcu_gp_init(rsp))
                                break;
-                        cond_resched();
+                        cond_resched_rcu_qs();
-                        flush_signals(current);
+                        WARN_ON(signal_pending(current));
                        trace_rcu_grace_period(rsp->name,
                                               ACCESS_ONCE(rsp->gpnum),
                                               TPS("reqwaitsig"));
@@ -1828,11 +1842,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                trace_rcu_grace_period(rsp->name,
                                                       ACCESS_ONCE(rsp->gpnum),
                                                       TPS("fqsend"));
-                                cond_resched();
+                                cond_resched_rcu_qs();
                        } else {
                                /* Deal with stray signal. */
-                                cond_resched();
+                                cond_resched_rcu_qs();
-                                flush_signals(current);
+                                WARN_ON(signal_pending(current));
                                trace_rcu_grace_period(rsp->name,
                                                       ACCESS_ONCE(rsp->gpnum),
                                                       TPS("fqswaitsig"));
@@ -1928,7 +1942,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
 {
        WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
        raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
-        wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
+        rcu_gp_kthread_wake(rsp);
 }
 /*
@@ -2210,8 +2224,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
        /* Adjust any no-longer-needed kthreads. */
        rcu_boost_kthread_setaffinity(rnp, -1);
-        /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
        /* Exclude any attempts to start a new grace period. */
        mutex_lock(&rsp->onoff_mutex);
        raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
@@ -2393,8 +2405,8 @@ void rcu_check_callbacks(int cpu, int user)
                 * at least not while the corresponding CPU is online.
                 */
-                rcu_sched_qs(cpu);
+                rcu_sched_qs();
-                rcu_bh_qs(cpu);
+                rcu_bh_qs();
        } else if (!in_softirq()) {
@@ -2405,11 +2417,13 @@ void rcu_check_callbacks(int cpu, int user)
                 * critical section, so note it.
                 */
-                rcu_bh_qs(cpu);
+                rcu_bh_qs();
        }
        rcu_preempt_check_callbacks(cpu);
        if (rcu_pending(cpu))
                invoke_rcu_core();
+        if (user)
+                rcu_note_voluntary_context_switch(current);
        trace_rcu_utilization(TPS("End scheduler-tick"));
 }
@@ -2432,7 +2446,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
        struct rcu_node *rnp;
        rcu_for_each_leaf_node(rsp, rnp) {
-                cond_resched();
+                cond_resched_rcu_qs();
                mask = 0;
                raw_spin_lock_irqsave(&rnp->lock, flags);
                smp_mb__after_unlock_lock();
@@ -2449,7 +2463,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
                for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
                        if ((rnp->qsmask & bit) != 0) {
                                if ((rnp->qsmaskinit & bit) != 0)
-                                        *isidle = 0;
+                                        *isidle = false;
                                if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
                                        mask |= bit;
                        }
@@ -2505,9 +2519,10 @@ static void force_quiescent_state(struct rcu_state *rsp)
                raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
                return;  /* Someone beat us to it. */
        }
-        ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS;
+        ACCESS_ONCE(rsp->gp_flags) =
+                ACCESS_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS;
        raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
-        wake_up(&rsp->gp_wq);  /* Memory barrier implied by wake_up() path. */
+        rcu_gp_kthread_wake(rsp);
 }
 /*
@@ -2925,11 +2940,6 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
 * restructure your code to batch your updates, and then use a single
 * synchronize_sched() instead.
 *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
- *
 * This implementation can be thought of as an application of ticket
 * locking to RCU, with sync_sched_expedited_started and
 * sync_sched_expedited_done taking on the roles of the halves
@@ -2979,7 +2989,12 @@ void synchronize_sched_expedited(void)
         */
        snap = atomic_long_inc_return(&rsp->expedited_start);
        firstsnap = snap;
-        get_online_cpus();
+        if (!try_get_online_cpus()) {
+                /* CPU hotplug operation in flight, fall back to normal GP. */
+                wait_rcu_gp(call_rcu_sched);
+                atomic_long_inc(&rsp->expedited_normal);
+                return;
+        }
        WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
        /*
@@ -3026,7 +3041,12 @@ void synchronize_sched_expedited(void)
                 * and they started after our first try, so their grace
                 * period works for us.
                 */
-                get_online_cpus();
+                if (!try_get_online_cpus()) {
+                        /* CPU hotplug operation in flight, use normal GP. */
+                        wait_rcu_gp(call_rcu_sched);
+                        atomic_long_inc(&rsp->expedited_normal);
+                        return;
+                }
                snap = atomic_long_read(&rsp->expedited_start);
                smp_mb(); /* ensure read is before try_stop_cpus(). */
        }
@@ -3442,6 +3462,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
        case CPU_UP_PREPARE_FROZEN:
                rcu_prepare_cpu(cpu);
                rcu_prepare_kthreads(cpu);
+                rcu_spawn_all_nocb_kthreads(cpu);
                break;
        case CPU_ONLINE:
        case CPU_DOWN_FAILED:
@@ -3489,7 +3510,7 @@ static int rcu_pm_notify(struct notifier_block *self,
 }
 /*
- * Spawn the kthread that handles this RCU flavor's grace periods.
+ * Spawn the kthreads that handle each RCU flavor's grace periods.
 */
 static int __init rcu_spawn_gp_kthread(void)
 {
@@ -3498,6 +3519,7 @@ static int __init rcu_spawn_gp_kthread(void)
        struct rcu_state *rsp;
        struct task_struct *t;
+        rcu_scheduler_fully_active = 1;
        for_each_rcu_flavor(rsp) {
                t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
                BUG_ON(IS_ERR(t));
@@ -3505,8 +3527,9 @@ static int __init rcu_spawn_gp_kthread(void)
                raw_spin_lock_irqsave(&rnp->lock, flags);
                rsp->gp_kthread = t;
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                rcu_spawn_nocb_kthreads(rsp);
        }
+        rcu_spawn_nocb_kthreads();
+        rcu_spawn_boost_kthreads();
        return 0;
 }
 early_initcall(rcu_spawn_gp_kthread);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 6a86eb7bac45..d03764652d91 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -350,7 +350,7 @@ struct rcu_data {
        int nocb_p_count_lazy;          /*  (approximate). */
        wait_queue_head_t nocb_wq;      /* For nocb kthreads to sleep on. */
        struct task_struct *nocb_kthread;
-        bool nocb_defer_wakeup;         /* Defer wakeup of nocb_kthread. */
+        int nocb_defer_wakeup;          /* Defer wakeup of nocb_kthread. */
        /* The following fields are used by the leader, hence own cacheline. */
        struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp;
@@ -383,6 +383,11 @@ struct rcu_data {
 #define RCU_FORCE_QS            3       /* Need to force quiescent state. */
 #define RCU_SIGNAL_INIT         RCU_SAVE_DYNTICK
+/* Values for nocb_defer_wakeup field in struct rcu_data. */
+#define RCU_NOGP_WAKE_NOT       0
+#define RCU_NOGP_WAKE           1
+#define RCU_NOGP_WAKE_FORCE     2
 #define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
                                        /* For jiffies_till_first_fqs and */
                                        /*  and jiffies_till_next_fqs. */
@@ -572,6 +577,7 @@ static void rcu_preempt_do_callbacks(void);
 static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
                                                 struct rcu_node *rnp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
+static void __init rcu_spawn_boost_kthreads(void);
 static void rcu_prepare_kthreads(int cpu);
 static void rcu_cleanup_after_idle(int cpu);
 static void rcu_prepare_for_idle(int cpu);
@@ -589,10 +595,14 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
                                      struct rcu_data *rdp,
                                      unsigned long flags);
-static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
+static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
 static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
 static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
-static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
+static void rcu_spawn_all_nocb_kthreads(int cpu);
+static void __init rcu_spawn_nocb_kthreads(void);
+#ifdef CONFIG_RCU_NOCB_CPU
+static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
 static bool init_nocb_callback_list(struct rcu_data *rdp);
 static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
@@ -605,6 +615,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
 static void rcu_bind_gp_kthread(void);
 static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
 static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
+static void rcu_dynticks_task_enter(void);
+static void rcu_dynticks_task_exit(void);
 #endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a7997e272564..387dd4599344 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -85,33 +85,6 @@ static void __init rcu_bootup_announce_oddness(void)
                pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
        if (nr_cpu_ids != NR_CPUS)
                pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
-#ifdef CONFIG_RCU_NOCB_CPU
-#ifndef CONFIG_RCU_NOCB_CPU_NONE
-        if (!have_rcu_nocb_mask) {
-                zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
-                have_rcu_nocb_mask = true;
-        }
-#ifdef CONFIG_RCU_NOCB_CPU_ZERO
-        pr_info("\tOffload RCU callbacks from CPU 0\n");
-        cpumask_set_cpu(0, rcu_nocb_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
-#ifdef CONFIG_RCU_NOCB_CPU_ALL
-        pr_info("\tOffload RCU callbacks from all CPUs\n");
-        cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
-        if (have_rcu_nocb_mask) {
-                if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
-                        pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
-                        cpumask_and(rcu_nocb_mask, cpu_possible_mask,
-                                    rcu_nocb_mask);
-                }
-                cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
-                pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
-                if (rcu_nocb_poll)
-                        pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
-        }
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 }
 #ifdef CONFIG_TREE_PREEMPT_RCU
@@ -134,7 +107,7 @@ static void __init rcu_bootup_announce(void)
 * Return the number of RCU-preempt batches processed thus far
 * for debug and statistics.
 */
-long rcu_batches_completed_preempt(void)
+static long rcu_batches_completed_preempt(void)
 {
        return rcu_preempt_state.completed;
 }
@@ -155,18 +128,19 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
 * not in a quiescent state.  There might be any number of tasks blocked
 * while in an RCU read-side critical section.
 *
- * Unlike the other rcu_*_qs() functions, callers to this function
+ * As with the other rcu_*_qs() functions, callers to this function
- * must disable irqs in order to protect the assignment to
+ * must disable preemption.
- * ->rcu_read_unlock_special.
+ */
- */
+static void rcu_preempt_qs(void)
-static void rcu_preempt_qs(int cpu)
+{
-{
+        if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
-        struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
+                trace_rcu_grace_period(TPS("rcu_preempt"),
+                                       __this_cpu_read(rcu_preempt_data.gpnum),
-        if (rdp->passed_quiesce == 0)
+                                       TPS("cpuqs"));
-                trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
+                __this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
-        rdp->passed_quiesce = 1;
+                barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
-        current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+                current->rcu_read_unlock_special.b.need_qs = false;
+        }
 }
 /*
@@ -190,14 +164,14 @@ static void rcu_preempt_note_context_switch(int cpu)
        struct rcu_node *rnp;
        if (t->rcu_read_lock_nesting > 0 &&
-            (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
+            !t->rcu_read_unlock_special.b.blocked) {
                /* Possibly blocking in an RCU read-side critical section. */
                rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
                rnp = rdp->mynode;
                raw_spin_lock_irqsave(&rnp->lock, flags);
                smp_mb__after_unlock_lock();
-                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
+                t->rcu_read_unlock_special.b.blocked = true;
                t->rcu_blocked_node = rnp;
                /*
@@ -239,7 +213,7 @@ static void rcu_preempt_note_context_switch(int cpu)
                                       : rnp->gpnum + 1);
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
        } else if (t->rcu_read_lock_nesting < 0 &&
-                   t->rcu_read_unlock_special) {
+                   t->rcu_read_unlock_special.s) {
                /*
                 * Complete exit from RCU read-side critical section on
@@ -257,9 +231,7 @@ static void rcu_preempt_note_context_switch(int cpu)
         * grace period, then the fact that the task has been enqueued
         * means that we continue to block the current grace period.
         */
-        local_irq_save(flags);
+        rcu_preempt_qs();
-        rcu_preempt_qs(cpu);
-        local_irq_restore(flags);
 }
 /*
@@ -340,7 +312,7 @@ void rcu_read_unlock_special(struct task_struct *t)
        bool drop_boost_mutex = false;
 #endif /* #ifdef CONFIG_RCU_BOOST */
        struct rcu_node *rnp;
-        int special;
+        union rcu_special special;
        /* NMI handlers cannot block and cannot safely manipulate state. */
        if (in_nmi())
@@ -350,12 +322,13 @@ void rcu_read_unlock_special(struct task_struct *t)
        /*
         * If RCU core is waiting for this CPU to exit critical section,
-         * let it know that we have done so.
+         * let it know that we have done so.  Because irqs are disabled,
+         * t->rcu_read_unlock_special cannot change.
         */
        special = t->rcu_read_unlock_special;
-        if (special & RCU_READ_UNLOCK_NEED_QS) {
+        if (special.b.need_qs) {
-                rcu_preempt_qs(smp_processor_id());
+                rcu_preempt_qs();
-                if (!t->rcu_read_unlock_special) {
+                if (!t->rcu_read_unlock_special.s) {
                        local_irq_restore(flags);
                        return;
                }
@@ -368,8 +341,8 @@ void rcu_read_unlock_special(struct task_struct *t)
        }
        /* Clean up if blocked during RCU read-side critical section. */
-        if (special & RCU_READ_UNLOCK_BLOCKED) {
+        if (special.b.blocked) {
-                t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
+                t->rcu_read_unlock_special.b.blocked = false;
                /*
                 * Remove this task from the list it blocked on.  The
@@ -653,12 +626,13 @@ static void rcu_preempt_check_callbacks(int cpu)
        struct task_struct *t = current;
        if (t->rcu_read_lock_nesting == 0) {
-                rcu_preempt_qs(cpu);
+                rcu_preempt_qs();
                return;
        }
        if (t->rcu_read_lock_nesting > 0 &&
-            per_cpu(rcu_preempt_data, cpu).qs_pending)
+            per_cpu(rcu_preempt_data, cpu).qs_pending &&
-                t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
+            !per_cpu(rcu_preempt_data, cpu).passed_quiesce)
+                t->rcu_read_unlock_special.b.need_qs = true;
 }
 #ifdef CONFIG_RCU_BOOST
@@ -819,11 +793,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 * In fact, if you are using synchronize_rcu_expedited() in a loop,
 * please restructure your code to batch your updates, and then Use a
 * single synchronize_rcu() instead.
- *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
 */
 void synchronize_rcu_expedited(void)
 {
@@ -845,7 +814,11 @@ void synchronize_rcu_expedited(void)
         * being boosted.  This simplifies the process of moving tasks
         * from leaf to root rcu_node structures.
         */
-        get_online_cpus();
+        if (!try_get_online_cpus()) {
+                /* CPU-hotplug operation in flight, fall back to normal GP. */
+                wait_rcu_gp(call_rcu);
+                return;
+        }
        /*
         * Acquire lock, falling back to synchronize_rcu() if too many
@@ -897,7 +870,8 @@ void synchronize_rcu_expedited(void)
        /* Clean up and exit. */
        smp_mb(); /* ensure expedited GP seen before counter increment. */
-        ACCESS_ONCE(sync_rcu_preempt_exp_count)++;
+        ACCESS_ONCE(sync_rcu_preempt_exp_count) =
+                                        sync_rcu_preempt_exp_count + 1;
 unlock_mb_ret:
        mutex_unlock(&sync_rcu_preempt_exp_mutex);
 mb_ret:
@@ -941,7 +915,7 @@ void exit_rcu(void)
                return;
        t->rcu_read_lock_nesting = 1;
        barrier();
-        t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
+        t->rcu_read_unlock_special.b.blocked = true;
        __rcu_read_unlock();
 }
@@ -1462,14 +1436,13 @@ static struct smp_hotplug_thread rcu_cpu_thread_spec = {
 };
 /*
- * Spawn all kthreads -- called as soon as the scheduler is running.
+ * Spawn boost kthreads -- called as soon as the scheduler is running.
 */
-static int __init rcu_spawn_kthreads(void)
+static void __init rcu_spawn_boost_kthreads(void)
 {
        struct rcu_node *rnp;
        int cpu;
-        rcu_scheduler_fully_active = 1;
        for_each_possible_cpu(cpu)
                per_cpu(rcu_cpu_has_work, cpu) = 0;
        BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
@@ -1479,9 +1452,7 @@ static int __init rcu_spawn_kthreads(void)
                rcu_for_each_leaf_node(rcu_state_p, rnp)
                        (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
        }
-        return 0;
 }
-early_initcall(rcu_spawn_kthreads);
 static void rcu_prepare_kthreads(int cpu)
 {
@@ -1519,12 +1490,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
 {
 }
-static int __init rcu_scheduler_really_started(void)
+static void __init rcu_spawn_boost_kthreads(void)
 {
-        rcu_scheduler_fully_active = 1;
-        return 0;
 }
-early_initcall(rcu_scheduler_really_started);
 static void rcu_prepare_kthreads(int cpu)
 {
@@ -1625,7 +1593,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
        /* Exit early if we advanced recently. */
        if (jiffies == rdtp->last_advance_all)
-                return 0;
+                return false;
        rdtp->last_advance_all = jiffies;
        for_each_rcu_flavor(rsp) {
@@ -1848,7 +1816,7 @@ static int rcu_oom_notify(struct notifier_block *self,
        get_online_cpus();
        for_each_online_cpu(cpu) {
                smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
-                cond_resched();
+                cond_resched_rcu_qs();
        }
        put_online_cpus();
@@ -2075,7 +2043,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
        if (!ACCESS_ONCE(rdp_leader->nocb_kthread))
                return;
        if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) {
-                /* Prior xchg orders against prior callback enqueue. */
+                /* Prior smp_mb__after_atomic() orders against prior enqueue. */
                ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false;
                wake_up(&rdp_leader->nocb_wq);
        }
@@ -2104,6 +2072,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
        ACCESS_ONCE(*old_rhpp) = rhp;
        atomic_long_add(rhcount, &rdp->nocb_q_count);
        atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
+        smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */
        /* If we are not being polled and there is a kthread, awaken it ... */
        t = ACCESS_ONCE(rdp->nocb_kthread);
@@ -2120,16 +2089,23 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
                        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
                                            TPS("WakeEmpty"));
                } else {
-                        rdp->nocb_defer_wakeup = true;
+                        rdp->nocb_defer_wakeup = RCU_NOGP_WAKE;
                        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
                                            TPS("WakeEmptyIsDeferred"));
                }
                rdp->qlen_last_fqs_check = 0;
        } else if (len > rdp->qlen_last_fqs_check + qhimark) {
                /* ... or if many callbacks queued. */
-                wake_nocb_leader(rdp, true);
+                if (!irqs_disabled_flags(flags)) {
+                        wake_nocb_leader(rdp, true);
+                        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                            TPS("WakeOvf"));
+                } else {
+                        rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE;
+                        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
+                                            TPS("WakeOvfIsDeferred"));
+                }
                rdp->qlen_last_fqs_check = LONG_MAX / 2;
-                trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf"));
        } else {
                trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));
        }
@@ -2150,7 +2126,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
 {
        if (!rcu_is_nocb_cpu(rdp->cpu))
-                return 0;
+                return false;
        __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
        if (__is_kfree_rcu_offset((unsigned long)rhp->func))
                trace_rcu_kfree_callback(rdp->rsp->name, rhp,
@@ -2161,7 +2137,18 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
                trace_rcu_callback(rdp->rsp->name, rhp,
                                   -atomic_long_read(&rdp->nocb_q_count_lazy),
                                   -atomic_long_read(&rdp->nocb_q_count));
-        return 1;
+        /*
+         * If called from an extended quiescent state with interrupts
+         * disabled, invoke the RCU core in order to allow the idle-entry
+         * deferred-wakeup check to function.
+         */
+        if (irqs_disabled_flags(flags) &&
+            !rcu_is_watching() &&
+            cpu_online(smp_processor_id()))
+                invoke_rcu_core();
+        return true;
 }
 /*
@@ -2177,7 +2164,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
        /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
        if (!rcu_is_nocb_cpu(smp_processor_id()))
-                return 0;
+                return false;
        rsp->qlen = 0;
        rsp->qlen_lazy = 0;
@@ -2196,7 +2183,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
                rsp->orphan_nxtlist = NULL;
                rsp->orphan_nxttail = &rsp->orphan_nxtlist;
        }
-        return 1;
+        return true;
 }
 /*
@@ -2229,7 +2216,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
                        (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
                if (likely(d))
                        break;
-                flush_signals(current);
+                WARN_ON(signal_pending(current));
                trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
        }
        trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
@@ -2288,7 +2275,7 @@ wait_again:
                if (!rcu_nocb_poll)
                        trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
                                            "WokeEmpty");
-                flush_signals(current);
+                WARN_ON(signal_pending(current));
                schedule_timeout_interruptible(1);
                /* Rescan in case we were a victim of memory ordering. */
@@ -2327,6 +2314,7 @@ wait_again:
                atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count);
                atomic_long_add(rdp->nocb_gp_count_lazy,
                                &rdp->nocb_follower_count_lazy);
+                smp_mb__after_atomic(); /* Store *tail before wakeup. */
                if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
                        /*
                         * List was empty, wake up the follower.
@@ -2367,7 +2355,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
                if (!rcu_nocb_poll)
                        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
                                            "WokeEmpty");
-                flush_signals(current);
+                WARN_ON(signal_pending(current));
                schedule_timeout_interruptible(1);
        }
 }
@@ -2428,15 +2416,16 @@ static int rcu_nocb_kthread(void *arg)
                        list = next;
                }
                trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
-                ACCESS_ONCE(rdp->nocb_p_count) -= c;
+                ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c;
-                ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl;
+                ACCESS_ONCE(rdp->nocb_p_count_lazy) =
+                                                rdp->nocb_p_count_lazy - cl;
                rdp->n_nocbs_invoked += c;
        }
        return 0;
 }
 /* Is a deferred wakeup of rcu_nocb_kthread() required? */
-static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
+static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
 {
        return ACCESS_ONCE(rdp->nocb_defer_wakeup);
 }
@@ -2444,11 +2433,79 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
 /* Do a deferred wakeup of rcu_nocb_kthread(). */
 static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
 {
+        int ndw;
        if (!rcu_nocb_need_deferred_wakeup(rdp))
                return;
-        ACCESS_ONCE(rdp->nocb_defer_wakeup) = false;
+        ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup);
-        wake_nocb_leader(rdp, false);
+        ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT;
-        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty"));
+        wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE);
+        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
+}
+void __init rcu_init_nohz(void)
+{
+        int cpu;
+        bool need_rcu_nocb_mask = true;
+        struct rcu_state *rsp;
+#ifdef CONFIG_RCU_NOCB_CPU_NONE
+        need_rcu_nocb_mask = false;
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
+#if defined(CONFIG_NO_HZ_FULL)
+        if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
+                need_rcu_nocb_mask = true;
+#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+        if (!have_rcu_nocb_mask && need_rcu_nocb_mask) {
+                if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
+                        pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
+                        return;
+                }
+                have_rcu_nocb_mask = true;
+        }
+        if (!have_rcu_nocb_mask)
+                return;
+#ifdef CONFIG_RCU_NOCB_CPU_ZERO
+        pr_info("\tOffload RCU callbacks from CPU 0\n");
+        cpumask_set_cpu(0, rcu_nocb_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
+#ifdef CONFIG_RCU_NOCB_CPU_ALL
+        pr_info("\tOffload RCU callbacks from all CPUs\n");
+        cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
+#if defined(CONFIG_NO_HZ_FULL)
+        if (tick_nohz_full_running)
+                cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
+#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+        if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
+                pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
+                cpumask_and(rcu_nocb_mask, cpu_possible_mask,
+                            rcu_nocb_mask);
+        }
+        cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
+        pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
+        if (rcu_nocb_poll)
+                pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
+        for_each_rcu_flavor(rsp) {
+                for_each_cpu(cpu, rcu_nocb_mask) {
+                        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+                        /*
+                         * If there are early callbacks, they will need
+                         * to be moved to the nocb lists.
+                         */
+                        WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] !=
+                                     &rdp->nxtlist &&
+                                     rdp->nxttail[RCU_NEXT_TAIL] != NULL);
+                        init_nocb_callback_list(rdp);
+                }
+                rcu_organize_nocb_kthreads(rsp);
+        }
 }
 /* Initialize per-rcu_data variables for no-CBs CPUs. */
@@ -2459,15 +2516,85 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
        rdp->nocb_follower_tail = &rdp->nocb_follower_head;
 }
+/*
+ * If the specified CPU is a no-CBs CPU that does not already have its
+ * rcuo kthread for the specified RCU flavor, spawn it.  If the CPUs are
+ * brought online out of order, this can require re-organizing the
+ * leader-follower relationships.
+ */
+static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
+{
+        struct rcu_data *rdp;
+        struct rcu_data *rdp_last;
+        struct rcu_data *rdp_old_leader;
+        struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu);
+        struct task_struct *t;
+        /*
+         * If this isn't a no-CBs CPU or if it already has an rcuo kthread,
+         * then nothing to do.
+         */
+        if (!rcu_is_nocb_cpu(cpu) || rdp_spawn->nocb_kthread)
+                return;
+        /* If we didn't spawn the leader first, reorganize! */
+        rdp_old_leader = rdp_spawn->nocb_leader;
+        if (rdp_old_leader != rdp_spawn && !rdp_old_leader->nocb_kthread) {
+                rdp_last = NULL;
+                rdp = rdp_old_leader;
+                do {
+                        rdp->nocb_leader = rdp_spawn;
+                        if (rdp_last && rdp != rdp_spawn)
+                                rdp_last->nocb_next_follower = rdp;
+                        rdp_last = rdp;
+                        rdp = rdp->nocb_next_follower;
+                        rdp_last->nocb_next_follower = NULL;
+                } while (rdp);
+                rdp_spawn->nocb_next_follower = rdp_old_leader;
+        }
+        /* Spawn the kthread for this CPU and RCU flavor. */
+        t = kthread_run(rcu_nocb_kthread, rdp_spawn,
+                        "rcuo%c/%d", rsp->abbr, cpu);
+        BUG_ON(IS_ERR(t));
+        ACCESS_ONCE(rdp_spawn->nocb_kthread) = t;
+}
+/*
+ * If the specified CPU is a no-CBs CPU that does not already have its
+ * rcuo kthreads, spawn them.
+ */
+static void rcu_spawn_all_nocb_kthreads(int cpu)
+{
+        struct rcu_state *rsp;
+        if (rcu_scheduler_fully_active)
+                for_each_rcu_flavor(rsp)
+                        rcu_spawn_one_nocb_kthread(rsp, cpu);
+}
+/*
+ * Once the scheduler is running, spawn rcuo kthreads for all online
+ * no-CBs CPUs.  This assumes that the early_initcall()s happen before
+ * non-boot CPUs come online -- if this changes, we will need to add
+ * some mutual exclusion.
+ */
+static void __init rcu_spawn_nocb_kthreads(void)
+{
+        int cpu;
+        for_each_online_cpu(cpu)
+                rcu_spawn_all_nocb_kthreads(cpu);
+}
 /* How many follower CPU IDs per leader?  Default of -1 for sqrt(nr_cpu_ids). */
 static int rcu_nocb_leader_stride = -1;
 module_param(rcu_nocb_leader_stride, int, 0444);
 /*
- * Create a kthread for each RCU flavor for each no-CBs CPU.
+ * Initialize leader-follower relationships for all no-CBs CPU.
- * Also initialize leader-follower relationships.
 */
-static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
+static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
 {
        int cpu;
        int ls = rcu_nocb_leader_stride;
@@ -2475,14 +2602,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
        struct rcu_data *rdp;
        struct rcu_data *rdp_leader = NULL;  /* Suppress misguided gcc warn. */
        struct rcu_data *rdp_prev = NULL;
-        struct task_struct *t;
-        if (rcu_nocb_mask == NULL)
+        if (!have_rcu_nocb_mask)
                return;
-#if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL)
-        if (tick_nohz_full_running)
-                cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
-#endif /* #if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL) */
        if (ls == -1) {
                ls = int_sqrt(nr_cpu_ids);
                rcu_nocb_leader_stride = ls;
@@ -2505,21 +2627,15 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
                        rdp_prev->nocb_next_follower = rdp;
                }
                rdp_prev = rdp;
-                /* Spawn the kthread for this CPU. */
-                t = kthread_run(rcu_nocb_kthread, rdp,
-                                "rcuo%c/%d", rsp->abbr, cpu);
-                BUG_ON(IS_ERR(t));
-                ACCESS_ONCE(rdp->nocb_kthread) = t;
        }
 }
 /* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
 static bool init_nocb_callback_list(struct rcu_data *rdp)
 {
-        if (rcu_nocb_mask == NULL ||
+        if (!rcu_is_nocb_cpu(rdp->cpu))
-            !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
                return false;
        rdp->nxttail[RCU_NEXT_TAIL] = NULL;
        return true;
 }
@@ -2541,21 +2657,21 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
 static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
                            bool lazy, unsigned long flags)
 {
-        return 0;
+        return false;
 }
 static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
                                                     struct rcu_data *rdp,
                                                     unsigned long flags)
 {
-        return 0;
+        return false;
 }
 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 {
 }
-static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
+static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
 {
        return false;
 }
@@ -2564,7 +2680,11 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
 {
 }
-static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
+static void rcu_spawn_all_nocb_kthreads(int cpu)
+{
+}
+static void __init rcu_spawn_nocb_kthreads(void)
 {
 }
@@ -2595,16 +2715,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
 #ifdef CONFIG_NO_HZ_FULL_SYSIDLE
-/*
- * Define RCU flavor that holds sysidle state.  This needs to be the
- * most active flavor of RCU.
- */
-#ifdef CONFIG_PREEMPT_RCU
-static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state;
-#else /* #ifdef CONFIG_PREEMPT_RCU */
-static struct rcu_state *rcu_sysidle_state = &rcu_sched_state;
-#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 static int full_sysidle_state;          /* Current system-idle state. */
 #define RCU_SYSIDLE_NOT         0       /* Some CPU is not idle. */
 #define RCU_SYSIDLE_SHORT       1       /* All CPUs idle for brief period. */
@@ -2622,6 +2732,10 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
 {
        unsigned long j;
+        /* If there are no nohz_full= CPUs, no need to track this. */
+        if (!tick_nohz_full_enabled())
+                return;
        /* Adjust nesting, check for fully idle. */
        if (irq) {
                rdtp->dynticks_idle_nesting--;
@@ -2687,6 +2801,10 @@ void rcu_sysidle_force_exit(void)
 */
 static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
 {
+        /* If there are no nohz_full= CPUs, no need to track this. */
+        if (!tick_nohz_full_enabled())
+                return;
        /* Adjust nesting, check for already non-idle. */
        if (irq) {
                rdtp->dynticks_idle_nesting++;
@@ -2741,12 +2859,16 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
        unsigned long j;
        struct rcu_dynticks *rdtp = rdp->dynticks;
+        /* If there are no nohz_full= CPUs, don't check system-wide idleness. */
+        if (!tick_nohz_full_enabled())
+                return;
        /*
         * If some other CPU has already reported non-idle, if this is
         * not the flavor of RCU that tracks sysidle state, or if this
         * is an offline or the timekeeping CPU, nothing to do.
         */
-        if (!*isidle || rdp->rsp != rcu_sysidle_state ||
+        if (!*isidle || rdp->rsp != rcu_state_p ||
            cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
                return;
        if (rcu_gp_in_progress(rdp->rsp))
@@ -2772,7 +2894,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
 */
 static bool is_sysidle_rcu_state(struct rcu_state *rsp)
 {
-        return rsp == rcu_sysidle_state;
+        return rsp == rcu_state_p;
 }
 /*
@@ -2850,7 +2972,7 @@ static void rcu_sysidle_cancel(void)
 static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
                               unsigned long maxj, bool gpkt)
 {
-        if (rsp != rcu_sysidle_state)
+        if (rsp != rcu_state_p)
                return;  /* Wrong flavor, ignore. */
        if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
                return;  /* Running state machine from timekeeping CPU. */
@@ -2867,6 +2989,10 @@ static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
 static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
                                  unsigned long maxj)
 {
+        /* If there are no nohz_full= CPUs, no need to track this. */
+        if (!tick_nohz_full_enabled())
+                return;
        rcu_sysidle_report(rsp, isidle, maxj, true);
 }
@@ -2893,7 +3019,8 @@ static void rcu_sysidle_cb(struct rcu_head *rhp)
 /*
 * Check to see if the system is fully idle, other than the timekeeping CPU.
- * The caller must have disabled interrupts.
+ * The caller must have disabled interrupts.  This is not intended to be
+ * called unless tick_nohz_full_enabled().
 */
 bool rcu_sys_is_idle(void)
 {
@@ -2919,13 +3046,12 @@ bool rcu_sys_is_idle(void)
                        /* Scan all the CPUs looking for nonidle CPUs. */
                        for_each_possible_cpu(cpu) {
-                                rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu);
+                                rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
                                rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
                                if (!isidle)
                                        break;
                        }
-                        rcu_sysidle_report(rcu_sysidle_state,
+                        rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
-                                           isidle, maxj, false);
                        oldrss = rss;
                        rss = ACCESS_ONCE(full_sysidle_state);
                }
@@ -2952,7 +3078,7 @@ bool rcu_sys_is_idle(void)
         * provided by the memory allocator.
         */
        if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
-            !rcu_gp_in_progress(rcu_sysidle_state) &&
+            !rcu_gp_in_progress(rcu_state_p) &&
            !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
                call_rcu(&rsh.rh, rcu_sysidle_cb);
        return false;
@@ -3036,3 +3162,19 @@ static void rcu_bind_gp_kthread(void)
                housekeeping_affine(current);
 #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 }
+/* Record the current task on dyntick-idle entry. */
+static void rcu_dynticks_task_enter(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+        ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
+/* Record no current task on dyntick-idle exit. */
+static void rcu_dynticks_task_exit(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+        ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d7992a6c..3ef8ba58694e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -47,6 +47,8 @@
 #include <linux/hardirq.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/tick.h>
 #define CREATE_TRACE_POINTS
@@ -91,7 +93,7 @@ void __rcu_read_unlock(void)
                barrier();  /* critical section before exit code. */
                t->rcu_read_lock_nesting = INT_MIN;
                barrier();  /* assign before ->rcu_read_unlock_special load */
-                if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+                if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
                        rcu_read_unlock_special(t);
                barrier();  /* ->rcu_read_unlock_special load before assign */
                t->rcu_read_lock_nesting = 0;
@@ -137,6 +139,38 @@ int notrace debug_lockdep_rcu_enabled(void)
 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
 /**
+ * rcu_read_lock_held() - might we be in RCU read-side critical section?
+ *
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
+ * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
+ * this assumes we are in an RCU read-side critical section unless it can
+ * prove otherwise.  This is useful for debug checks in functions that
+ * require that they be called within an RCU read-side critical section.
+ *
+ * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
+ *
+ * Note that rcu_read_lock() and the matching rcu_read_unlock() must
+ * occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock() in process context if the matching rcu_read_lock()
+ * was invoked from within an irq handler.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
+ */
+int rcu_read_lock_held(void)
+{
+        if (!debug_lockdep_rcu_enabled())
+                return 1;
+        if (!rcu_is_watching())
+                return 0;
+        if (!rcu_lockdep_current_cpu_online())
+                return 0;
+        return lock_is_held(&rcu_lock_map);
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_held);
+/**
 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
 *
 * Check for bottom half being disabled, which covers both the
@@ -347,3 +381,312 @@ static int __init check_cpu_stall_init(void)
 early_initcall(check_cpu_stall_init);
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+#ifdef CONFIG_TASKS_RCU
+/*
+ * Simple variant of RCU whose quiescent states are voluntary context switch,
+ * user-space execution, and idle.  As such, grace periods can take one good
+ * long time.  There are no read-side primitives similar to rcu_read_lock()
+ * and rcu_read_unlock() because this implementation is intended to get
+ * the system into a safe state for some of the manipulations involved in
+ * tracing and the like.  Finally, this implementation does not support
+ * high call_rcu_tasks() rates from multiple CPUs.  If this is required,
+ * per-CPU callback lists will be needed.
+ */
+/* Global list of callbacks and associated lock. */
+static struct rcu_head *rcu_tasks_cbs_head;
+static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
+static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
+static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
+/* Track exiting tasks in order to allow them to be waited for. */
+DEFINE_SRCU(tasks_rcu_exit_srcu);
+/* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
+static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
+module_param(rcu_task_stall_timeout, int, 0644);
+static void rcu_spawn_tasks_kthread(void);
+/*
+ * Post an RCU-tasks callback.  First call must be from process context
+ * after the scheduler if fully operational.
+ */
+void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
+{
+        unsigned long flags;
+        bool needwake;
+        rhp->next = NULL;
+        rhp->func = func;
+        raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
+        needwake = !rcu_tasks_cbs_head;
+        *rcu_tasks_cbs_tail = rhp;
+        rcu_tasks_cbs_tail = &rhp->next;
+        raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
+        if (needwake) {
+                rcu_spawn_tasks_kthread();
+                wake_up(&rcu_tasks_cbs_wq);
+        }
+}
+EXPORT_SYMBOL_GPL(call_rcu_tasks);
+/**
+ * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu-tasks
+ * grace period has elapsed, in other words after all currently
+ * executing rcu-tasks read-side critical sections have elapsed.  These
+ * read-side critical sections are delimited by calls to schedule(),
+ * cond_resched_rcu_qs(), idle execution, userspace execution, calls
+ * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
+ *
+ * This is a very specialized primitive, intended only for a few uses in
+ * tracing and other situations requiring manipulation of function
+ * preambles and profiling hooks.  The synchronize_rcu_tasks() function
+ * is not (yet) intended for heavy use from multiple CPUs.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_rcu_tasks() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-tasks read-side critical section whose beginning
+ * preceded the call to synchronize_rcu_tasks().  In addition, each CPU
+ * having an RCU-tasks read-side critical section that extends beyond
+ * the return from synchronize_rcu_tasks() is guaranteed to have executed
+ * a full memory barrier after the beginning of synchronize_rcu_tasks()
+ * and before the beginning of that RCU-tasks read-side critical section.
+ * Note that these guarantees include CPUs that are offline, idle, or
+ * executing in user mode, as well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
+ * (but again only if the system has more than one CPU).
+ */
+void synchronize_rcu_tasks(void)
+{
+        /* Complain if the scheduler has not started.  */
+        rcu_lockdep_assert(!rcu_scheduler_active,
+                           "synchronize_rcu_tasks called too soon");
+        /* Wait for the grace period. */
+        wait_rcu_gp(call_rcu_tasks);
+}
+EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
+/**
+ * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
+ *
+ * Although the current implementation is guaranteed to wait, it is not
+ * obligated to, for example, if there are no pending callbacks.
+ */
+void rcu_barrier_tasks(void)
+{
+        /* There is only one callback queue, so this is easy.  ;-) */
+        synchronize_rcu_tasks();
+}
+EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
+/* See if tasks are still holding out, complain if so. */
+static void check_holdout_task(struct task_struct *t,
+                               bool needreport, bool *firstreport)
+{
+        int cpu;
+        if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
+            t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
+            !ACCESS_ONCE(t->on_rq) ||
+            (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
+             !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
+                ACCESS_ONCE(t->rcu_tasks_holdout) = false;
+                list_del_init(&t->rcu_tasks_holdout_list);
+                put_task_struct(t);
+                return;
+        }
+        if (!needreport)
+                return;
+        if (*firstreport) {
+                pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
+                *firstreport = false;
+        }
+        cpu = task_cpu(t);
+        pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
+                 t, ".I"[is_idle_task(t)],
+                 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
+                 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
+                 t->rcu_tasks_idle_cpu, cpu);
+        sched_show_task(t);
+}
+/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
+static int __noreturn rcu_tasks_kthread(void *arg)
+{
+        unsigned long flags;
+        struct task_struct *g, *t;
+        unsigned long lastreport;
+        struct rcu_head *list;
+        struct rcu_head *next;
+        LIST_HEAD(rcu_tasks_holdouts);
+        /* FIXME: Add housekeeping affinity. */
+        /*
+         * Each pass through the following loop makes one check for
+         * newly arrived callbacks, and, if there are some, waits for
+         * one RCU-tasks grace period and then invokes the callbacks.
+         * This loop is terminated by the system going down.  ;-)
+         */
+        for (;;) {
+                /* Pick up any new callbacks. */
+                raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
+                list = rcu_tasks_cbs_head;
+                rcu_tasks_cbs_head = NULL;
+                rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
+                raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
+                /* If there were none, wait a bit and start over. */
+                if (!list) {
+                        wait_event_interruptible(rcu_tasks_cbs_wq,
+                                                 rcu_tasks_cbs_head);
+                        if (!rcu_tasks_cbs_head) {
+                                WARN_ON(signal_pending(current));
+                                schedule_timeout_interruptible(HZ/10);
+                        }
+                        continue;
+                }
+                /*
+                 * Wait for all pre-existing t->on_rq and t->nvcsw
+                 * transitions to complete.  Invoking synchronize_sched()
+                 * suffices because all these transitions occur with
+                 * interrupts disabled.  Without this synchronize_sched(),
+                 * a read-side critical section that started before the
+                 * grace period might be incorrectly seen as having started
+                 * after the grace period.
+                 *
+                 * This synchronize_sched() also dispenses with the
+                 * need for a memory barrier on the first store to
+                 * ->rcu_tasks_holdout, as it forces the store to happen
+                 * after the beginning of the grace period.
+                 */
+                synchronize_sched();
+                /*
+                 * There were callbacks, so we need to wait for an
+                 * RCU-tasks grace period.  Start off by scanning
+                 * the task list for tasks that are not already
+                 * voluntarily blocked.  Mark these tasks and make
+                 * a list of them in rcu_tasks_holdouts.
+                 */
+                rcu_read_lock();
+                for_each_process_thread(g, t) {
+                        if (t != current && ACCESS_ONCE(t->on_rq) &&
+                            !is_idle_task(t)) {
+                                get_task_struct(t);
+                                t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
+                                ACCESS_ONCE(t->rcu_tasks_holdout) = true;
+                                list_add(&t->rcu_tasks_holdout_list,
+                                         &rcu_tasks_holdouts);
+                        }
+                }
+                rcu_read_unlock();
+                /*
+                 * Wait for tasks that are in the process of exiting.
+                 * This does only part of the job, ensuring that all
+                 * tasks that were previously exiting reach the point
+                 * where they have disabled preemption, allowing the
+                 * later synchronize_sched() to finish the job.
+                 */
+                synchronize_srcu(&tasks_rcu_exit_srcu);
+                /*
+                 * Each pass through the following loop scans the list
+                 * of holdout tasks, removing any that are no longer
+                 * holdouts.  When the list is empty, we are done.
+                 */
+                lastreport = jiffies;
+                while (!list_empty(&rcu_tasks_holdouts)) {
+                        bool firstreport;
+                        bool needreport;
+                        int rtst;
+                        struct task_struct *t1;
+                        schedule_timeout_interruptible(HZ);
+                        rtst = ACCESS_ONCE(rcu_task_stall_timeout);
+                        needreport = rtst > 0 &&
+                                     time_after(jiffies, lastreport + rtst);
+                        if (needreport)
+                                lastreport = jiffies;
+                        firstreport = true;
+                        WARN_ON(signal_pending(current));
+                        list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
+                                                rcu_tasks_holdout_list) {
+                                check_holdout_task(t, needreport, &firstreport);
+                                cond_resched();
+                        }
+                }
+                /*
+                 * Because ->on_rq and ->nvcsw are not guaranteed
+                 * to have a full memory barriers prior to them in the
+                 * schedule() path, memory reordering on other CPUs could
+                 * cause their RCU-tasks read-side critical sections to
+                 * extend past the end of the grace period.  However,
+                 * because these ->nvcsw updates are carried out with
+                 * interrupts disabled, we can use synchronize_sched()
+                 * to force the needed ordering on all such CPUs.
+                 *
+                 * This synchronize_sched() also confines all
+                 * ->rcu_tasks_holdout accesses to be within the grace
+                 * period, avoiding the need for memory barriers for
+                 * ->rcu_tasks_holdout accesses.
+                 *
+                 * In addition, this synchronize_sched() waits for exiting
+                 * tasks to complete their final preempt_disable() region
+                 * of execution, cleaning up after the synchronize_srcu()
+                 * above.
+                 */
+                synchronize_sched();
+                /* Invoke the callbacks. */
+                while (list) {
+                        next = list->next;
+                        local_bh_disable();
+                        list->func(list);
+                        local_bh_enable();
+                        list = next;
+                        cond_resched();
+                }
+                schedule_timeout_uninterruptible(HZ/10);
+        }
+}
+/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
+static void rcu_spawn_tasks_kthread(void)
+{
+        static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
+        static struct task_struct *rcu_tasks_kthread_ptr;
+        struct task_struct *t;
+        if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
+                smp_mb(); /* Ensure caller sees full kthread. */
+                return;
+        }
+        mutex_lock(&rcu_tasks_kthread_mutex);
+        if (rcu_tasks_kthread_ptr) {
+                mutex_unlock(&rcu_tasks_kthread_mutex);
+                return;
+        }
+        t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
+        BUG_ON(IS_ERR(t));
+        smp_mb(); /* Ensure others see full kthread. */
+        ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
+        mutex_unlock(&rcu_tasks_kthread_mutex);
+}
+#endif /* #ifdef CONFIG_TASKS_RCU */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5918d227730f..348ec763b104 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -278,7 +278,7 @@ restart:
                pending >>= softirq_bit;
        }
-        rcu_bh_qs(smp_processor_id());
+        rcu_bh_qs();
        local_irq_disable();
        pending = local_softirq_pending();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 91180987e40e..4aada6d9fe74 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1055,15 +1055,6 @@ static struct ctl_table kern_table[] = {
                .child          = key_sysctls,
        },
 #endif
-#ifdef CONFIG_RCU_TORTURE_TEST
-        {
-                .procname       = "rcutorture_runnable",
-                .data           = &rcutorture_runnable,
-                .maxlen         = sizeof(int),
-                .mode           = 0644,
-                .proc_handler   = proc_dointvec,
-        },
-#endif
 #ifdef CONFIG_PERF_EVENTS
        /*
         * User-space scripts rely on the existence of this file
diff --git a/kernel/torture.c b/kernel/torture.c
index d600af21f022..dd70993c266c 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -211,18 +211,16 @@ EXPORT_SYMBOL_GPL(torture_onoff_cleanup);
 /*
 * Print online/offline testing statistics.
 */
-char *torture_onoff_stats(char *page)
+void torture_onoff_stats(void)
 {
 #ifdef CONFIG_HOTPLUG_CPU
-        page += sprintf(page,
+        pr_cont("onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
-                       "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
+                n_online_successes, n_online_attempts,
-                       n_online_successes, n_online_attempts,
+                n_offline_successes, n_offline_attempts,
-                       n_offline_successes, n_offline_attempts,
+                min_online, max_online,
-                       min_online, max_online,
+                min_offline, max_offline,
-                       min_offline, max_offline,
+                sum_online, sum_offline, HZ);
-                       sum_online, sum_offline, HZ);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
-        return page;
 }
 EXPORT_SYMBOL_GPL(torture_onoff_stats);
@@ -635,8 +633,13 @@ EXPORT_SYMBOL_GPL(torture_init_end);
 *
 * This must be called before the caller starts shutting down its own
 * kthreads.
+ *
+ * Both torture_cleanup_begin() and torture_cleanup_end() must be paired,
+ * in order to correctly perform the cleanup. They are separated because
+ * threads can still need to reference the torture_type type, thus nullify
+ * only after completing all other relevant calls.
 */
-bool torture_cleanup(void)
+bool torture_cleanup_begin(void)
 {
        mutex_lock(&fullstop_mutex);
        if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
@@ -651,12 +654,17 @@ bool torture_cleanup(void)
        torture_shuffle_cleanup();
        torture_stutter_cleanup();
        torture_onoff_cleanup();
+        return false;
+}
+EXPORT_SYMBOL_GPL(torture_cleanup_begin);
+void torture_cleanup_end(void)
+{
        mutex_lock(&fullstop_mutex);
        torture_type = NULL;
        mutex_unlock(&fullstop_mutex);
-        return false;
 }
-EXPORT_SYMBOL_GPL(torture_cleanup);
+EXPORT_SYMBOL_GPL(torture_cleanup_end);
 /*
 * Is it time for the current torture test to stop?
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 5dbe22aa3efd..09b685daee3d 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -2043,9 +2043,10 @@ __acquires(&pool->lock)
         * kernels, where a requeueing work item waiting for something to
         * happen could deadlock with stop_machine as such work item could
         * indefinitely requeue itself while all other CPUs are trapped in
-         * stop_machine.
+         * stop_machine. At the same time, report a quiescent RCU state so
+         * the same condition doesn't freeze RCU.
         */
-        cond_resched();
+        cond_resched_rcu_qs();
        spin_lock_irq(&pool->lock);