Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RCU updates from Ingo Molnar: - updates to the handling of expedited grace periods - updates to reduce lock contention in the rcu_node combining tree [ These are in preparation for the consolidation of RCU-bh, RCU-preempt, and RCU-sched into a single flavor, which was requested by Linus in response to a security flaw whose root cause included confusion between the multiple flavors of RCU ] - torture-test updates that save their users some time and effort - miscellaneous fixes * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits) rcu/x86: Provide early rcu_cpu_starting() callback torture: Make kvm-find-errors.sh find build warnings rcutorture: Abbreviate kvm.sh summary lines rcutorture: Print end-of-test state in kvm.sh summary rcutorture: Print end-of-test state torture: Fold parse-torture.sh into parse-console.sh torture: Add a script to edit output from failed runs rcu: Update list of rcu_future_grace_period() trace events rcu: Drop early GP request check from rcu_gp_kthread() rcu: Simplify and inline cpu_needs_another_gp() rcu: The rcu_gp_cleanup() function does not need cpu_needs_another_gp() rcu: Make rcu_start_this_gp() check for out-of-range requests rcu: Add funnel locking to rcu_start_this_gp() rcu: Make rcu_start_future_gp() caller select grace period rcu: Inline rcu_start_gp_advanced() into rcu_start_future_gp() rcu: Clear request other than RCU_GP_FLAG_INIT at GP end rcu: Cleanup, don't put ->completed into an int rcu: Switch __rcu_process_callbacks() to rcu_accelerate_cbs() rcu: Avoid __call_rcu_core() root rcu_node ->lock acquisition rcu: Make rcu_migrate_callbacks wake GP kthread when needed ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-06-04 18:54:04 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-06-04 18:54:04 -0400
commit: 4057adafb395204af4ff93f3669ecb49eb45b3cf (patch)
tree: d6b0abf50ec5cd658fe958f90941c0192486549c
parent: 137f5ae4dae85011b13e3a7049414c4060ad94c0 (diff)
parent: 52f2b34f46223ca2789320fa10c13f6664c1b628 (diff)
31 files changed, 631 insertions, 629 deletions
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index a27fbfb0efb8..65eb856526b7 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -1,3 +1,5 @@
+What is RCU?  --  "Read, Copy, Update"
 Please note that the "What is RCU?" LWN series is an excellent place
 to start learning about RCU:
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 7468de429087..3ea0047beb40 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -46,6 +46,7 @@
 #include <linux/pci.h>
 #include <linux/smp.h>
 #include <linux/syscore_ops.h>
+#include <linux/rcupdate.h>
 #include <asm/cpufeature.h>
 #include <asm/e820/api.h>
@@ -793,6 +794,9 @@ void mtrr_ap_init(void)
        if (!use_intel() || mtrr_aps_delayed_init)
                return;
+        rcu_cpu_starting(smp_processor_id());
        /*
         * Ideally we should hold mtrr_mutex here to avoid mtrr entries
         * changed, but this routine will be called in cpu boot time,
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 04a20da76786..c8b30067b6ae 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -357,7 +357,7 @@ static void nvme_free_ns_head(struct kref *ref)
        nvme_mpath_remove_disk(head);
        ida_simple_remove(&head->subsys->ns_ida, head->instance);
        list_del_init(&head->entry);
-        cleanup_srcu_struct(&head->srcu);
+        cleanup_srcu_struct_quiesced(&head->srcu);
        nvme_put_subsystem(head->subsys);
        kfree(head);
 }
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 36360d07f25b..e679b175b411 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -108,7 +108,6 @@ void rcu_sched_qs(void);
 void rcu_bh_qs(void);
 void rcu_check_callbacks(int user);
 void rcu_report_dead(unsigned int cpu);
-void rcu_cpu_starting(unsigned int cpu);
 void rcutree_migrate_callbacks(int cpu);
 #ifdef CONFIG_RCU_STALL_COMMON
@@ -188,13 +187,13 @@ static inline void exit_tasks_rcu_finish(void) { }
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 /**
- * cond_resched_rcu_qs - Report potential quiescent states to RCU
+ * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
 *
 * This macro resembles cond_resched(), except that it is defined to
 * report potential quiescent states to RCU-tasks even if the cond_resched()
 * machinery were to be shut off, as some advocate for PREEMPT kernels.
 */
-#define cond_resched_rcu_qs() \
+#define cond_resched_tasks_rcu_qs() \
 do { \
        if (!cond_resched()) \
                rcu_note_voluntary_context_switch_lite(current); \
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ce9beec35e34..7b3c82e8a625 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -132,5 +132,6 @@ static inline void rcu_all_qs(void) { barrier(); }
 #define rcutree_offline_cpu      NULL
 #define rcutree_dead_cpu         NULL
 #define rcutree_dying_cpu        NULL
+static inline void rcu_cpu_starting(unsigned int cpu) { }
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index fd996cdf1833..914655848ef6 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -74,6 +74,7 @@ static inline void synchronize_rcu_bh_expedited(void)
 void rcu_barrier(void);
 void rcu_barrier_bh(void);
 void rcu_barrier_sched(void);
+bool rcu_eqs_special_set(int cpu);
 unsigned long get_state_synchronize_rcu(void);
 void cond_synchronize_rcu(unsigned long oldstate);
 unsigned long get_state_synchronize_sched(void);
@@ -100,5 +101,6 @@ int rcutree_online_cpu(unsigned int cpu);
 int rcutree_offline_cpu(unsigned int cpu);
 int rcutree_dead_cpu(unsigned int cpu);
 int rcutree_dying_cpu(unsigned int cpu);
+void rcu_cpu_starting(unsigned int cpu);
 #endif /* __LINUX_RCUTREE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ca3f3eae8980..5a0c10b45273 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1661,7 +1661,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
 * explicit rescheduling in places that are safe. The return
 * value indicates whether a reschedule was done in fact.
 * cond_resched_lock() will drop the spinlock before scheduling,
- * cond_resched_softirq() will enable bhs before scheduling.
 */
 #ifndef CONFIG_PREEMPT
 extern int _cond_resched(void);
@@ -1681,13 +1680,6 @@ extern int __cond_resched_lock(spinlock_t *lock);
        __cond_resched_lock(lock);                              \
 })
-extern int __cond_resched_softirq(void);
-#define cond_resched_softirq() ({                                       \
-        ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);     \
-        __cond_resched_softirq();                                       \
-})
 static inline void cond_resched_rcu(void)
 {
 #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 33c1c698df09..91494d7e8e41 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -69,11 +69,45 @@ struct srcu_struct { };
 void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
                void (*func)(struct rcu_head *head));
-void cleanup_srcu_struct(struct srcu_struct *sp);
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced);
 int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
 void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
 void synchronize_srcu(struct srcu_struct *sp);
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+static inline void cleanup_srcu_struct(struct srcu_struct *sp)
+{
+        _cleanup_srcu_struct(sp, false);
+}
+/**
+ * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.  Also,
+ * all grace-period processing must have completed.
+ *
+ * "Completed" means that the last synchronize_srcu() and
+ * synchronize_srcu_expedited() calls must have returned before the call
+ * to cleanup_srcu_struct_quiesced().  It also means that the callback
+ * from the last call_srcu() must have been invoked before the call to
+ * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help
+ * with this last.  Violating these rules will get you a WARN_ON() splat
+ * (with high probability, anyway), and will also cause the srcu_struct
+ * to be leaked.
+ */
+static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp)
+{
+        _cleanup_srcu_struct(sp, true);
+}
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 /**
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d8c33298c153..5936aac357ab 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -84,20 +84,21 @@ TRACE_EVENT(rcu_grace_period,
 );
 /*
- * Tracepoint for future grace-period events, including those for no-callbacks
+ * Tracepoint for future grace-period events.  The caller should pull
- * CPUs.  The caller should pull the data from the rcu_node structure,
+ * the data from the rcu_node structure, other than rcuname, which comes
- * other than rcuname, which comes from the rcu_state structure, and event,
+ * from the rcu_state structure, and event, which is one of the following:
- * which is one of the following:
 *
- * "Startleaf": Request a nocb grace period based on leaf-node data.
+ * "Startleaf": Request a grace period based on leaf-node data.
+ * "Prestarted": Someone beat us to the request
 * "Startedleaf": Leaf-node start proved sufficient.
 * "Startedleafroot": Leaf-node start proved sufficient after checking root.
 * "Startedroot": Requested a nocb grace period based on root-node data.
+ * "NoGPkthread": The RCU grace-period kthread has not yet started.
 * "StartWait": Start waiting for the requested grace period.
 * "ResumeWait": Resume waiting after signal.
 * "EndWait": Complete wait.
 * "Cleanup": Clean up rcu_node structure after previous GP.
- * "CleanupMore": Clean up, and another no-CB GP is needed.
+ * "CleanupMore": Clean up, and another GP is needed.
 */
 TRACE_EVENT(rcu_future_grace_period,
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 7a693e31184a..40cea6735c2d 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -270,6 +270,12 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
        }
 }
+/* Returns first leaf rcu_node of the specified RCU flavor. */
+#define rcu_first_leaf_node(rsp) ((rsp)->level[rcu_num_lvls - 1])
+/* Is this rcu_node a leaf? */
+#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
 /*
 * Do a full breadth-first scan of the rcu_node structures for the
 * specified rcu_state structure.
@@ -284,8 +290,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
 * rcu_node tree with but one rcu_node structure, this loop is a no-op.
 */
 #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
-        for ((rnp) = &(rsp)->node[0]; \
+        for ((rnp) = &(rsp)->node[0]; !rcu_is_leaf_node(rsp, rnp); (rnp)++)
-             (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
 /*
 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
@@ -294,7 +299,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
 * It is still a leaf node, even if it is also the root node.
 */
 #define rcu_for_each_leaf_node(rsp, rnp) \
-        for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
+        for ((rnp) = rcu_first_leaf_node(rsp); \
             (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
 /*
@@ -486,6 +491,7 @@ void rcu_force_quiescent_state(void);
 void rcu_bh_force_quiescent_state(void);
 void rcu_sched_force_quiescent_state(void);
 extern struct workqueue_struct *rcu_gp_wq;
+extern struct workqueue_struct *rcu_par_gp_wq;
 #endif /* #else #ifdef CONFIG_TINY_RCU */
 #ifdef CONFIG_RCU_NOCB_CPU
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 88cba7c2956c..5aff271adf1e 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -404,24 +404,6 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
 }
 /*
- * Scan the specified rcu_segcblist structure for callbacks that need
- * a grace period later than the one specified by "seq".  We don't look
- * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
- * have a grace-period sequence number.
- */
-bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
-                                    unsigned long seq)
-{
-        int i;
-        for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
-                if (rsclp->tails[i - 1] != rsclp->tails[i] &&
-                    ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
-                        return true;
-        return false;
-}
-/*
 * Merge the source rcu_segcblist structure into the destination
 * rcu_segcblist structure, then initialize the source.  Any pending
 * callbacks from the source get to start over.  It is best to
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 581c12b63544..948470cef385 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -134,7 +134,5 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
                                   struct rcu_cblist *rclp);
 void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
 bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
-bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
-                                    unsigned long seq);
 void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
                         struct rcu_segcblist *src_rsclp);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 777e7a6a0292..e232846516b3 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -369,7 +369,7 @@ static bool __maybe_unused torturing_tasks(void)
 */
 static void rcu_perf_wait_shutdown(void)
 {
-        cond_resched_rcu_qs();
+        cond_resched_tasks_rcu_qs();
        if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
                return;
        while (!torture_must_stop())
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 680c96d8c00f..e628fcfd1bde 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -593,7 +593,12 @@ static void srcu_torture_init(void)
 static void srcu_torture_cleanup(void)
 {
-        cleanup_srcu_struct(&srcu_ctld);
+        static DEFINE_TORTURE_RANDOM(rand);
+        if (torture_random(&rand) & 0x800)
+                cleanup_srcu_struct(&srcu_ctld);
+        else
+                cleanup_srcu_struct_quiesced(&srcu_ctld);
        srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
 }
@@ -1609,6 +1614,9 @@ static enum cpuhp_state rcutor_hp;
 static void
 rcu_torture_cleanup(void)
 {
+        int flags = 0;
+        unsigned long gpnum = 0;
+        unsigned long completed = 0;
        int i;
        rcutorture_record_test_transition();
@@ -1639,6 +1647,11 @@ rcu_torture_cleanup(void)
                fakewriter_tasks = NULL;
        }
+        rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed);
+        srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
+                                &flags, &gpnum, &completed);
+        pr_alert("%s:  End-test grace-period state: g%lu c%lu f%#x\n",
+                 cur_ops->name, gpnum, completed, flags);
        torture_stop_kthread(rcu_torture_stats, stats_task);
        torture_stop_kthread(rcu_torture_fqs, fqs_task);
        for (i = 0; i < ncbflooders; i++)
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 76ac5f50b2c7..622792abe41a 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -86,16 +86,19 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
 * Must invoke this after you are finished using a given srcu_struct that
 * was initialized via init_srcu_struct(), else you leak memory.
 */
-void cleanup_srcu_struct(struct srcu_struct *sp)
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
 {
        WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
-        flush_work(&sp->srcu_work);
+        if (quiesced)
+                WARN_ON(work_pending(&sp->srcu_work));
+        else
+                flush_work(&sp->srcu_work);
        WARN_ON(sp->srcu_gp_running);
        WARN_ON(sp->srcu_gp_waiting);
        WARN_ON(sp->srcu_cb_head);
        WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
 }
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
 /*
 * Removes the count for the old reader from the appropriate element of
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index fb560fca9ef4..b4123d7a2cec 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -366,24 +366,28 @@ static unsigned long srcu_get_delay(struct srcu_struct *sp)
        return SRCU_INTERVAL;
 }
-/**
+/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */
- * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
- * @sp: structure to clean up.
- *
- * Must invoke this after you are finished using a given srcu_struct that
- * was initialized via init_srcu_struct(), else you leak memory.
- */
-void cleanup_srcu_struct(struct srcu_struct *sp)
 {
        int cpu;
        if (WARN_ON(!srcu_get_delay(sp)))
-                return; /* Leakage unless caller handles error. */
+                return; /* Just leak it! */
        if (WARN_ON(srcu_readers_active(sp)))
-                return; /* Leakage unless caller handles error. */
+                return; /* Just leak it! */
-        flush_delayed_work(&sp->work);
+        if (quiesced) {
+                if (WARN_ON(delayed_work_pending(&sp->work)))
+                        return; /* Just leak it! */
+        } else {
+                flush_delayed_work(&sp->work);
+        }
        for_each_possible_cpu(cpu)
-                flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+                if (quiesced) {
+                        if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work)))
+                                return; /* Just leak it! */
+                } else {
+                        flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
+                }
        if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
            WARN_ON(srcu_readers_active(sp))) {
                pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
@@ -392,7 +396,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
        free_percpu(sp->sda);
        sp->sda = NULL;
 }
-EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
 /*
 * Counts the new reader in the appropriate per-CPU element of the
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2a734692a581..aa7cade1b9f3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -524,8 +524,6 @@ module_param(rcu_kick_kthreads, bool, 0644);
 static ulong jiffies_till_sched_qs = HZ / 10;
 module_param(jiffies_till_sched_qs, ulong, 0444);
-static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
-                                  struct rcu_data *rdp);
 static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
 static void force_quiescent_state(struct rcu_state *rsp);
 static int rcu_pending(void);
@@ -711,44 +709,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
 }
 /*
- * Is there any need for future grace periods?
- * Interrupts must be disabled.  If the caller does not hold the root
- * rnp_node structure's ->lock, the results are advisory only.
- */
-static int rcu_future_needs_gp(struct rcu_state *rsp)
-{
-        struct rcu_node *rnp = rcu_get_root(rsp);
-        int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
-        int *fp = &rnp->need_future_gp[idx];
-        lockdep_assert_irqs_disabled();
-        return READ_ONCE(*fp);
-}
-/*
- * Does the current CPU require a not-yet-started grace period?
- * The caller must have disabled interrupts to prevent races with
- * normal callback registry.
- */
-static bool
-cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
-{
-        lockdep_assert_irqs_disabled();
-        if (rcu_gp_in_progress(rsp))
-                return false;  /* No, a grace period is already in progress. */
-        if (rcu_future_needs_gp(rsp))
-                return true;  /* Yes, a no-CBs CPU needs one. */
-        if (!rcu_segcblist_is_enabled(&rdp->cblist))
-                return false;  /* No, this is a no-CBs (or offline) CPU. */
-        if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
-                return true;  /* Yes, CPU has newly registered callbacks. */
-        if (rcu_segcblist_future_gp_needed(&rdp->cblist,
-                                           READ_ONCE(rsp->completed)))
-                return true;  /* Yes, CBs for future grace period. */
-        return false; /* No grace period needed. */
-}
-/*
 * Enter an RCU extended quiescent state, which can be either the
 * idle loop or adaptive-tickless usermode execution.
 *
@@ -1234,10 +1194,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
        }
        /*
-         * Has this CPU encountered a cond_resched_rcu_qs() since the
+         * Has this CPU encountered a cond_resched() since the beginning
-         * beginning of the grace period?  For this to be the case,
+         * of the grace period?  For this to be the case, the CPU has to
-         * the CPU has to have noticed the current grace period.  This
+         * have noticed the current grace period.  This might not be the
-         * might not be the case for nohz_full CPUs looping in the kernel.
+         * case for nohz_full CPUs looping in the kernel.
         */
        jtsq = jiffies_till_sched_qs;
        ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
@@ -1642,18 +1602,30 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
                return rnp->completed + 1;
        /*
+         * If the current rcu_node structure believes that RCU is
+         * idle, and if the rcu_state structure does not yet reflect
+         * the start of a new grace period, then the next grace period
+         * will suffice.  The memory barrier is needed to accurately
+         * sample the rsp->gpnum, and pairs with the second lock
+         * acquisition in rcu_gp_init(), which is augmented with
+         * smp_mb__after_unlock_lock() for this purpose.
+         */
+        if (rnp->gpnum == rnp->completed) {
+                smp_mb(); /* See above block comment. */
+                if (READ_ONCE(rsp->gpnum) == rnp->completed)
+                        return rnp->completed + 1;
+        }
+        /*
         * Otherwise, wait for a possible partial grace period and
         * then the subsequent full grace period.
         */
        return rnp->completed + 2;
 }
-/*
+/* Trace-event wrapper function for trace_rcu_future_grace_period.  */
- * Trace-event helper function for rcu_start_future_gp() and
+static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
- * rcu_nocb_wait_gp().
+                              unsigned long c, const char *s)
- */
-static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
-                                unsigned long c, const char *s)
 {
        trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
                                      rnp->completed, c, rnp->level,
@@ -1661,96 +1633,67 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
 }
 /*
- * Start some future grace period, as needed to handle newly arrived
+ * Start the specified grace period, as needed to handle newly arrived
 * callbacks.  The required future grace periods are recorded in each
- * rcu_node structure's ->need_future_gp field.  Returns true if there
+ * rcu_node structure's ->need_future_gp[] field.  Returns true if there
 * is reason to awaken the grace-period kthread.
 *
- * The caller must hold the specified rcu_node structure's ->lock.
+ * The caller must hold the specified rcu_node structure's ->lock, which
+ * is why the caller is responsible for waking the grace-period kthread.
 */
-static bool __maybe_unused
+static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
-rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
+                              unsigned long c)
-                    unsigned long *c_out)
 {
-        unsigned long c;
        bool ret = false;
-        struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+        struct rcu_state *rsp = rdp->rsp;
+        struct rcu_node *rnp_root;
-        raw_lockdep_assert_held_rcu_node(rnp);
-        /*
-         * Pick up grace-period number for new callbacks.  If this
-         * grace period is already marked as needed, return to the caller.
-         */
-        c = rcu_cbs_completed(rdp->rsp, rnp);
-        trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
-        if (rnp->need_future_gp[c & 0x1]) {
-                trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
-                goto out;
-        }
        /*
-         * If either this rcu_node structure or the root rcu_node structure
+         * Use funnel locking to either acquire the root rcu_node
-         * believe that a grace period is in progress, then we must wait
+         * structure's lock or bail out if the need for this grace period
-         * for the one following, which is in "c".  Because our request
+         * has already been recorded -- or has already started.  If there
-         * will be noticed at the end of the current grace period, we don't
+         * is already a grace period in progress in a non-leaf node, no
-         * need to explicitly start one.  We only do the lockless check
+         * recording is needed because the end of the grace period will
-         * of rnp_root's fields if the current rcu_node structure thinks
+         * scan the leaf rcu_node structures.  Note that rnp->lock must
-         * there is no grace period in flight, and because we hold rnp->lock,
+         * not be released.
-         * the only possible change is when rnp_root's two fields are
-         * equal, in which case rnp_root->gpnum might be concurrently
-         * incremented.  But that is OK, as it will just result in our
-         * doing some extra useless work.
         */
-        if (rnp->gpnum != rnp->completed ||
+        raw_lockdep_assert_held_rcu_node(rnp);
-            READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) {
+        trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf"));
-                rnp->need_future_gp[c & 0x1]++;
+        for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) {
-                trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf"));
+                if (rnp_root != rnp)
-                goto out;
+                        raw_spin_lock_rcu_node(rnp_root);
+                WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum +
+                                          need_future_gp_mask(), c));
+                if (need_future_gp_element(rnp_root, c) ||
+                    ULONG_CMP_GE(rnp_root->gpnum, c) ||
+                    (rnp != rnp_root &&
+                     rnp_root->gpnum != rnp_root->completed)) {
+                        trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted"));
+                        goto unlock_out;
+                }
+                need_future_gp_element(rnp_root, c) = true;
+                if (rnp_root != rnp && rnp_root->parent != NULL)
+                        raw_spin_unlock_rcu_node(rnp_root);
+                if (!rnp_root->parent)
+                        break;  /* At root, and perhaps also leaf. */
        }
-        /*
+        /* If GP already in progress, just leave, otherwise start one. */
-         * There might be no grace period in progress.  If we don't already
+        if (rnp_root->gpnum != rnp_root->completed) {
-         * hold it, acquire the root rcu_node structure's lock in order to
+                trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot"));
-         * start one (if needed).
-         */
-        if (rnp != rnp_root)
-                raw_spin_lock_rcu_node(rnp_root);
-        /*
-         * Get a new grace-period number.  If there really is no grace
-         * period in progress, it will be smaller than the one we obtained
-         * earlier.  Adjust callbacks as needed.
-         */
-        c = rcu_cbs_completed(rdp->rsp, rnp_root);
-        if (!rcu_is_nocb_cpu(rdp->cpu))
-                (void)rcu_segcblist_accelerate(&rdp->cblist, c);
-        /*
-         * If the needed for the required grace period is already
-         * recorded, trace and leave.
-         */
-        if (rnp_root->need_future_gp[c & 0x1]) {
-                trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
                goto unlock_out;
        }
+        trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot"));
-        /* Record the need for the future grace period. */
+        WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
-        rnp_root->need_future_gp[c & 0x1]++;
+        if (!rsp->gp_kthread) {
+                trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
-        /* If a grace period is not already in progress, start one. */
+                goto unlock_out;
-        if (rnp_root->gpnum != rnp_root->completed) {
-                trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
-        } else {
-                trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
-                ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
        }
+        trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq"));
+        ret = true;  /* Caller must wake GP kthread. */
 unlock_out:
        if (rnp != rnp_root)
                raw_spin_unlock_rcu_node(rnp_root);
-out:
-        if (c_out != NULL)
-                *c_out = c;
        return ret;
 }
@@ -1758,16 +1701,16 @@ out:
 * Clean up any old requests for the just-ended grace period.  Also return
 * whether any additional grace periods have been requested.
 */
-static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
+static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
 {
-        int c = rnp->completed;
+        unsigned long c = rnp->completed;
-        int needmore;
+        bool needmore;
        struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
-        rnp->need_future_gp[c & 0x1] = 0;
+        need_future_gp_element(rnp, c) = false;
-        needmore = rnp->need_future_gp[(c + 1) & 0x1];
+        needmore = need_any_future_gp(rnp);
-        trace_rcu_future_gp(rnp, rdp, c,
+        trace_rcu_this_gp(rnp, rdp, c,
-                            needmore ? TPS("CleanupMore") : TPS("Cleanup"));
+                          needmore ? TPS("CleanupMore") : TPS("Cleanup"));
        return needmore;
 }
@@ -1802,6 +1745,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
 static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
                               struct rcu_data *rdp)
 {
+        unsigned long c;
        bool ret = false;
        raw_lockdep_assert_held_rcu_node(rnp);
@@ -1820,8 +1764,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
         * accelerating callback invocation to an earlier grace-period
         * number.
         */
-        if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp)))
+        c = rcu_cbs_completed(rsp, rnp);
-                ret = rcu_start_future_gp(rnp, rdp, NULL);
+        if (rcu_segcblist_accelerate(&rdp->cblist, c))
+                ret = rcu_start_this_gp(rnp, rdp, c);
        /* Trace depending on how much we were able to accelerate. */
        if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
@@ -2049,7 +1994,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
                                            rnp->level, rnp->grplo,
                                            rnp->grphi, rnp->qsmask);
                raw_spin_unlock_irq_rcu_node(rnp);
-                cond_resched_rcu_qs();
+                cond_resched_tasks_rcu_qs();
                WRITE_ONCE(rsp->gp_activity, jiffies);
        }
@@ -2108,7 +2053,6 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
 {
        unsigned long gp_duration;
        bool needgp = false;
-        int nocb = 0;
        struct rcu_data *rdp;
        struct rcu_node *rnp = rcu_get_root(rsp);
        struct swait_queue_head *sq;
@@ -2147,31 +2091,35 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
                if (rnp == rdp->mynode)
                        needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
                /* smp_mb() provided by prior unlock-lock pair. */
-                nocb += rcu_future_gp_cleanup(rsp, rnp);
+                needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp;
                sq = rcu_nocb_gp_get(rnp);
                raw_spin_unlock_irq_rcu_node(rnp);
                rcu_nocb_gp_cleanup(sq);
-                cond_resched_rcu_qs();
+                cond_resched_tasks_rcu_qs();
                WRITE_ONCE(rsp->gp_activity, jiffies);
                rcu_gp_slow(rsp, gp_cleanup_delay);
        }
        rnp = rcu_get_root(rsp);
        raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
-        rcu_nocb_gp_set(rnp, nocb);
        /* Declare grace period done. */
        WRITE_ONCE(rsp->completed, rsp->gpnum);
        trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
        rsp->gp_state = RCU_GP_IDLE;
+        /* Check for GP requests since above loop. */
        rdp = this_cpu_ptr(rsp->rda);
+        if (need_any_future_gp(rnp)) {
+                trace_rcu_this_gp(rnp, rdp, rsp->completed - 1,
+                                  TPS("CleanupMore"));
+                needgp = true;
+        }
        /* Advance CBs to reduce false positives below. */
-        needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp;
+        if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
-        if (needgp || cpu_needs_another_gp(rsp, rdp)) {
                WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
-                trace_rcu_grace_period(rsp->name,
+                trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
-                                       READ_ONCE(rsp->gpnum),
                                       TPS("newreq"));
        }
+        WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);
        raw_spin_unlock_irq_rcu_node(rnp);
 }
@@ -2202,7 +2150,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                        /* Locking provides needed memory barrier. */
                        if (rcu_gp_init(rsp))
                                break;
-                        cond_resched_rcu_qs();
+                        cond_resched_tasks_rcu_qs();
                        WRITE_ONCE(rsp->gp_activity, jiffies);
                        WARN_ON(signal_pending(current));
                        trace_rcu_grace_period(rsp->name,
@@ -2247,7 +2195,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                trace_rcu_grace_period(rsp->name,
                                                       READ_ONCE(rsp->gpnum),
                                                       TPS("fqsend"));
-                                cond_resched_rcu_qs();
+                                cond_resched_tasks_rcu_qs();
                                WRITE_ONCE(rsp->gp_activity, jiffies);
                                ret = 0; /* Force full wait till next FQS. */
                                j = jiffies_till_next_fqs;
@@ -2260,7 +2208,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                }
                        } else {
                                /* Deal with stray signal. */
-                                cond_resched_rcu_qs();
+                                cond_resched_tasks_rcu_qs();
                                WRITE_ONCE(rsp->gp_activity, jiffies);
                                WARN_ON(signal_pending(current));
                                trace_rcu_grace_period(rsp->name,
@@ -2283,71 +2231,6 @@ static int __noreturn rcu_gp_kthread(void *arg)
 }
 /*
- * Start a new RCU grace period if warranted, re-initializing the hierarchy
- * in preparation for detecting the next grace period.  The caller must hold
- * the root node's ->lock and hard irqs must be disabled.
- *
- * Note that it is legal for a dying CPU (which is marked as offline) to
- * invoke this function.  This can happen when the dying CPU reports its
- * quiescent state.
- *
- * Returns true if the grace-period kthread must be awakened.
- */
-static bool
-rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
-                      struct rcu_data *rdp)
-{
-        raw_lockdep_assert_held_rcu_node(rnp);
-        if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
-                /*
-                 * Either we have not yet spawned the grace-period
-                 * task, this CPU does not need another grace period,
-                 * or a grace period is already in progress.
-                 * Either way, don't start a new grace period.
-                 */
-                return false;
-        }
-        WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
-        trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
-                               TPS("newreq"));
-        /*
-         * We can't do wakeups while holding the rnp->lock, as that
-         * could cause possible deadlocks with the rq->lock. Defer
-         * the wakeup to our caller.
-         */
-        return true;
-}
-/*
- * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
- * callbacks.  Note that rcu_start_gp_advanced() cannot do this because it
- * is invoked indirectly from rcu_advance_cbs(), which would result in
- * endless recursion -- or would do so if it wasn't for the self-deadlock
- * that is encountered beforehand.
- *
- * Returns true if the grace-period kthread needs to be awakened.
- */
-static bool rcu_start_gp(struct rcu_state *rsp)
-{
-        struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
-        struct rcu_node *rnp = rcu_get_root(rsp);
-        bool ret = false;
-        /*
-         * If there is no grace period in progress right now, any
-         * callbacks we have up to this point will be satisfied by the
-         * next grace period.  Also, advancing the callbacks reduces the
-         * probability of false positives from cpu_needs_another_gp()
-         * resulting in pointless grace periods.  So, advance callbacks
-         * then start the grace period!
-         */
-        ret = rcu_advance_cbs(rsp, rnp, rdp) || ret;
-        ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret;
-        return ret;
-}
-/*
 * Report a full set of quiescent states to the specified rcu_state data
 * structure.  Invoke rcu_gp_kthread_wake() to awaken the grace-period
 * kthread if another grace period is required.  Whether we wake
@@ -2398,7 +2281,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
                        return;
                }
                WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
-                WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 &&
+                WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
                             rcu_preempt_blocked_readers_cgp(rnp));
                rnp->qsmask &= ~mask;
                trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
@@ -2782,7 +2665,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
        struct rcu_node *rnp;
        rcu_for_each_leaf_node(rsp, rnp) {
-                cond_resched_rcu_qs();
+                cond_resched_tasks_rcu_qs();
                mask = 0;
                raw_spin_lock_irqsave_rcu_node(rnp, flags);
                if (rnp->qsmask == 0) {
@@ -2874,22 +2757,27 @@ __rcu_process_callbacks(struct rcu_state *rsp)
        unsigned long flags;
        bool needwake;
        struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
+        struct rcu_node *rnp;
        WARN_ON_ONCE(!rdp->beenonline);
        /* Update RCU state based on any recent quiescent states. */
        rcu_check_quiescent_state(rsp, rdp);
-        /* Does this CPU require a not-yet-started grace period? */
+        /* No grace period and unregistered callbacks? */
-        local_irq_save(flags);
+        if (!rcu_gp_in_progress(rsp) &&
-        if (cpu_needs_another_gp(rsp, rdp)) {
+            rcu_segcblist_is_enabled(&rdp->cblist)) {
-                raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */
+                local_irq_save(flags);
-                needwake = rcu_start_gp(rsp);
+                if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) {
-                raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
+                        local_irq_restore(flags);
-                if (needwake)
+                } else {
-                        rcu_gp_kthread_wake(rsp);
+                        rnp = rdp->mynode;
-        } else {
+                        raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
-                local_irq_restore(flags);
+                        needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
+                        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+                        if (needwake)
+                                rcu_gp_kthread_wake(rsp);
+                }
        }
        /* If there are callbacks ready, invoke them. */
@@ -2973,11 +2861,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
                /* Start a new grace period if one not already started. */
                if (!rcu_gp_in_progress(rsp)) {
-                        struct rcu_node *rnp_root = rcu_get_root(rsp);
+                        struct rcu_node *rnp = rdp->mynode;
-                        raw_spin_lock_rcu_node(rnp_root);
+                        raw_spin_lock_rcu_node(rnp);
-                        needwake = rcu_start_gp(rsp);
+                        needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
-                        raw_spin_unlock_rcu_node(rnp_root);
+                        raw_spin_unlock_rcu_node(rnp);
                        if (needwake)
                                rcu_gp_kthread_wake(rsp);
                } else {
@@ -3368,7 +3256,9 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
                return 1;
        /* Has RCU gone idle with this CPU needing another grace period? */
-        if (cpu_needs_another_gp(rsp, rdp))
+        if (!rcu_gp_in_progress(rsp) &&
+            rcu_segcblist_is_enabled(&rdp->cblist) &&
+            !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
                return 1;
        /* Has another RCU grace period completed?  */
@@ -3775,6 +3665,8 @@ int rcutree_dead_cpu(unsigned int cpu)
        return 0;
 }
+static DEFINE_PER_CPU(int, rcu_cpu_started);
 /*
 * Mark the specified CPU as being online so that subsequent grace periods
 * (both expedited and normal) will wait on it.  Note that this means that
@@ -3796,6 +3688,11 @@ void rcu_cpu_starting(unsigned int cpu)
        struct rcu_node *rnp;
        struct rcu_state *rsp;
+        if (per_cpu(rcu_cpu_started, cpu))
+                return;
+        per_cpu(rcu_cpu_started, cpu) = 1;
        for_each_rcu_flavor(rsp) {
                rdp = per_cpu_ptr(rsp->rda, cpu);
                rnp = rdp->mynode;
@@ -3852,6 +3749,8 @@ void rcu_report_dead(unsigned int cpu)
        preempt_enable();
        for_each_rcu_flavor(rsp)
                rcu_cleanup_dying_idle_cpu(cpu, rsp);
+        per_cpu(rcu_cpu_started, cpu) = 0;
 }
 /* Migrate the dead CPU's callbacks to the current CPU. */
@@ -3861,6 +3760,7 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
        struct rcu_data *my_rdp;
        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
        struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
+        bool needwake;
        if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
                return;  /* No callbacks to migrate. */
@@ -3872,12 +3772,15 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
                return;
        }
        raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
-        rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */
+        /* Leverage recent GPs and set GP for new callbacks. */
-        rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */
+        needwake = rcu_advance_cbs(rsp, rnp_root, rdp) ||
+                   rcu_advance_cbs(rsp, rnp_root, my_rdp);
        rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
        WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
                     !rcu_segcblist_n_cbs(&my_rdp->cblist));
        raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
+        if (needwake)
+                rcu_gp_kthread_wake(rsp);
        WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
                  !rcu_segcblist_empty(&rdp->cblist),
                  "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
@@ -4056,7 +3959,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
        init_swait_queue_head(&rsp->gp_wq);
        init_swait_queue_head(&rsp->expedited_wq);
-        rnp = rsp->level[rcu_num_lvls - 1];
+        rnp = rcu_first_leaf_node(rsp);
        for_each_possible_cpu(i) {
                while (i > rnp->grphi)
                        rnp++;
@@ -4168,6 +4071,7 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
 }
 struct workqueue_struct *rcu_gp_wq;
+struct workqueue_struct *rcu_par_gp_wq;
 void __init rcu_init(void)
 {
@@ -4199,6 +4103,8 @@ void __init rcu_init(void)
        /* Create workqueue for expedited GPs and for Tree SRCU. */
        rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
        WARN_ON(!rcu_gp_wq);
+        rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
+        WARN_ON(!rcu_par_gp_wq);
 }
 #include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f491ab4f2e8e..78e051dffc5b 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -58,6 +58,14 @@ struct rcu_dynticks {
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
 };
+/* Communicate arguments to a workqueue handler. */
+struct rcu_exp_work {
+        smp_call_func_t rew_func;
+        struct rcu_state *rew_rsp;
+        unsigned long rew_s;
+        struct work_struct rew_work;
+};
 /* RCU's kthread states for tracing. */
 #define RCU_KTHREAD_STOPPED  0
 #define RCU_KTHREAD_RUNNING  1
@@ -150,15 +158,32 @@ struct rcu_node {
        struct swait_queue_head nocb_gp_wq[2];
                                /* Place for rcu_nocb_kthread() to wait GP. */
 #endif /* #ifdef CONFIG_RCU_NOCB_CPU */
-        int need_future_gp[2];
+        u8 need_future_gp[4];   /* Counts of upcoming GP requests. */
-                                /* Counts of upcoming no-CB GP requests. */
        raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
        spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
        unsigned long exp_seq_rq;
        wait_queue_head_t exp_wq[4];
+        struct rcu_exp_work rew;
+        bool exp_need_flush;    /* Need to flush workitem? */
 } ____cacheline_internodealigned_in_smp;
+/* Accessors for ->need_future_gp[] array. */
+#define need_future_gp_mask() \
+        (ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1)
+#define need_future_gp_element(rnp, c) \
+        ((rnp)->need_future_gp[(c) & need_future_gp_mask()])
+#define need_any_future_gp(rnp)                                         \
+({                                                                      \
+        int __i;                                                        \
+        bool __nonzero = false;                                         \
+                                                                        \
+        for (__i = 0; __i < ARRAY_SIZE((rnp)->need_future_gp); __i++)   \
+                __nonzero = __nonzero ||                                \
+                            READ_ONCE((rnp)->need_future_gp[__i]);      \
+        __nonzero;                                                      \
+})
 /*
 * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
 * are indexed relative to this interval rather than the global CPU ID space.
@@ -224,10 +249,6 @@ struct rcu_data {
 #ifdef CONFIG_RCU_FAST_NO_HZ
        struct rcu_head oom_head;
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
-        atomic_long_t exp_workdone0;    /* # done by workqueue. */
-        atomic_long_t exp_workdone1;    /* # done by others #1. */
-        atomic_long_t exp_workdone2;    /* # done by others #2. */
-        atomic_long_t exp_workdone3;    /* # done by others #3. */
        int exp_dynticks_snap;          /* Double-check need for IPI. */
        /* 6) Callback offloading. */
@@ -408,7 +429,6 @@ extern struct rcu_state rcu_preempt_state;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
-bool rcu_eqs_special_set(int cpu);
 #ifdef CONFIG_RCU_BOOST
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
@@ -438,7 +458,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
 static void invoke_rcu_callbacks_kthread(void);
 static bool rcu_is_callbacks_kthread(void);
 #ifdef CONFIG_RCU_BOOST
-static void rcu_preempt_do_callbacks(void);
 static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
                                                 struct rcu_node *rnp);
 #endif /* #ifdef CONFIG_RCU_BOOST */
@@ -454,7 +473,6 @@ static void print_cpu_stall_info_end(void);
 static void zero_cpu_stall_ticks(struct rcu_data *rdp);
 static void increment_cpu_stall_ticks(void);
 static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
 static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
 static void rcu_init_one_nocb(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index f72eefab8543..d40708e8c5d6 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -20,6 +20,8 @@
 * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
 */
+#include <linux/lockdep.h>
 /*
 * Record the start of an expedited grace period.
 */
@@ -154,15 +156,35 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
 * for the current expedited grace period.  Works only for preemptible
 * RCU -- other RCU implementation use other means.
 *
- * Caller must hold the rcu_state's exp_mutex.
+ * Caller must hold the specificed rcu_node structure's ->lock
 */
 static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 {
+        raw_lockdep_assert_held_rcu_node(rnp);
        return rnp->exp_tasks == NULL &&
               READ_ONCE(rnp->expmask) == 0;
 }
 /*
+ * Like sync_rcu_preempt_exp_done(), but this function assumes the caller
+ * doesn't hold the rcu_node's ->lock, and will acquire and release the lock
+ * itself
+ */
+static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp)
+{
+        unsigned long flags;
+        bool ret;
+        raw_spin_lock_irqsave_rcu_node(rnp, flags);
+        ret = sync_rcu_preempt_exp_done(rnp);
+        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+        return ret;
+}
+/*
 * Report the exit from RCU read-side critical section for the last task
 * that queued itself during or before the current expedited preemptible-RCU
 * grace period.  This event is reported either to the rcu_node structure on
@@ -170,8 +192,7 @@ static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
 * recursively up the tree.  (Calm down, calm down, we do the recursion
 * iteratively!)
 *
- * Caller must hold the rcu_state's exp_mutex and the specified rcu_node
+ * Caller must hold the specified rcu_node structure's ->lock.
- * structure's ->lock.
 */
 static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
                                 bool wake, unsigned long flags)
@@ -207,8 +228,6 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 /*
 * Report expedited quiescent state for specified node.  This is a
 * lock-acquisition wrapper function for __rcu_report_exp_rnp().
- *
- * Caller must hold the rcu_state's exp_mutex.
 */
 static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
                                              struct rcu_node *rnp, bool wake)
@@ -221,8 +240,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
 /*
 * Report expedited quiescent state for multiple CPUs, all covered by the
- * specified leaf rcu_node structure.  Caller must hold the rcu_state's
+ * specified leaf rcu_node structure.
- * exp_mutex.
 */
 static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
                                    unsigned long mask, bool wake)
@@ -248,14 +266,12 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
 }
 /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
-static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
+static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
-                               unsigned long s)
 {
        if (rcu_exp_gp_seq_done(rsp, s)) {
                trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
                /* Ensure test happens before caller kfree(). */
                smp_mb__before_atomic(); /* ^^^ */
-                atomic_long_inc(stat);
                return true;
        }
        return false;
@@ -289,7 +305,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
         * promoting locality and is not strictly needed for correctness.
         */
        for (; rnp != NULL; rnp = rnp->parent) {
-                if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
+                if (sync_exp_work_done(rsp, s))
                        return true;
                /* Work not done, either wait here or go up. */
@@ -302,8 +318,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
                                                  rnp->grplo, rnp->grphi,
                                                  TPS("wait"));
                        wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
-                                   sync_exp_work_done(rsp,
+                                   sync_exp_work_done(rsp, s));
-                                                      &rdp->exp_workdone2, s));
                        return true;
                }
                rnp->exp_seq_rq = s; /* Followers can wait on us. */
@@ -313,7 +328,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
        }
        mutex_lock(&rsp->exp_mutex);
 fastpath:
-        if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
+        if (sync_exp_work_done(rsp, s)) {
                mutex_unlock(&rsp->exp_mutex);
                return true;
        }
@@ -362,93 +377,129 @@ static void sync_sched_exp_online_cleanup(int cpu)
 }
 /*
- * Select the nodes that the upcoming expedited grace period needs
+ * Select the CPUs within the specified rcu_node that the upcoming
- * to wait for.
+ * expedited grace period needs to wait for.
 */
-static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
+static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
-                                     smp_call_func_t func)
 {
        int cpu;
        unsigned long flags;
+        smp_call_func_t func;
        unsigned long mask_ofl_test;
        unsigned long mask_ofl_ipi;
        int ret;
-        struct rcu_node *rnp;
+        struct rcu_exp_work *rewp =
+                container_of(wp, struct rcu_exp_work, rew_work);
-        trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
+        struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
-        sync_exp_reset_tree(rsp);
+        struct rcu_state *rsp = rewp->rew_rsp;
-        trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
-        rcu_for_each_leaf_node(rsp, rnp) {
-                raw_spin_lock_irqsave_rcu_node(rnp, flags);
-                /* Each pass checks a CPU for identity, offline, and idle. */
+        func = rewp->rew_func;
-                mask_ofl_test = 0;
+        raw_spin_lock_irqsave_rcu_node(rnp, flags);
-                for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
-                        unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
-                        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
-                        struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
-                        int snap;
-                        if (raw_smp_processor_id() == cpu ||
+        /* Each pass checks a CPU for identity, offline, and idle. */
-                            !(rnp->qsmaskinitnext & mask)) {
+        mask_ofl_test = 0;
+        for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+                unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
+                struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+                struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
+                int snap;
+                if (raw_smp_processor_id() == cpu ||
+                    !(rnp->qsmaskinitnext & mask)) {
+                        mask_ofl_test |= mask;
+                } else {
+                        snap = rcu_dynticks_snap(rdtp);
+                        if (rcu_dynticks_in_eqs(snap))
                                mask_ofl_test |= mask;
-                        } else {
+                        else
-                                snap = rcu_dynticks_snap(rdtp);
+                                rdp->exp_dynticks_snap = snap;
-                                if (rcu_dynticks_in_eqs(snap))
-                                        mask_ofl_test |= mask;
-                                else
-                                        rdp->exp_dynticks_snap = snap;
-                        }
                }
-                mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
+        }
+        mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
-                /*
-                 * Need to wait for any blocked tasks as well.  Note that
-                 * additional blocking tasks will also block the expedited
-                 * GP until such time as the ->expmask bits are cleared.
-                 */
-                if (rcu_preempt_has_tasks(rnp))
-                        rnp->exp_tasks = rnp->blkd_tasks.next;
-                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-                /* IPI the remaining CPUs for expedited quiescent state. */
+        /*
-                for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+         * Need to wait for any blocked tasks as well.  Note that
-                        unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
+         * additional blocking tasks will also block the expedited GP
-                        struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+         * until such time as the ->expmask bits are cleared.
+         */
+        if (rcu_preempt_has_tasks(rnp))
+                rnp->exp_tasks = rnp->blkd_tasks.next;
+        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-                        if (!(mask_ofl_ipi & mask))
+        /* IPI the remaining CPUs for expedited quiescent state. */
-                                continue;
+        for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
+                unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
+                struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+                if (!(mask_ofl_ipi & mask))
+                        continue;
 retry_ipi:
-                        if (rcu_dynticks_in_eqs_since(rdp->dynticks,
+                if (rcu_dynticks_in_eqs_since(rdp->dynticks,
-                                                      rdp->exp_dynticks_snap)) {
+                                              rdp->exp_dynticks_snap)) {
-                                mask_ofl_test |= mask;
+                        mask_ofl_test |= mask;
-                                continue;
+                        continue;
-                        }
+                }
-                        ret = smp_call_function_single(cpu, func, rsp, 0);
+                ret = smp_call_function_single(cpu, func, rsp, 0);
-                        if (!ret) {
+                if (!ret) {
-                                mask_ofl_ipi &= ~mask;
+                        mask_ofl_ipi &= ~mask;
-                                continue;
+                        continue;
-                        }
+                }
-                        /* Failed, raced with CPU hotplug operation. */
+                /* Failed, raced with CPU hotplug operation. */
-                        raw_spin_lock_irqsave_rcu_node(rnp, flags);
+                raw_spin_lock_irqsave_rcu_node(rnp, flags);
-                        if ((rnp->qsmaskinitnext & mask) &&
+                if ((rnp->qsmaskinitnext & mask) &&
-                            (rnp->expmask & mask)) {
+                    (rnp->expmask & mask)) {
-                                /* Online, so delay for a bit and try again. */
+                        /* Online, so delay for a bit and try again. */
-                                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
-                                trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
-                                schedule_timeout_uninterruptible(1);
-                                goto retry_ipi;
-                        }
-                        /* CPU really is offline, so we can ignore it. */
-                        if (!(rnp->expmask & mask))
-                                mask_ofl_ipi &= ~mask;
                        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+                        trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
+                        schedule_timeout_uninterruptible(1);
+                        goto retry_ipi;
+                }
+                /* CPU really is offline, so we can ignore it. */
+                if (!(rnp->expmask & mask))
+                        mask_ofl_ipi &= ~mask;
+                raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
+        }
+        /* Report quiescent states for those that went offline. */
+        mask_ofl_test |= mask_ofl_ipi;
+        if (mask_ofl_test)
+                rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
+}
+/*
+ * Select the nodes that the upcoming expedited grace period needs
+ * to wait for.
+ */
+static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
+                                     smp_call_func_t func)
+{
+        struct rcu_node *rnp;
+        trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
+        sync_exp_reset_tree(rsp);
+        trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
+        /* Schedule work for each leaf rcu_node structure. */
+        rcu_for_each_leaf_node(rsp, rnp) {
+                rnp->exp_need_flush = false;
+                if (!READ_ONCE(rnp->expmask))
+                        continue; /* Avoid early boot non-existent wq. */
+                rnp->rew.rew_func = func;
+                rnp->rew.rew_rsp = rsp;
+                if (!READ_ONCE(rcu_par_gp_wq) ||
+                    rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
+                        /* No workqueues yet. */
+                        sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
+                        continue;
                }
-                /* Report quiescent states for those that went offline. */
+                INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
-                mask_ofl_test |= mask_ofl_ipi;
+                queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work);
-                if (mask_ofl_test)
+                rnp->exp_need_flush = true;
-                        rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
        }
+        /* Wait for workqueue jobs (if any) to complete. */
+        rcu_for_each_leaf_node(rsp, rnp)
+                if (rnp->exp_need_flush)
+                        flush_work(&rnp->rew.rew_work);
 }
 static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
@@ -469,9 +520,9 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
        for (;;) {
                ret = swait_event_timeout(
                                rsp->expedited_wq,
-                                sync_rcu_preempt_exp_done(rnp_root),
+                                sync_rcu_preempt_exp_done_unlocked(rnp_root),
                                jiffies_stall);
-                if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
+                if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root))
                        return;
                WARN_ON(ret < 0);  /* workqueues should not be signaled. */
                if (rcu_cpu_stall_suppress)
@@ -504,7 +555,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
                        rcu_for_each_node_breadth_first(rsp, rnp) {
                                if (rnp == rnp_root)
                                        continue; /* printed unconditionally */
-                                if (sync_rcu_preempt_exp_done(rnp))
+                                if (sync_rcu_preempt_exp_done_unlocked(rnp))
                                        continue;
                                pr_cont(" l=%u:%d-%d:%#lx/%c",
                                        rnp->level, rnp->grplo, rnp->grphi,
@@ -560,14 +611,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
        mutex_unlock(&rsp->exp_wake_mutex);
 }
-/* Let the workqueue handler know what it is supposed to do. */
-struct rcu_exp_work {
-        smp_call_func_t rew_func;
-        struct rcu_state *rew_rsp;
-        unsigned long rew_s;
-        struct work_struct rew_work;
-};
 /*
 * Common code to drive an expedited grace period forward, used by
 * workqueues and mid-boot-time tasks.
@@ -633,7 +676,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
        rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
        rnp = rcu_get_root(rsp);
        wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
-                   sync_exp_work_done(rsp, &rdp->exp_workdone0, s));
+                   sync_exp_work_done(rsp, s));
        smp_mb(); /* Workqueue actions happen before return. */
        /* Let the next expedited grace period start. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 84fbee4686d3..7fd12039e512 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -182,7 +182,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
        raw_lockdep_assert_held_rcu_node(rnp);
        WARN_ON_ONCE(rdp->mynode != rnp);
-        WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
+        WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
        /*
         * Decide where to queue the newly blocked task.  In theory,
@@ -384,6 +384,50 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 }
 /*
+ * Preemptible RCU implementation for rcu_read_lock().
+ * Just increment ->rcu_read_lock_nesting, shared state will be updated
+ * if we block.
+ */
+void __rcu_read_lock(void)
+{
+        current->rcu_read_lock_nesting++;
+        barrier();  /* critical section after entry code. */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_lock);
+/*
+ * Preemptible RCU implementation for rcu_read_unlock().
+ * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
+ * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
+ * invoke rcu_read_unlock_special() to clean up after a context switch
+ * in an RCU read-side critical section and other special cases.
+ */
+void __rcu_read_unlock(void)
+{
+        struct task_struct *t = current;
+        if (t->rcu_read_lock_nesting != 1) {
+                --t->rcu_read_lock_nesting;
+        } else {
+                barrier();  /* critical section before exit code. */
+                t->rcu_read_lock_nesting = INT_MIN;
+                barrier();  /* assign before ->rcu_read_unlock_special load */
+                if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
+                        rcu_read_unlock_special(t);
+                barrier();  /* ->rcu_read_unlock_special load before assign */
+                t->rcu_read_lock_nesting = 0;
+        }
+#ifdef CONFIG_PROVE_LOCKING
+        {
+                int rrln = READ_ONCE(t->rcu_read_lock_nesting);
+                WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
+        }
+#endif /* #ifdef CONFIG_PROVE_LOCKING */
+}
+EXPORT_SYMBOL_GPL(__rcu_read_unlock);
+/*
 * Advance a ->blkd_tasks-list pointer to the next entry, instead
 * returning NULL if at the end of the list.
 */
@@ -489,7 +533,7 @@ void rcu_read_unlock_special(struct task_struct *t)
                rnp = t->rcu_blocked_node;
                raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
                WARN_ON_ONCE(rnp != t->rcu_blocked_node);
-                WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
+                WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
                empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
                empty_exp = sync_rcu_preempt_exp_done(rnp);
                smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
@@ -685,15 +729,6 @@ static void rcu_preempt_check_callbacks(void)
                t->rcu_read_unlock_special.b.need_qs = true;
 }
-#ifdef CONFIG_RCU_BOOST
-static void rcu_preempt_do_callbacks(void)
-{
-        rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
-}
-#endif /* #ifdef CONFIG_RCU_BOOST */
 /**
 * call_rcu() - Queue an RCU callback for invocation after a grace period.
 * @head: structure to be used for queueing the RCU updates.
@@ -1140,7 +1175,7 @@ static void rcu_kthread_do_work(void)
 {
        rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
        rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
-        rcu_preempt_do_callbacks();
+        rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
 }
 static void rcu_cpu_kthread_setup(unsigned int cpu)
@@ -1607,7 +1642,7 @@ static int rcu_oom_notify(struct notifier_block *self,
        for_each_online_cpu(cpu) {
                smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
-                cond_resched_rcu_qs();
+                cond_resched_tasks_rcu_qs();
        }
        /* Unconditionally decrement: no need to wake ourselves up. */
@@ -1780,19 +1815,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
        swake_up_all(sq);
 }
-/*
- * Set the root rcu_node structure's ->need_future_gp field
- * based on the sum of those of all rcu_node structures.  This does
- * double-count the root rcu_node structure's requests, but this
- * is necessary to handle the possibility of a rcu_nocb_kthread()
- * having awakened during the time that the rcu_node structures
- * were being updated for the end of the previous grace period.
- */
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
-{
-        rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
-}
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
 {
        return &rnp->nocb_gp_wq[rnp->completed & 0x1];
@@ -1966,7 +1988,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
                        trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
                                            TPS("WakeOvf"));
                } else {
-                        wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
+                        wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE,
                                               TPS("WakeOvfIsDeferred"));
                }
                rdp->qlen_last_fqs_check = LONG_MAX / 2;
@@ -2048,7 +2070,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
        struct rcu_node *rnp = rdp->mynode;
        raw_spin_lock_irqsave_rcu_node(rnp, flags);
-        needwake = rcu_start_future_gp(rnp, rdp, &c);
+        c = rcu_cbs_completed(rdp->rsp, rnp);
+        needwake = rcu_start_this_gp(rnp, rdp, c);
        raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
        if (needwake)
                rcu_gp_kthread_wake(rdp->rsp);
@@ -2057,7 +2080,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
         * Wait for the grace period.  Do so interruptibly to avoid messing
         * up the load average.
         */
-        trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
+        trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
        for (;;) {
                swait_event_interruptible(
                        rnp->nocb_gp_wq[c & 0x1],
@@ -2065,9 +2088,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
                if (likely(d))
                        break;
                WARN_ON(signal_pending(current));
-                trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
+                trace_rcu_this_gp(rnp, rdp, c, TPS("ResumeWait"));
        }
-        trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
+        trace_rcu_this_gp(rnp, rdp, c, TPS("EndWait"));
        smp_mb(); /* Ensure that CB invocation happens after GP end. */
 }
@@ -2236,7 +2259,7 @@ static int rcu_nocb_kthread(void *arg)
                                cl++;
                        c++;
                        local_bh_enable();
-                        cond_resched_rcu_qs();
+                        cond_resched_tasks_rcu_qs();
                        list = next;
                }
                trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
@@ -2292,7 +2315,7 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
 void __init rcu_init_nohz(void)
 {
        int cpu;
-        bool need_rcu_nocb_mask = true;
+        bool need_rcu_nocb_mask = false;
        struct rcu_state *rsp;
 #if defined(CONFIG_NO_HZ_FULL)
@@ -2315,7 +2338,7 @@ void __init rcu_init_nohz(void)
 #endif /* #if defined(CONFIG_NO_HZ_FULL) */
        if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
-                pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
+                pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
                cpumask_and(rcu_nocb_mask, cpu_possible_mask,
                            rcu_nocb_mask);
        }
@@ -2495,10 +2518,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
 {
 }
-static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
-{
-}
 static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
 {
        return NULL;
@@ -2587,8 +2606,7 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
 }
 /*
- * Bind the grace-period kthread for the sysidle flavor of RCU to the
+ * Bind the RCU grace-period kthreads to the housekeeping CPU.
- * timekeeping CPU.
 */
 static void rcu_bind_gp_kthread(void)
 {
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 68fa19a5e7bd..4c230a60ece4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -226,54 +226,6 @@ core_initcall(rcu_set_runtime_mode);
 #endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */
-#ifdef CONFIG_PREEMPT_RCU
-/*
- * Preemptible RCU implementation for rcu_read_lock().
- * Just increment ->rcu_read_lock_nesting, shared state will be updated
- * if we block.
- */
-void __rcu_read_lock(void)
-{
-        current->rcu_read_lock_nesting++;
-        barrier();  /* critical section after entry code. */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_lock);
-/*
- * Preemptible RCU implementation for rcu_read_unlock().
- * Decrement ->rcu_read_lock_nesting.  If the result is zero (outermost
- * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
- * invoke rcu_read_unlock_special() to clean up after a context switch
- * in an RCU read-side critical section and other special cases.
- */
-void __rcu_read_unlock(void)
-{
-        struct task_struct *t = current;
-        if (t->rcu_read_lock_nesting != 1) {
-                --t->rcu_read_lock_nesting;
-        } else {
-                barrier();  /* critical section before exit code. */
-                t->rcu_read_lock_nesting = INT_MIN;
-                barrier();  /* assign before ->rcu_read_unlock_special load */
-                if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
-                        rcu_read_unlock_special(t);
-                barrier();  /* ->rcu_read_unlock_special load before assign */
-                t->rcu_read_lock_nesting = 0;
-        }
-#ifdef CONFIG_PROVE_LOCKING
-        {
-                int rrln = READ_ONCE(t->rcu_read_lock_nesting);
-                WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
-        }
-#endif /* #ifdef CONFIG_PROVE_LOCKING */
-}
-EXPORT_SYMBOL_GPL(__rcu_read_unlock);
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
 struct lockdep_map rcu_lock_map =
@@ -624,7 +576,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks);
 * grace period has elapsed, in other words after all currently
 * executing rcu-tasks read-side critical sections have elapsed.  These
 * read-side critical sections are delimited by calls to schedule(),
- * cond_resched_rcu_qs(), idle execution, userspace execution, calls
+ * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
 *
 * This is a very specialized primitive, intended only for a few uses in
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 211890edf37e..e27034bd954e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5025,20 +5025,6 @@ int __cond_resched_lock(spinlock_t *lock)
 }
 EXPORT_SYMBOL(__cond_resched_lock);
-int __sched __cond_resched_softirq(void)
-{
-        BUG_ON(!in_softirq());
-        if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
-                local_bh_enable();
-                preempt_schedule_common();
-                local_bh_disable();
-                return 1;
-        }
-        return 0;
-}
-EXPORT_SYMBOL(__cond_resched_softirq);
 /**
 * yield - yield the current processor to other threads.
 *
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 177de3640c78..03981f1c39ea 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -145,8 +145,7 @@ static void __local_bh_enable(unsigned int cnt)
 }
 /*
- * Special-case - softirqs can safely be enabled in
+ * Special-case - softirqs can safely be enabled by __do_softirq(),
- * cond_resched_softirq(), or by __do_softirq(),
 * without processing still-pending softirqs:
 */
 void _local_bh_enable(void)
diff --git a/kernel/torture.c b/kernel/torture.c
index 37b94012a3f8..3de1efbecd6a 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -574,7 +574,7 @@ void stutter_wait(const char *title)
 {
        int spt;
-        cond_resched_rcu_qs();
+        cond_resched_tasks_rcu_qs();
        spt = READ_ONCE(stutter_pause_test);
        for (; spt; spt = READ_ONCE(stutter_pause_test)) {
                if (spt == 1) {
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 22fee766081b..80e0b2aca703 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -159,13 +159,13 @@ static int benchmark_event_kthread(void *arg)
                 * wants to run, schedule in, but if the CPU is idle,
                 * we'll keep burning cycles.
                 *
-                 * Note the _rcu_qs() version of cond_resched() will
+                 * Note the tasks_rcu_qs() version of cond_resched() will
                 * notify synchronize_rcu_tasks() that this thread has
                 * passed a quiescent state for rcu_tasks. Otherwise
                 * this thread will never voluntarily schedule which would
                 * block synchronize_rcu_tasks() indefinitely.
                 */
-                cond_resched();
+                cond_resched_tasks_rcu_qs();
        }
        return 0;
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
new file mode 100755
index 000000000000..98f650c9bf54
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# Invoke a text editor on all console.log files for all runs with diagnostics,
+# that is, on all such files having a console.log.diags counterpart.
+# Note that both console.log.diags and console.log are passed to the
+# editor (currently defaulting to "vi"), allowing the user to get an
+# idea of what to search for in the console.log file.
+#
+# Usage: kvm-find-errors.sh directory
+#
+# The "directory" above should end with the date/time directory, for example,
+# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+rundir="${1}"
+if test -z "$rundir" -o ! -d "$rundir"
+then
+        echo Usage: $0 directory
+fi
+editor=${EDITOR-vi}
+# Find builds with errors
+files=
+for i in ${rundir}/*/Make.out
+do
+        if egrep -q "error:|warning:" < $i
+        then
+                egrep "error:|warning:" < $i > $i.diags
+                files="$files $i.diags $i"
+        fi
+done
+if test -n "$files"
+then
+        $editor $files
+else
+        echo No build errors.
+fi
+if grep -q -e "--buildonly" < ${rundir}/log
+then
+        echo Build-only run, no console logs to check.
+fi
+# Find console logs with errors
+files=
+for i in ${rundir}/*/console.log
+do
+        if test -r $i.diags
+        then
+                files="$files $i.diags $i"
+        fi
+done
+if test -n "$files"
+then
+        $editor $files
+else
+        echo No errors in console logs.
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index c2e1bb6d0cba..477ecb1293ab 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -34,11 +34,15 @@ fi
 configfile=`echo $i | sed -e 's/^.*\///'`
 ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
+stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
+            tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
+            awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
+            tr -d '\012\015'`"
 if test -z "$ngps"
 then
-        echo "$configfile -------"
+        echo "$configfile ------- " $stopstate
 else
-        title="$configfile ------- $ngps grace periods"
+        title="$configfile ------- $ngps GPs"
        dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
        if test -z "$dur"
        then
@@ -46,9 +50,9 @@ else
        else
                ngpsps=`awk -v ngps=$ngps -v dur=$dur '
                        BEGIN { print ngps / dur }' < /dev/null`
-                title="$title ($ngpsps per second)"
+                title="$title ($ngpsps/s)"
        fi
-        echo $title
+        echo $title $stopstate
        nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
        if test -z "$nclosecalls"
        then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index f7e988f369dd..c27e97824163 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -48,10 +48,6 @@ do
                                cat $i/Make.oldconfig.err
                        fi
                        parse-build.sh $i/Make.out $configfile
-                        if test "$TORTURE_SUITE" != rcuperf
-                        then
-                                parse-torture.sh $i/console.log $configfile
-                        fi
                        parse-console.sh $i/console.log $configfile
                        if test -r $i/Warnings
                        then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 5f8fbb0d7c17..c5b0f94341d9 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -267,5 +267,4 @@ then
        echo Unknown PID, cannot kill qemu command
 fi
-parse-torture.sh $resdir/console.log $title
 parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 08aa7d50ae0e..17293436f551 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -24,57 +24,146 @@
 #
 # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+T=${TMPDIR-/tmp}/parse-console.sh.$$
 file="$1"
 title="$2"
+trap 'rm -f $T.seq $T.diags' 0
 . functions.sh
+# Check for presence and readability of console output file
+if test -f "$file" -a -r "$file"
+then
+        :
+else
+        echo $title unreadable console output file: $file
+        exit 1
+fi
 if grep -Pq '\x00' < $file
 then
        print_warning Console output contains nul bytes, old qemu still running?
 fi
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
+cat /dev/null > $file.diags
-if test -s $1.diags
+# Check for proper termination, except that rcuperf runs don't indicate this.
+if test "$TORTURE_SUITE" != rcuperf
 then
-        print_warning Assertion failure in $file $title
+        # check for abject failure
-        # cat $1.diags
+        if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
+        then
+                nerrs=`grep --binary-files=text '!!!' $file |
+                tail -1 |
+                awk '
+                {
+                        for (i=NF-8;i<=NF;i++)
+                                sum+=$i;
+                }
+                END { print sum }'`
+                print_bug $title FAILURE, $nerrs instances
+                exit
+        fi
+        grep --binary-files=text 'torture:.*ver:' $file |
+        egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
+        sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+        awk '
+        BEGIN   {
+                ver = 0;
+                badseq = 0;
+                }
+                {
+                if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+                        badseqno1 = ver;
+                        badseqno2 = $5;
+                        badseqnr = NR;
+                        badseq = 1;
+                }
+                ver = $5
+                }
+        END     {
+                if (badseq) {
+                        if (badseqno1 == badseqno2 && badseqno2 == ver)
+                                print "GP HANG at " ver " torture stat " badseqnr;
+                        else
+                                print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
+                }
+                }' > $T.seq
+        if grep -q SUCCESS $file
+        then
+                if test -s $T.seq
+                then
+                        print_warning $title `cat $T.seq`
+                        echo "   " $file
+                        exit 2
+                fi
+        else
+                if grep -q "_HOTPLUG:" $file
+                then
+                        print_warning HOTPLUG FAILURES $title `cat $T.seq`
+                        echo "   " $file
+                        exit 3
+                fi
+                echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
+                if test -s $T.seq
+                then
+                        print_warning $title `cat $T.seq`
+                fi
+                exit 2
+        fi
+fi | tee -a $file.diags
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
+grep -v 'ODEBUG: ' |
+grep -v 'Warning: unable to open an initial console' > $T.diags
+if test -s $T.diags
+then
+        print_warning "Assertion failure in $file $title"
+        # cat $T.diags
        summary=""
-        n_badness=`grep -c Badness $1`
+        n_badness=`grep -c Badness $file`
        if test "$n_badness" -ne 0
        then
                summary="$summary  Badness: $n_badness"
        fi
-        n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'`
+        n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
        if test "$n_warn" -ne 0
        then
                summary="$summary  Warnings: $n_warn"
        fi
-        n_bugs=`egrep -c 'BUG|Oops:' $1`
+        n_bugs=`egrep -c 'BUG|Oops:' $file`
        if test "$n_bugs" -ne 0
        then
                summary="$summary  Bugs: $n_bugs"
        fi
-        n_calltrace=`grep -c 'Call Trace:' $1`
+        n_calltrace=`grep -c 'Call Trace:' $file`
        if test "$n_calltrace" -ne 0
        then
                summary="$summary  Call Traces: $n_calltrace"
        fi
-        n_lockdep=`grep -c =========== $1`
+        n_lockdep=`grep -c =========== $file`
        if test "$n_badness" -ne 0
        then
                summary="$summary  lockdep: $n_badness"
        fi
-        n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1`
+        n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
        if test "$n_stalls" -ne 0
        then
                summary="$summary  Stalls: $n_stalls"
        fi
-        n_starves=`grep -c 'rcu_.*kthread starved for' $1`
+        n_starves=`grep -c 'rcu_.*kthread starved for' $file`
        if test "$n_starves" -ne 0
        then
                summary="$summary  Starves: $n_starves"
        fi
        print_warning Summary: $summary
-else
+        cat $T.diags >> $file.diags
-        rm $1.diags
+fi
+if ! test -s $file.diags
+then
+        rm -f $file.diags
 fi
diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh
deleted file mode 100755
index 5987e50cfeb4..000000000000
--- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/bin/bash
-#
-# Check the console output from a torture run for goodness.
-# The "file" is a pathname on the local system, and "title" is
-# a text string for error-message purposes.
-#
-# The file must contain torture output, but can be interspersed
-# with other dmesg text, as in console-log output.
-#
-# Usage: parse-torture.sh file title
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, you can access it online at
-# http://www.gnu.org/licenses/gpl-2.0.html.
-#
-# Copyright (C) IBM Corporation, 2011
-#
-# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-T=${TMPDIR-/tmp}/parse-torture.sh.$$
-file="$1"
-title="$2"
-trap 'rm -f $T.seq' 0
-. functions.sh
-# check for presence of torture output file.
-if test -f "$file" -a -r "$file"
-then
-        :
-else
-        echo $title unreadable torture output file: $file
-        exit 1
-fi
-# check for abject failure
-if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
-then
-        nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
-        print_bug $title FAILURE, $nerrs instances
-        echo "   " $url
-        exit
-fi
-grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
-awk '
-BEGIN   {
-        ver = 0;
-        badseq = 0;
-        }
-        {
-        if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
-                badseqno1 = ver;
-                badseqno2 = $5;
-                badseqnr = NR;
-                badseq = 1;
-        }
-        ver = $5
-        }
-END     {
-        if (badseq) {
-                if (badseqno1 == badseqno2 && badseqno2 == ver)
-                        print "GP HANG at " ver " torture stat " badseqnr;
-                else
-                        print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
-        }
-        }' > $T.seq
-if grep -q SUCCESS $file
-then
-        if test -s $T.seq
-        then
-                print_warning $title $title `cat $T.seq`
-                echo "   " $file
-                exit 2
-        fi
-else
-        if grep -q "_HOTPLUG:" $file
-        then
-                print_warning HOTPLUG FAILURES $title `cat $T.seq`
-                echo "   " $file
-                exit 3
-        fi
-        echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
-        if test -s $T.seq
-        then
-                print_warning $title `cat $T.seq`
-        fi
-        exit 2
-fi
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-06-04 18:54:04 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-06-04 18:54:04 -0400
commit	4057adafb395204af4ff93f3669ecb49eb45b3cf (patch)
tree	d6b0abf50ec5cd658fe958f90941c0192486549c
parent	137f5ae4dae85011b13e3a7049414c4060ad94c0 (diff)
parent	52f2b34f46223ca2789320fa10c13f6664c1b628 (diff)