aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-04 18:54:04 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-04 18:54:04 -0400
commit4057adafb395204af4ff93f3669ecb49eb45b3cf (patch)
treed6b0abf50ec5cd658fe958f90941c0192486549c
parent137f5ae4dae85011b13e3a7049414c4060ad94c0 (diff)
parent52f2b34f46223ca2789320fa10c13f6664c1b628 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnar: - updates to the handling of expedited grace periods - updates to reduce lock contention in the rcu_node combining tree [ These are in preparation for the consolidation of RCU-bh, RCU-preempt, and RCU-sched into a single flavor, which was requested by Linus in response to a security flaw whose root cause included confusion between the multiple flavors of RCU ] - torture-test updates that save their users some time and effort - miscellaneous fixes * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits) rcu/x86: Provide early rcu_cpu_starting() callback torture: Make kvm-find-errors.sh find build warnings rcutorture: Abbreviate kvm.sh summary lines rcutorture: Print end-of-test state in kvm.sh summary rcutorture: Print end-of-test state torture: Fold parse-torture.sh into parse-console.sh torture: Add a script to edit output from failed runs rcu: Update list of rcu_future_grace_period() trace events rcu: Drop early GP request check from rcu_gp_kthread() rcu: Simplify and inline cpu_needs_another_gp() rcu: The rcu_gp_cleanup() function does not need cpu_needs_another_gp() rcu: Make rcu_start_this_gp() check for out-of-range requests rcu: Add funnel locking to rcu_start_this_gp() rcu: Make rcu_start_future_gp() caller select grace period rcu: Inline rcu_start_gp_advanced() into rcu_start_future_gp() rcu: Clear request other than RCU_GP_FLAG_INIT at GP end rcu: Cleanup, don't put ->completed into an int rcu: Switch __rcu_process_callbacks() to rcu_accelerate_cbs() rcu: Avoid __call_rcu_core() root rcu_node ->lock acquisition rcu: Make rcu_migrate_callbacks wake GP kthread when needed ...
-rw-r--r--Documentation/RCU/whatisRCU.txt2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--include/linux/rcupdate.h5
-rw-r--r--include/linux/rcutiny.h1
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/sched.h8
-rw-r--r--include/linux/srcu.h36
-rw-r--r--include/trace/events/rcu.h13
-rw-r--r--kernel/rcu/rcu.h12
-rw-r--r--kernel/rcu/rcu_segcblist.c18
-rw-r--r--kernel/rcu/rcu_segcblist.h2
-rw-r--r--kernel/rcu/rcuperf.c2
-rw-r--r--kernel/rcu/rcutorture.c15
-rw-r--r--kernel/rcu/srcutiny.c9
-rw-r--r--kernel/rcu/srcutree.c30
-rw-r--r--kernel/rcu/tree.c364
-rw-r--r--kernel/rcu/tree.h36
-rw-r--r--kernel/rcu/tree_exp.h235
-rw-r--r--kernel/rcu/tree_plugin.h98
-rw-r--r--kernel/rcu/update.c50
-rw-r--r--kernel/sched/core.c14
-rw-r--r--kernel/softirq.c3
-rw-r--r--kernel/torture.c2
-rw-r--r--kernel/trace/trace_benchmark.c4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh12
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh115
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-torture.sh105
31 files changed, 631 insertions, 629 deletions
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index a27fbfb0efb8..65eb856526b7 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -1,3 +1,5 @@
1What is RCU? -- "Read, Copy, Update"
2
1Please note that the "What is RCU?" LWN series is an excellent place 3Please note that the "What is RCU?" LWN series is an excellent place
2to start learning about RCU: 4to start learning about RCU:
3 5
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 7468de429087..3ea0047beb40 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -46,6 +46,7 @@
46#include <linux/pci.h> 46#include <linux/pci.h>
47#include <linux/smp.h> 47#include <linux/smp.h>
48#include <linux/syscore_ops.h> 48#include <linux/syscore_ops.h>
49#include <linux/rcupdate.h>
49 50
50#include <asm/cpufeature.h> 51#include <asm/cpufeature.h>
51#include <asm/e820/api.h> 52#include <asm/e820/api.h>
@@ -793,6 +794,9 @@ void mtrr_ap_init(void)
793 794
794 if (!use_intel() || mtrr_aps_delayed_init) 795 if (!use_intel() || mtrr_aps_delayed_init)
795 return; 796 return;
797
798 rcu_cpu_starting(smp_processor_id());
799
796 /* 800 /*
797 * Ideally we should hold mtrr_mutex here to avoid mtrr entries 801 * Ideally we should hold mtrr_mutex here to avoid mtrr entries
798 * changed, but this routine will be called in cpu boot time, 802 * changed, but this routine will be called in cpu boot time,
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 04a20da76786..c8b30067b6ae 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -357,7 +357,7 @@ static void nvme_free_ns_head(struct kref *ref)
357 nvme_mpath_remove_disk(head); 357 nvme_mpath_remove_disk(head);
358 ida_simple_remove(&head->subsys->ns_ida, head->instance); 358 ida_simple_remove(&head->subsys->ns_ida, head->instance);
359 list_del_init(&head->entry); 359 list_del_init(&head->entry);
360 cleanup_srcu_struct(&head->srcu); 360 cleanup_srcu_struct_quiesced(&head->srcu);
361 nvme_put_subsystem(head->subsys); 361 nvme_put_subsystem(head->subsys);
362 kfree(head); 362 kfree(head);
363} 363}
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 36360d07f25b..e679b175b411 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -108,7 +108,6 @@ void rcu_sched_qs(void);
108void rcu_bh_qs(void); 108void rcu_bh_qs(void);
109void rcu_check_callbacks(int user); 109void rcu_check_callbacks(int user);
110void rcu_report_dead(unsigned int cpu); 110void rcu_report_dead(unsigned int cpu);
111void rcu_cpu_starting(unsigned int cpu);
112void rcutree_migrate_callbacks(int cpu); 111void rcutree_migrate_callbacks(int cpu);
113 112
114#ifdef CONFIG_RCU_STALL_COMMON 113#ifdef CONFIG_RCU_STALL_COMMON
@@ -188,13 +187,13 @@ static inline void exit_tasks_rcu_finish(void) { }
188#endif /* #else #ifdef CONFIG_TASKS_RCU */ 187#endif /* #else #ifdef CONFIG_TASKS_RCU */
189 188
190/** 189/**
191 * cond_resched_rcu_qs - Report potential quiescent states to RCU 190 * cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
192 * 191 *
193 * This macro resembles cond_resched(), except that it is defined to 192 * This macro resembles cond_resched(), except that it is defined to
194 * report potential quiescent states to RCU-tasks even if the cond_resched() 193 * report potential quiescent states to RCU-tasks even if the cond_resched()
195 * machinery were to be shut off, as some advocate for PREEMPT kernels. 194 * machinery were to be shut off, as some advocate for PREEMPT kernels.
196 */ 195 */
197#define cond_resched_rcu_qs() \ 196#define cond_resched_tasks_rcu_qs() \
198do { \ 197do { \
199 if (!cond_resched()) \ 198 if (!cond_resched()) \
200 rcu_note_voluntary_context_switch_lite(current); \ 199 rcu_note_voluntary_context_switch_lite(current); \
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ce9beec35e34..7b3c82e8a625 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -132,5 +132,6 @@ static inline void rcu_all_qs(void) { barrier(); }
132#define rcutree_offline_cpu NULL 132#define rcutree_offline_cpu NULL
133#define rcutree_dead_cpu NULL 133#define rcutree_dead_cpu NULL
134#define rcutree_dying_cpu NULL 134#define rcutree_dying_cpu NULL
135static inline void rcu_cpu_starting(unsigned int cpu) { }
135 136
136#endif /* __LINUX_RCUTINY_H */ 137#endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index fd996cdf1833..914655848ef6 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -74,6 +74,7 @@ static inline void synchronize_rcu_bh_expedited(void)
74void rcu_barrier(void); 74void rcu_barrier(void);
75void rcu_barrier_bh(void); 75void rcu_barrier_bh(void);
76void rcu_barrier_sched(void); 76void rcu_barrier_sched(void);
77bool rcu_eqs_special_set(int cpu);
77unsigned long get_state_synchronize_rcu(void); 78unsigned long get_state_synchronize_rcu(void);
78void cond_synchronize_rcu(unsigned long oldstate); 79void cond_synchronize_rcu(unsigned long oldstate);
79unsigned long get_state_synchronize_sched(void); 80unsigned long get_state_synchronize_sched(void);
@@ -100,5 +101,6 @@ int rcutree_online_cpu(unsigned int cpu);
100int rcutree_offline_cpu(unsigned int cpu); 101int rcutree_offline_cpu(unsigned int cpu);
101int rcutree_dead_cpu(unsigned int cpu); 102int rcutree_dead_cpu(unsigned int cpu);
102int rcutree_dying_cpu(unsigned int cpu); 103int rcutree_dying_cpu(unsigned int cpu);
104void rcu_cpu_starting(unsigned int cpu);
103 105
104#endif /* __LINUX_RCUTREE_H */ 106#endif /* __LINUX_RCUTREE_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ca3f3eae8980..5a0c10b45273 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1661,7 +1661,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
1661 * explicit rescheduling in places that are safe. The return 1661 * explicit rescheduling in places that are safe. The return
1662 * value indicates whether a reschedule was done in fact. 1662 * value indicates whether a reschedule was done in fact.
1663 * cond_resched_lock() will drop the spinlock before scheduling, 1663 * cond_resched_lock() will drop the spinlock before scheduling,
1664 * cond_resched_softirq() will enable bhs before scheduling.
1665 */ 1664 */
1666#ifndef CONFIG_PREEMPT 1665#ifndef CONFIG_PREEMPT
1667extern int _cond_resched(void); 1666extern int _cond_resched(void);
@@ -1681,13 +1680,6 @@ extern int __cond_resched_lock(spinlock_t *lock);
1681 __cond_resched_lock(lock); \ 1680 __cond_resched_lock(lock); \
1682}) 1681})
1683 1682
1684extern int __cond_resched_softirq(void);
1685
1686#define cond_resched_softirq() ({ \
1687 ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
1688 __cond_resched_softirq(); \
1689})
1690
1691static inline void cond_resched_rcu(void) 1683static inline void cond_resched_rcu(void)
1692{ 1684{
1693#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) 1685#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 33c1c698df09..91494d7e8e41 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -69,11 +69,45 @@ struct srcu_struct { };
69 69
70void call_srcu(struct srcu_struct *sp, struct rcu_head *head, 70void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
71 void (*func)(struct rcu_head *head)); 71 void (*func)(struct rcu_head *head));
72void cleanup_srcu_struct(struct srcu_struct *sp); 72void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced);
73int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); 73int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
74void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); 74void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
75void synchronize_srcu(struct srcu_struct *sp); 75void synchronize_srcu(struct srcu_struct *sp);
76 76
77/**
78 * cleanup_srcu_struct - deconstruct a sleep-RCU structure
79 * @sp: structure to clean up.
80 *
81 * Must invoke this after you are finished using a given srcu_struct that
82 * was initialized via init_srcu_struct(), else you leak memory.
83 */
84static inline void cleanup_srcu_struct(struct srcu_struct *sp)
85{
86 _cleanup_srcu_struct(sp, false);
87}
88
89/**
90 * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
91 * @sp: structure to clean up.
92 *
93 * Must invoke this after you are finished using a given srcu_struct that
94 * was initialized via init_srcu_struct(), else you leak memory. Also,
95 * all grace-period processing must have completed.
96 *
97 * "Completed" means that the last synchronize_srcu() and
98 * synchronize_srcu_expedited() calls must have returned before the call
99 * to cleanup_srcu_struct_quiesced(). It also means that the callback
100 * from the last call_srcu() must have been invoked before the call to
101 * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help
102 * with this last. Violating these rules will get you a WARN_ON() splat
103 * (with high probability, anyway), and will also cause the srcu_struct
104 * to be leaked.
105 */
106static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp)
107{
108 _cleanup_srcu_struct(sp, true);
109}
110
77#ifdef CONFIG_DEBUG_LOCK_ALLOC 111#ifdef CONFIG_DEBUG_LOCK_ALLOC
78 112
79/** 113/**
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index d8c33298c153..5936aac357ab 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -84,20 +84,21 @@ TRACE_EVENT(rcu_grace_period,
84); 84);
85 85
86/* 86/*
87 * Tracepoint for future grace-period events, including those for no-callbacks 87 * Tracepoint for future grace-period events. The caller should pull
88 * CPUs. The caller should pull the data from the rcu_node structure, 88 * the data from the rcu_node structure, other than rcuname, which comes
89 * other than rcuname, which comes from the rcu_state structure, and event, 89 * from the rcu_state structure, and event, which is one of the following:
90 * which is one of the following:
91 * 90 *
92 * "Startleaf": Request a nocb grace period based on leaf-node data. 91 * "Startleaf": Request a grace period based on leaf-node data.
92 * "Prestarted": Someone beat us to the request
93 * "Startedleaf": Leaf-node start proved sufficient. 93 * "Startedleaf": Leaf-node start proved sufficient.
94 * "Startedleafroot": Leaf-node start proved sufficient after checking root. 94 * "Startedleafroot": Leaf-node start proved sufficient after checking root.
95 * "Startedroot": Requested a nocb grace period based on root-node data. 95 * "Startedroot": Requested a nocb grace period based on root-node data.
96 * "NoGPkthread": The RCU grace-period kthread has not yet started.
96 * "StartWait": Start waiting for the requested grace period. 97 * "StartWait": Start waiting for the requested grace period.
97 * "ResumeWait": Resume waiting after signal. 98 * "ResumeWait": Resume waiting after signal.
98 * "EndWait": Complete wait. 99 * "EndWait": Complete wait.
99 * "Cleanup": Clean up rcu_node structure after previous GP. 100 * "Cleanup": Clean up rcu_node structure after previous GP.
100 * "CleanupMore": Clean up, and another no-CB GP is needed. 101 * "CleanupMore": Clean up, and another GP is needed.
101 */ 102 */
102TRACE_EVENT(rcu_future_grace_period, 103TRACE_EVENT(rcu_future_grace_period,
103 104
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 7a693e31184a..40cea6735c2d 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -270,6 +270,12 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
270 } 270 }
271} 271}
272 272
273/* Returns first leaf rcu_node of the specified RCU flavor. */
274#define rcu_first_leaf_node(rsp) ((rsp)->level[rcu_num_lvls - 1])
275
276/* Is this rcu_node a leaf? */
277#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
278
273/* 279/*
274 * Do a full breadth-first scan of the rcu_node structures for the 280 * Do a full breadth-first scan of the rcu_node structures for the
275 * specified rcu_state structure. 281 * specified rcu_state structure.
@@ -284,8 +290,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
284 * rcu_node tree with but one rcu_node structure, this loop is a no-op. 290 * rcu_node tree with but one rcu_node structure, this loop is a no-op.
285 */ 291 */
286#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ 292#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
287 for ((rnp) = &(rsp)->node[0]; \ 293 for ((rnp) = &(rsp)->node[0]; !rcu_is_leaf_node(rsp, rnp); (rnp)++)
288 (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
289 294
290/* 295/*
291 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state 296 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
@@ -294,7 +299,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
294 * It is still a leaf node, even if it is also the root node. 299 * It is still a leaf node, even if it is also the root node.
295 */ 300 */
296#define rcu_for_each_leaf_node(rsp, rnp) \ 301#define rcu_for_each_leaf_node(rsp, rnp) \
297 for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ 302 for ((rnp) = rcu_first_leaf_node(rsp); \
298 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) 303 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
299 304
300/* 305/*
@@ -486,6 +491,7 @@ void rcu_force_quiescent_state(void);
486void rcu_bh_force_quiescent_state(void); 491void rcu_bh_force_quiescent_state(void);
487void rcu_sched_force_quiescent_state(void); 492void rcu_sched_force_quiescent_state(void);
488extern struct workqueue_struct *rcu_gp_wq; 493extern struct workqueue_struct *rcu_gp_wq;
494extern struct workqueue_struct *rcu_par_gp_wq;
489#endif /* #else #ifdef CONFIG_TINY_RCU */ 495#endif /* #else #ifdef CONFIG_TINY_RCU */
490 496
491#ifdef CONFIG_RCU_NOCB_CPU 497#ifdef CONFIG_RCU_NOCB_CPU
diff --git a/kernel/rcu/rcu_segcblist.c b/kernel/rcu/rcu_segcblist.c
index 88cba7c2956c..5aff271adf1e 100644
--- a/kernel/rcu/rcu_segcblist.c
+++ b/kernel/rcu/rcu_segcblist.c
@@ -404,24 +404,6 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
404} 404}
405 405
406/* 406/*
407 * Scan the specified rcu_segcblist structure for callbacks that need
408 * a grace period later than the one specified by "seq". We don't look
409 * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
410 * have a grace-period sequence number.
411 */
412bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
413 unsigned long seq)
414{
415 int i;
416
417 for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
418 if (rsclp->tails[i - 1] != rsclp->tails[i] &&
419 ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
420 return true;
421 return false;
422}
423
424/*
425 * Merge the source rcu_segcblist structure into the destination 407 * Merge the source rcu_segcblist structure into the destination
426 * rcu_segcblist structure, then initialize the source. Any pending 408 * rcu_segcblist structure, then initialize the source. Any pending
427 * callbacks from the source get to start over. It is best to 409 * callbacks from the source get to start over. It is best to
diff --git a/kernel/rcu/rcu_segcblist.h b/kernel/rcu/rcu_segcblist.h
index 581c12b63544..948470cef385 100644
--- a/kernel/rcu/rcu_segcblist.h
+++ b/kernel/rcu/rcu_segcblist.h
@@ -134,7 +134,5 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
134 struct rcu_cblist *rclp); 134 struct rcu_cblist *rclp);
135void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq); 135void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
136bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq); 136bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
137bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
138 unsigned long seq);
139void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp, 137void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
140 struct rcu_segcblist *src_rsclp); 138 struct rcu_segcblist *src_rsclp);
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 777e7a6a0292..e232846516b3 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -369,7 +369,7 @@ static bool __maybe_unused torturing_tasks(void)
369 */ 369 */
370static void rcu_perf_wait_shutdown(void) 370static void rcu_perf_wait_shutdown(void)
371{ 371{
372 cond_resched_rcu_qs(); 372 cond_resched_tasks_rcu_qs();
373 if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters) 373 if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
374 return; 374 return;
375 while (!torture_must_stop()) 375 while (!torture_must_stop())
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 680c96d8c00f..e628fcfd1bde 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -593,7 +593,12 @@ static void srcu_torture_init(void)
593 593
594static void srcu_torture_cleanup(void) 594static void srcu_torture_cleanup(void)
595{ 595{
596 cleanup_srcu_struct(&srcu_ctld); 596 static DEFINE_TORTURE_RANDOM(rand);
597
598 if (torture_random(&rand) & 0x800)
599 cleanup_srcu_struct(&srcu_ctld);
600 else
601 cleanup_srcu_struct_quiesced(&srcu_ctld);
597 srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */ 602 srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
598} 603}
599 604
@@ -1609,6 +1614,9 @@ static enum cpuhp_state rcutor_hp;
1609static void 1614static void
1610rcu_torture_cleanup(void) 1615rcu_torture_cleanup(void)
1611{ 1616{
1617 int flags = 0;
1618 unsigned long gpnum = 0;
1619 unsigned long completed = 0;
1612 int i; 1620 int i;
1613 1621
1614 rcutorture_record_test_transition(); 1622 rcutorture_record_test_transition();
@@ -1639,6 +1647,11 @@ rcu_torture_cleanup(void)
1639 fakewriter_tasks = NULL; 1647 fakewriter_tasks = NULL;
1640 } 1648 }
1641 1649
1650 rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed);
1651 srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
1652 &flags, &gpnum, &completed);
1653 pr_alert("%s: End-test grace-period state: g%lu c%lu f%#x\n",
1654 cur_ops->name, gpnum, completed, flags);
1642 torture_stop_kthread(rcu_torture_stats, stats_task); 1655 torture_stop_kthread(rcu_torture_stats, stats_task);
1643 torture_stop_kthread(rcu_torture_fqs, fqs_task); 1656 torture_stop_kthread(rcu_torture_fqs, fqs_task);
1644 for (i = 0; i < ncbflooders; i++) 1657 for (i = 0; i < ncbflooders; i++)
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 76ac5f50b2c7..622792abe41a 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -86,16 +86,19 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
86 * Must invoke this after you are finished using a given srcu_struct that 86 * Must invoke this after you are finished using a given srcu_struct that
87 * was initialized via init_srcu_struct(), else you leak memory. 87 * was initialized via init_srcu_struct(), else you leak memory.
88 */ 88 */
89void cleanup_srcu_struct(struct srcu_struct *sp) 89void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
90{ 90{
91 WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); 91 WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
92 flush_work(&sp->srcu_work); 92 if (quiesced)
93 WARN_ON(work_pending(&sp->srcu_work));
94 else
95 flush_work(&sp->srcu_work);
93 WARN_ON(sp->srcu_gp_running); 96 WARN_ON(sp->srcu_gp_running);
94 WARN_ON(sp->srcu_gp_waiting); 97 WARN_ON(sp->srcu_gp_waiting);
95 WARN_ON(sp->srcu_cb_head); 98 WARN_ON(sp->srcu_cb_head);
96 WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail); 99 WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
97} 100}
98EXPORT_SYMBOL_GPL(cleanup_srcu_struct); 101EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
99 102
100/* 103/*
101 * Removes the count for the old reader from the appropriate element of 104 * Removes the count for the old reader from the appropriate element of
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index fb560fca9ef4..b4123d7a2cec 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -366,24 +366,28 @@ static unsigned long srcu_get_delay(struct srcu_struct *sp)
366 return SRCU_INTERVAL; 366 return SRCU_INTERVAL;
367} 367}
368 368
369/** 369/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */
370 * cleanup_srcu_struct - deconstruct a sleep-RCU structure 370void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
371 * @sp: structure to clean up.
372 *
373 * Must invoke this after you are finished using a given srcu_struct that
374 * was initialized via init_srcu_struct(), else you leak memory.
375 */
376void cleanup_srcu_struct(struct srcu_struct *sp)
377{ 371{
378 int cpu; 372 int cpu;
379 373
380 if (WARN_ON(!srcu_get_delay(sp))) 374 if (WARN_ON(!srcu_get_delay(sp)))
381 return; /* Leakage unless caller handles error. */ 375 return; /* Just leak it! */
382 if (WARN_ON(srcu_readers_active(sp))) 376 if (WARN_ON(srcu_readers_active(sp)))
383 return; /* Leakage unless caller handles error. */ 377 return; /* Just leak it! */
384 flush_delayed_work(&sp->work); 378 if (quiesced) {
379 if (WARN_ON(delayed_work_pending(&sp->work)))
380 return; /* Just leak it! */
381 } else {
382 flush_delayed_work(&sp->work);
383 }
385 for_each_possible_cpu(cpu) 384 for_each_possible_cpu(cpu)
386 flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); 385 if (quiesced) {
386 if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work)))
387 return; /* Just leak it! */
388 } else {
389 flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
390 }
387 if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || 391 if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
388 WARN_ON(srcu_readers_active(sp))) { 392 WARN_ON(srcu_readers_active(sp))) {
389 pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); 393 pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
@@ -392,7 +396,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
392 free_percpu(sp->sda); 396 free_percpu(sp->sda);
393 sp->sda = NULL; 397 sp->sda = NULL;
394} 398}
395EXPORT_SYMBOL_GPL(cleanup_srcu_struct); 399EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
396 400
397/* 401/*
398 * Counts the new reader in the appropriate per-CPU element of the 402 * Counts the new reader in the appropriate per-CPU element of the
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 2a734692a581..aa7cade1b9f3 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -524,8 +524,6 @@ module_param(rcu_kick_kthreads, bool, 0644);
524static ulong jiffies_till_sched_qs = HZ / 10; 524static ulong jiffies_till_sched_qs = HZ / 10;
525module_param(jiffies_till_sched_qs, ulong, 0444); 525module_param(jiffies_till_sched_qs, ulong, 0444);
526 526
527static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
528 struct rcu_data *rdp);
529static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp)); 527static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
530static void force_quiescent_state(struct rcu_state *rsp); 528static void force_quiescent_state(struct rcu_state *rsp);
531static int rcu_pending(void); 529static int rcu_pending(void);
@@ -711,44 +709,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
711} 709}
712 710
713/* 711/*
714 * Is there any need for future grace periods?
715 * Interrupts must be disabled. If the caller does not hold the root
716 * rnp_node structure's ->lock, the results are advisory only.
717 */
718static int rcu_future_needs_gp(struct rcu_state *rsp)
719{
720 struct rcu_node *rnp = rcu_get_root(rsp);
721 int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
722 int *fp = &rnp->need_future_gp[idx];
723
724 lockdep_assert_irqs_disabled();
725 return READ_ONCE(*fp);
726}
727
728/*
729 * Does the current CPU require a not-yet-started grace period?
730 * The caller must have disabled interrupts to prevent races with
731 * normal callback registry.
732 */
733static bool
734cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
735{
736 lockdep_assert_irqs_disabled();
737 if (rcu_gp_in_progress(rsp))
738 return false; /* No, a grace period is already in progress. */
739 if (rcu_future_needs_gp(rsp))
740 return true; /* Yes, a no-CBs CPU needs one. */
741 if (!rcu_segcblist_is_enabled(&rdp->cblist))
742 return false; /* No, this is a no-CBs (or offline) CPU. */
743 if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
744 return true; /* Yes, CPU has newly registered callbacks. */
745 if (rcu_segcblist_future_gp_needed(&rdp->cblist,
746 READ_ONCE(rsp->completed)))
747 return true; /* Yes, CBs for future grace period. */
748 return false; /* No grace period needed. */
749}
750
751/*
752 * Enter an RCU extended quiescent state, which can be either the 712 * Enter an RCU extended quiescent state, which can be either the
753 * idle loop or adaptive-tickless usermode execution. 713 * idle loop or adaptive-tickless usermode execution.
754 * 714 *
@@ -1234,10 +1194,10 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1234 } 1194 }
1235 1195
1236 /* 1196 /*
1237 * Has this CPU encountered a cond_resched_rcu_qs() since the 1197 * Has this CPU encountered a cond_resched() since the beginning
1238 * beginning of the grace period? For this to be the case, 1198 * of the grace period? For this to be the case, the CPU has to
1239 * the CPU has to have noticed the current grace period. This 1199 * have noticed the current grace period. This might not be the
1240 * might not be the case for nohz_full CPUs looping in the kernel. 1200 * case for nohz_full CPUs looping in the kernel.
1241 */ 1201 */
1242 jtsq = jiffies_till_sched_qs; 1202 jtsq = jiffies_till_sched_qs;
1243 ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu); 1203 ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
@@ -1642,18 +1602,30 @@ static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
1642 return rnp->completed + 1; 1602 return rnp->completed + 1;
1643 1603
1644 /* 1604 /*
1605 * If the current rcu_node structure believes that RCU is
1606 * idle, and if the rcu_state structure does not yet reflect
1607 * the start of a new grace period, then the next grace period
1608 * will suffice. The memory barrier is needed to accurately
1609 * sample the rsp->gpnum, and pairs with the second lock
1610 * acquisition in rcu_gp_init(), which is augmented with
1611 * smp_mb__after_unlock_lock() for this purpose.
1612 */
1613 if (rnp->gpnum == rnp->completed) {
1614 smp_mb(); /* See above block comment. */
1615 if (READ_ONCE(rsp->gpnum) == rnp->completed)
1616 return rnp->completed + 1;
1617 }
1618
1619 /*
1645 * Otherwise, wait for a possible partial grace period and 1620 * Otherwise, wait for a possible partial grace period and
1646 * then the subsequent full grace period. 1621 * then the subsequent full grace period.
1647 */ 1622 */
1648 return rnp->completed + 2; 1623 return rnp->completed + 2;
1649} 1624}
1650 1625
1651/* 1626/* Trace-event wrapper function for trace_rcu_future_grace_period. */
1652 * Trace-event helper function for rcu_start_future_gp() and 1627static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1653 * rcu_nocb_wait_gp(). 1628 unsigned long c, const char *s)
1654 */
1655static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1656 unsigned long c, const char *s)
1657{ 1629{
1658 trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum, 1630 trace_rcu_future_grace_period(rdp->rsp->name, rnp->gpnum,
1659 rnp->completed, c, rnp->level, 1631 rnp->completed, c, rnp->level,
@@ -1661,96 +1633,67 @@ static void trace_rcu_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1661} 1633}
1662 1634
1663/* 1635/*
1664 * Start some future grace period, as needed to handle newly arrived 1636 * Start the specified grace period, as needed to handle newly arrived
1665 * callbacks. The required future grace periods are recorded in each 1637 * callbacks. The required future grace periods are recorded in each
1666 * rcu_node structure's ->need_future_gp field. Returns true if there 1638 * rcu_node structure's ->need_future_gp[] field. Returns true if there
1667 * is reason to awaken the grace-period kthread. 1639 * is reason to awaken the grace-period kthread.
1668 * 1640 *
1669 * The caller must hold the specified rcu_node structure's ->lock. 1641 * The caller must hold the specified rcu_node structure's ->lock, which
1642 * is why the caller is responsible for waking the grace-period kthread.
1670 */ 1643 */
1671static bool __maybe_unused 1644static bool rcu_start_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1672rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp, 1645 unsigned long c)
1673 unsigned long *c_out)
1674{ 1646{
1675 unsigned long c;
1676 bool ret = false; 1647 bool ret = false;
1677 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); 1648 struct rcu_state *rsp = rdp->rsp;
1678 1649 struct rcu_node *rnp_root;
1679 raw_lockdep_assert_held_rcu_node(rnp);
1680
1681 /*
1682 * Pick up grace-period number for new callbacks. If this
1683 * grace period is already marked as needed, return to the caller.
1684 */
1685 c = rcu_cbs_completed(rdp->rsp, rnp);
1686 trace_rcu_future_gp(rnp, rdp, c, TPS("Startleaf"));
1687 if (rnp->need_future_gp[c & 0x1]) {
1688 trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartleaf"));
1689 goto out;
1690 }
1691 1650
1692 /* 1651 /*
1693 * If either this rcu_node structure or the root rcu_node structure 1652 * Use funnel locking to either acquire the root rcu_node
1694 * believe that a grace period is in progress, then we must wait 1653 * structure's lock or bail out if the need for this grace period
1695 * for the one following, which is in "c". Because our request 1654 * has already been recorded -- or has already started. If there
1696 * will be noticed at the end of the current grace period, we don't 1655 * is already a grace period in progress in a non-leaf node, no
1697 * need to explicitly start one. We only do the lockless check 1656 * recording is needed because the end of the grace period will
1698 * of rnp_root's fields if the current rcu_node structure thinks 1657 * scan the leaf rcu_node structures. Note that rnp->lock must
1699 * there is no grace period in flight, and because we hold rnp->lock, 1658 * not be released.
1700 * the only possible change is when rnp_root's two fields are
1701 * equal, in which case rnp_root->gpnum might be concurrently
1702 * incremented. But that is OK, as it will just result in our
1703 * doing some extra useless work.
1704 */ 1659 */
1705 if (rnp->gpnum != rnp->completed || 1660 raw_lockdep_assert_held_rcu_node(rnp);
1706 READ_ONCE(rnp_root->gpnum) != READ_ONCE(rnp_root->completed)) { 1661 trace_rcu_this_gp(rnp, rdp, c, TPS("Startleaf"));
1707 rnp->need_future_gp[c & 0x1]++; 1662 for (rnp_root = rnp; 1; rnp_root = rnp_root->parent) {
1708 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleaf")); 1663 if (rnp_root != rnp)
1709 goto out; 1664 raw_spin_lock_rcu_node(rnp_root);
1665 WARN_ON_ONCE(ULONG_CMP_LT(rnp_root->gpnum +
1666 need_future_gp_mask(), c));
1667 if (need_future_gp_element(rnp_root, c) ||
1668 ULONG_CMP_GE(rnp_root->gpnum, c) ||
1669 (rnp != rnp_root &&
1670 rnp_root->gpnum != rnp_root->completed)) {
1671 trace_rcu_this_gp(rnp_root, rdp, c, TPS("Prestarted"));
1672 goto unlock_out;
1673 }
1674 need_future_gp_element(rnp_root, c) = true;
1675 if (rnp_root != rnp && rnp_root->parent != NULL)
1676 raw_spin_unlock_rcu_node(rnp_root);
1677 if (!rnp_root->parent)
1678 break; /* At root, and perhaps also leaf. */
1710 } 1679 }
1711 1680
1712 /* 1681 /* If GP already in progress, just leave, otherwise start one. */
1713 * There might be no grace period in progress. If we don't already 1682 if (rnp_root->gpnum != rnp_root->completed) {
1714 * hold it, acquire the root rcu_node structure's lock in order to 1683 trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedleafroot"));
1715 * start one (if needed).
1716 */
1717 if (rnp != rnp_root)
1718 raw_spin_lock_rcu_node(rnp_root);
1719
1720 /*
1721 * Get a new grace-period number. If there really is no grace
1722 * period in progress, it will be smaller than the one we obtained
1723 * earlier. Adjust callbacks as needed.
1724 */
1725 c = rcu_cbs_completed(rdp->rsp, rnp_root);
1726 if (!rcu_is_nocb_cpu(rdp->cpu))
1727 (void)rcu_segcblist_accelerate(&rdp->cblist, c);
1728
1729 /*
1730 * If the needed for the required grace period is already
1731 * recorded, trace and leave.
1732 */
1733 if (rnp_root->need_future_gp[c & 0x1]) {
1734 trace_rcu_future_gp(rnp, rdp, c, TPS("Prestartedroot"));
1735 goto unlock_out; 1684 goto unlock_out;
1736 } 1685 }
1737 1686 trace_rcu_this_gp(rnp_root, rdp, c, TPS("Startedroot"));
1738 /* Record the need for the future grace period. */ 1687 WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT);
1739 rnp_root->need_future_gp[c & 0x1]++; 1688 if (!rsp->gp_kthread) {
1740 1689 trace_rcu_this_gp(rnp_root, rdp, c, TPS("NoGPkthread"));
1741 /* If a grace period is not already in progress, start one. */ 1690 goto unlock_out;
1742 if (rnp_root->gpnum != rnp_root->completed) {
1743 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedleafroot"));
1744 } else {
1745 trace_rcu_future_gp(rnp, rdp, c, TPS("Startedroot"));
1746 ret = rcu_start_gp_advanced(rdp->rsp, rnp_root, rdp);
1747 } 1691 }
1692 trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum), TPS("newreq"));
1693 ret = true; /* Caller must wake GP kthread. */
1748unlock_out: 1694unlock_out:
1749 if (rnp != rnp_root) 1695 if (rnp != rnp_root)
1750 raw_spin_unlock_rcu_node(rnp_root); 1696 raw_spin_unlock_rcu_node(rnp_root);
1751out:
1752 if (c_out != NULL)
1753 *c_out = c;
1754 return ret; 1697 return ret;
1755} 1698}
1756 1699
@@ -1758,16 +1701,16 @@ out:
1758 * Clean up any old requests for the just-ended grace period. Also return 1701 * Clean up any old requests for the just-ended grace period. Also return
1759 * whether any additional grace periods have been requested. 1702 * whether any additional grace periods have been requested.
1760 */ 1703 */
1761static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) 1704static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1762{ 1705{
1763 int c = rnp->completed; 1706 unsigned long c = rnp->completed;
1764 int needmore; 1707 bool needmore;
1765 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1708 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1766 1709
1767 rnp->need_future_gp[c & 0x1] = 0; 1710 need_future_gp_element(rnp, c) = false;
1768 needmore = rnp->need_future_gp[(c + 1) & 0x1]; 1711 needmore = need_any_future_gp(rnp);
1769 trace_rcu_future_gp(rnp, rdp, c, 1712 trace_rcu_this_gp(rnp, rdp, c,
1770 needmore ? TPS("CleanupMore") : TPS("Cleanup")); 1713 needmore ? TPS("CleanupMore") : TPS("Cleanup"));
1771 return needmore; 1714 return needmore;
1772} 1715}
1773 1716
@@ -1802,6 +1745,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
1802static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1745static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1803 struct rcu_data *rdp) 1746 struct rcu_data *rdp)
1804{ 1747{
1748 unsigned long c;
1805 bool ret = false; 1749 bool ret = false;
1806 1750
1807 raw_lockdep_assert_held_rcu_node(rnp); 1751 raw_lockdep_assert_held_rcu_node(rnp);
@@ -1820,8 +1764,9 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1820 * accelerating callback invocation to an earlier grace-period 1764 * accelerating callback invocation to an earlier grace-period
1821 * number. 1765 * number.
1822 */ 1766 */
1823 if (rcu_segcblist_accelerate(&rdp->cblist, rcu_cbs_completed(rsp, rnp))) 1767 c = rcu_cbs_completed(rsp, rnp);
1824 ret = rcu_start_future_gp(rnp, rdp, NULL); 1768 if (rcu_segcblist_accelerate(&rdp->cblist, c))
1769 ret = rcu_start_this_gp(rnp, rdp, c);
1825 1770
1826 /* Trace depending on how much we were able to accelerate. */ 1771 /* Trace depending on how much we were able to accelerate. */
1827 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) 1772 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
@@ -2049,7 +1994,7 @@ static bool rcu_gp_init(struct rcu_state *rsp)
2049 rnp->level, rnp->grplo, 1994 rnp->level, rnp->grplo,
2050 rnp->grphi, rnp->qsmask); 1995 rnp->grphi, rnp->qsmask);
2051 raw_spin_unlock_irq_rcu_node(rnp); 1996 raw_spin_unlock_irq_rcu_node(rnp);
2052 cond_resched_rcu_qs(); 1997 cond_resched_tasks_rcu_qs();
2053 WRITE_ONCE(rsp->gp_activity, jiffies); 1998 WRITE_ONCE(rsp->gp_activity, jiffies);
2054 } 1999 }
2055 2000
@@ -2108,7 +2053,6 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
2108{ 2053{
2109 unsigned long gp_duration; 2054 unsigned long gp_duration;
2110 bool needgp = false; 2055 bool needgp = false;
2111 int nocb = 0;
2112 struct rcu_data *rdp; 2056 struct rcu_data *rdp;
2113 struct rcu_node *rnp = rcu_get_root(rsp); 2057 struct rcu_node *rnp = rcu_get_root(rsp);
2114 struct swait_queue_head *sq; 2058 struct swait_queue_head *sq;
@@ -2147,31 +2091,35 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
2147 if (rnp == rdp->mynode) 2091 if (rnp == rdp->mynode)
2148 needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; 2092 needgp = __note_gp_changes(rsp, rnp, rdp) || needgp;
2149 /* smp_mb() provided by prior unlock-lock pair. */ 2093 /* smp_mb() provided by prior unlock-lock pair. */
2150 nocb += rcu_future_gp_cleanup(rsp, rnp); 2094 needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp;
2151 sq = rcu_nocb_gp_get(rnp); 2095 sq = rcu_nocb_gp_get(rnp);
2152 raw_spin_unlock_irq_rcu_node(rnp); 2096 raw_spin_unlock_irq_rcu_node(rnp);
2153 rcu_nocb_gp_cleanup(sq); 2097 rcu_nocb_gp_cleanup(sq);
2154 cond_resched_rcu_qs(); 2098 cond_resched_tasks_rcu_qs();
2155 WRITE_ONCE(rsp->gp_activity, jiffies); 2099 WRITE_ONCE(rsp->gp_activity, jiffies);
2156 rcu_gp_slow(rsp, gp_cleanup_delay); 2100 rcu_gp_slow(rsp, gp_cleanup_delay);
2157 } 2101 }
2158 rnp = rcu_get_root(rsp); 2102 rnp = rcu_get_root(rsp);
2159 raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */ 2103 raw_spin_lock_irq_rcu_node(rnp); /* Order GP before ->completed update. */
2160 rcu_nocb_gp_set(rnp, nocb);
2161 2104
2162 /* Declare grace period done. */ 2105 /* Declare grace period done. */
2163 WRITE_ONCE(rsp->completed, rsp->gpnum); 2106 WRITE_ONCE(rsp->completed, rsp->gpnum);
2164 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end")); 2107 trace_rcu_grace_period(rsp->name, rsp->completed, TPS("end"));
2165 rsp->gp_state = RCU_GP_IDLE; 2108 rsp->gp_state = RCU_GP_IDLE;
2109 /* Check for GP requests since above loop. */
2166 rdp = this_cpu_ptr(rsp->rda); 2110 rdp = this_cpu_ptr(rsp->rda);
2111 if (need_any_future_gp(rnp)) {
2112 trace_rcu_this_gp(rnp, rdp, rsp->completed - 1,
2113 TPS("CleanupMore"));
2114 needgp = true;
2115 }
2167 /* Advance CBs to reduce false positives below. */ 2116 /* Advance CBs to reduce false positives below. */
2168 needgp = rcu_advance_cbs(rsp, rnp, rdp) || needgp; 2117 if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) {
2169 if (needgp || cpu_needs_another_gp(rsp, rdp)) {
2170 WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); 2118 WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
2171 trace_rcu_grace_period(rsp->name, 2119 trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
2172 READ_ONCE(rsp->gpnum),
2173 TPS("newreq")); 2120 TPS("newreq"));
2174 } 2121 }
2122 WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT);
2175 raw_spin_unlock_irq_rcu_node(rnp); 2123 raw_spin_unlock_irq_rcu_node(rnp);
2176} 2124}
2177 2125
@@ -2202,7 +2150,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
2202 /* Locking provides needed memory barrier. */ 2150 /* Locking provides needed memory barrier. */
2203 if (rcu_gp_init(rsp)) 2151 if (rcu_gp_init(rsp))
2204 break; 2152 break;
2205 cond_resched_rcu_qs(); 2153 cond_resched_tasks_rcu_qs();
2206 WRITE_ONCE(rsp->gp_activity, jiffies); 2154 WRITE_ONCE(rsp->gp_activity, jiffies);
2207 WARN_ON(signal_pending(current)); 2155 WARN_ON(signal_pending(current));
2208 trace_rcu_grace_period(rsp->name, 2156 trace_rcu_grace_period(rsp->name,
@@ -2247,7 +2195,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
2247 trace_rcu_grace_period(rsp->name, 2195 trace_rcu_grace_period(rsp->name,
2248 READ_ONCE(rsp->gpnum), 2196 READ_ONCE(rsp->gpnum),
2249 TPS("fqsend")); 2197 TPS("fqsend"));
2250 cond_resched_rcu_qs(); 2198 cond_resched_tasks_rcu_qs();
2251 WRITE_ONCE(rsp->gp_activity, jiffies); 2199 WRITE_ONCE(rsp->gp_activity, jiffies);
2252 ret = 0; /* Force full wait till next FQS. */ 2200 ret = 0; /* Force full wait till next FQS. */
2253 j = jiffies_till_next_fqs; 2201 j = jiffies_till_next_fqs;
@@ -2260,7 +2208,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
2260 } 2208 }
2261 } else { 2209 } else {
2262 /* Deal with stray signal. */ 2210 /* Deal with stray signal. */
2263 cond_resched_rcu_qs(); 2211 cond_resched_tasks_rcu_qs();
2264 WRITE_ONCE(rsp->gp_activity, jiffies); 2212 WRITE_ONCE(rsp->gp_activity, jiffies);
2265 WARN_ON(signal_pending(current)); 2213 WARN_ON(signal_pending(current));
2266 trace_rcu_grace_period(rsp->name, 2214 trace_rcu_grace_period(rsp->name,
@@ -2283,71 +2231,6 @@ static int __noreturn rcu_gp_kthread(void *arg)
2283} 2231}
2284 2232
2285/* 2233/*
2286 * Start a new RCU grace period if warranted, re-initializing the hierarchy
2287 * in preparation for detecting the next grace period. The caller must hold
2288 * the root node's ->lock and hard irqs must be disabled.
2289 *
2290 * Note that it is legal for a dying CPU (which is marked as offline) to
2291 * invoke this function. This can happen when the dying CPU reports its
2292 * quiescent state.
2293 *
2294 * Returns true if the grace-period kthread must be awakened.
2295 */
2296static bool
2297rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
2298 struct rcu_data *rdp)
2299{
2300 raw_lockdep_assert_held_rcu_node(rnp);
2301 if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
2302 /*
2303 * Either we have not yet spawned the grace-period
2304 * task, this CPU does not need another grace period,
2305 * or a grace period is already in progress.
2306 * Either way, don't start a new grace period.
2307 */
2308 return false;
2309 }
2310 WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT);
2311 trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gpnum),
2312 TPS("newreq"));
2313
2314 /*
2315 * We can't do wakeups while holding the rnp->lock, as that
2316 * could cause possible deadlocks with the rq->lock. Defer
2317 * the wakeup to our caller.
2318 */
2319 return true;
2320}
2321
2322/*
2323 * Similar to rcu_start_gp_advanced(), but also advance the calling CPU's
2324 * callbacks. Note that rcu_start_gp_advanced() cannot do this because it
2325 * is invoked indirectly from rcu_advance_cbs(), which would result in
2326 * endless recursion -- or would do so if it wasn't for the self-deadlock
2327 * that is encountered beforehand.
2328 *
2329 * Returns true if the grace-period kthread needs to be awakened.
2330 */
2331static bool rcu_start_gp(struct rcu_state *rsp)
2332{
2333 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
2334 struct rcu_node *rnp = rcu_get_root(rsp);
2335 bool ret = false;
2336
2337 /*
2338 * If there is no grace period in progress right now, any
2339 * callbacks we have up to this point will be satisfied by the
2340 * next grace period. Also, advancing the callbacks reduces the
2341 * probability of false positives from cpu_needs_another_gp()
2342 * resulting in pointless grace periods. So, advance callbacks
2343 * then start the grace period!
2344 */
2345 ret = rcu_advance_cbs(rsp, rnp, rdp) || ret;
2346 ret = rcu_start_gp_advanced(rsp, rnp, rdp) || ret;
2347 return ret;
2348}
2349
2350/*
2351 * Report a full set of quiescent states to the specified rcu_state data 2234 * Report a full set of quiescent states to the specified rcu_state data
2352 * structure. Invoke rcu_gp_kthread_wake() to awaken the grace-period 2235 * structure. Invoke rcu_gp_kthread_wake() to awaken the grace-period
2353 * kthread if another grace period is required. Whether we wake 2236 * kthread if another grace period is required. Whether we wake
@@ -2398,7 +2281,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2398 return; 2281 return;
2399 } 2282 }
2400 WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */ 2283 WARN_ON_ONCE(oldmask); /* Any child must be all zeroed! */
2401 WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1 && 2284 WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
2402 rcu_preempt_blocked_readers_cgp(rnp)); 2285 rcu_preempt_blocked_readers_cgp(rnp));
2403 rnp->qsmask &= ~mask; 2286 rnp->qsmask &= ~mask;
2404 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum, 2287 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
@@ -2782,7 +2665,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
2782 struct rcu_node *rnp; 2665 struct rcu_node *rnp;
2783 2666
2784 rcu_for_each_leaf_node(rsp, rnp) { 2667 rcu_for_each_leaf_node(rsp, rnp) {
2785 cond_resched_rcu_qs(); 2668 cond_resched_tasks_rcu_qs();
2786 mask = 0; 2669 mask = 0;
2787 raw_spin_lock_irqsave_rcu_node(rnp, flags); 2670 raw_spin_lock_irqsave_rcu_node(rnp, flags);
2788 if (rnp->qsmask == 0) { 2671 if (rnp->qsmask == 0) {
@@ -2874,22 +2757,27 @@ __rcu_process_callbacks(struct rcu_state *rsp)
2874 unsigned long flags; 2757 unsigned long flags;
2875 bool needwake; 2758 bool needwake;
2876 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 2759 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
2760 struct rcu_node *rnp;
2877 2761
2878 WARN_ON_ONCE(!rdp->beenonline); 2762 WARN_ON_ONCE(!rdp->beenonline);
2879 2763
2880 /* Update RCU state based on any recent quiescent states. */ 2764 /* Update RCU state based on any recent quiescent states. */
2881 rcu_check_quiescent_state(rsp, rdp); 2765 rcu_check_quiescent_state(rsp, rdp);
2882 2766
2883 /* Does this CPU require a not-yet-started grace period? */ 2767 /* No grace period and unregistered callbacks? */
2884 local_irq_save(flags); 2768 if (!rcu_gp_in_progress(rsp) &&
2885 if (cpu_needs_another_gp(rsp, rdp)) { 2769 rcu_segcblist_is_enabled(&rdp->cblist)) {
2886 raw_spin_lock_rcu_node(rcu_get_root(rsp)); /* irqs disabled. */ 2770 local_irq_save(flags);
2887 needwake = rcu_start_gp(rsp); 2771 if (rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) {
2888 raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); 2772 local_irq_restore(flags);
2889 if (needwake) 2773 } else {
2890 rcu_gp_kthread_wake(rsp); 2774 rnp = rdp->mynode;
2891 } else { 2775 raw_spin_lock_rcu_node(rnp); /* irqs disabled. */
2892 local_irq_restore(flags); 2776 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
2777 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2778 if (needwake)
2779 rcu_gp_kthread_wake(rsp);
2780 }
2893 } 2781 }
2894 2782
2895 /* If there are callbacks ready, invoke them. */ 2783 /* If there are callbacks ready, invoke them. */
@@ -2973,11 +2861,11 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2973 2861
2974 /* Start a new grace period if one not already started. */ 2862 /* Start a new grace period if one not already started. */
2975 if (!rcu_gp_in_progress(rsp)) { 2863 if (!rcu_gp_in_progress(rsp)) {
2976 struct rcu_node *rnp_root = rcu_get_root(rsp); 2864 struct rcu_node *rnp = rdp->mynode;
2977 2865
2978 raw_spin_lock_rcu_node(rnp_root); 2866 raw_spin_lock_rcu_node(rnp);
2979 needwake = rcu_start_gp(rsp); 2867 needwake = rcu_accelerate_cbs(rsp, rnp, rdp);
2980 raw_spin_unlock_rcu_node(rnp_root); 2868 raw_spin_unlock_rcu_node(rnp);
2981 if (needwake) 2869 if (needwake)
2982 rcu_gp_kthread_wake(rsp); 2870 rcu_gp_kthread_wake(rsp);
2983 } else { 2871 } else {
@@ -3368,7 +3256,9 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3368 return 1; 3256 return 1;
3369 3257
3370 /* Has RCU gone idle with this CPU needing another grace period? */ 3258 /* Has RCU gone idle with this CPU needing another grace period? */
3371 if (cpu_needs_another_gp(rsp, rdp)) 3259 if (!rcu_gp_in_progress(rsp) &&
3260 rcu_segcblist_is_enabled(&rdp->cblist) &&
3261 !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
3372 return 1; 3262 return 1;
3373 3263
3374 /* Has another RCU grace period completed? */ 3264 /* Has another RCU grace period completed? */
@@ -3775,6 +3665,8 @@ int rcutree_dead_cpu(unsigned int cpu)
3775 return 0; 3665 return 0;
3776} 3666}
3777 3667
3668static DEFINE_PER_CPU(int, rcu_cpu_started);
3669
3778/* 3670/*
3779 * Mark the specified CPU as being online so that subsequent grace periods 3671 * Mark the specified CPU as being online so that subsequent grace periods
3780 * (both expedited and normal) will wait on it. Note that this means that 3672 * (both expedited and normal) will wait on it. Note that this means that
@@ -3796,6 +3688,11 @@ void rcu_cpu_starting(unsigned int cpu)
3796 struct rcu_node *rnp; 3688 struct rcu_node *rnp;
3797 struct rcu_state *rsp; 3689 struct rcu_state *rsp;
3798 3690
3691 if (per_cpu(rcu_cpu_started, cpu))
3692 return;
3693
3694 per_cpu(rcu_cpu_started, cpu) = 1;
3695
3799 for_each_rcu_flavor(rsp) { 3696 for_each_rcu_flavor(rsp) {
3800 rdp = per_cpu_ptr(rsp->rda, cpu); 3697 rdp = per_cpu_ptr(rsp->rda, cpu);
3801 rnp = rdp->mynode; 3698 rnp = rdp->mynode;
@@ -3852,6 +3749,8 @@ void rcu_report_dead(unsigned int cpu)
3852 preempt_enable(); 3749 preempt_enable();
3853 for_each_rcu_flavor(rsp) 3750 for_each_rcu_flavor(rsp)
3854 rcu_cleanup_dying_idle_cpu(cpu, rsp); 3751 rcu_cleanup_dying_idle_cpu(cpu, rsp);
3752
3753 per_cpu(rcu_cpu_started, cpu) = 0;
3855} 3754}
3856 3755
3857/* Migrate the dead CPU's callbacks to the current CPU. */ 3756/* Migrate the dead CPU's callbacks to the current CPU. */
@@ -3861,6 +3760,7 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
3861 struct rcu_data *my_rdp; 3760 struct rcu_data *my_rdp;
3862 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 3761 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
3863 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); 3762 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
3763 bool needwake;
3864 3764
3865 if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist)) 3765 if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
3866 return; /* No callbacks to migrate. */ 3766 return; /* No callbacks to migrate. */
@@ -3872,12 +3772,15 @@ static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp)
3872 return; 3772 return;
3873 } 3773 }
3874 raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ 3774 raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
3875 rcu_advance_cbs(rsp, rnp_root, rdp); /* Leverage recent GPs. */ 3775 /* Leverage recent GPs and set GP for new callbacks. */
3876 rcu_advance_cbs(rsp, rnp_root, my_rdp); /* Assign GP to pending CBs. */ 3776 needwake = rcu_advance_cbs(rsp, rnp_root, rdp) ||
3777 rcu_advance_cbs(rsp, rnp_root, my_rdp);
3877 rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); 3778 rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
3878 WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != 3779 WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
3879 !rcu_segcblist_n_cbs(&my_rdp->cblist)); 3780 !rcu_segcblist_n_cbs(&my_rdp->cblist));
3880 raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags); 3781 raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
3782 if (needwake)
3783 rcu_gp_kthread_wake(rsp);
3881 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || 3784 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
3882 !rcu_segcblist_empty(&rdp->cblist), 3785 !rcu_segcblist_empty(&rdp->cblist),
3883 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", 3786 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
@@ -4056,7 +3959,7 @@ static void __init rcu_init_one(struct rcu_state *rsp)
4056 3959
4057 init_swait_queue_head(&rsp->gp_wq); 3960 init_swait_queue_head(&rsp->gp_wq);
4058 init_swait_queue_head(&rsp->expedited_wq); 3961 init_swait_queue_head(&rsp->expedited_wq);
4059 rnp = rsp->level[rcu_num_lvls - 1]; 3962 rnp = rcu_first_leaf_node(rsp);
4060 for_each_possible_cpu(i) { 3963 for_each_possible_cpu(i) {
4061 while (i > rnp->grphi) 3964 while (i > rnp->grphi)
4062 rnp++; 3965 rnp++;
@@ -4168,6 +4071,7 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp)
4168} 4071}
4169 4072
4170struct workqueue_struct *rcu_gp_wq; 4073struct workqueue_struct *rcu_gp_wq;
4074struct workqueue_struct *rcu_par_gp_wq;
4171 4075
4172void __init rcu_init(void) 4076void __init rcu_init(void)
4173{ 4077{
@@ -4199,6 +4103,8 @@ void __init rcu_init(void)
4199 /* Create workqueue for expedited GPs and for Tree SRCU. */ 4103 /* Create workqueue for expedited GPs and for Tree SRCU. */
4200 rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); 4104 rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0);
4201 WARN_ON(!rcu_gp_wq); 4105 WARN_ON(!rcu_gp_wq);
4106 rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
4107 WARN_ON(!rcu_par_gp_wq);
4202} 4108}
4203 4109
4204#include "tree_exp.h" 4110#include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index f491ab4f2e8e..78e051dffc5b 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -58,6 +58,14 @@ struct rcu_dynticks {
58#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 58#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
59}; 59};
60 60
61/* Communicate arguments to a workqueue handler. */
62struct rcu_exp_work {
63 smp_call_func_t rew_func;
64 struct rcu_state *rew_rsp;
65 unsigned long rew_s;
66 struct work_struct rew_work;
67};
68
61/* RCU's kthread states for tracing. */ 69/* RCU's kthread states for tracing. */
62#define RCU_KTHREAD_STOPPED 0 70#define RCU_KTHREAD_STOPPED 0
63#define RCU_KTHREAD_RUNNING 1 71#define RCU_KTHREAD_RUNNING 1
@@ -150,15 +158,32 @@ struct rcu_node {
150 struct swait_queue_head nocb_gp_wq[2]; 158 struct swait_queue_head nocb_gp_wq[2];
151 /* Place for rcu_nocb_kthread() to wait GP. */ 159 /* Place for rcu_nocb_kthread() to wait GP. */
152#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 160#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
153 int need_future_gp[2]; 161 u8 need_future_gp[4]; /* Counts of upcoming GP requests. */
154 /* Counts of upcoming no-CB GP requests. */
155 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp; 162 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
156 163
157 spinlock_t exp_lock ____cacheline_internodealigned_in_smp; 164 spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
158 unsigned long exp_seq_rq; 165 unsigned long exp_seq_rq;
159 wait_queue_head_t exp_wq[4]; 166 wait_queue_head_t exp_wq[4];
167 struct rcu_exp_work rew;
168 bool exp_need_flush; /* Need to flush workitem? */
160} ____cacheline_internodealigned_in_smp; 169} ____cacheline_internodealigned_in_smp;
161 170
171/* Accessors for ->need_future_gp[] array. */
172#define need_future_gp_mask() \
173 (ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1)
174#define need_future_gp_element(rnp, c) \
175 ((rnp)->need_future_gp[(c) & need_future_gp_mask()])
176#define need_any_future_gp(rnp) \
177({ \
178 int __i; \
179 bool __nonzero = false; \
180 \
181 for (__i = 0; __i < ARRAY_SIZE((rnp)->need_future_gp); __i++) \
182 __nonzero = __nonzero || \
183 READ_ONCE((rnp)->need_future_gp[__i]); \
184 __nonzero; \
185})
186
162/* 187/*
163 * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and 188 * Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
164 * are indexed relative to this interval rather than the global CPU ID space. 189 * are indexed relative to this interval rather than the global CPU ID space.
@@ -224,10 +249,6 @@ struct rcu_data {
224#ifdef CONFIG_RCU_FAST_NO_HZ 249#ifdef CONFIG_RCU_FAST_NO_HZ
225 struct rcu_head oom_head; 250 struct rcu_head oom_head;
226#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 251#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
227 atomic_long_t exp_workdone0; /* # done by workqueue. */
228 atomic_long_t exp_workdone1; /* # done by others #1. */
229 atomic_long_t exp_workdone2; /* # done by others #2. */
230 atomic_long_t exp_workdone3; /* # done by others #3. */
231 int exp_dynticks_snap; /* Double-check need for IPI. */ 252 int exp_dynticks_snap; /* Double-check need for IPI. */
232 253
233 /* 6) Callback offloading. */ 254 /* 6) Callback offloading. */
@@ -408,7 +429,6 @@ extern struct rcu_state rcu_preempt_state;
408#endif /* #ifdef CONFIG_PREEMPT_RCU */ 429#endif /* #ifdef CONFIG_PREEMPT_RCU */
409 430
410int rcu_dynticks_snap(struct rcu_dynticks *rdtp); 431int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
411bool rcu_eqs_special_set(int cpu);
412 432
413#ifdef CONFIG_RCU_BOOST 433#ifdef CONFIG_RCU_BOOST
414DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 434DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
@@ -438,7 +458,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
438static void invoke_rcu_callbacks_kthread(void); 458static void invoke_rcu_callbacks_kthread(void);
439static bool rcu_is_callbacks_kthread(void); 459static bool rcu_is_callbacks_kthread(void);
440#ifdef CONFIG_RCU_BOOST 460#ifdef CONFIG_RCU_BOOST
441static void rcu_preempt_do_callbacks(void);
442static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 461static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
443 struct rcu_node *rnp); 462 struct rcu_node *rnp);
444#endif /* #ifdef CONFIG_RCU_BOOST */ 463#endif /* #ifdef CONFIG_RCU_BOOST */
@@ -454,7 +473,6 @@ static void print_cpu_stall_info_end(void);
454static void zero_cpu_stall_ticks(struct rcu_data *rdp); 473static void zero_cpu_stall_ticks(struct rcu_data *rdp);
455static void increment_cpu_stall_ticks(void); 474static void increment_cpu_stall_ticks(void);
456static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu); 475static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
457static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
458static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); 476static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
459static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); 477static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
460static void rcu_init_one_nocb(struct rcu_node *rnp); 478static void rcu_init_one_nocb(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index f72eefab8543..d40708e8c5d6 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -20,6 +20,8 @@
20 * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 20 * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
21 */ 21 */
22 22
23#include <linux/lockdep.h>
24
23/* 25/*
24 * Record the start of an expedited grace period. 26 * Record the start of an expedited grace period.
25 */ 27 */
@@ -154,15 +156,35 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
154 * for the current expedited grace period. Works only for preemptible 156 * for the current expedited grace period. Works only for preemptible
155 * RCU -- other RCU implementation use other means. 157 * RCU -- other RCU implementation use other means.
156 * 158 *
157 * Caller must hold the rcu_state's exp_mutex. 159 * Caller must hold the specificed rcu_node structure's ->lock
158 */ 160 */
159static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp) 161static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
160{ 162{
163 raw_lockdep_assert_held_rcu_node(rnp);
164
161 return rnp->exp_tasks == NULL && 165 return rnp->exp_tasks == NULL &&
162 READ_ONCE(rnp->expmask) == 0; 166 READ_ONCE(rnp->expmask) == 0;
163} 167}
164 168
165/* 169/*
170 * Like sync_rcu_preempt_exp_done(), but this function assumes the caller
171 * doesn't hold the rcu_node's ->lock, and will acquire and release the lock
172 * itself
173 */
174static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp)
175{
176 unsigned long flags;
177 bool ret;
178
179 raw_spin_lock_irqsave_rcu_node(rnp, flags);
180 ret = sync_rcu_preempt_exp_done(rnp);
181 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
182
183 return ret;
184}
185
186
187/*
166 * Report the exit from RCU read-side critical section for the last task 188 * Report the exit from RCU read-side critical section for the last task
167 * that queued itself during or before the current expedited preemptible-RCU 189 * that queued itself during or before the current expedited preemptible-RCU
168 * grace period. This event is reported either to the rcu_node structure on 190 * grace period. This event is reported either to the rcu_node structure on
@@ -170,8 +192,7 @@ static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
170 * recursively up the tree. (Calm down, calm down, we do the recursion 192 * recursively up the tree. (Calm down, calm down, we do the recursion
171 * iteratively!) 193 * iteratively!)
172 * 194 *
173 * Caller must hold the rcu_state's exp_mutex and the specified rcu_node 195 * Caller must hold the specified rcu_node structure's ->lock.
174 * structure's ->lock.
175 */ 196 */
176static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 197static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
177 bool wake, unsigned long flags) 198 bool wake, unsigned long flags)
@@ -207,8 +228,6 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
207/* 228/*
208 * Report expedited quiescent state for specified node. This is a 229 * Report expedited quiescent state for specified node. This is a
209 * lock-acquisition wrapper function for __rcu_report_exp_rnp(). 230 * lock-acquisition wrapper function for __rcu_report_exp_rnp().
210 *
211 * Caller must hold the rcu_state's exp_mutex.
212 */ 231 */
213static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, 232static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
214 struct rcu_node *rnp, bool wake) 233 struct rcu_node *rnp, bool wake)
@@ -221,8 +240,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
221 240
222/* 241/*
223 * Report expedited quiescent state for multiple CPUs, all covered by the 242 * Report expedited quiescent state for multiple CPUs, all covered by the
224 * specified leaf rcu_node structure. Caller must hold the rcu_state's 243 * specified leaf rcu_node structure.
225 * exp_mutex.
226 */ 244 */
227static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp, 245static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
228 unsigned long mask, bool wake) 246 unsigned long mask, bool wake)
@@ -248,14 +266,12 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
248} 266}
249 267
250/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ 268/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
251static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat, 269static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
252 unsigned long s)
253{ 270{
254 if (rcu_exp_gp_seq_done(rsp, s)) { 271 if (rcu_exp_gp_seq_done(rsp, s)) {
255 trace_rcu_exp_grace_period(rsp->name, s, TPS("done")); 272 trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
256 /* Ensure test happens before caller kfree(). */ 273 /* Ensure test happens before caller kfree(). */
257 smp_mb__before_atomic(); /* ^^^ */ 274 smp_mb__before_atomic(); /* ^^^ */
258 atomic_long_inc(stat);
259 return true; 275 return true;
260 } 276 }
261 return false; 277 return false;
@@ -289,7 +305,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
289 * promoting locality and is not strictly needed for correctness. 305 * promoting locality and is not strictly needed for correctness.
290 */ 306 */
291 for (; rnp != NULL; rnp = rnp->parent) { 307 for (; rnp != NULL; rnp = rnp->parent) {
292 if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s)) 308 if (sync_exp_work_done(rsp, s))
293 return true; 309 return true;
294 310
295 /* Work not done, either wait here or go up. */ 311 /* Work not done, either wait here or go up. */
@@ -302,8 +318,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
302 rnp->grplo, rnp->grphi, 318 rnp->grplo, rnp->grphi,
303 TPS("wait")); 319 TPS("wait"));
304 wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], 320 wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
305 sync_exp_work_done(rsp, 321 sync_exp_work_done(rsp, s));
306 &rdp->exp_workdone2, s));
307 return true; 322 return true;
308 } 323 }
309 rnp->exp_seq_rq = s; /* Followers can wait on us. */ 324 rnp->exp_seq_rq = s; /* Followers can wait on us. */
@@ -313,7 +328,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
313 } 328 }
314 mutex_lock(&rsp->exp_mutex); 329 mutex_lock(&rsp->exp_mutex);
315fastpath: 330fastpath:
316 if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) { 331 if (sync_exp_work_done(rsp, s)) {
317 mutex_unlock(&rsp->exp_mutex); 332 mutex_unlock(&rsp->exp_mutex);
318 return true; 333 return true;
319 } 334 }
@@ -362,93 +377,129 @@ static void sync_sched_exp_online_cleanup(int cpu)
362} 377}
363 378
364/* 379/*
365 * Select the nodes that the upcoming expedited grace period needs 380 * Select the CPUs within the specified rcu_node that the upcoming
366 * to wait for. 381 * expedited grace period needs to wait for.
367 */ 382 */
368static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, 383static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
369 smp_call_func_t func)
370{ 384{
371 int cpu; 385 int cpu;
372 unsigned long flags; 386 unsigned long flags;
387 smp_call_func_t func;
373 unsigned long mask_ofl_test; 388 unsigned long mask_ofl_test;
374 unsigned long mask_ofl_ipi; 389 unsigned long mask_ofl_ipi;
375 int ret; 390 int ret;
376 struct rcu_node *rnp; 391 struct rcu_exp_work *rewp =
377 392 container_of(wp, struct rcu_exp_work, rew_work);
378 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); 393 struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
379 sync_exp_reset_tree(rsp); 394 struct rcu_state *rsp = rewp->rew_rsp;
380 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
381 rcu_for_each_leaf_node(rsp, rnp) {
382 raw_spin_lock_irqsave_rcu_node(rnp, flags);
383 395
384 /* Each pass checks a CPU for identity, offline, and idle. */ 396 func = rewp->rew_func;
385 mask_ofl_test = 0; 397 raw_spin_lock_irqsave_rcu_node(rnp, flags);
386 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
387 unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
388 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
389 struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
390 int snap;
391 398
392 if (raw_smp_processor_id() == cpu || 399 /* Each pass checks a CPU for identity, offline, and idle. */
393 !(rnp->qsmaskinitnext & mask)) { 400 mask_ofl_test = 0;
401 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
402 unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
403 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
404 struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
405 int snap;
406
407 if (raw_smp_processor_id() == cpu ||
408 !(rnp->qsmaskinitnext & mask)) {
409 mask_ofl_test |= mask;
410 } else {
411 snap = rcu_dynticks_snap(rdtp);
412 if (rcu_dynticks_in_eqs(snap))
394 mask_ofl_test |= mask; 413 mask_ofl_test |= mask;
395 } else { 414 else
396 snap = rcu_dynticks_snap(rdtp); 415 rdp->exp_dynticks_snap = snap;
397 if (rcu_dynticks_in_eqs(snap))
398 mask_ofl_test |= mask;
399 else
400 rdp->exp_dynticks_snap = snap;
401 }
402 } 416 }
403 mask_ofl_ipi = rnp->expmask & ~mask_ofl_test; 417 }
404 418 mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
405 /*
406 * Need to wait for any blocked tasks as well. Note that
407 * additional blocking tasks will also block the expedited
408 * GP until such time as the ->expmask bits are cleared.
409 */
410 if (rcu_preempt_has_tasks(rnp))
411 rnp->exp_tasks = rnp->blkd_tasks.next;
412 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
413 419
414 /* IPI the remaining CPUs for expedited quiescent state. */ 420 /*
415 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { 421 * Need to wait for any blocked tasks as well. Note that
416 unsigned long mask = leaf_node_cpu_bit(rnp, cpu); 422 * additional blocking tasks will also block the expedited GP
417 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 423 * until such time as the ->expmask bits are cleared.
424 */
425 if (rcu_preempt_has_tasks(rnp))
426 rnp->exp_tasks = rnp->blkd_tasks.next;
427 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
418 428
419 if (!(mask_ofl_ipi & mask)) 429 /* IPI the remaining CPUs for expedited quiescent state. */
420 continue; 430 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
431 unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
432 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
433
434 if (!(mask_ofl_ipi & mask))
435 continue;
421retry_ipi: 436retry_ipi:
422 if (rcu_dynticks_in_eqs_since(rdp->dynticks, 437 if (rcu_dynticks_in_eqs_since(rdp->dynticks,
423 rdp->exp_dynticks_snap)) { 438 rdp->exp_dynticks_snap)) {
424 mask_ofl_test |= mask; 439 mask_ofl_test |= mask;
425 continue; 440 continue;
426 } 441 }
427 ret = smp_call_function_single(cpu, func, rsp, 0); 442 ret = smp_call_function_single(cpu, func, rsp, 0);
428 if (!ret) { 443 if (!ret) {
429 mask_ofl_ipi &= ~mask; 444 mask_ofl_ipi &= ~mask;
430 continue; 445 continue;
431 } 446 }
432 /* Failed, raced with CPU hotplug operation. */ 447 /* Failed, raced with CPU hotplug operation. */
433 raw_spin_lock_irqsave_rcu_node(rnp, flags); 448 raw_spin_lock_irqsave_rcu_node(rnp, flags);
434 if ((rnp->qsmaskinitnext & mask) && 449 if ((rnp->qsmaskinitnext & mask) &&
435 (rnp->expmask & mask)) { 450 (rnp->expmask & mask)) {
436 /* Online, so delay for a bit and try again. */ 451 /* Online, so delay for a bit and try again. */
437 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
438 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
439 schedule_timeout_uninterruptible(1);
440 goto retry_ipi;
441 }
442 /* CPU really is offline, so we can ignore it. */
443 if (!(rnp->expmask & mask))
444 mask_ofl_ipi &= ~mask;
445 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 452 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
453 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
454 schedule_timeout_uninterruptible(1);
455 goto retry_ipi;
456 }
457 /* CPU really is offline, so we can ignore it. */
458 if (!(rnp->expmask & mask))
459 mask_ofl_ipi &= ~mask;
460 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
461 }
462 /* Report quiescent states for those that went offline. */
463 mask_ofl_test |= mask_ofl_ipi;
464 if (mask_ofl_test)
465 rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
466}
467
468/*
469 * Select the nodes that the upcoming expedited grace period needs
470 * to wait for.
471 */
472static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
473 smp_call_func_t func)
474{
475 struct rcu_node *rnp;
476
477 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
478 sync_exp_reset_tree(rsp);
479 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
480
481 /* Schedule work for each leaf rcu_node structure. */
482 rcu_for_each_leaf_node(rsp, rnp) {
483 rnp->exp_need_flush = false;
484 if (!READ_ONCE(rnp->expmask))
485 continue; /* Avoid early boot non-existent wq. */
486 rnp->rew.rew_func = func;
487 rnp->rew.rew_rsp = rsp;
488 if (!READ_ONCE(rcu_par_gp_wq) ||
489 rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
490 /* No workqueues yet. */
491 sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
492 continue;
446 } 493 }
447 /* Report quiescent states for those that went offline. */ 494 INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
448 mask_ofl_test |= mask_ofl_ipi; 495 queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work);
449 if (mask_ofl_test) 496 rnp->exp_need_flush = true;
450 rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
451 } 497 }
498
499 /* Wait for workqueue jobs (if any) to complete. */
500 rcu_for_each_leaf_node(rsp, rnp)
501 if (rnp->exp_need_flush)
502 flush_work(&rnp->rew.rew_work);
452} 503}
453 504
454static void synchronize_sched_expedited_wait(struct rcu_state *rsp) 505static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
@@ -469,9 +520,9 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
469 for (;;) { 520 for (;;) {
470 ret = swait_event_timeout( 521 ret = swait_event_timeout(
471 rsp->expedited_wq, 522 rsp->expedited_wq,
472 sync_rcu_preempt_exp_done(rnp_root), 523 sync_rcu_preempt_exp_done_unlocked(rnp_root),
473 jiffies_stall); 524 jiffies_stall);
474 if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root)) 525 if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root))
475 return; 526 return;
476 WARN_ON(ret < 0); /* workqueues should not be signaled. */ 527 WARN_ON(ret < 0); /* workqueues should not be signaled. */
477 if (rcu_cpu_stall_suppress) 528 if (rcu_cpu_stall_suppress)
@@ -504,7 +555,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
504 rcu_for_each_node_breadth_first(rsp, rnp) { 555 rcu_for_each_node_breadth_first(rsp, rnp) {
505 if (rnp == rnp_root) 556 if (rnp == rnp_root)
506 continue; /* printed unconditionally */ 557 continue; /* printed unconditionally */
507 if (sync_rcu_preempt_exp_done(rnp)) 558 if (sync_rcu_preempt_exp_done_unlocked(rnp))
508 continue; 559 continue;
509 pr_cont(" l=%u:%d-%d:%#lx/%c", 560 pr_cont(" l=%u:%d-%d:%#lx/%c",
510 rnp->level, rnp->grplo, rnp->grphi, 561 rnp->level, rnp->grplo, rnp->grphi,
@@ -560,14 +611,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
560 mutex_unlock(&rsp->exp_wake_mutex); 611 mutex_unlock(&rsp->exp_wake_mutex);
561} 612}
562 613
563/* Let the workqueue handler know what it is supposed to do. */
564struct rcu_exp_work {
565 smp_call_func_t rew_func;
566 struct rcu_state *rew_rsp;
567 unsigned long rew_s;
568 struct work_struct rew_work;
569};
570
571/* 614/*
572 * Common code to drive an expedited grace period forward, used by 615 * Common code to drive an expedited grace period forward, used by
573 * workqueues and mid-boot-time tasks. 616 * workqueues and mid-boot-time tasks.
@@ -633,7 +676,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
633 rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); 676 rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
634 rnp = rcu_get_root(rsp); 677 rnp = rcu_get_root(rsp);
635 wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], 678 wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
636 sync_exp_work_done(rsp, &rdp->exp_workdone0, s)); 679 sync_exp_work_done(rsp, s));
637 smp_mb(); /* Workqueue actions happen before return. */ 680 smp_mb(); /* Workqueue actions happen before return. */
638 681
639 /* Let the next expedited grace period start. */ 682 /* Let the next expedited grace period start. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 84fbee4686d3..7fd12039e512 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -182,7 +182,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
182 182
183 raw_lockdep_assert_held_rcu_node(rnp); 183 raw_lockdep_assert_held_rcu_node(rnp);
184 WARN_ON_ONCE(rdp->mynode != rnp); 184 WARN_ON_ONCE(rdp->mynode != rnp);
185 WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1); 185 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
186 186
187 /* 187 /*
188 * Decide where to queue the newly blocked task. In theory, 188 * Decide where to queue the newly blocked task. In theory,
@@ -384,6 +384,50 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
384} 384}
385 385
386/* 386/*
387 * Preemptible RCU implementation for rcu_read_lock().
388 * Just increment ->rcu_read_lock_nesting, shared state will be updated
389 * if we block.
390 */
391void __rcu_read_lock(void)
392{
393 current->rcu_read_lock_nesting++;
394 barrier(); /* critical section after entry code. */
395}
396EXPORT_SYMBOL_GPL(__rcu_read_lock);
397
398/*
399 * Preemptible RCU implementation for rcu_read_unlock().
400 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
401 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
402 * invoke rcu_read_unlock_special() to clean up after a context switch
403 * in an RCU read-side critical section and other special cases.
404 */
405void __rcu_read_unlock(void)
406{
407 struct task_struct *t = current;
408
409 if (t->rcu_read_lock_nesting != 1) {
410 --t->rcu_read_lock_nesting;
411 } else {
412 barrier(); /* critical section before exit code. */
413 t->rcu_read_lock_nesting = INT_MIN;
414 barrier(); /* assign before ->rcu_read_unlock_special load */
415 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
416 rcu_read_unlock_special(t);
417 barrier(); /* ->rcu_read_unlock_special load before assign */
418 t->rcu_read_lock_nesting = 0;
419 }
420#ifdef CONFIG_PROVE_LOCKING
421 {
422 int rrln = READ_ONCE(t->rcu_read_lock_nesting);
423
424 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
425 }
426#endif /* #ifdef CONFIG_PROVE_LOCKING */
427}
428EXPORT_SYMBOL_GPL(__rcu_read_unlock);
429
430/*
387 * Advance a ->blkd_tasks-list pointer to the next entry, instead 431 * Advance a ->blkd_tasks-list pointer to the next entry, instead
388 * returning NULL if at the end of the list. 432 * returning NULL if at the end of the list.
389 */ 433 */
@@ -489,7 +533,7 @@ void rcu_read_unlock_special(struct task_struct *t)
489 rnp = t->rcu_blocked_node; 533 rnp = t->rcu_blocked_node;
490 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 534 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
491 WARN_ON_ONCE(rnp != t->rcu_blocked_node); 535 WARN_ON_ONCE(rnp != t->rcu_blocked_node);
492 WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1); 536 WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
493 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp); 537 empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
494 empty_exp = sync_rcu_preempt_exp_done(rnp); 538 empty_exp = sync_rcu_preempt_exp_done(rnp);
495 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 539 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
@@ -685,15 +729,6 @@ static void rcu_preempt_check_callbacks(void)
685 t->rcu_read_unlock_special.b.need_qs = true; 729 t->rcu_read_unlock_special.b.need_qs = true;
686} 730}
687 731
688#ifdef CONFIG_RCU_BOOST
689
690static void rcu_preempt_do_callbacks(void)
691{
692 rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
693}
694
695#endif /* #ifdef CONFIG_RCU_BOOST */
696
697/** 732/**
698 * call_rcu() - Queue an RCU callback for invocation after a grace period. 733 * call_rcu() - Queue an RCU callback for invocation after a grace period.
699 * @head: structure to be used for queueing the RCU updates. 734 * @head: structure to be used for queueing the RCU updates.
@@ -1140,7 +1175,7 @@ static void rcu_kthread_do_work(void)
1140{ 1175{
1141 rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); 1176 rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
1142 rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data)); 1177 rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
1143 rcu_preempt_do_callbacks(); 1178 rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
1144} 1179}
1145 1180
1146static void rcu_cpu_kthread_setup(unsigned int cpu) 1181static void rcu_cpu_kthread_setup(unsigned int cpu)
@@ -1607,7 +1642,7 @@ static int rcu_oom_notify(struct notifier_block *self,
1607 1642
1608 for_each_online_cpu(cpu) { 1643 for_each_online_cpu(cpu) {
1609 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); 1644 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
1610 cond_resched_rcu_qs(); 1645 cond_resched_tasks_rcu_qs();
1611 } 1646 }
1612 1647
1613 /* Unconditionally decrement: no need to wake ourselves up. */ 1648 /* Unconditionally decrement: no need to wake ourselves up. */
@@ -1780,19 +1815,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
1780 swake_up_all(sq); 1815 swake_up_all(sq);
1781} 1816}
1782 1817
1783/*
1784 * Set the root rcu_node structure's ->need_future_gp field
1785 * based on the sum of those of all rcu_node structures. This does
1786 * double-count the root rcu_node structure's requests, but this
1787 * is necessary to handle the possibility of a rcu_nocb_kthread()
1788 * having awakened during the time that the rcu_node structures
1789 * were being updated for the end of the previous grace period.
1790 */
1791static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
1792{
1793 rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
1794}
1795
1796static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) 1818static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
1797{ 1819{
1798 return &rnp->nocb_gp_wq[rnp->completed & 0x1]; 1820 return &rnp->nocb_gp_wq[rnp->completed & 0x1];
@@ -1966,7 +1988,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
1966 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 1988 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1967 TPS("WakeOvf")); 1989 TPS("WakeOvf"));
1968 } else { 1990 } else {
1969 wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE, 1991 wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE,
1970 TPS("WakeOvfIsDeferred")); 1992 TPS("WakeOvfIsDeferred"));
1971 } 1993 }
1972 rdp->qlen_last_fqs_check = LONG_MAX / 2; 1994 rdp->qlen_last_fqs_check = LONG_MAX / 2;
@@ -2048,7 +2070,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2048 struct rcu_node *rnp = rdp->mynode; 2070 struct rcu_node *rnp = rdp->mynode;
2049 2071
2050 raw_spin_lock_irqsave_rcu_node(rnp, flags); 2072 raw_spin_lock_irqsave_rcu_node(rnp, flags);
2051 needwake = rcu_start_future_gp(rnp, rdp, &c); 2073 c = rcu_cbs_completed(rdp->rsp, rnp);
2074 needwake = rcu_start_this_gp(rnp, rdp, c);
2052 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2075 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2053 if (needwake) 2076 if (needwake)
2054 rcu_gp_kthread_wake(rdp->rsp); 2077 rcu_gp_kthread_wake(rdp->rsp);
@@ -2057,7 +2080,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2057 * Wait for the grace period. Do so interruptibly to avoid messing 2080 * Wait for the grace period. Do so interruptibly to avoid messing
2058 * up the load average. 2081 * up the load average.
2059 */ 2082 */
2060 trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait")); 2083 trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
2061 for (;;) { 2084 for (;;) {
2062 swait_event_interruptible( 2085 swait_event_interruptible(
2063 rnp->nocb_gp_wq[c & 0x1], 2086 rnp->nocb_gp_wq[c & 0x1],
@@ -2065,9 +2088,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2065 if (likely(d)) 2088 if (likely(d))
2066 break; 2089 break;
2067 WARN_ON(signal_pending(current)); 2090 WARN_ON(signal_pending(current));
2068 trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait")); 2091 trace_rcu_this_gp(rnp, rdp, c, TPS("ResumeWait"));
2069 } 2092 }
2070 trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait")); 2093 trace_rcu_this_gp(rnp, rdp, c, TPS("EndWait"));
2071 smp_mb(); /* Ensure that CB invocation happens after GP end. */ 2094 smp_mb(); /* Ensure that CB invocation happens after GP end. */
2072} 2095}
2073 2096
@@ -2236,7 +2259,7 @@ static int rcu_nocb_kthread(void *arg)
2236 cl++; 2259 cl++;
2237 c++; 2260 c++;
2238 local_bh_enable(); 2261 local_bh_enable();
2239 cond_resched_rcu_qs(); 2262 cond_resched_tasks_rcu_qs();
2240 list = next; 2263 list = next;
2241 } 2264 }
2242 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); 2265 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
@@ -2292,7 +2315,7 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2292void __init rcu_init_nohz(void) 2315void __init rcu_init_nohz(void)
2293{ 2316{
2294 int cpu; 2317 int cpu;
2295 bool need_rcu_nocb_mask = true; 2318 bool need_rcu_nocb_mask = false;
2296 struct rcu_state *rsp; 2319 struct rcu_state *rsp;
2297 2320
2298#if defined(CONFIG_NO_HZ_FULL) 2321#if defined(CONFIG_NO_HZ_FULL)
@@ -2315,7 +2338,7 @@ void __init rcu_init_nohz(void)
2315#endif /* #if defined(CONFIG_NO_HZ_FULL) */ 2338#endif /* #if defined(CONFIG_NO_HZ_FULL) */
2316 2339
2317 if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) { 2340 if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
2318 pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n"); 2341 pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
2319 cpumask_and(rcu_nocb_mask, cpu_possible_mask, 2342 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
2320 rcu_nocb_mask); 2343 rcu_nocb_mask);
2321 } 2344 }
@@ -2495,10 +2518,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
2495{ 2518{
2496} 2519}
2497 2520
2498static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
2499{
2500}
2501
2502static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp) 2521static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
2503{ 2522{
2504 return NULL; 2523 return NULL;
@@ -2587,8 +2606,7 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
2587} 2606}
2588 2607
2589/* 2608/*
2590 * Bind the grace-period kthread for the sysidle flavor of RCU to the 2609 * Bind the RCU grace-period kthreads to the housekeeping CPU.
2591 * timekeeping CPU.
2592 */ 2610 */
2593static void rcu_bind_gp_kthread(void) 2611static void rcu_bind_gp_kthread(void)
2594{ 2612{
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 68fa19a5e7bd..4c230a60ece4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -226,54 +226,6 @@ core_initcall(rcu_set_runtime_mode);
226 226
227#endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */ 227#endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */
228 228
229#ifdef CONFIG_PREEMPT_RCU
230
231/*
232 * Preemptible RCU implementation for rcu_read_lock().
233 * Just increment ->rcu_read_lock_nesting, shared state will be updated
234 * if we block.
235 */
236void __rcu_read_lock(void)
237{
238 current->rcu_read_lock_nesting++;
239 barrier(); /* critical section after entry code. */
240}
241EXPORT_SYMBOL_GPL(__rcu_read_lock);
242
243/*
244 * Preemptible RCU implementation for rcu_read_unlock().
245 * Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
246 * rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
247 * invoke rcu_read_unlock_special() to clean up after a context switch
248 * in an RCU read-side critical section and other special cases.
249 */
250void __rcu_read_unlock(void)
251{
252 struct task_struct *t = current;
253
254 if (t->rcu_read_lock_nesting != 1) {
255 --t->rcu_read_lock_nesting;
256 } else {
257 barrier(); /* critical section before exit code. */
258 t->rcu_read_lock_nesting = INT_MIN;
259 barrier(); /* assign before ->rcu_read_unlock_special load */
260 if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
261 rcu_read_unlock_special(t);
262 barrier(); /* ->rcu_read_unlock_special load before assign */
263 t->rcu_read_lock_nesting = 0;
264 }
265#ifdef CONFIG_PROVE_LOCKING
266 {
267 int rrln = READ_ONCE(t->rcu_read_lock_nesting);
268
269 WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
270 }
271#endif /* #ifdef CONFIG_PROVE_LOCKING */
272}
273EXPORT_SYMBOL_GPL(__rcu_read_unlock);
274
275#endif /* #ifdef CONFIG_PREEMPT_RCU */
276
277#ifdef CONFIG_DEBUG_LOCK_ALLOC 229#ifdef CONFIG_DEBUG_LOCK_ALLOC
278static struct lock_class_key rcu_lock_key; 230static struct lock_class_key rcu_lock_key;
279struct lockdep_map rcu_lock_map = 231struct lockdep_map rcu_lock_map =
@@ -624,7 +576,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks);
624 * grace period has elapsed, in other words after all currently 576 * grace period has elapsed, in other words after all currently
625 * executing rcu-tasks read-side critical sections have elapsed. These 577 * executing rcu-tasks read-side critical sections have elapsed. These
626 * read-side critical sections are delimited by calls to schedule(), 578 * read-side critical sections are delimited by calls to schedule(),
627 * cond_resched_rcu_qs(), idle execution, userspace execution, calls 579 * cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
628 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched(). 580 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
629 * 581 *
630 * This is a very specialized primitive, intended only for a few uses in 582 * This is a very specialized primitive, intended only for a few uses in
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 211890edf37e..e27034bd954e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5025,20 +5025,6 @@ int __cond_resched_lock(spinlock_t *lock)
5025} 5025}
5026EXPORT_SYMBOL(__cond_resched_lock); 5026EXPORT_SYMBOL(__cond_resched_lock);
5027 5027
5028int __sched __cond_resched_softirq(void)
5029{
5030 BUG_ON(!in_softirq());
5031
5032 if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
5033 local_bh_enable();
5034 preempt_schedule_common();
5035 local_bh_disable();
5036 return 1;
5037 }
5038 return 0;
5039}
5040EXPORT_SYMBOL(__cond_resched_softirq);
5041
5042/** 5028/**
5043 * yield - yield the current processor to other threads. 5029 * yield - yield the current processor to other threads.
5044 * 5030 *
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 177de3640c78..03981f1c39ea 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -145,8 +145,7 @@ static void __local_bh_enable(unsigned int cnt)
145} 145}
146 146
147/* 147/*
148 * Special-case - softirqs can safely be enabled in 148 * Special-case - softirqs can safely be enabled by __do_softirq(),
149 * cond_resched_softirq(), or by __do_softirq(),
150 * without processing still-pending softirqs: 149 * without processing still-pending softirqs:
151 */ 150 */
152void _local_bh_enable(void) 151void _local_bh_enable(void)
diff --git a/kernel/torture.c b/kernel/torture.c
index 37b94012a3f8..3de1efbecd6a 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -574,7 +574,7 @@ void stutter_wait(const char *title)
574{ 574{
575 int spt; 575 int spt;
576 576
577 cond_resched_rcu_qs(); 577 cond_resched_tasks_rcu_qs();
578 spt = READ_ONCE(stutter_pause_test); 578 spt = READ_ONCE(stutter_pause_test);
579 for (; spt; spt = READ_ONCE(stutter_pause_test)) { 579 for (; spt; spt = READ_ONCE(stutter_pause_test)) {
580 if (spt == 1) { 580 if (spt == 1) {
diff --git a/kernel/trace/trace_benchmark.c b/kernel/trace/trace_benchmark.c
index 22fee766081b..80e0b2aca703 100644
--- a/kernel/trace/trace_benchmark.c
+++ b/kernel/trace/trace_benchmark.c
@@ -159,13 +159,13 @@ static int benchmark_event_kthread(void *arg)
159 * wants to run, schedule in, but if the CPU is idle, 159 * wants to run, schedule in, but if the CPU is idle,
160 * we'll keep burning cycles. 160 * we'll keep burning cycles.
161 * 161 *
162 * Note the _rcu_qs() version of cond_resched() will 162 * Note the tasks_rcu_qs() version of cond_resched() will
163 * notify synchronize_rcu_tasks() that this thread has 163 * notify synchronize_rcu_tasks() that this thread has
164 * passed a quiescent state for rcu_tasks. Otherwise 164 * passed a quiescent state for rcu_tasks. Otherwise
165 * this thread will never voluntarily schedule which would 165 * this thread will never voluntarily schedule which would
166 * block synchronize_rcu_tasks() indefinitely. 166 * block synchronize_rcu_tasks() indefinitely.
167 */ 167 */
168 cond_resched(); 168 cond_resched_tasks_rcu_qs();
169 } 169 }
170 170
171 return 0; 171 return 0;
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
new file mode 100755
index 000000000000..98f650c9bf54
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -0,0 +1,56 @@
1#!/bin/sh
2#
3# Invoke a text editor on all console.log files for all runs with diagnostics,
4# that is, on all such files having a console.log.diags counterpart.
5# Note that both console.log.diags and console.log are passed to the
6# editor (currently defaulting to "vi"), allowing the user to get an
7# idea of what to search for in the console.log file.
8#
9# Usage: kvm-find-errors.sh directory
10#
11# The "directory" above should end with the date/time directory, for example,
12# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
13
14rundir="${1}"
15if test -z "$rundir" -o ! -d "$rundir"
16then
17 echo Usage: $0 directory
18fi
19editor=${EDITOR-vi}
20
21# Find builds with errors
22files=
23for i in ${rundir}/*/Make.out
24do
25 if egrep -q "error:|warning:" < $i
26 then
27 egrep "error:|warning:" < $i > $i.diags
28 files="$files $i.diags $i"
29 fi
30done
31if test -n "$files"
32then
33 $editor $files
34else
35 echo No build errors.
36fi
37if grep -q -e "--buildonly" < ${rundir}/log
38then
39 echo Build-only run, no console logs to check.
40fi
41
42# Find console logs with errors
43files=
44for i in ${rundir}/*/console.log
45do
46 if test -r $i.diags
47 then
48 files="$files $i.diags $i"
49 fi
50done
51if test -n "$files"
52then
53 $editor $files
54else
55 echo No errors in console logs.
56fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index c2e1bb6d0cba..477ecb1293ab 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -34,11 +34,15 @@ fi
34 34
35configfile=`echo $i | sed -e 's/^.*\///'` 35configfile=`echo $i | sed -e 's/^.*\///'`
36ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` 36ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
37stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
38 tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
39 awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
40 tr -d '\012\015'`"
37if test -z "$ngps" 41if test -z "$ngps"
38then 42then
39 echo "$configfile -------" 43 echo "$configfile ------- " $stopstate
40else 44else
41 title="$configfile ------- $ngps grace periods" 45 title="$configfile ------- $ngps GPs"
42 dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null` 46 dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
43 if test -z "$dur" 47 if test -z "$dur"
44 then 48 then
@@ -46,9 +50,9 @@ else
46 else 50 else
47 ngpsps=`awk -v ngps=$ngps -v dur=$dur ' 51 ngpsps=`awk -v ngps=$ngps -v dur=$dur '
48 BEGIN { print ngps / dur }' < /dev/null` 52 BEGIN { print ngps / dur }' < /dev/null`
49 title="$title ($ngpsps per second)" 53 title="$title ($ngpsps/s)"
50 fi 54 fi
51 echo $title 55 echo $title $stopstate
52 nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` 56 nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
53 if test -z "$nclosecalls" 57 if test -z "$nclosecalls"
54 then 58 then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index f7e988f369dd..c27e97824163 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -48,10 +48,6 @@ do
48 cat $i/Make.oldconfig.err 48 cat $i/Make.oldconfig.err
49 fi 49 fi
50 parse-build.sh $i/Make.out $configfile 50 parse-build.sh $i/Make.out $configfile
51 if test "$TORTURE_SUITE" != rcuperf
52 then
53 parse-torture.sh $i/console.log $configfile
54 fi
55 parse-console.sh $i/console.log $configfile 51 parse-console.sh $i/console.log $configfile
56 if test -r $i/Warnings 52 if test -r $i/Warnings
57 then 53 then
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 5f8fbb0d7c17..c5b0f94341d9 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -267,5 +267,4 @@ then
267 echo Unknown PID, cannot kill qemu command 267 echo Unknown PID, cannot kill qemu command
268fi 268fi
269 269
270parse-torture.sh $resdir/console.log $title
271parse-console.sh $resdir/console.log $title 270parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 08aa7d50ae0e..17293436f551 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -24,57 +24,146 @@
24# 24#
25# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 25# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
26 26
27T=${TMPDIR-/tmp}/parse-console.sh.$$
27file="$1" 28file="$1"
28title="$2" 29title="$2"
29 30
31trap 'rm -f $T.seq $T.diags' 0
32
30. functions.sh 33. functions.sh
31 34
35# Check for presence and readability of console output file
36if test -f "$file" -a -r "$file"
37then
38 :
39else
40 echo $title unreadable console output file: $file
41 exit 1
42fi
32if grep -Pq '\x00' < $file 43if grep -Pq '\x00' < $file
33then 44then
34 print_warning Console output contains nul bytes, old qemu still running? 45 print_warning Console output contains nul bytes, old qemu still running?
35fi 46fi
36egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags 47cat /dev/null > $file.diags
37if test -s $1.diags 48
49# Check for proper termination, except that rcuperf runs don't indicate this.
50if test "$TORTURE_SUITE" != rcuperf
38then 51then
39 print_warning Assertion failure in $file $title 52 # check for abject failure
40 # cat $1.diags 53
54 if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
55 then
56 nerrs=`grep --binary-files=text '!!!' $file |
57 tail -1 |
58 awk '
59 {
60 for (i=NF-8;i<=NF;i++)
61 sum+=$i;
62 }
63 END { print sum }'`
64 print_bug $title FAILURE, $nerrs instances
65 exit
66 fi
67
68 grep --binary-files=text 'torture:.*ver:' $file |
69 egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
70 sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
71 awk '
72 BEGIN {
73 ver = 0;
74 badseq = 0;
75 }
76
77 {
78 if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
79 badseqno1 = ver;
80 badseqno2 = $5;
81 badseqnr = NR;
82 badseq = 1;
83 }
84 ver = $5
85 }
86
87 END {
88 if (badseq) {
89 if (badseqno1 == badseqno2 && badseqno2 == ver)
90 print "GP HANG at " ver " torture stat " badseqnr;
91 else
92 print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
93 }
94 }' > $T.seq
95
96 if grep -q SUCCESS $file
97 then
98 if test -s $T.seq
99 then
100 print_warning $title `cat $T.seq`
101 echo " " $file
102 exit 2
103 fi
104 else
105 if grep -q "_HOTPLUG:" $file
106 then
107 print_warning HOTPLUG FAILURES $title `cat $T.seq`
108 echo " " $file
109 exit 3
110 fi
111 echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
112 if test -s $T.seq
113 then
114 print_warning $title `cat $T.seq`
115 fi
116 exit 2
117 fi
118fi | tee -a $file.diags
119
120egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
121grep -v 'ODEBUG: ' |
122grep -v 'Warning: unable to open an initial console' > $T.diags
123if test -s $T.diags
124then
125 print_warning "Assertion failure in $file $title"
126 # cat $T.diags
41 summary="" 127 summary=""
42 n_badness=`grep -c Badness $1` 128 n_badness=`grep -c Badness $file`
43 if test "$n_badness" -ne 0 129 if test "$n_badness" -ne 0
44 then 130 then
45 summary="$summary Badness: $n_badness" 131 summary="$summary Badness: $n_badness"
46 fi 132 fi
47 n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'` 133 n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
48 if test "$n_warn" -ne 0 134 if test "$n_warn" -ne 0
49 then 135 then
50 summary="$summary Warnings: $n_warn" 136 summary="$summary Warnings: $n_warn"
51 fi 137 fi
52 n_bugs=`egrep -c 'BUG|Oops:' $1` 138 n_bugs=`egrep -c 'BUG|Oops:' $file`
53 if test "$n_bugs" -ne 0 139 if test "$n_bugs" -ne 0
54 then 140 then
55 summary="$summary Bugs: $n_bugs" 141 summary="$summary Bugs: $n_bugs"
56 fi 142 fi
57 n_calltrace=`grep -c 'Call Trace:' $1` 143 n_calltrace=`grep -c 'Call Trace:' $file`
58 if test "$n_calltrace" -ne 0 144 if test "$n_calltrace" -ne 0
59 then 145 then
60 summary="$summary Call Traces: $n_calltrace" 146 summary="$summary Call Traces: $n_calltrace"
61 fi 147 fi
62 n_lockdep=`grep -c =========== $1` 148 n_lockdep=`grep -c =========== $file`
63 if test "$n_badness" -ne 0 149 if test "$n_badness" -ne 0
64 then 150 then
65 summary="$summary lockdep: $n_badness" 151 summary="$summary lockdep: $n_badness"
66 fi 152 fi
67 n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1` 153 n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
68 if test "$n_stalls" -ne 0 154 if test "$n_stalls" -ne 0
69 then 155 then
70 summary="$summary Stalls: $n_stalls" 156 summary="$summary Stalls: $n_stalls"
71 fi 157 fi
72 n_starves=`grep -c 'rcu_.*kthread starved for' $1` 158 n_starves=`grep -c 'rcu_.*kthread starved for' $file`
73 if test "$n_starves" -ne 0 159 if test "$n_starves" -ne 0
74 then 160 then
75 summary="$summary Starves: $n_starves" 161 summary="$summary Starves: $n_starves"
76 fi 162 fi
77 print_warning Summary: $summary 163 print_warning Summary: $summary
78else 164 cat $T.diags >> $file.diags
79 rm $1.diags 165fi
166if ! test -s $file.diags
167then
168 rm -f $file.diags
80fi 169fi
diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh
deleted file mode 100755
index 5987e50cfeb4..000000000000
--- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh
+++ /dev/null
@@ -1,105 +0,0 @@
1#!/bin/bash
2#
3# Check the console output from a torture run for goodness.
4# The "file" is a pathname on the local system, and "title" is
5# a text string for error-message purposes.
6#
7# The file must contain torture output, but can be interspersed
8# with other dmesg text, as in console-log output.
9#
10# Usage: parse-torture.sh file title
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, you can access it online at
24# http://www.gnu.org/licenses/gpl-2.0.html.
25#
26# Copyright (C) IBM Corporation, 2011
27#
28# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
29
30T=${TMPDIR-/tmp}/parse-torture.sh.$$
31file="$1"
32title="$2"
33
34trap 'rm -f $T.seq' 0
35
36. functions.sh
37
38# check for presence of torture output file.
39
40if test -f "$file" -a -r "$file"
41then
42 :
43else
44 echo $title unreadable torture output file: $file
45 exit 1
46fi
47
48# check for abject failure
49
50if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
51then
52 nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
53 print_bug $title FAILURE, $nerrs instances
54 echo " " $url
55 exit
56fi
57
58grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
59awk '
60BEGIN {
61 ver = 0;
62 badseq = 0;
63 }
64
65 {
66 if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
67 badseqno1 = ver;
68 badseqno2 = $5;
69 badseqnr = NR;
70 badseq = 1;
71 }
72 ver = $5
73 }
74
75END {
76 if (badseq) {
77 if (badseqno1 == badseqno2 && badseqno2 == ver)
78 print "GP HANG at " ver " torture stat " badseqnr;
79 else
80 print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
81 }
82 }' > $T.seq
83
84if grep -q SUCCESS $file
85then
86 if test -s $T.seq
87 then
88 print_warning $title $title `cat $T.seq`
89 echo " " $file
90 exit 2
91 fi
92else
93 if grep -q "_HOTPLUG:" $file
94 then
95 print_warning HOTPLUG FAILURES $title `cat $T.seq`
96 echo " " $file
97 exit 3
98 fi
99 echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
100 if test -s $T.seq
101 then
102 print_warning $title `cat $T.seq`
103 fi
104 exit 2
105fi