summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-23 07:31:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-23 07:31:17 -0400
commitcee1352f792646ae87e65f8bfb0ae91ff3d2cb95 (patch)
treed44817238a45a095c4737bb905e2157e695d43c4 /kernel
parente2b623fbe6a34bce1332584212ae101ebc2508f5 (diff)
parentd0346559a7c358a5328e1fa43135acc548c0f224 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RCU updates from Ingo Molnar: "The biggest change in this cycle is the conclusion of the big 'simplify RCU to two primary flavors' consolidation work - i.e. there's a single RCU flavor for any kernel variant (PREEMPT and !PREEMPT): - Consolidate the RCU-bh, RCU-preempt, and RCU-sched flavors into a single flavor similar to RCU-sched in !PREEMPT kernels and into a single flavor similar to RCU-preempt (but also waiting on preempt-disabled sequences of code) in PREEMPT kernels. This branch also includes a refactoring of rcu_{nmi,irq}_{enter,exit}() from Byungchul Park. - Now that there is only one RCU flavor in any given running kernel, the many "rsp" pointers are no longer required, and this cleanup series removes them. - This branch carries out additional cleanups made possible by the RCU flavor consolidation, including inlining now-trivial functions, updating comments and definitions, and removing now-unneeded rcutorture scenarios. - Now that there is only one flavor of RCU in any running kernel, there is also only on rcu_data structure per CPU. This means that the rcu_dynticks structure can be merged into the rcu_data structure, a task taken on by this branch. This branch also contains a -rt-related fix from Mike Galbraith. There were also other updates: - Documentation updates, including some good-eye catches from Joel Fernandes. - SRCU updates, most notably changes enabling call_srcu() to be invoked very early in the boot sequence. - Torture-test updates, including some preliminary work towards making rcutorture better able to find problems that result in insufficient grace-period forward progress. - Initial changes to RCU to better promote forward progress of grace periods, including fixing a bug found by Marius Hillenbrand and David Woodhouse, with the fix suggested by Peter Zijlstra" * 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (140 commits) srcu: Make early-boot call_srcu() reuse workqueue lists rcutorture: Test early boot call_srcu() srcu: Make call_srcu() available during very early boot rcu: Convert rcu_state.ofl_lock to raw_spinlock_t rcu: Remove obsolete ->dynticks_fqs and ->cond_resched_completed rcu: Switch ->dynticks to rcu_data structure, remove rcu_dynticks rcu: Switch dyntick nesting counters to rcu_data structure rcu: Switch urgent quiescent-state requests to rcu_data structure rcu: Switch lazy counts to rcu_data structure rcu: Switch last accelerate/advance to rcu_data structure rcu: Switch ->tick_nohz_enabled_snap to rcu_data structure rcu: Merge rcu_dynticks structure into rcu_data structure rcu: Remove unused rcu_dynticks_snap() from Tiny RCU rcu: Convert "1UL << x" to "BIT(x)" rcu: Avoid resched_cpu() when rescheduling the current CPU rcu: More aggressively enlist scheduler aid for nohz_full CPUs rcu: Compute jiffies_till_sched_qs from other kernel parameters rcu: Provide functions for determining if call_rcu() has been invoked rcu: Eliminate ->rcu_qs_ctr from the rcu_dynticks structure rcu: Motivate Tiny RCU forward progress ...
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcu/Kconfig14
-rw-r--r--kernel/rcu/rcu.h67
-rw-r--r--kernel/rcu/rcuperf.c66
-rw-r--r--kernel/rcu/rcutorture.c397
-rw-r--r--kernel/rcu/srcutiny.c29
-rw-r--r--kernel/rcu/srcutree.c31
-rw-r--r--kernel/rcu/tiny.c154
-rw-r--r--kernel/rcu/tree.c2213
-rw-r--r--kernel/rcu/tree.h132
-rw-r--r--kernel/rcu/tree_exp.h426
-rw-r--r--kernel/rcu/tree_plugin.h790
-rw-r--r--kernel/rcu/update.c70
-rw-r--r--kernel/softirq.c3
-rw-r--r--kernel/torture.c3
14 files changed, 2008 insertions, 2387 deletions
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 9210379c0353..939a2056c87a 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -196,7 +196,7 @@ config RCU_BOOST
196 This option boosts the priority of preempted RCU readers that 196 This option boosts the priority of preempted RCU readers that
197 block the current preemptible RCU grace period for too long. 197 block the current preemptible RCU grace period for too long.
198 This option also prevents heavy loads from blocking RCU 198 This option also prevents heavy loads from blocking RCU
199 callback invocation for all flavors of RCU. 199 callback invocation.
200 200
201 Say Y here if you are working with real-time apps or heavy loads 201 Say Y here if you are working with real-time apps or heavy loads
202 Say N here if you are unsure. 202 Say N here if you are unsure.
@@ -225,12 +225,12 @@ config RCU_NOCB_CPU
225 callback invocation to energy-efficient CPUs in battery-powered 225 callback invocation to energy-efficient CPUs in battery-powered
226 asymmetric multiprocessors. 226 asymmetric multiprocessors.
227 227
228 This option offloads callback invocation from the set of 228 This option offloads callback invocation from the set of CPUs
229 CPUs specified at boot time by the rcu_nocbs parameter. 229 specified at boot time by the rcu_nocbs parameter. For each
230 For each such CPU, a kthread ("rcuox/N") will be created to 230 such CPU, a kthread ("rcuox/N") will be created to invoke
231 invoke callbacks, where the "N" is the CPU being offloaded, 231 callbacks, where the "N" is the CPU being offloaded, and where
232 and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and 232 the "p" for RCU-preempt (PREEMPT kernels) and "s" for RCU-sched
233 "s" for RCU-sched. Nothing prevents this kthread from running 233 (!PREEMPT kernels). Nothing prevents this kthread from running
234 on the specified CPUs, but (1) the kthreads may be preempted 234 on the specified CPUs, but (1) the kthreads may be preempted
235 between each callback, and (2) affinity or cgroups can be used 235 between each callback, and (2) affinity or cgroups can be used
236 to force the kthreads to run on whatever set of CPUs is desired. 236 to force the kthreads to run on whatever set of CPUs is desired.
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 4d04683c31b2..2866166863f0 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -176,8 +176,9 @@ static inline unsigned long rcu_seq_diff(unsigned long new, unsigned long old)
176 176
177/* 177/*
178 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally 178 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
179 * by call_rcu() and rcu callback execution, and are therefore not part of the 179 * by call_rcu() and rcu callback execution, and are therefore not part
180 * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors. 180 * of the RCU API. These are in rcupdate.h because they are used by all
181 * RCU implementations.
181 */ 182 */
182 183
183#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 184#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
@@ -223,6 +224,7 @@ void kfree(const void *);
223 */ 224 */
224static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) 225static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
225{ 226{
227 rcu_callback_t f;
226 unsigned long offset = (unsigned long)head->func; 228 unsigned long offset = (unsigned long)head->func;
227 229
228 rcu_lock_acquire(&rcu_callback_map); 230 rcu_lock_acquire(&rcu_callback_map);
@@ -233,7 +235,9 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head)
233 return true; 235 return true;
234 } else { 236 } else {
235 RCU_TRACE(trace_rcu_invoke_callback(rn, head);) 237 RCU_TRACE(trace_rcu_invoke_callback(rn, head);)
236 head->func(head); 238 f = head->func;
239 WRITE_ONCE(head->func, (rcu_callback_t)0L);
240 f(head);
237 rcu_lock_release(&rcu_callback_map); 241 rcu_lock_release(&rcu_callback_map);
238 return false; 242 return false;
239 } 243 }
@@ -328,40 +332,35 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
328 } 332 }
329} 333}
330 334
331/* Returns first leaf rcu_node of the specified RCU flavor. */ 335/* Returns a pointer to the first leaf rcu_node structure. */
332#define rcu_first_leaf_node(rsp) ((rsp)->level[rcu_num_lvls - 1]) 336#define rcu_first_leaf_node() (rcu_state.level[rcu_num_lvls - 1])
333 337
334/* Is this rcu_node a leaf? */ 338/* Is this rcu_node a leaf? */
335#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1) 339#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
336 340
337/* Is this rcu_node the last leaf? */ 341/* Is this rcu_node the last leaf? */
338#define rcu_is_last_leaf_node(rsp, rnp) ((rnp) == &(rsp)->node[rcu_num_nodes - 1]) 342#define rcu_is_last_leaf_node(rnp) ((rnp) == &rcu_state.node[rcu_num_nodes - 1])
339 343
340/* 344/*
341 * Do a full breadth-first scan of the rcu_node structures for the 345 * Do a full breadth-first scan of the {s,}rcu_node structures for the
342 * specified rcu_state structure. 346 * specified state structure (for SRCU) or the only rcu_state structure
347 * (for RCU).
343 */ 348 */
344#define rcu_for_each_node_breadth_first(rsp, rnp) \ 349#define srcu_for_each_node_breadth_first(sp, rnp) \
345 for ((rnp) = &(rsp)->node[0]; \ 350 for ((rnp) = &(sp)->node[0]; \
346 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) 351 (rnp) < &(sp)->node[rcu_num_nodes]; (rnp)++)
352#define rcu_for_each_node_breadth_first(rnp) \
353 srcu_for_each_node_breadth_first(&rcu_state, rnp)
347 354
348/* 355/*
349 * Do a breadth-first scan of the non-leaf rcu_node structures for the 356 * Scan the leaves of the rcu_node hierarchy for the rcu_state structure.
350 * specified rcu_state structure. Note that if there is a singleton 357 * Note that if there is a singleton rcu_node tree with but one rcu_node
351 * rcu_node tree with but one rcu_node structure, this loop is a no-op. 358 * structure, this loop -will- visit the rcu_node structure. It is still
359 * a leaf node, even if it is also the root node.
352 */ 360 */
353#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ 361#define rcu_for_each_leaf_node(rnp) \
354 for ((rnp) = &(rsp)->node[0]; !rcu_is_leaf_node(rsp, rnp); (rnp)++) 362 for ((rnp) = rcu_first_leaf_node(); \
355 363 (rnp) < &rcu_state.node[rcu_num_nodes]; (rnp)++)
356/*
357 * Scan the leaves of the rcu_node hierarchy for the specified rcu_state
358 * structure. Note that if there is a singleton rcu_node tree with but
359 * one rcu_node structure, this loop -will- visit the rcu_node structure.
360 * It is still a leaf node, even if it is also the root node.
361 */
362#define rcu_for_each_leaf_node(rsp, rnp) \
363 for ((rnp) = rcu_first_leaf_node(rsp); \
364 (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
365 364
366/* 365/*
367 * Iterate over all possible CPUs in a leaf RCU node. 366 * Iterate over all possible CPUs in a leaf RCU node.
@@ -435,6 +434,12 @@ do { \
435 434
436#endif /* #if defined(SRCU) || !defined(TINY_RCU) */ 435#endif /* #if defined(SRCU) || !defined(TINY_RCU) */
437 436
437#ifdef CONFIG_SRCU
438void srcu_init(void);
439#else /* #ifdef CONFIG_SRCU */
440static inline void srcu_init(void) { }
441#endif /* #else #ifdef CONFIG_SRCU */
442
438#ifdef CONFIG_TINY_RCU 443#ifdef CONFIG_TINY_RCU
439/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ 444/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
440static inline bool rcu_gp_is_normal(void) { return true; } 445static inline bool rcu_gp_is_normal(void) { return true; }
@@ -515,29 +520,19 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
515 520
516#ifdef CONFIG_TINY_RCU 521#ifdef CONFIG_TINY_RCU
517static inline unsigned long rcu_get_gp_seq(void) { return 0; } 522static inline unsigned long rcu_get_gp_seq(void) { return 0; }
518static inline unsigned long rcu_bh_get_gp_seq(void) { return 0; }
519static inline unsigned long rcu_sched_get_gp_seq(void) { return 0; }
520static inline unsigned long rcu_exp_batches_completed(void) { return 0; } 523static inline unsigned long rcu_exp_batches_completed(void) { return 0; }
521static inline unsigned long rcu_exp_batches_completed_sched(void) { return 0; }
522static inline unsigned long 524static inline unsigned long
523srcu_batches_completed(struct srcu_struct *sp) { return 0; } 525srcu_batches_completed(struct srcu_struct *sp) { return 0; }
524static inline void rcu_force_quiescent_state(void) { } 526static inline void rcu_force_quiescent_state(void) { }
525static inline void rcu_bh_force_quiescent_state(void) { }
526static inline void rcu_sched_force_quiescent_state(void) { }
527static inline void show_rcu_gp_kthreads(void) { } 527static inline void show_rcu_gp_kthreads(void) { }
528static inline int rcu_get_gp_kthreads_prio(void) { return 0; } 528static inline int rcu_get_gp_kthreads_prio(void) { return 0; }
529#else /* #ifdef CONFIG_TINY_RCU */ 529#else /* #ifdef CONFIG_TINY_RCU */
530unsigned long rcu_get_gp_seq(void); 530unsigned long rcu_get_gp_seq(void);
531unsigned long rcu_bh_get_gp_seq(void);
532unsigned long rcu_sched_get_gp_seq(void);
533unsigned long rcu_exp_batches_completed(void); 531unsigned long rcu_exp_batches_completed(void);
534unsigned long rcu_exp_batches_completed_sched(void);
535unsigned long srcu_batches_completed(struct srcu_struct *sp); 532unsigned long srcu_batches_completed(struct srcu_struct *sp);
536void show_rcu_gp_kthreads(void); 533void show_rcu_gp_kthreads(void);
537int rcu_get_gp_kthreads_prio(void); 534int rcu_get_gp_kthreads_prio(void);
538void rcu_force_quiescent_state(void); 535void rcu_force_quiescent_state(void);
539void rcu_bh_force_quiescent_state(void);
540void rcu_sched_force_quiescent_state(void);
541extern struct workqueue_struct *rcu_gp_wq; 536extern struct workqueue_struct *rcu_gp_wq;
542extern struct workqueue_struct *rcu_par_gp_wq; 537extern struct workqueue_struct *rcu_par_gp_wq;
543#endif /* #else #ifdef CONFIG_TINY_RCU */ 538#endif /* #else #ifdef CONFIG_TINY_RCU */
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index 34244523550e..b459da70b4fc 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -190,36 +190,6 @@ static struct rcu_perf_ops rcu_ops = {
190}; 190};
191 191
192/* 192/*
193 * Definitions for rcu_bh perf testing.
194 */
195
196static int rcu_bh_perf_read_lock(void) __acquires(RCU_BH)
197{
198 rcu_read_lock_bh();
199 return 0;
200}
201
202static void rcu_bh_perf_read_unlock(int idx) __releases(RCU_BH)
203{
204 rcu_read_unlock_bh();
205}
206
207static struct rcu_perf_ops rcu_bh_ops = {
208 .ptype = RCU_BH_FLAVOR,
209 .init = rcu_sync_perf_init,
210 .readlock = rcu_bh_perf_read_lock,
211 .readunlock = rcu_bh_perf_read_unlock,
212 .get_gp_seq = rcu_bh_get_gp_seq,
213 .gp_diff = rcu_seq_diff,
214 .exp_completed = rcu_exp_batches_completed_sched,
215 .async = call_rcu_bh,
216 .gp_barrier = rcu_barrier_bh,
217 .sync = synchronize_rcu_bh,
218 .exp_sync = synchronize_rcu_bh_expedited,
219 .name = "rcu_bh"
220};
221
222/*
223 * Definitions for srcu perf testing. 193 * Definitions for srcu perf testing.
224 */ 194 */
225 195
@@ -306,36 +276,6 @@ static struct rcu_perf_ops srcud_ops = {
306}; 276};
307 277
308/* 278/*
309 * Definitions for sched perf testing.
310 */
311
312static int sched_perf_read_lock(void)
313{
314 preempt_disable();
315 return 0;
316}
317
318static void sched_perf_read_unlock(int idx)
319{
320 preempt_enable();
321}
322
323static struct rcu_perf_ops sched_ops = {
324 .ptype = RCU_SCHED_FLAVOR,
325 .init = rcu_sync_perf_init,
326 .readlock = sched_perf_read_lock,
327 .readunlock = sched_perf_read_unlock,
328 .get_gp_seq = rcu_sched_get_gp_seq,
329 .gp_diff = rcu_seq_diff,
330 .exp_completed = rcu_exp_batches_completed_sched,
331 .async = call_rcu_sched,
332 .gp_barrier = rcu_barrier_sched,
333 .sync = synchronize_sched,
334 .exp_sync = synchronize_sched_expedited,
335 .name = "sched"
336};
337
338/*
339 * Definitions for RCU-tasks perf testing. 279 * Definitions for RCU-tasks perf testing.
340 */ 280 */
341 281
@@ -611,7 +551,7 @@ rcu_perf_cleanup(void)
611 kfree(writer_n_durations); 551 kfree(writer_n_durations);
612 } 552 }
613 553
614 /* Do flavor-specific cleanup operations. */ 554 /* Do torture-type-specific cleanup operations. */
615 if (cur_ops->cleanup != NULL) 555 if (cur_ops->cleanup != NULL)
616 cur_ops->cleanup(); 556 cur_ops->cleanup();
617 557
@@ -661,8 +601,7 @@ rcu_perf_init(void)
661 long i; 601 long i;
662 int firsterr = 0; 602 int firsterr = 0;
663 static struct rcu_perf_ops *perf_ops[] = { 603 static struct rcu_perf_ops *perf_ops[] = {
664 &rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops, 604 &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops,
665 &tasks_ops,
666 }; 605 };
667 606
668 if (!torture_init_begin(perf_type, verbose)) 607 if (!torture_init_begin(perf_type, verbose))
@@ -680,6 +619,7 @@ rcu_perf_init(void)
680 for (i = 0; i < ARRAY_SIZE(perf_ops); i++) 619 for (i = 0; i < ARRAY_SIZE(perf_ops); i++)
681 pr_cont(" %s", perf_ops[i]->name); 620 pr_cont(" %s", perf_ops[i]->name);
682 pr_cont("\n"); 621 pr_cont("\n");
622 WARN_ON(!IS_MODULE(CONFIG_RCU_PERF_TEST));
683 firsterr = -EINVAL; 623 firsterr = -EINVAL;
684 goto unwind; 624 goto unwind;
685 } 625 }
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index c596c6f1e457..210c77460365 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -66,15 +66,19 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@jos
66/* Bits for ->extendables field, extendables param, and related definitions. */ 66/* Bits for ->extendables field, extendables param, and related definitions. */
67#define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */ 67#define RCUTORTURE_RDR_SHIFT 8 /* Put SRCU index in upper bits. */
68#define RCUTORTURE_RDR_MASK ((1 << RCUTORTURE_RDR_SHIFT) - 1) 68#define RCUTORTURE_RDR_MASK ((1 << RCUTORTURE_RDR_SHIFT) - 1)
69#define RCUTORTURE_RDR_BH 0x1 /* Extend readers by disabling bh. */ 69#define RCUTORTURE_RDR_BH 0x01 /* Extend readers by disabling bh. */
70#define RCUTORTURE_RDR_IRQ 0x2 /* ... disabling interrupts. */ 70#define RCUTORTURE_RDR_IRQ 0x02 /* ... disabling interrupts. */
71#define RCUTORTURE_RDR_PREEMPT 0x4 /* ... disabling preemption. */ 71#define RCUTORTURE_RDR_PREEMPT 0x04 /* ... disabling preemption. */
72#define RCUTORTURE_RDR_RCU 0x8 /* ... entering another RCU reader. */ 72#define RCUTORTURE_RDR_RBH 0x08 /* ... rcu_read_lock_bh(). */
73#define RCUTORTURE_RDR_NBITS 4 /* Number of bits defined above. */ 73#define RCUTORTURE_RDR_SCHED 0x10 /* ... rcu_read_lock_sched(). */
74#define RCUTORTURE_MAX_EXTEND (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ | \ 74#define RCUTORTURE_RDR_RCU 0x20 /* ... entering another RCU reader. */
75 RCUTORTURE_RDR_PREEMPT) 75#define RCUTORTURE_RDR_NBITS 6 /* Number of bits defined above. */
76#define RCUTORTURE_MAX_EXTEND \
77 (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ | RCUTORTURE_RDR_PREEMPT | \
78 RCUTORTURE_RDR_RBH | RCUTORTURE_RDR_SCHED)
76#define RCUTORTURE_RDR_MAX_LOOPS 0x7 /* Maximum reader extensions. */ 79#define RCUTORTURE_RDR_MAX_LOOPS 0x7 /* Maximum reader extensions. */
77 /* Must be power of two minus one. */ 80 /* Must be power of two minus one. */
81#define RCUTORTURE_RDR_MAX_SEGS (RCUTORTURE_RDR_MAX_LOOPS + 3)
78 82
79torture_param(int, cbflood_inter_holdoff, HZ, 83torture_param(int, cbflood_inter_holdoff, HZ,
80 "Holdoff between floods (jiffies)"); 84 "Holdoff between floods (jiffies)");
@@ -89,6 +93,12 @@ torture_param(int, fqs_duration, 0,
89 "Duration of fqs bursts (us), 0 to disable"); 93 "Duration of fqs bursts (us), 0 to disable");
90torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); 94torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
91torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)"); 95torture_param(int, fqs_stutter, 3, "Wait time between fqs bursts (s)");
96torture_param(bool, fwd_progress, 1, "Test grace-period forward progress");
97torture_param(int, fwd_progress_div, 4, "Fraction of CPU stall to wait");
98torture_param(int, fwd_progress_holdoff, 60,
99 "Time between forward-progress tests (s)");
100torture_param(bool, fwd_progress_need_resched, 1,
101 "Hide cond_resched() behind need_resched()");
92torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives"); 102torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives");
93torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); 103torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
94torture_param(bool, gp_normal, false, 104torture_param(bool, gp_normal, false,
@@ -125,7 +135,7 @@ torture_param(int, verbose, 1,
125 135
126static char *torture_type = "rcu"; 136static char *torture_type = "rcu";
127module_param(torture_type, charp, 0444); 137module_param(torture_type, charp, 0444);
128MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); 138MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, srcu, ...)");
129 139
130static int nrealreaders; 140static int nrealreaders;
131static int ncbflooders; 141static int ncbflooders;
@@ -137,6 +147,7 @@ static struct task_struct **cbflood_task;
137static struct task_struct *fqs_task; 147static struct task_struct *fqs_task;
138static struct task_struct *boost_tasks[NR_CPUS]; 148static struct task_struct *boost_tasks[NR_CPUS];
139static struct task_struct *stall_task; 149static struct task_struct *stall_task;
150static struct task_struct *fwd_prog_task;
140static struct task_struct **barrier_cbs_tasks; 151static struct task_struct **barrier_cbs_tasks;
141static struct task_struct *barrier_task; 152static struct task_struct *barrier_task;
142 153
@@ -197,6 +208,18 @@ static const char * const rcu_torture_writer_state_names[] = {
197 "RTWS_STOPPING", 208 "RTWS_STOPPING",
198}; 209};
199 210
211/* Record reader segment types and duration for first failing read. */
212struct rt_read_seg {
213 int rt_readstate;
214 unsigned long rt_delay_jiffies;
215 unsigned long rt_delay_ms;
216 unsigned long rt_delay_us;
217 bool rt_preempted;
218};
219static int err_segs_recorded;
220static struct rt_read_seg err_segs[RCUTORTURE_RDR_MAX_SEGS];
221static int rt_read_nsegs;
222
200static const char *rcu_torture_writer_state_getname(void) 223static const char *rcu_torture_writer_state_getname(void)
201{ 224{
202 unsigned int i = READ_ONCE(rcu_torture_writer_state); 225 unsigned int i = READ_ONCE(rcu_torture_writer_state);
@@ -278,7 +301,8 @@ struct rcu_torture_ops {
278 void (*init)(void); 301 void (*init)(void);
279 void (*cleanup)(void); 302 void (*cleanup)(void);
280 int (*readlock)(void); 303 int (*readlock)(void);
281 void (*read_delay)(struct torture_random_state *rrsp); 304 void (*read_delay)(struct torture_random_state *rrsp,
305 struct rt_read_seg *rtrsp);
282 void (*readunlock)(int idx); 306 void (*readunlock)(int idx);
283 unsigned long (*get_gp_seq)(void); 307 unsigned long (*get_gp_seq)(void);
284 unsigned long (*gp_diff)(unsigned long new, unsigned long old); 308 unsigned long (*gp_diff)(unsigned long new, unsigned long old);
@@ -291,6 +315,7 @@ struct rcu_torture_ops {
291 void (*cb_barrier)(void); 315 void (*cb_barrier)(void);
292 void (*fqs)(void); 316 void (*fqs)(void);
293 void (*stats)(void); 317 void (*stats)(void);
318 int (*stall_dur)(void);
294 int irq_capable; 319 int irq_capable;
295 int can_boost; 320 int can_boost;
296 int extendables; 321 int extendables;
@@ -310,12 +335,13 @@ static int rcu_torture_read_lock(void) __acquires(RCU)
310 return 0; 335 return 0;
311} 336}
312 337
313static void rcu_read_delay(struct torture_random_state *rrsp) 338static void
339rcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
314{ 340{
315 unsigned long started; 341 unsigned long started;
316 unsigned long completed; 342 unsigned long completed;
317 const unsigned long shortdelay_us = 200; 343 const unsigned long shortdelay_us = 200;
318 const unsigned long longdelay_ms = 50; 344 unsigned long longdelay_ms = 300;
319 unsigned long long ts; 345 unsigned long long ts;
320 346
321 /* We want a short delay sometimes to make a reader delay the grace 347 /* We want a short delay sometimes to make a reader delay the grace
@@ -325,16 +351,23 @@ static void rcu_read_delay(struct torture_random_state *rrsp)
325 if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) { 351 if (!(torture_random(rrsp) % (nrealreaders * 2000 * longdelay_ms))) {
326 started = cur_ops->get_gp_seq(); 352 started = cur_ops->get_gp_seq();
327 ts = rcu_trace_clock_local(); 353 ts = rcu_trace_clock_local();
354 if (preempt_count() & (SOFTIRQ_MASK | HARDIRQ_MASK))
355 longdelay_ms = 5; /* Avoid triggering BH limits. */
328 mdelay(longdelay_ms); 356 mdelay(longdelay_ms);
357 rtrsp->rt_delay_ms = longdelay_ms;
329 completed = cur_ops->get_gp_seq(); 358 completed = cur_ops->get_gp_seq();
330 do_trace_rcu_torture_read(cur_ops->name, NULL, ts, 359 do_trace_rcu_torture_read(cur_ops->name, NULL, ts,
331 started, completed); 360 started, completed);
332 } 361 }
333 if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) 362 if (!(torture_random(rrsp) % (nrealreaders * 2 * shortdelay_us))) {
334 udelay(shortdelay_us); 363 udelay(shortdelay_us);
364 rtrsp->rt_delay_us = shortdelay_us;
365 }
335 if (!preempt_count() && 366 if (!preempt_count() &&
336 !(torture_random(rrsp) % (nrealreaders * 500))) 367 !(torture_random(rrsp) % (nrealreaders * 500))) {
337 torture_preempt_schedule(); /* QS only if preemptible. */ 368 torture_preempt_schedule(); /* QS only if preemptible. */
369 rtrsp->rt_preempted = true;
370 }
338} 371}
339 372
340static void rcu_torture_read_unlock(int idx) __releases(RCU) 373static void rcu_torture_read_unlock(int idx) __releases(RCU)
@@ -429,53 +462,14 @@ static struct rcu_torture_ops rcu_ops = {
429 .cb_barrier = rcu_barrier, 462 .cb_barrier = rcu_barrier,
430 .fqs = rcu_force_quiescent_state, 463 .fqs = rcu_force_quiescent_state,
431 .stats = NULL, 464 .stats = NULL,
465 .stall_dur = rcu_jiffies_till_stall_check,
432 .irq_capable = 1, 466 .irq_capable = 1,
433 .can_boost = rcu_can_boost(), 467 .can_boost = rcu_can_boost(),
468 .extendables = RCUTORTURE_MAX_EXTEND,
434 .name = "rcu" 469 .name = "rcu"
435}; 470};
436 471
437/* 472/*
438 * Definitions for rcu_bh torture testing.
439 */
440
441static int rcu_bh_torture_read_lock(void) __acquires(RCU_BH)
442{
443 rcu_read_lock_bh();
444 return 0;
445}
446
447static void rcu_bh_torture_read_unlock(int idx) __releases(RCU_BH)
448{
449 rcu_read_unlock_bh();
450}
451
452static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
453{
454 call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
455}
456
457static struct rcu_torture_ops rcu_bh_ops = {
458 .ttype = RCU_BH_FLAVOR,
459 .init = rcu_sync_torture_init,
460 .readlock = rcu_bh_torture_read_lock,
461 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
462 .readunlock = rcu_bh_torture_read_unlock,
463 .get_gp_seq = rcu_bh_get_gp_seq,
464 .gp_diff = rcu_seq_diff,
465 .deferred_free = rcu_bh_torture_deferred_free,
466 .sync = synchronize_rcu_bh,
467 .exp_sync = synchronize_rcu_bh_expedited,
468 .call = call_rcu_bh,
469 .cb_barrier = rcu_barrier_bh,
470 .fqs = rcu_bh_force_quiescent_state,
471 .stats = NULL,
472 .irq_capable = 1,
473 .extendables = (RCUTORTURE_RDR_BH | RCUTORTURE_RDR_IRQ),
474 .ext_irq_conflict = RCUTORTURE_RDR_RCU,
475 .name = "rcu_bh"
476};
477
478/*
479 * Don't even think about trying any of these in real life!!! 473 * Don't even think about trying any of these in real life!!!
480 * The names includes "busted", and they really means it! 474 * The names includes "busted", and they really means it!
481 * The only purpose of these functions is to provide a buggy RCU 475 * The only purpose of these functions is to provide a buggy RCU
@@ -531,7 +525,8 @@ static int srcu_torture_read_lock(void) __acquires(srcu_ctlp)
531 return srcu_read_lock(srcu_ctlp); 525 return srcu_read_lock(srcu_ctlp);
532} 526}
533 527
534static void srcu_read_delay(struct torture_random_state *rrsp) 528static void
529srcu_read_delay(struct torture_random_state *rrsp, struct rt_read_seg *rtrsp)
535{ 530{
536 long delay; 531 long delay;
537 const long uspertick = 1000000 / HZ; 532 const long uspertick = 1000000 / HZ;
@@ -541,10 +536,12 @@ static void srcu_read_delay(struct torture_random_state *rrsp)
541 536
542 delay = torture_random(rrsp) % 537 delay = torture_random(rrsp) %
543 (nrealreaders * 2 * longdelay * uspertick); 538 (nrealreaders * 2 * longdelay * uspertick);
544 if (!delay && in_task()) 539 if (!delay && in_task()) {
545 schedule_timeout_interruptible(longdelay); 540 schedule_timeout_interruptible(longdelay);
546 else 541 rtrsp->rt_delay_jiffies = longdelay;
547 rcu_read_delay(rrsp); 542 } else {
543 rcu_read_delay(rrsp, rtrsp);
544 }
548} 545}
549 546
550static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp) 547static void srcu_torture_read_unlock(int idx) __releases(srcu_ctlp)
@@ -663,48 +660,6 @@ static struct rcu_torture_ops busted_srcud_ops = {
663}; 660};
664 661
665/* 662/*
666 * Definitions for sched torture testing.
667 */
668
669static int sched_torture_read_lock(void)
670{
671 preempt_disable();
672 return 0;
673}
674
675static void sched_torture_read_unlock(int idx)
676{
677 preempt_enable();
678}
679
680static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
681{
682 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
683}
684
685static struct rcu_torture_ops sched_ops = {
686 .ttype = RCU_SCHED_FLAVOR,
687 .init = rcu_sync_torture_init,
688 .readlock = sched_torture_read_lock,
689 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
690 .readunlock = sched_torture_read_unlock,
691 .get_gp_seq = rcu_sched_get_gp_seq,
692 .gp_diff = rcu_seq_diff,
693 .deferred_free = rcu_sched_torture_deferred_free,
694 .sync = synchronize_sched,
695 .exp_sync = synchronize_sched_expedited,
696 .get_state = get_state_synchronize_sched,
697 .cond_sync = cond_synchronize_sched,
698 .call = call_rcu_sched,
699 .cb_barrier = rcu_barrier_sched,
700 .fqs = rcu_sched_force_quiescent_state,
701 .stats = NULL,
702 .irq_capable = 1,
703 .extendables = RCUTORTURE_MAX_EXTEND,
704 .name = "sched"
705};
706
707/*
708 * Definitions for RCU-tasks torture testing. 663 * Definitions for RCU-tasks torture testing.
709 */ 664 */
710 665
@@ -1116,7 +1071,8 @@ rcu_torture_writer(void *arg)
1116 break; 1071 break;
1117 } 1072 }
1118 } 1073 }
1119 rcu_torture_current_version++; 1074 WRITE_ONCE(rcu_torture_current_version,
1075 rcu_torture_current_version + 1);
1120 /* Cycle through nesting levels of rcu_expedite_gp() calls. */ 1076 /* Cycle through nesting levels of rcu_expedite_gp() calls. */
1121 if (can_expedite && 1077 if (can_expedite &&
1122 !(torture_random(&rand) & 0xff & (!!expediting - 1))) { 1078 !(torture_random(&rand) & 0xff & (!!expediting - 1))) {
@@ -1132,7 +1088,10 @@ rcu_torture_writer(void *arg)
1132 !rcu_gp_is_normal(); 1088 !rcu_gp_is_normal();
1133 } 1089 }
1134 rcu_torture_writer_state = RTWS_STUTTER; 1090 rcu_torture_writer_state = RTWS_STUTTER;
1135 stutter_wait("rcu_torture_writer"); 1091 if (stutter_wait("rcu_torture_writer"))
1092 for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++)
1093 if (list_empty(&rcu_tortures[i].rtort_free))
1094 WARN_ON_ONCE(1);
1136 } while (!torture_must_stop()); 1095 } while (!torture_must_stop());
1137 /* Reset expediting back to unexpedited. */ 1096 /* Reset expediting back to unexpedited. */
1138 if (expediting > 0) 1097 if (expediting > 0)
@@ -1199,7 +1158,8 @@ static void rcu_torture_timer_cb(struct rcu_head *rhp)
1199 * change, do a ->read_delay(). 1158 * change, do a ->read_delay().
1200 */ 1159 */
1201static void rcutorture_one_extend(int *readstate, int newstate, 1160static void rcutorture_one_extend(int *readstate, int newstate,
1202 struct torture_random_state *trsp) 1161 struct torture_random_state *trsp,
1162 struct rt_read_seg *rtrsp)
1203{ 1163{
1204 int idxnew = -1; 1164 int idxnew = -1;
1205 int idxold = *readstate; 1165 int idxold = *readstate;
@@ -1208,6 +1168,7 @@ static void rcutorture_one_extend(int *readstate, int newstate,
1208 1168
1209 WARN_ON_ONCE(idxold < 0); 1169 WARN_ON_ONCE(idxold < 0);
1210 WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1); 1170 WARN_ON_ONCE((idxold >> RCUTORTURE_RDR_SHIFT) > 1);
1171 rtrsp->rt_readstate = newstate;
1211 1172
1212 /* First, put new protection in place to avoid critical-section gap. */ 1173 /* First, put new protection in place to avoid critical-section gap. */
1213 if (statesnew & RCUTORTURE_RDR_BH) 1174 if (statesnew & RCUTORTURE_RDR_BH)
@@ -1216,6 +1177,10 @@ static void rcutorture_one_extend(int *readstate, int newstate,
1216 local_irq_disable(); 1177 local_irq_disable();
1217 if (statesnew & RCUTORTURE_RDR_PREEMPT) 1178 if (statesnew & RCUTORTURE_RDR_PREEMPT)
1218 preempt_disable(); 1179 preempt_disable();
1180 if (statesnew & RCUTORTURE_RDR_RBH)
1181 rcu_read_lock_bh();
1182 if (statesnew & RCUTORTURE_RDR_SCHED)
1183 rcu_read_lock_sched();
1219 if (statesnew & RCUTORTURE_RDR_RCU) 1184 if (statesnew & RCUTORTURE_RDR_RCU)
1220 idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT; 1185 idxnew = cur_ops->readlock() << RCUTORTURE_RDR_SHIFT;
1221 1186
@@ -1226,12 +1191,16 @@ static void rcutorture_one_extend(int *readstate, int newstate,
1226 local_bh_enable(); 1191 local_bh_enable();
1227 if (statesold & RCUTORTURE_RDR_PREEMPT) 1192 if (statesold & RCUTORTURE_RDR_PREEMPT)
1228 preempt_enable(); 1193 preempt_enable();
1194 if (statesold & RCUTORTURE_RDR_RBH)
1195 rcu_read_unlock_bh();
1196 if (statesold & RCUTORTURE_RDR_SCHED)
1197 rcu_read_unlock_sched();
1229 if (statesold & RCUTORTURE_RDR_RCU) 1198 if (statesold & RCUTORTURE_RDR_RCU)
1230 cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT); 1199 cur_ops->readunlock(idxold >> RCUTORTURE_RDR_SHIFT);
1231 1200
1232 /* Delay if neither beginning nor end and there was a change. */ 1201 /* Delay if neither beginning nor end and there was a change. */
1233 if ((statesnew || statesold) && *readstate && newstate) 1202 if ((statesnew || statesold) && *readstate && newstate)
1234 cur_ops->read_delay(trsp); 1203 cur_ops->read_delay(trsp, rtrsp);
1235 1204
1236 /* Update the reader state. */ 1205 /* Update the reader state. */
1237 if (idxnew == -1) 1206 if (idxnew == -1)
@@ -1260,18 +1229,19 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
1260{ 1229{
1261 int mask = rcutorture_extend_mask_max(); 1230 int mask = rcutorture_extend_mask_max();
1262 unsigned long randmask1 = torture_random(trsp) >> 8; 1231 unsigned long randmask1 = torture_random(trsp) >> 8;
1263 unsigned long randmask2 = randmask1 >> 1; 1232 unsigned long randmask2 = randmask1 >> 3;
1264 1233
1265 WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT); 1234 WARN_ON_ONCE(mask >> RCUTORTURE_RDR_SHIFT);
1266 /* Half the time lots of bits, half the time only one bit. */ 1235 /* Most of the time lots of bits, half the time only one bit. */
1267 if (randmask1 & 0x1) 1236 if (!(randmask1 & 0x7))
1268 mask = mask & randmask2; 1237 mask = mask & randmask2;
1269 else 1238 else
1270 mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS)); 1239 mask = mask & (1 << (randmask2 % RCUTORTURE_RDR_NBITS));
1240 /* Can't enable bh w/irq disabled. */
1271 if ((mask & RCUTORTURE_RDR_IRQ) && 1241 if ((mask & RCUTORTURE_RDR_IRQ) &&
1272 !(mask & RCUTORTURE_RDR_BH) && 1242 ((!(mask & RCUTORTURE_RDR_BH) && (oldmask & RCUTORTURE_RDR_BH)) ||
1273 (oldmask & RCUTORTURE_RDR_BH)) 1243 (!(mask & RCUTORTURE_RDR_RBH) && (oldmask & RCUTORTURE_RDR_RBH))))
1274 mask |= RCUTORTURE_RDR_BH; /* Can't enable bh w/irq disabled. */ 1244 mask |= RCUTORTURE_RDR_BH | RCUTORTURE_RDR_RBH;
1275 if ((mask & RCUTORTURE_RDR_IRQ) && 1245 if ((mask & RCUTORTURE_RDR_IRQ) &&
1276 !(mask & cur_ops->ext_irq_conflict) && 1246 !(mask & cur_ops->ext_irq_conflict) &&
1277 (oldmask & cur_ops->ext_irq_conflict)) 1247 (oldmask & cur_ops->ext_irq_conflict))
@@ -1283,20 +1253,25 @@ rcutorture_extend_mask(int oldmask, struct torture_random_state *trsp)
1283 * Do a randomly selected number of extensions of an existing RCU read-side 1253 * Do a randomly selected number of extensions of an existing RCU read-side
1284 * critical section. 1254 * critical section.
1285 */ 1255 */
1286static void rcutorture_loop_extend(int *readstate, 1256static struct rt_read_seg *
1287 struct torture_random_state *trsp) 1257rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp,
1258 struct rt_read_seg *rtrsp)
1288{ 1259{
1289 int i; 1260 int i;
1261 int j;
1290 int mask = rcutorture_extend_mask_max(); 1262 int mask = rcutorture_extend_mask_max();
1291 1263
1292 WARN_ON_ONCE(!*readstate); /* -Existing- RCU read-side critsect! */ 1264 WARN_ON_ONCE(!*readstate); /* -Existing- RCU read-side critsect! */
1293 if (!((mask - 1) & mask)) 1265 if (!((mask - 1) & mask))
1294 return; /* Current RCU flavor not extendable. */ 1266 return rtrsp; /* Current RCU reader not extendable. */
1295 i = (torture_random(trsp) >> 3) & RCUTORTURE_RDR_MAX_LOOPS; 1267 /* Bias towards larger numbers of loops. */
1296 while (i--) { 1268 i = (torture_random(trsp) >> 3);
1269 i = ((i | (i >> 3)) & RCUTORTURE_RDR_MAX_LOOPS) + 1;
1270 for (j = 0; j < i; j++) {
1297 mask = rcutorture_extend_mask(*readstate, trsp); 1271 mask = rcutorture_extend_mask(*readstate, trsp);
1298 rcutorture_one_extend(readstate, mask, trsp); 1272 rcutorture_one_extend(readstate, mask, trsp, &rtrsp[j]);
1299 } 1273 }
1274 return &rtrsp[j];
1300} 1275}
1301 1276
1302/* 1277/*
@@ -1306,16 +1281,20 @@ static void rcutorture_loop_extend(int *readstate,
1306 */ 1281 */
1307static bool rcu_torture_one_read(struct torture_random_state *trsp) 1282static bool rcu_torture_one_read(struct torture_random_state *trsp)
1308{ 1283{
1284 int i;
1309 unsigned long started; 1285 unsigned long started;
1310 unsigned long completed; 1286 unsigned long completed;
1311 int newstate; 1287 int newstate;
1312 struct rcu_torture *p; 1288 struct rcu_torture *p;
1313 int pipe_count; 1289 int pipe_count;
1314 int readstate = 0; 1290 int readstate = 0;
1291 struct rt_read_seg rtseg[RCUTORTURE_RDR_MAX_SEGS] = { { 0 } };
1292 struct rt_read_seg *rtrsp = &rtseg[0];
1293 struct rt_read_seg *rtrsp1;
1315 unsigned long long ts; 1294 unsigned long long ts;
1316 1295
1317 newstate = rcutorture_extend_mask(readstate, trsp); 1296 newstate = rcutorture_extend_mask(readstate, trsp);
1318 rcutorture_one_extend(&readstate, newstate, trsp); 1297 rcutorture_one_extend(&readstate, newstate, trsp, rtrsp++);
1319 started = cur_ops->get_gp_seq(); 1298 started = cur_ops->get_gp_seq();
1320 ts = rcu_trace_clock_local(); 1299 ts = rcu_trace_clock_local();
1321 p = rcu_dereference_check(rcu_torture_current, 1300 p = rcu_dereference_check(rcu_torture_current,
@@ -1325,12 +1304,12 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
1325 torturing_tasks()); 1304 torturing_tasks());
1326 if (p == NULL) { 1305 if (p == NULL) {
1327 /* Wait for rcu_torture_writer to get underway */ 1306 /* Wait for rcu_torture_writer to get underway */
1328 rcutorture_one_extend(&readstate, 0, trsp); 1307 rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
1329 return false; 1308 return false;
1330 } 1309 }
1331 if (p->rtort_mbtest == 0) 1310 if (p->rtort_mbtest == 0)
1332 atomic_inc(&n_rcu_torture_mberror); 1311 atomic_inc(&n_rcu_torture_mberror);
1333 rcutorture_loop_extend(&readstate, trsp); 1312 rtrsp = rcutorture_loop_extend(&readstate, trsp, rtrsp);
1334 preempt_disable(); 1313 preempt_disable();
1335 pipe_count = p->rtort_pipe_count; 1314 pipe_count = p->rtort_pipe_count;
1336 if (pipe_count > RCU_TORTURE_PIPE_LEN) { 1315 if (pipe_count > RCU_TORTURE_PIPE_LEN) {
@@ -1351,8 +1330,17 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp)
1351 } 1330 }
1352 __this_cpu_inc(rcu_torture_batch[completed]); 1331 __this_cpu_inc(rcu_torture_batch[completed]);
1353 preempt_enable(); 1332 preempt_enable();
1354 rcutorture_one_extend(&readstate, 0, trsp); 1333 rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
1355 WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK); 1334 WARN_ON_ONCE(readstate & RCUTORTURE_RDR_MASK);
1335
1336 /* If error or close call, record the sequence of reader protections. */
1337 if ((pipe_count > 1 || completed > 1) && !xchg(&err_segs_recorded, 1)) {
1338 i = 0;
1339 for (rtrsp1 = &rtseg[0]; rtrsp1 < rtrsp; rtrsp1++)
1340 err_segs[i++] = *rtrsp1;
1341 rt_read_nsegs = i;
1342 }
1343
1356 return true; 1344 return true;
1357} 1345}
1358 1346
@@ -1387,6 +1375,9 @@ static void rcu_torture_timer(struct timer_list *unused)
1387static int 1375static int
1388rcu_torture_reader(void *arg) 1376rcu_torture_reader(void *arg)
1389{ 1377{
1378 unsigned long lastsleep = jiffies;
1379 long myid = (long)arg;
1380 int mynumonline = myid;
1390 DEFINE_TORTURE_RANDOM(rand); 1381 DEFINE_TORTURE_RANDOM(rand);
1391 struct timer_list t; 1382 struct timer_list t;
1392 1383
@@ -1402,6 +1393,12 @@ rcu_torture_reader(void *arg)
1402 } 1393 }
1403 if (!rcu_torture_one_read(&rand)) 1394 if (!rcu_torture_one_read(&rand))
1404 schedule_timeout_interruptible(HZ); 1395 schedule_timeout_interruptible(HZ);
1396 if (time_after(jiffies, lastsleep)) {
1397 schedule_timeout_interruptible(1);
1398 lastsleep = jiffies + 10;
1399 }
1400 while (num_online_cpus() < mynumonline && !torture_must_stop())
1401 schedule_timeout_interruptible(HZ / 5);
1405 stutter_wait("rcu_torture_reader"); 1402 stutter_wait("rcu_torture_reader");
1406 } while (!torture_must_stop()); 1403 } while (!torture_must_stop());
1407 if (irqreader && cur_ops->irq_capable) { 1404 if (irqreader && cur_ops->irq_capable) {
@@ -1655,6 +1652,121 @@ static int __init rcu_torture_stall_init(void)
1655 return torture_create_kthread(rcu_torture_stall, NULL, stall_task); 1652 return torture_create_kthread(rcu_torture_stall, NULL, stall_task);
1656} 1653}
1657 1654
1655/* State structure for forward-progress self-propagating RCU callback. */
1656struct fwd_cb_state {
1657 struct rcu_head rh;
1658 int stop;
1659};
1660
1661/*
1662 * Forward-progress self-propagating RCU callback function. Because
1663 * callbacks run from softirq, this function is an implicit RCU read-side
1664 * critical section.
1665 */
1666static void rcu_torture_fwd_prog_cb(struct rcu_head *rhp)
1667{
1668 struct fwd_cb_state *fcsp = container_of(rhp, struct fwd_cb_state, rh);
1669
1670 if (READ_ONCE(fcsp->stop)) {
1671 WRITE_ONCE(fcsp->stop, 2);
1672 return;
1673 }
1674 cur_ops->call(&fcsp->rh, rcu_torture_fwd_prog_cb);
1675}
1676
1677/* Carry out grace-period forward-progress testing. */
1678static int rcu_torture_fwd_prog(void *args)
1679{
1680 unsigned long cver;
1681 unsigned long dur;
1682 struct fwd_cb_state fcs;
1683 unsigned long gps;
1684 int idx;
1685 int sd;
1686 int sd4;
1687 bool selfpropcb = false;
1688 unsigned long stopat;
1689 int tested = 0;
1690 int tested_tries = 0;
1691 static DEFINE_TORTURE_RANDOM(trs);
1692
1693 VERBOSE_TOROUT_STRING("rcu_torture_fwd_progress task started");
1694 if (!IS_ENABLED(CONFIG_SMP) || !IS_ENABLED(CONFIG_RCU_BOOST))
1695 set_user_nice(current, MAX_NICE);
1696 if (cur_ops->call && cur_ops->sync && cur_ops->cb_barrier) {
1697 init_rcu_head_on_stack(&fcs.rh);
1698 selfpropcb = true;
1699 }
1700 do {
1701 schedule_timeout_interruptible(fwd_progress_holdoff * HZ);
1702 if (selfpropcb) {
1703 WRITE_ONCE(fcs.stop, 0);
1704 cur_ops->call(&fcs.rh, rcu_torture_fwd_prog_cb);
1705 }
1706 cver = READ_ONCE(rcu_torture_current_version);
1707 gps = cur_ops->get_gp_seq();
1708 sd = cur_ops->stall_dur() + 1;
1709 sd4 = (sd + fwd_progress_div - 1) / fwd_progress_div;
1710 dur = sd4 + torture_random(&trs) % (sd - sd4);
1711 stopat = jiffies + dur;
1712 while (time_before(jiffies, stopat) && !torture_must_stop()) {
1713 idx = cur_ops->readlock();
1714 udelay(10);
1715 cur_ops->readunlock(idx);
1716 if (!fwd_progress_need_resched || need_resched())
1717 cond_resched();
1718 }
1719 tested_tries++;
1720 if (!time_before(jiffies, stopat) && !torture_must_stop()) {
1721 tested++;
1722 cver = READ_ONCE(rcu_torture_current_version) - cver;
1723 gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps);
1724 WARN_ON(!cver && gps < 2);
1725 pr_alert("%s: Duration %ld cver %ld gps %ld\n", __func__, dur, cver, gps);
1726 }
1727 if (selfpropcb) {
1728 WRITE_ONCE(fcs.stop, 1);
1729 cur_ops->sync(); /* Wait for running CB to complete. */
1730 cur_ops->cb_barrier(); /* Wait for queued callbacks. */
1731 }
1732 /* Avoid slow periods, better to test when busy. */
1733 stutter_wait("rcu_torture_fwd_prog");
1734 } while (!torture_must_stop());
1735 if (selfpropcb) {
1736 WARN_ON(READ_ONCE(fcs.stop) != 2);
1737 destroy_rcu_head_on_stack(&fcs.rh);
1738 }
1739 /* Short runs might not contain a valid forward-progress attempt. */
1740 WARN_ON(!tested && tested_tries >= 5);
1741 pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries);
1742 torture_kthread_stopping("rcu_torture_fwd_prog");
1743 return 0;
1744}
1745
1746/* If forward-progress checking is requested and feasible, spawn the thread. */
1747static int __init rcu_torture_fwd_prog_init(void)
1748{
1749 if (!fwd_progress)
1750 return 0; /* Not requested, so don't do it. */
1751 if (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0) {
1752 VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test");
1753 return 0;
1754 }
1755 if (stall_cpu > 0) {
1756 VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, conflicts with CPU-stall testing");
1757 if (IS_MODULE(CONFIG_RCU_TORTURE_TESTS))
1758 return -EINVAL; /* In module, can fail back to user. */
1759 WARN_ON(1); /* Make sure rcutorture notices conflict. */
1760 return 0;
1761 }
1762 if (fwd_progress_holdoff <= 0)
1763 fwd_progress_holdoff = 1;
1764 if (fwd_progress_div <= 0)
1765 fwd_progress_div = 4;
1766 return torture_create_kthread(rcu_torture_fwd_prog,
1767 NULL, fwd_prog_task);
1768}
1769
1658/* Callback function for RCU barrier testing. */ 1770/* Callback function for RCU barrier testing. */
1659static void rcu_torture_barrier_cbf(struct rcu_head *rcu) 1771static void rcu_torture_barrier_cbf(struct rcu_head *rcu)
1660{ 1772{
@@ -1817,6 +1929,7 @@ static enum cpuhp_state rcutor_hp;
1817static void 1929static void
1818rcu_torture_cleanup(void) 1930rcu_torture_cleanup(void)
1819{ 1931{
1932 int firsttime;
1820 int flags = 0; 1933 int flags = 0;
1821 unsigned long gp_seq = 0; 1934 unsigned long gp_seq = 0;
1822 int i; 1935 int i;
@@ -1828,6 +1941,7 @@ rcu_torture_cleanup(void)
1828 } 1941 }
1829 1942
1830 rcu_torture_barrier_cleanup(); 1943 rcu_torture_barrier_cleanup();
1944 torture_stop_kthread(rcu_torture_fwd_prog, fwd_prog_task);
1831 torture_stop_kthread(rcu_torture_stall, stall_task); 1945 torture_stop_kthread(rcu_torture_stall, stall_task);
1832 torture_stop_kthread(rcu_torture_writer, writer_task); 1946 torture_stop_kthread(rcu_torture_writer, writer_task);
1833 1947
@@ -1860,7 +1974,7 @@ rcu_torture_cleanup(void)
1860 cpuhp_remove_state(rcutor_hp); 1974 cpuhp_remove_state(rcutor_hp);
1861 1975
1862 /* 1976 /*
1863 * Wait for all RCU callbacks to fire, then do flavor-specific 1977 * Wait for all RCU callbacks to fire, then do torture-type-specific
1864 * cleanup operations. 1978 * cleanup operations.
1865 */ 1979 */
1866 if (cur_ops->cb_barrier != NULL) 1980 if (cur_ops->cb_barrier != NULL)
@@ -1870,6 +1984,33 @@ rcu_torture_cleanup(void)
1870 1984
1871 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ 1985 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
1872 1986
1987 if (err_segs_recorded) {
1988 pr_alert("Failure/close-call rcutorture reader segments:\n");
1989 if (rt_read_nsegs == 0)
1990 pr_alert("\t: No segments recorded!!!\n");
1991 firsttime = 1;
1992 for (i = 0; i < rt_read_nsegs; i++) {
1993 pr_alert("\t%d: %#x ", i, err_segs[i].rt_readstate);
1994 if (err_segs[i].rt_delay_jiffies != 0) {
1995 pr_cont("%s%ldjiffies", firsttime ? "" : "+",
1996 err_segs[i].rt_delay_jiffies);
1997 firsttime = 0;
1998 }
1999 if (err_segs[i].rt_delay_ms != 0) {
2000 pr_cont("%s%ldms", firsttime ? "" : "+",
2001 err_segs[i].rt_delay_ms);
2002 firsttime = 0;
2003 }
2004 if (err_segs[i].rt_delay_us != 0) {
2005 pr_cont("%s%ldus", firsttime ? "" : "+",
2006 err_segs[i].rt_delay_us);
2007 firsttime = 0;
2008 }
2009 pr_cont("%s\n",
2010 err_segs[i].rt_preempted ? "preempted" : "");
2011
2012 }
2013 }
1873 if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error) 2014 if (atomic_read(&n_rcu_torture_error) || n_rcu_torture_barrier_error)
1874 rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE"); 2015 rcu_torture_print_module_parms(cur_ops, "End of test: FAILURE");
1875 else if (torture_onoff_failures()) 2016 else if (torture_onoff_failures())
@@ -1939,12 +2080,12 @@ static void rcu_test_debug_objects(void)
1939static int __init 2080static int __init
1940rcu_torture_init(void) 2081rcu_torture_init(void)
1941{ 2082{
1942 int i; 2083 long i;
1943 int cpu; 2084 int cpu;
1944 int firsterr = 0; 2085 int firsterr = 0;
1945 static struct rcu_torture_ops *torture_ops[] = { 2086 static struct rcu_torture_ops *torture_ops[] = {
1946 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops, 2087 &rcu_ops, &rcu_busted_ops, &srcu_ops, &srcud_ops,
1947 &busted_srcud_ops, &sched_ops, &tasks_ops, 2088 &busted_srcud_ops, &tasks_ops,
1948 }; 2089 };
1949 2090
1950 if (!torture_init_begin(torture_type, verbose)) 2091 if (!torture_init_begin(torture_type, verbose))
@@ -1963,6 +2104,7 @@ rcu_torture_init(void)
1963 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) 2104 for (i = 0; i < ARRAY_SIZE(torture_ops); i++)
1964 pr_cont(" %s", torture_ops[i]->name); 2105 pr_cont(" %s", torture_ops[i]->name);
1965 pr_cont("\n"); 2106 pr_cont("\n");
2107 WARN_ON(!IS_MODULE(CONFIG_RCU_TORTURE_TEST));
1966 firsterr = -EINVAL; 2108 firsterr = -EINVAL;
1967 goto unwind; 2109 goto unwind;
1968 } 2110 }
@@ -2013,6 +2155,8 @@ rcu_torture_init(void)
2013 per_cpu(rcu_torture_batch, cpu)[i] = 0; 2155 per_cpu(rcu_torture_batch, cpu)[i] = 0;
2014 } 2156 }
2015 } 2157 }
2158 err_segs_recorded = 0;
2159 rt_read_nsegs = 0;
2016 2160
2017 /* Start up the kthreads. */ 2161 /* Start up the kthreads. */
2018 2162
@@ -2044,7 +2188,7 @@ rcu_torture_init(void)
2044 goto unwind; 2188 goto unwind;
2045 } 2189 }
2046 for (i = 0; i < nrealreaders; i++) { 2190 for (i = 0; i < nrealreaders; i++) {
2047 firsterr = torture_create_kthread(rcu_torture_reader, NULL, 2191 firsterr = torture_create_kthread(rcu_torture_reader, (void *)i,
2048 reader_tasks[i]); 2192 reader_tasks[i]);
2049 if (firsterr) 2193 if (firsterr)
2050 goto unwind; 2194 goto unwind;
@@ -2100,6 +2244,9 @@ rcu_torture_init(void)
2100 firsterr = rcu_torture_stall_init(); 2244 firsterr = rcu_torture_stall_init();
2101 if (firsterr) 2245 if (firsterr)
2102 goto unwind; 2246 goto unwind;
2247 firsterr = rcu_torture_fwd_prog_init();
2248 if (firsterr)
2249 goto unwind;
2103 firsterr = rcu_torture_barrier_init(); 2250 firsterr = rcu_torture_barrier_init();
2104 if (firsterr) 2251 if (firsterr)
2105 goto unwind; 2252 goto unwind;
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 04fc2ed71af8..b46e6683f8c9 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -34,6 +34,8 @@
34#include "rcu.h" 34#include "rcu.h"
35 35
36int rcu_scheduler_active __read_mostly; 36int rcu_scheduler_active __read_mostly;
37static LIST_HEAD(srcu_boot_list);
38static bool srcu_init_done;
37 39
38static int init_srcu_struct_fields(struct srcu_struct *sp) 40static int init_srcu_struct_fields(struct srcu_struct *sp)
39{ 41{
@@ -46,6 +48,7 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
46 sp->srcu_gp_waiting = false; 48 sp->srcu_gp_waiting = false;
47 sp->srcu_idx = 0; 49 sp->srcu_idx = 0;
48 INIT_WORK(&sp->srcu_work, srcu_drive_gp); 50 INIT_WORK(&sp->srcu_work, srcu_drive_gp);
51 INIT_LIST_HEAD(&sp->srcu_work.entry);
49 return 0; 52 return 0;
50} 53}
51 54
@@ -179,8 +182,12 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
179 *sp->srcu_cb_tail = rhp; 182 *sp->srcu_cb_tail = rhp;
180 sp->srcu_cb_tail = &rhp->next; 183 sp->srcu_cb_tail = &rhp->next;
181 local_irq_restore(flags); 184 local_irq_restore(flags);
182 if (!READ_ONCE(sp->srcu_gp_running)) 185 if (!READ_ONCE(sp->srcu_gp_running)) {
183 schedule_work(&sp->srcu_work); 186 if (likely(srcu_init_done))
187 schedule_work(&sp->srcu_work);
188 else if (list_empty(&sp->srcu_work.entry))
189 list_add(&sp->srcu_work.entry, &srcu_boot_list);
190 }
184} 191}
185EXPORT_SYMBOL_GPL(call_srcu); 192EXPORT_SYMBOL_GPL(call_srcu);
186 193
@@ -204,3 +211,21 @@ void __init rcu_scheduler_starting(void)
204{ 211{
205 rcu_scheduler_active = RCU_SCHEDULER_RUNNING; 212 rcu_scheduler_active = RCU_SCHEDULER_RUNNING;
206} 213}
214
215/*
216 * Queue work for srcu_struct structures with early boot callbacks.
217 * The work won't actually execute until the workqueue initialization
218 * phase that takes place after the scheduler starts.
219 */
220void __init srcu_init(void)
221{
222 struct srcu_struct *sp;
223
224 srcu_init_done = true;
225 while (!list_empty(&srcu_boot_list)) {
226 sp = list_first_entry(&srcu_boot_list,
227 struct srcu_struct, srcu_work.entry);
228 list_del_init(&sp->srcu_work.entry);
229 schedule_work(&sp->srcu_work);
230 }
231}
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 6c9866a854b1..a8846ed7f352 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -51,6 +51,10 @@ module_param(exp_holdoff, ulong, 0444);
51static ulong counter_wrap_check = (ULONG_MAX >> 2); 51static ulong counter_wrap_check = (ULONG_MAX >> 2);
52module_param(counter_wrap_check, ulong, 0444); 52module_param(counter_wrap_check, ulong, 0444);
53 53
54/* Early-boot callback-management, so early that no lock is required! */
55static LIST_HEAD(srcu_boot_list);
56static bool __read_mostly srcu_init_done;
57
54static void srcu_invoke_callbacks(struct work_struct *work); 58static void srcu_invoke_callbacks(struct work_struct *work);
55static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); 59static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
56static void process_srcu(struct work_struct *work); 60static void process_srcu(struct work_struct *work);
@@ -105,7 +109,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
105 rcu_init_levelspread(levelspread, num_rcu_lvl); 109 rcu_init_levelspread(levelspread, num_rcu_lvl);
106 110
107 /* Each pass through this loop initializes one srcu_node structure. */ 111 /* Each pass through this loop initializes one srcu_node structure. */
108 rcu_for_each_node_breadth_first(sp, snp) { 112 srcu_for_each_node_breadth_first(sp, snp) {
109 spin_lock_init(&ACCESS_PRIVATE(snp, lock)); 113 spin_lock_init(&ACCESS_PRIVATE(snp, lock));
110 WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != 114 WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
111 ARRAY_SIZE(snp->srcu_data_have_cbs)); 115 ARRAY_SIZE(snp->srcu_data_have_cbs));
@@ -235,7 +239,6 @@ static void check_init_srcu_struct(struct srcu_struct *sp)
235{ 239{
236 unsigned long flags; 240 unsigned long flags;
237 241
238 WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT);
239 /* The smp_load_acquire() pairs with the smp_store_release(). */ 242 /* The smp_load_acquire() pairs with the smp_store_release(). */
240 if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ 243 if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
241 return; /* Already initialized. */ 244 return; /* Already initialized. */
@@ -561,7 +564,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
561 564
562 /* Initiate callback invocation as needed. */ 565 /* Initiate callback invocation as needed. */
563 idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); 566 idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
564 rcu_for_each_node_breadth_first(sp, snp) { 567 srcu_for_each_node_breadth_first(sp, snp) {
565 spin_lock_irq_rcu_node(snp); 568 spin_lock_irq_rcu_node(snp);
566 cbs = false; 569 cbs = false;
567 last_lvl = snp >= sp->level[rcu_num_lvls - 1]; 570 last_lvl = snp >= sp->level[rcu_num_lvls - 1];
@@ -701,7 +704,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
701 rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { 704 rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
702 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); 705 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
703 srcu_gp_start(sp); 706 srcu_gp_start(sp);
704 queue_delayed_work(rcu_gp_wq, &sp->work, srcu_get_delay(sp)); 707 if (likely(srcu_init_done))
708 queue_delayed_work(rcu_gp_wq, &sp->work,
709 srcu_get_delay(sp));
710 else if (list_empty(&sp->work.work.entry))
711 list_add(&sp->work.work.entry, &srcu_boot_list);
705 } 712 }
706 spin_unlock_irqrestore_rcu_node(sp, flags); 713 spin_unlock_irqrestore_rcu_node(sp, flags);
707} 714}
@@ -980,7 +987,7 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
980 * There are memory-ordering constraints implied by synchronize_srcu(). 987 * There are memory-ordering constraints implied by synchronize_srcu().
981 * On systems with more than one CPU, when synchronize_srcu() returns, 988 * On systems with more than one CPU, when synchronize_srcu() returns,
982 * each CPU is guaranteed to have executed a full memory barrier since 989 * each CPU is guaranteed to have executed a full memory barrier since
983 * the end of its last corresponding SRCU-sched read-side critical section 990 * the end of its last corresponding SRCU read-side critical section
984 * whose beginning preceded the call to synchronize_srcu(). In addition, 991 * whose beginning preceded the call to synchronize_srcu(). In addition,
985 * each CPU having an SRCU read-side critical section that extends beyond 992 * each CPU having an SRCU read-side critical section that extends beyond
986 * the return from synchronize_srcu() is guaranteed to have executed a 993 * the return from synchronize_srcu() is guaranteed to have executed a
@@ -1308,3 +1315,17 @@ static int __init srcu_bootup_announce(void)
1308 return 0; 1315 return 0;
1309} 1316}
1310early_initcall(srcu_bootup_announce); 1317early_initcall(srcu_bootup_announce);
1318
1319void __init srcu_init(void)
1320{
1321 struct srcu_struct *sp;
1322
1323 srcu_init_done = true;
1324 while (!list_empty(&srcu_boot_list)) {
1325 sp = list_first_entry(&srcu_boot_list, struct srcu_struct,
1326 work.work.entry);
1327 check_init_srcu_struct(sp);
1328 list_del_init(&sp->work.work.entry);
1329 queue_work(rcu_gp_wq, &sp->work.work);
1330 }
1331}
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index befc9321a89c..5f5963ba313e 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -46,69 +46,27 @@ struct rcu_ctrlblk {
46}; 46};
47 47
48/* Definition for rcupdate control block. */ 48/* Definition for rcupdate control block. */
49static struct rcu_ctrlblk rcu_sched_ctrlblk = { 49static struct rcu_ctrlblk rcu_ctrlblk = {
50 .donetail = &rcu_sched_ctrlblk.rcucblist, 50 .donetail = &rcu_ctrlblk.rcucblist,
51 .curtail = &rcu_sched_ctrlblk.rcucblist, 51 .curtail = &rcu_ctrlblk.rcucblist,
52}; 52};
53 53
54static struct rcu_ctrlblk rcu_bh_ctrlblk = { 54void rcu_barrier(void)
55 .donetail = &rcu_bh_ctrlblk.rcucblist,
56 .curtail = &rcu_bh_ctrlblk.rcucblist,
57};
58
59void rcu_barrier_bh(void)
60{
61 wait_rcu_gp(call_rcu_bh);
62}
63EXPORT_SYMBOL(rcu_barrier_bh);
64
65void rcu_barrier_sched(void)
66{
67 wait_rcu_gp(call_rcu_sched);
68}
69EXPORT_SYMBOL(rcu_barrier_sched);
70
71/*
72 * Helper function for rcu_sched_qs() and rcu_bh_qs().
73 * Also irqs are disabled to avoid confusion due to interrupt handlers
74 * invoking call_rcu().
75 */
76static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
77{
78 if (rcp->donetail != rcp->curtail) {
79 rcp->donetail = rcp->curtail;
80 return 1;
81 }
82
83 return 0;
84}
85
86/*
87 * Record an rcu quiescent state. And an rcu_bh quiescent state while we
88 * are at it, given that any rcu quiescent state is also an rcu_bh
89 * quiescent state. Use "+" instead of "||" to defeat short circuiting.
90 */
91void rcu_sched_qs(void)
92{ 55{
93 unsigned long flags; 56 wait_rcu_gp(call_rcu);
94
95 local_irq_save(flags);
96 if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
97 rcu_qsctr_help(&rcu_bh_ctrlblk))
98 raise_softirq(RCU_SOFTIRQ);
99 local_irq_restore(flags);
100} 57}
58EXPORT_SYMBOL(rcu_barrier);
101 59
102/* 60/* Record an rcu quiescent state. */
103 * Record an rcu_bh quiescent state. 61void rcu_qs(void)
104 */
105void rcu_bh_qs(void)
106{ 62{
107 unsigned long flags; 63 unsigned long flags;
108 64
109 local_irq_save(flags); 65 local_irq_save(flags);
110 if (rcu_qsctr_help(&rcu_bh_ctrlblk)) 66 if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
67 rcu_ctrlblk.donetail = rcu_ctrlblk.curtail;
111 raise_softirq(RCU_SOFTIRQ); 68 raise_softirq(RCU_SOFTIRQ);
69 }
112 local_irq_restore(flags); 70 local_irq_restore(flags);
113} 71}
114 72
@@ -120,34 +78,33 @@ void rcu_bh_qs(void)
120 */ 78 */
121void rcu_check_callbacks(int user) 79void rcu_check_callbacks(int user)
122{ 80{
123 if (user) 81 if (user) {
124 rcu_sched_qs(); 82 rcu_qs();
125 if (user || !in_softirq()) 83 } else if (rcu_ctrlblk.donetail != rcu_ctrlblk.curtail) {
126 rcu_bh_qs(); 84 set_tsk_need_resched(current);
85 set_preempt_need_resched();
86 }
127} 87}
128 88
129/* 89/* Invoke the RCU callbacks whose grace period has elapsed. */
130 * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure 90static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
131 * whose grace period has elapsed.
132 */
133static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
134{ 91{
135 struct rcu_head *next, *list; 92 struct rcu_head *next, *list;
136 unsigned long flags; 93 unsigned long flags;
137 94
138 /* Move the ready-to-invoke callbacks to a local list. */ 95 /* Move the ready-to-invoke callbacks to a local list. */
139 local_irq_save(flags); 96 local_irq_save(flags);
140 if (rcp->donetail == &rcp->rcucblist) { 97 if (rcu_ctrlblk.donetail == &rcu_ctrlblk.rcucblist) {
141 /* No callbacks ready, so just leave. */ 98 /* No callbacks ready, so just leave. */
142 local_irq_restore(flags); 99 local_irq_restore(flags);
143 return; 100 return;
144 } 101 }
145 list = rcp->rcucblist; 102 list = rcu_ctrlblk.rcucblist;
146 rcp->rcucblist = *rcp->donetail; 103 rcu_ctrlblk.rcucblist = *rcu_ctrlblk.donetail;
147 *rcp->donetail = NULL; 104 *rcu_ctrlblk.donetail = NULL;
148 if (rcp->curtail == rcp->donetail) 105 if (rcu_ctrlblk.curtail == rcu_ctrlblk.donetail)
149 rcp->curtail = &rcp->rcucblist; 106 rcu_ctrlblk.curtail = &rcu_ctrlblk.rcucblist;
150 rcp->donetail = &rcp->rcucblist; 107 rcu_ctrlblk.donetail = &rcu_ctrlblk.rcucblist;
151 local_irq_restore(flags); 108 local_irq_restore(flags);
152 109
153 /* Invoke the callbacks on the local list. */ 110 /* Invoke the callbacks on the local list. */
@@ -162,37 +119,31 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
162 } 119 }
163} 120}
164 121
165static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
166{
167 __rcu_process_callbacks(&rcu_sched_ctrlblk);
168 __rcu_process_callbacks(&rcu_bh_ctrlblk);
169}
170
171/* 122/*
172 * Wait for a grace period to elapse. But it is illegal to invoke 123 * Wait for a grace period to elapse. But it is illegal to invoke
173 * synchronize_sched() from within an RCU read-side critical section. 124 * synchronize_rcu() from within an RCU read-side critical section.
174 * Therefore, any legal call to synchronize_sched() is a quiescent 125 * Therefore, any legal call to synchronize_rcu() is a quiescent
175 * state, and so on a UP system, synchronize_sched() need do nothing. 126 * state, and so on a UP system, synchronize_rcu() need do nothing.
176 * Ditto for synchronize_rcu_bh(). (But Lai Jiangshan points out the 127 * (But Lai Jiangshan points out the benefits of doing might_sleep()
177 * benefits of doing might_sleep() to reduce latency.) 128 * to reduce latency.)
178 * 129 *
179 * Cool, huh? (Due to Josh Triplett.) 130 * Cool, huh? (Due to Josh Triplett.)
180 */ 131 */
181void synchronize_sched(void) 132void synchronize_rcu(void)
182{ 133{
183 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || 134 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
184 lock_is_held(&rcu_lock_map) || 135 lock_is_held(&rcu_lock_map) ||
185 lock_is_held(&rcu_sched_lock_map), 136 lock_is_held(&rcu_sched_lock_map),
186 "Illegal synchronize_sched() in RCU read-side critical section"); 137 "Illegal synchronize_rcu() in RCU read-side critical section");
187} 138}
188EXPORT_SYMBOL_GPL(synchronize_sched); 139EXPORT_SYMBOL_GPL(synchronize_rcu);
189 140
190/* 141/*
191 * Helper function for call_rcu() and call_rcu_bh(). 142 * Post an RCU callback to be invoked after the end of an RCU grace
143 * period. But since we have but one CPU, that would be after any
144 * quiescent state.
192 */ 145 */
193static void __call_rcu(struct rcu_head *head, 146void call_rcu(struct rcu_head *head, rcu_callback_t func)
194 rcu_callback_t func,
195 struct rcu_ctrlblk *rcp)
196{ 147{
197 unsigned long flags; 148 unsigned long flags;
198 149
@@ -201,39 +152,20 @@ static void __call_rcu(struct rcu_head *head,
201 head->next = NULL; 152 head->next = NULL;
202 153
203 local_irq_save(flags); 154 local_irq_save(flags);
204 *rcp->curtail = head; 155 *rcu_ctrlblk.curtail = head;
205 rcp->curtail = &head->next; 156 rcu_ctrlblk.curtail = &head->next;
206 local_irq_restore(flags); 157 local_irq_restore(flags);
207 158
208 if (unlikely(is_idle_task(current))) { 159 if (unlikely(is_idle_task(current))) {
209 /* force scheduling for rcu_sched_qs() */ 160 /* force scheduling for rcu_qs() */
210 resched_cpu(0); 161 resched_cpu(0);
211 } 162 }
212} 163}
213 164EXPORT_SYMBOL_GPL(call_rcu);
214/*
215 * Post an RCU callback to be invoked after the end of an RCU-sched grace
216 * period. But since we have but one CPU, that would be after any
217 * quiescent state.
218 */
219void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
220{
221 __call_rcu(head, func, &rcu_sched_ctrlblk);
222}
223EXPORT_SYMBOL_GPL(call_rcu_sched);
224
225/*
226 * Post an RCU bottom-half callback to be invoked after any subsequent
227 * quiescent state.
228 */
229void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
230{
231 __call_rcu(head, func, &rcu_bh_ctrlblk);
232}
233EXPORT_SYMBOL_GPL(call_rcu_bh);
234 165
235void __init rcu_init(void) 166void __init rcu_init(void)
236{ 167{
237 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 168 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
238 rcu_early_boot_tests(); 169 rcu_early_boot_tests();
170 srcu_init();
239} 171}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 0b760c1369f7..121f833acd04 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -61,6 +61,7 @@
61#include <linux/trace_events.h> 61#include <linux/trace_events.h>
62#include <linux/suspend.h> 62#include <linux/suspend.h>
63#include <linux/ftrace.h> 63#include <linux/ftrace.h>
64#include <linux/tick.h>
64 65
65#include "tree.h" 66#include "tree.h"
66#include "rcu.h" 67#include "rcu.h"
@@ -73,45 +74,31 @@
73/* Data structures. */ 74/* Data structures. */
74 75
75/* 76/*
76 * In order to export the rcu_state name to the tracing tools, it 77 * Steal a bit from the bottom of ->dynticks for idle entry/exit
77 * needs to be added in the __tracepoint_string section. 78 * control. Initially this is for TLB flushing.
78 * This requires defining a separate variable tp_<sname>_varname
79 * that points to the string being used, and this will allow
80 * the tracing userspace tools to be able to decipher the string
81 * address to the matching string.
82 */ 79 */
83#ifdef CONFIG_TRACING 80#define RCU_DYNTICK_CTRL_MASK 0x1
84# define DEFINE_RCU_TPS(sname) \ 81#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
85static char sname##_varname[] = #sname; \ 82#ifndef rcu_eqs_special_exit
86static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; 83#define rcu_eqs_special_exit() do { } while (0)
87# define RCU_STATE_NAME(sname) sname##_varname
88#else
89# define DEFINE_RCU_TPS(sname)
90# define RCU_STATE_NAME(sname) __stringify(sname)
91#endif 84#endif
92 85
93#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ 86static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
94DEFINE_RCU_TPS(sname) \ 87 .dynticks_nesting = 1,
95static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ 88 .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
96struct rcu_state sname##_state = { \ 89 .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
97 .level = { &sname##_state.node[0] }, \ 90};
98 .rda = &sname##_data, \ 91struct rcu_state rcu_state = {
99 .call = cr, \ 92 .level = { &rcu_state.node[0] },
100 .gp_state = RCU_GP_IDLE, \ 93 .gp_state = RCU_GP_IDLE,
101 .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT, \ 94 .gp_seq = (0UL - 300UL) << RCU_SEQ_CTR_SHIFT,
102 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 95 .barrier_mutex = __MUTEX_INITIALIZER(rcu_state.barrier_mutex),
103 .name = RCU_STATE_NAME(sname), \ 96 .name = RCU_NAME,
104 .abbr = sabbr, \ 97 .abbr = RCU_ABBR,
105 .exp_mutex = __MUTEX_INITIALIZER(sname##_state.exp_mutex), \ 98 .exp_mutex = __MUTEX_INITIALIZER(rcu_state.exp_mutex),
106 .exp_wake_mutex = __MUTEX_INITIALIZER(sname##_state.exp_wake_mutex), \ 99 .exp_wake_mutex = __MUTEX_INITIALIZER(rcu_state.exp_wake_mutex),
107 .ofl_lock = __SPIN_LOCK_UNLOCKED(sname##_state.ofl_lock), \ 100 .ofl_lock = __RAW_SPIN_LOCK_UNLOCKED(rcu_state.ofl_lock),
108} 101};
109
110RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
111RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
112
113static struct rcu_state *const rcu_state_p;
114LIST_HEAD(rcu_struct_flavors);
115 102
116/* Dump rcu_node combining tree at boot to verify correct setup. */ 103/* Dump rcu_node combining tree at boot to verify correct setup. */
117static bool dump_tree; 104static bool dump_tree;
@@ -158,16 +145,14 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active);
158 */ 145 */
159static int rcu_scheduler_fully_active __read_mostly; 146static int rcu_scheduler_fully_active __read_mostly;
160 147
161static void 148static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
162rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, 149 unsigned long gps, unsigned long flags);
163 struct rcu_node *rnp, unsigned long gps, unsigned long flags);
164static void rcu_init_new_rnp(struct rcu_node *rnp_leaf); 150static void rcu_init_new_rnp(struct rcu_node *rnp_leaf);
165static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf); 151static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf);
166static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); 152static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
167static void invoke_rcu_core(void); 153static void invoke_rcu_core(void);
168static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 154static void invoke_rcu_callbacks(struct rcu_data *rdp);
169static void rcu_report_exp_rdp(struct rcu_state *rsp, 155static void rcu_report_exp_rdp(struct rcu_data *rdp);
170 struct rcu_data *rdp, bool wake);
171static void sync_sched_exp_online_cleanup(int cpu); 156static void sync_sched_exp_online_cleanup(int cpu);
172 157
173/* rcuc/rcub kthread realtime priority */ 158/* rcuc/rcub kthread realtime priority */
@@ -183,7 +168,7 @@ module_param(gp_init_delay, int, 0444);
183static int gp_cleanup_delay; 168static int gp_cleanup_delay;
184module_param(gp_cleanup_delay, int, 0444); 169module_param(gp_cleanup_delay, int, 0444);
185 170
186/* Retreive RCU kthreads priority for rcutorture */ 171/* Retrieve RCU kthreads priority for rcutorture */
187int rcu_get_gp_kthreads_prio(void) 172int rcu_get_gp_kthreads_prio(void)
188{ 173{
189 return kthread_prio; 174 return kthread_prio;
@@ -217,67 +202,24 @@ unsigned long rcu_rnp_online_cpus(struct rcu_node *rnp)
217 * permit this function to be invoked without holding the root rcu_node 202 * permit this function to be invoked without holding the root rcu_node
218 * structure's ->lock, but of course results can be subject to change. 203 * structure's ->lock, but of course results can be subject to change.
219 */ 204 */
220static int rcu_gp_in_progress(struct rcu_state *rsp) 205static int rcu_gp_in_progress(void)
221{ 206{
222 return rcu_seq_state(rcu_seq_current(&rsp->gp_seq)); 207 return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq));
223}
224
225/*
226 * Note a quiescent state. Because we do not need to know
227 * how many quiescent states passed, just if there was at least
228 * one since the start of the grace period, this just sets a flag.
229 * The caller must have disabled preemption.
230 */
231void rcu_sched_qs(void)
232{
233 RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!");
234 if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
235 return;
236 trace_rcu_grace_period(TPS("rcu_sched"),
237 __this_cpu_read(rcu_sched_data.gp_seq),
238 TPS("cpuqs"));
239 __this_cpu_write(rcu_sched_data.cpu_no_qs.b.norm, false);
240 if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
241 return;
242 __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, false);
243 rcu_report_exp_rdp(&rcu_sched_state,
244 this_cpu_ptr(&rcu_sched_data), true);
245} 208}
246 209
247void rcu_bh_qs(void) 210void rcu_softirq_qs(void)
248{ 211{
249 RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!"); 212 rcu_qs();
250 if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { 213 rcu_preempt_deferred_qs(current);
251 trace_rcu_grace_period(TPS("rcu_bh"),
252 __this_cpu_read(rcu_bh_data.gp_seq),
253 TPS("cpuqs"));
254 __this_cpu_write(rcu_bh_data.cpu_no_qs.b.norm, false);
255 }
256} 214}
257 215
258/* 216/*
259 * Steal a bit from the bottom of ->dynticks for idle entry/exit
260 * control. Initially this is for TLB flushing.
261 */
262#define RCU_DYNTICK_CTRL_MASK 0x1
263#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
264#ifndef rcu_eqs_special_exit
265#define rcu_eqs_special_exit() do { } while (0)
266#endif
267
268static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
269 .dynticks_nesting = 1,
270 .dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
271 .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
272};
273
274/*
275 * Record entry into an extended quiescent state. This is only to be 217 * Record entry into an extended quiescent state. This is only to be
276 * called when not already in an extended quiescent state. 218 * called when not already in an extended quiescent state.
277 */ 219 */
278static void rcu_dynticks_eqs_enter(void) 220static void rcu_dynticks_eqs_enter(void)
279{ 221{
280 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 222 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
281 int seq; 223 int seq;
282 224
283 /* 225 /*
@@ -285,7 +227,7 @@ static void rcu_dynticks_eqs_enter(void)
285 * critical sections, and we also must force ordering with the 227 * critical sections, and we also must force ordering with the
286 * next idle sojourn. 228 * next idle sojourn.
287 */ 229 */
288 seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); 230 seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
289 /* Better be in an extended quiescent state! */ 231 /* Better be in an extended quiescent state! */
290 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && 232 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
291 (seq & RCU_DYNTICK_CTRL_CTR)); 233 (seq & RCU_DYNTICK_CTRL_CTR));
@@ -300,7 +242,7 @@ static void rcu_dynticks_eqs_enter(void)
300 */ 242 */
301static void rcu_dynticks_eqs_exit(void) 243static void rcu_dynticks_eqs_exit(void)
302{ 244{
303 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 245 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
304 int seq; 246 int seq;
305 247
306 /* 248 /*
@@ -308,11 +250,11 @@ static void rcu_dynticks_eqs_exit(void)
308 * and we also must force ordering with the next RCU read-side 250 * and we also must force ordering with the next RCU read-side
309 * critical section. 251 * critical section.
310 */ 252 */
311 seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); 253 seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
312 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && 254 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
313 !(seq & RCU_DYNTICK_CTRL_CTR)); 255 !(seq & RCU_DYNTICK_CTRL_CTR));
314 if (seq & RCU_DYNTICK_CTRL_MASK) { 256 if (seq & RCU_DYNTICK_CTRL_MASK) {
315 atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks); 257 atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdp->dynticks);
316 smp_mb__after_atomic(); /* _exit after clearing mask. */ 258 smp_mb__after_atomic(); /* _exit after clearing mask. */
317 /* Prefer duplicate flushes to losing a flush. */ 259 /* Prefer duplicate flushes to losing a flush. */
318 rcu_eqs_special_exit(); 260 rcu_eqs_special_exit();
@@ -331,11 +273,11 @@ static void rcu_dynticks_eqs_exit(void)
331 */ 273 */
332static void rcu_dynticks_eqs_online(void) 274static void rcu_dynticks_eqs_online(void)
333{ 275{
334 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 276 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
335 277
336 if (atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR) 278 if (atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR)
337 return; 279 return;
338 atomic_add(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); 280 atomic_add(RCU_DYNTICK_CTRL_CTR, &rdp->dynticks);
339} 281}
340 282
341/* 283/*
@@ -345,18 +287,18 @@ static void rcu_dynticks_eqs_online(void)
345 */ 287 */
346bool rcu_dynticks_curr_cpu_in_eqs(void) 288bool rcu_dynticks_curr_cpu_in_eqs(void)
347{ 289{
348 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 290 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
349 291
350 return !(atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR); 292 return !(atomic_read(&rdp->dynticks) & RCU_DYNTICK_CTRL_CTR);
351} 293}
352 294
353/* 295/*
354 * Snapshot the ->dynticks counter with full ordering so as to allow 296 * Snapshot the ->dynticks counter with full ordering so as to allow
355 * stable comparison of this counter with past and future snapshots. 297 * stable comparison of this counter with past and future snapshots.
356 */ 298 */
357int rcu_dynticks_snap(struct rcu_dynticks *rdtp) 299int rcu_dynticks_snap(struct rcu_data *rdp)
358{ 300{
359 int snap = atomic_add_return(0, &rdtp->dynticks); 301 int snap = atomic_add_return(0, &rdp->dynticks);
360 302
361 return snap & ~RCU_DYNTICK_CTRL_MASK; 303 return snap & ~RCU_DYNTICK_CTRL_MASK;
362} 304}
@@ -371,13 +313,13 @@ static bool rcu_dynticks_in_eqs(int snap)
371} 313}
372 314
373/* 315/*
374 * Return true if the CPU corresponding to the specified rcu_dynticks 316 * Return true if the CPU corresponding to the specified rcu_data
375 * structure has spent some time in an extended quiescent state since 317 * structure has spent some time in an extended quiescent state since
376 * rcu_dynticks_snap() returned the specified snapshot. 318 * rcu_dynticks_snap() returned the specified snapshot.
377 */ 319 */
378static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap) 320static bool rcu_dynticks_in_eqs_since(struct rcu_data *rdp, int snap)
379{ 321{
380 return snap != rcu_dynticks_snap(rdtp); 322 return snap != rcu_dynticks_snap(rdp);
381} 323}
382 324
383/* 325/*
@@ -391,14 +333,14 @@ bool rcu_eqs_special_set(int cpu)
391{ 333{
392 int old; 334 int old;
393 int new; 335 int new;
394 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 336 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
395 337
396 do { 338 do {
397 old = atomic_read(&rdtp->dynticks); 339 old = atomic_read(&rdp->dynticks);
398 if (old & RCU_DYNTICK_CTRL_CTR) 340 if (old & RCU_DYNTICK_CTRL_CTR)
399 return false; 341 return false;
400 new = old | RCU_DYNTICK_CTRL_MASK; 342 new = old | RCU_DYNTICK_CTRL_MASK;
401 } while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old); 343 } while (atomic_cmpxchg(&rdp->dynticks, old, new) != old);
402 return true; 344 return true;
403} 345}
404 346
@@ -413,82 +355,30 @@ bool rcu_eqs_special_set(int cpu)
413 * 355 *
414 * The caller must have disabled interrupts and must not be idle. 356 * The caller must have disabled interrupts and must not be idle.
415 */ 357 */
416static void rcu_momentary_dyntick_idle(void) 358static void __maybe_unused rcu_momentary_dyntick_idle(void)
417{ 359{
418 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
419 int special; 360 int special;
420 361
421 raw_cpu_write(rcu_dynticks.rcu_need_heavy_qs, false); 362 raw_cpu_write(rcu_data.rcu_need_heavy_qs, false);
422 special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks); 363 special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
364 &this_cpu_ptr(&rcu_data)->dynticks);
423 /* It is illegal to call this from idle state. */ 365 /* It is illegal to call this from idle state. */
424 WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR)); 366 WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
367 rcu_preempt_deferred_qs(current);
425} 368}
426 369
427/* 370/**
428 * Note a context switch. This is a quiescent state for RCU-sched, 371 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
429 * and requires special handling for preemptible RCU.
430 * The caller must have disabled interrupts.
431 */
432void rcu_note_context_switch(bool preempt)
433{
434 barrier(); /* Avoid RCU read-side critical sections leaking down. */
435 trace_rcu_utilization(TPS("Start context switch"));
436 rcu_sched_qs();
437 rcu_preempt_note_context_switch(preempt);
438 /* Load rcu_urgent_qs before other flags. */
439 if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
440 goto out;
441 this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
442 if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs)))
443 rcu_momentary_dyntick_idle();
444 this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
445 if (!preempt)
446 rcu_tasks_qs(current);
447out:
448 trace_rcu_utilization(TPS("End context switch"));
449 barrier(); /* Avoid RCU read-side critical sections leaking up. */
450}
451EXPORT_SYMBOL_GPL(rcu_note_context_switch);
452
453/*
454 * Register a quiescent state for all RCU flavors. If there is an
455 * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
456 * dyntick-idle quiescent state visible to other CPUs (but only for those
457 * RCU flavors in desperate need of a quiescent state, which will normally
458 * be none of them). Either way, do a lightweight quiescent state for
459 * all RCU flavors.
460 *
461 * The barrier() calls are redundant in the common case when this is
462 * called externally, but just in case this is called from within this
463 * file.
464 * 372 *
373 * If the current CPU is idle or running at a first-level (not nested)
374 * interrupt from idle, return true. The caller must have at least
375 * disabled preemption.
465 */ 376 */
466void rcu_all_qs(void) 377static int rcu_is_cpu_rrupt_from_idle(void)
467{ 378{
468 unsigned long flags; 379 return __this_cpu_read(rcu_data.dynticks_nesting) <= 0 &&
469 380 __this_cpu_read(rcu_data.dynticks_nmi_nesting) <= 1;
470 if (!raw_cpu_read(rcu_dynticks.rcu_urgent_qs))
471 return;
472 preempt_disable();
473 /* Load rcu_urgent_qs before other flags. */
474 if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) {
475 preempt_enable();
476 return;
477 }
478 this_cpu_write(rcu_dynticks.rcu_urgent_qs, false);
479 barrier(); /* Avoid RCU read-side critical sections leaking down. */
480 if (unlikely(raw_cpu_read(rcu_dynticks.rcu_need_heavy_qs))) {
481 local_irq_save(flags);
482 rcu_momentary_dyntick_idle();
483 local_irq_restore(flags);
484 }
485 if (unlikely(raw_cpu_read(rcu_sched_data.cpu_no_qs.b.exp)))
486 rcu_sched_qs();
487 this_cpu_inc(rcu_dynticks.rcu_qs_ctr);
488 barrier(); /* Avoid RCU read-side critical sections leaking up. */
489 preempt_enable();
490} 381}
491EXPORT_SYMBOL_GPL(rcu_all_qs);
492 382
493#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch. */ 383#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch. */
494static long blimit = DEFAULT_RCU_BLIMIT; 384static long blimit = DEFAULT_RCU_BLIMIT;
@@ -505,13 +395,47 @@ static ulong jiffies_till_first_fqs = ULONG_MAX;
505static ulong jiffies_till_next_fqs = ULONG_MAX; 395static ulong jiffies_till_next_fqs = ULONG_MAX;
506static bool rcu_kick_kthreads; 396static bool rcu_kick_kthreads;
507 397
398/*
399 * How long the grace period must be before we start recruiting
400 * quiescent-state help from rcu_note_context_switch().
401 */
402static ulong jiffies_till_sched_qs = ULONG_MAX;
403module_param(jiffies_till_sched_qs, ulong, 0444);
404static ulong jiffies_to_sched_qs; /* Adjusted version of above if not default */
405module_param(jiffies_to_sched_qs, ulong, 0444); /* Display only! */
406
407/*
408 * Make sure that we give the grace-period kthread time to detect any
409 * idle CPUs before taking active measures to force quiescent states.
410 * However, don't go below 100 milliseconds, adjusted upwards for really
411 * large systems.
412 */
413static void adjust_jiffies_till_sched_qs(void)
414{
415 unsigned long j;
416
417 /* If jiffies_till_sched_qs was specified, respect the request. */
418 if (jiffies_till_sched_qs != ULONG_MAX) {
419 WRITE_ONCE(jiffies_to_sched_qs, jiffies_till_sched_qs);
420 return;
421 }
422 j = READ_ONCE(jiffies_till_first_fqs) +
423 2 * READ_ONCE(jiffies_till_next_fqs);
424 if (j < HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV)
425 j = HZ / 10 + nr_cpu_ids / RCU_JIFFIES_FQS_DIV;
426 pr_info("RCU calculated value of scheduler-enlistment delay is %ld jiffies.\n", j);
427 WRITE_ONCE(jiffies_to_sched_qs, j);
428}
429
508static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp) 430static int param_set_first_fqs_jiffies(const char *val, const struct kernel_param *kp)
509{ 431{
510 ulong j; 432 ulong j;
511 int ret = kstrtoul(val, 0, &j); 433 int ret = kstrtoul(val, 0, &j);
512 434
513 if (!ret) 435 if (!ret) {
514 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j); 436 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : j);
437 adjust_jiffies_till_sched_qs();
438 }
515 return ret; 439 return ret;
516} 440}
517 441
@@ -520,8 +444,10 @@ static int param_set_next_fqs_jiffies(const char *val, const struct kernel_param
520 ulong j; 444 ulong j;
521 int ret = kstrtoul(val, 0, &j); 445 int ret = kstrtoul(val, 0, &j);
522 446
523 if (!ret) 447 if (!ret) {
524 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1)); 448 WRITE_ONCE(*(ulong *)kp->arg, (j > HZ) ? HZ : (j ?: 1));
449 adjust_jiffies_till_sched_qs();
450 }
525 return ret; 451 return ret;
526} 452}
527 453
@@ -539,15 +465,8 @@ module_param_cb(jiffies_till_first_fqs, &first_fqs_jiffies_ops, &jiffies_till_fi
539module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644); 465module_param_cb(jiffies_till_next_fqs, &next_fqs_jiffies_ops, &jiffies_till_next_fqs, 0644);
540module_param(rcu_kick_kthreads, bool, 0644); 466module_param(rcu_kick_kthreads, bool, 0644);
541 467
542/* 468static void force_qs_rnp(int (*f)(struct rcu_data *rdp));
543 * How long the grace period must be before we start recruiting 469static void force_quiescent_state(void);
544 * quiescent-state help from rcu_note_context_switch().
545 */
546static ulong jiffies_till_sched_qs = HZ / 10;
547module_param(jiffies_till_sched_qs, ulong, 0444);
548
549static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
550static void force_quiescent_state(struct rcu_state *rsp);
551static int rcu_pending(void); 470static int rcu_pending(void);
552 471
553/* 472/*
@@ -555,29 +474,11 @@ static int rcu_pending(void);
555 */ 474 */
556unsigned long rcu_get_gp_seq(void) 475unsigned long rcu_get_gp_seq(void)
557{ 476{
558 return READ_ONCE(rcu_state_p->gp_seq); 477 return READ_ONCE(rcu_state.gp_seq);
559} 478}
560EXPORT_SYMBOL_GPL(rcu_get_gp_seq); 479EXPORT_SYMBOL_GPL(rcu_get_gp_seq);
561 480
562/* 481/*
563 * Return the number of RCU-sched GPs completed thus far for debug & stats.
564 */
565unsigned long rcu_sched_get_gp_seq(void)
566{
567 return READ_ONCE(rcu_sched_state.gp_seq);
568}
569EXPORT_SYMBOL_GPL(rcu_sched_get_gp_seq);
570
571/*
572 * Return the number of RCU-bh GPs completed thus far for debug & stats.
573 */
574unsigned long rcu_bh_get_gp_seq(void)
575{
576 return READ_ONCE(rcu_bh_state.gp_seq);
577}
578EXPORT_SYMBOL_GPL(rcu_bh_get_gp_seq);
579
580/*
581 * Return the number of RCU expedited batches completed thus far for 482 * Return the number of RCU expedited batches completed thus far for
582 * debug & stats. Odd numbers mean that a batch is in progress, even 483 * debug & stats. Odd numbers mean that a batch is in progress, even
583 * numbers mean idle. The value returned will thus be roughly double 484 * numbers mean idle. The value returned will thus be roughly double
@@ -585,48 +486,20 @@ EXPORT_SYMBOL_GPL(rcu_bh_get_gp_seq);
585 */ 486 */
586unsigned long rcu_exp_batches_completed(void) 487unsigned long rcu_exp_batches_completed(void)
587{ 488{
588 return rcu_state_p->expedited_sequence; 489 return rcu_state.expedited_sequence;
589} 490}
590EXPORT_SYMBOL_GPL(rcu_exp_batches_completed); 491EXPORT_SYMBOL_GPL(rcu_exp_batches_completed);
591 492
592/* 493/*
593 * Return the number of RCU-sched expedited batches completed thus far
594 * for debug & stats. Similar to rcu_exp_batches_completed().
595 */
596unsigned long rcu_exp_batches_completed_sched(void)
597{
598 return rcu_sched_state.expedited_sequence;
599}
600EXPORT_SYMBOL_GPL(rcu_exp_batches_completed_sched);
601
602/*
603 * Force a quiescent state. 494 * Force a quiescent state.
604 */ 495 */
605void rcu_force_quiescent_state(void) 496void rcu_force_quiescent_state(void)
606{ 497{
607 force_quiescent_state(rcu_state_p); 498 force_quiescent_state();
608} 499}
609EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); 500EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
610 501
611/* 502/*
612 * Force a quiescent state for RCU BH.
613 */
614void rcu_bh_force_quiescent_state(void)
615{
616 force_quiescent_state(&rcu_bh_state);
617}
618EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
619
620/*
621 * Force a quiescent state for RCU-sched.
622 */
623void rcu_sched_force_quiescent_state(void)
624{
625 force_quiescent_state(&rcu_sched_state);
626}
627EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
628
629/*
630 * Show the state of the grace-period kthreads. 503 * Show the state of the grace-period kthreads.
631 */ 504 */
632void show_rcu_gp_kthreads(void) 505void show_rcu_gp_kthreads(void)
@@ -634,31 +507,28 @@ void show_rcu_gp_kthreads(void)
634 int cpu; 507 int cpu;
635 struct rcu_data *rdp; 508 struct rcu_data *rdp;
636 struct rcu_node *rnp; 509 struct rcu_node *rnp;
637 struct rcu_state *rsp;
638 510
639 for_each_rcu_flavor(rsp) { 511 pr_info("%s: wait state: %d ->state: %#lx\n", rcu_state.name,
640 pr_info("%s: wait state: %d ->state: %#lx\n", 512 rcu_state.gp_state, rcu_state.gp_kthread->state);
641 rsp->name, rsp->gp_state, rsp->gp_kthread->state); 513 rcu_for_each_node_breadth_first(rnp) {
642 rcu_for_each_node_breadth_first(rsp, rnp) { 514 if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed))
643 if (ULONG_CMP_GE(rsp->gp_seq, rnp->gp_seq_needed)) 515 continue;
644 continue; 516 pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n",
645 pr_info("\trcu_node %d:%d ->gp_seq %lu ->gp_seq_needed %lu\n", 517 rnp->grplo, rnp->grphi, rnp->gp_seq,
646 rnp->grplo, rnp->grphi, rnp->gp_seq, 518 rnp->gp_seq_needed);
647 rnp->gp_seq_needed); 519 if (!rcu_is_leaf_node(rnp))
648 if (!rcu_is_leaf_node(rnp)) 520 continue;
521 for_each_leaf_node_possible_cpu(rnp, cpu) {
522 rdp = per_cpu_ptr(&rcu_data, cpu);
523 if (rdp->gpwrap ||
524 ULONG_CMP_GE(rcu_state.gp_seq,
525 rdp->gp_seq_needed))
649 continue; 526 continue;
650 for_each_leaf_node_possible_cpu(rnp, cpu) { 527 pr_info("\tcpu %d ->gp_seq_needed %lu\n",
651 rdp = per_cpu_ptr(rsp->rda, cpu); 528 cpu, rdp->gp_seq_needed);
652 if (rdp->gpwrap ||
653 ULONG_CMP_GE(rsp->gp_seq,
654 rdp->gp_seq_needed))
655 continue;
656 pr_info("\tcpu %d ->gp_seq_needed %lu\n",
657 cpu, rdp->gp_seq_needed);
658 }
659 } 529 }
660 /* sched_show_task(rsp->gp_kthread); */
661 } 530 }
531 /* sched_show_task(rcu_state.gp_kthread); */
662} 532}
663EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads); 533EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
664 534
@@ -668,34 +538,25 @@ EXPORT_SYMBOL_GPL(show_rcu_gp_kthreads);
668void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, 538void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
669 unsigned long *gp_seq) 539 unsigned long *gp_seq)
670{ 540{
671 struct rcu_state *rsp = NULL;
672
673 switch (test_type) { 541 switch (test_type) {
674 case RCU_FLAVOR: 542 case RCU_FLAVOR:
675 rsp = rcu_state_p;
676 break;
677 case RCU_BH_FLAVOR: 543 case RCU_BH_FLAVOR:
678 rsp = &rcu_bh_state;
679 break;
680 case RCU_SCHED_FLAVOR: 544 case RCU_SCHED_FLAVOR:
681 rsp = &rcu_sched_state; 545 *flags = READ_ONCE(rcu_state.gp_flags);
546 *gp_seq = rcu_seq_current(&rcu_state.gp_seq);
682 break; 547 break;
683 default: 548 default:
684 break; 549 break;
685 } 550 }
686 if (rsp == NULL)
687 return;
688 *flags = READ_ONCE(rsp->gp_flags);
689 *gp_seq = rcu_seq_current(&rsp->gp_seq);
690} 551}
691EXPORT_SYMBOL_GPL(rcutorture_get_gp_data); 552EXPORT_SYMBOL_GPL(rcutorture_get_gp_data);
692 553
693/* 554/*
694 * Return the root node of the specified rcu_state structure. 555 * Return the root node of the rcu_state structure.
695 */ 556 */
696static struct rcu_node *rcu_get_root(struct rcu_state *rsp) 557static struct rcu_node *rcu_get_root(void)
697{ 558{
698 return &rsp->node[0]; 559 return &rcu_state.node[0];
699} 560}
700 561
701/* 562/*
@@ -708,28 +569,25 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
708 */ 569 */
709static void rcu_eqs_enter(bool user) 570static void rcu_eqs_enter(bool user)
710{ 571{
711 struct rcu_state *rsp; 572 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
712 struct rcu_data *rdp;
713 struct rcu_dynticks *rdtp;
714 573
715 rdtp = this_cpu_ptr(&rcu_dynticks); 574 WARN_ON_ONCE(rdp->dynticks_nmi_nesting != DYNTICK_IRQ_NONIDLE);
716 WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); 575 WRITE_ONCE(rdp->dynticks_nmi_nesting, 0);
717 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && 576 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
718 rdtp->dynticks_nesting == 0); 577 rdp->dynticks_nesting == 0);
719 if (rdtp->dynticks_nesting != 1) { 578 if (rdp->dynticks_nesting != 1) {
720 rdtp->dynticks_nesting--; 579 rdp->dynticks_nesting--;
721 return; 580 return;
722 } 581 }
723 582
724 lockdep_assert_irqs_disabled(); 583 lockdep_assert_irqs_disabled();
725 trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0, rdtp->dynticks); 584 trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, rdp->dynticks);
726 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); 585 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
727 for_each_rcu_flavor(rsp) { 586 rdp = this_cpu_ptr(&rcu_data);
728 rdp = this_cpu_ptr(rsp->rda); 587 do_nocb_deferred_wakeup(rdp);
729 do_nocb_deferred_wakeup(rdp);
730 }
731 rcu_prepare_for_idle(); 588 rcu_prepare_for_idle();
732 WRITE_ONCE(rdtp->dynticks_nesting, 0); /* Avoid irq-access tearing. */ 589 rcu_preempt_deferred_qs(current);
590 WRITE_ONCE(rdp->dynticks_nesting, 0); /* Avoid irq-access tearing. */
733 rcu_dynticks_eqs_enter(); 591 rcu_dynticks_eqs_enter();
734 rcu_dynticks_task_enter(); 592 rcu_dynticks_task_enter();
735} 593}
@@ -770,44 +628,61 @@ void rcu_user_enter(void)
770} 628}
771#endif /* CONFIG_NO_HZ_FULL */ 629#endif /* CONFIG_NO_HZ_FULL */
772 630
773/** 631/*
774 * rcu_nmi_exit - inform RCU of exit from NMI context
775 *
776 * If we are returning from the outermost NMI handler that interrupted an 632 * If we are returning from the outermost NMI handler that interrupted an
777 * RCU-idle period, update rdtp->dynticks and rdtp->dynticks_nmi_nesting 633 * RCU-idle period, update rdp->dynticks and rdp->dynticks_nmi_nesting
778 * to let the RCU grace-period handling know that the CPU is back to 634 * to let the RCU grace-period handling know that the CPU is back to
779 * being RCU-idle. 635 * being RCU-idle.
780 * 636 *
781 * If you add or remove a call to rcu_nmi_exit(), be sure to test 637 * If you add or remove a call to rcu_nmi_exit_common(), be sure to test
782 * with CONFIG_RCU_EQS_DEBUG=y. 638 * with CONFIG_RCU_EQS_DEBUG=y.
783 */ 639 */
784void rcu_nmi_exit(void) 640static __always_inline void rcu_nmi_exit_common(bool irq)
785{ 641{
786 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 642 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
787 643
788 /* 644 /*
789 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks. 645 * Check for ->dynticks_nmi_nesting underflow and bad ->dynticks.
790 * (We are exiting an NMI handler, so RCU better be paying attention 646 * (We are exiting an NMI handler, so RCU better be paying attention
791 * to us!) 647 * to us!)
792 */ 648 */
793 WARN_ON_ONCE(rdtp->dynticks_nmi_nesting <= 0); 649 WARN_ON_ONCE(rdp->dynticks_nmi_nesting <= 0);
794 WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs()); 650 WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs());
795 651
796 /* 652 /*
797 * If the nesting level is not 1, the CPU wasn't RCU-idle, so 653 * If the nesting level is not 1, the CPU wasn't RCU-idle, so
798 * leave it in non-RCU-idle state. 654 * leave it in non-RCU-idle state.
799 */ 655 */
800 if (rdtp->dynticks_nmi_nesting != 1) { 656 if (rdp->dynticks_nmi_nesting != 1) {
801 trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nmi_nesting, rdtp->dynticks_nmi_nesting - 2, rdtp->dynticks); 657 trace_rcu_dyntick(TPS("--="), rdp->dynticks_nmi_nesting, rdp->dynticks_nmi_nesting - 2, rdp->dynticks);
802 WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* No store tearing. */ 658 WRITE_ONCE(rdp->dynticks_nmi_nesting, /* No store tearing. */
803 rdtp->dynticks_nmi_nesting - 2); 659 rdp->dynticks_nmi_nesting - 2);
804 return; 660 return;
805 } 661 }
806 662
807 /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */ 663 /* This NMI interrupted an RCU-idle CPU, restore RCU-idleness. */
808 trace_rcu_dyntick(TPS("Startirq"), rdtp->dynticks_nmi_nesting, 0, rdtp->dynticks); 664 trace_rcu_dyntick(TPS("Startirq"), rdp->dynticks_nmi_nesting, 0, rdp->dynticks);
809 WRITE_ONCE(rdtp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */ 665 WRITE_ONCE(rdp->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
666
667 if (irq)
668 rcu_prepare_for_idle();
669
810 rcu_dynticks_eqs_enter(); 670 rcu_dynticks_eqs_enter();
671
672 if (irq)
673 rcu_dynticks_task_enter();
674}
675
676/**
677 * rcu_nmi_exit - inform RCU of exit from NMI context
678 * @irq: Is this call from rcu_irq_exit?
679 *
680 * If you add or remove a call to rcu_nmi_exit(), be sure to test
681 * with CONFIG_RCU_EQS_DEBUG=y.
682 */
683void rcu_nmi_exit(void)
684{
685 rcu_nmi_exit_common(false);
811} 686}
812 687
813/** 688/**
@@ -831,14 +706,8 @@ void rcu_nmi_exit(void)
831 */ 706 */
832void rcu_irq_exit(void) 707void rcu_irq_exit(void)
833{ 708{
834 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
835
836 lockdep_assert_irqs_disabled(); 709 lockdep_assert_irqs_disabled();
837 if (rdtp->dynticks_nmi_nesting == 1) 710 rcu_nmi_exit_common(true);
838 rcu_prepare_for_idle();
839 rcu_nmi_exit();
840 if (rdtp->dynticks_nmi_nesting == 0)
841 rcu_dynticks_task_enter();
842} 711}
843 712
844/* 713/*
@@ -866,24 +735,25 @@ void rcu_irq_exit_irqson(void)
866 */ 735 */
867static void rcu_eqs_exit(bool user) 736static void rcu_eqs_exit(bool user)
868{ 737{
869 struct rcu_dynticks *rdtp; 738 struct rcu_data *rdp;
870 long oldval; 739 long oldval;
871 740
872 lockdep_assert_irqs_disabled(); 741 lockdep_assert_irqs_disabled();
873 rdtp = this_cpu_ptr(&rcu_dynticks); 742 rdp = this_cpu_ptr(&rcu_data);
874 oldval = rdtp->dynticks_nesting; 743 oldval = rdp->dynticks_nesting;
875 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); 744 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
876 if (oldval) { 745 if (oldval) {
877 rdtp->dynticks_nesting++; 746 rdp->dynticks_nesting++;
878 return; 747 return;
879 } 748 }
880 rcu_dynticks_task_exit(); 749 rcu_dynticks_task_exit();
881 rcu_dynticks_eqs_exit(); 750 rcu_dynticks_eqs_exit();
882 rcu_cleanup_after_idle(); 751 rcu_cleanup_after_idle();
883 trace_rcu_dyntick(TPS("End"), rdtp->dynticks_nesting, 1, rdtp->dynticks); 752 trace_rcu_dyntick(TPS("End"), rdp->dynticks_nesting, 1, rdp->dynticks);
884 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); 753 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
885 WRITE_ONCE(rdtp->dynticks_nesting, 1); 754 WRITE_ONCE(rdp->dynticks_nesting, 1);
886 WRITE_ONCE(rdtp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE); 755 WARN_ON_ONCE(rdp->dynticks_nmi_nesting);
756 WRITE_ONCE(rdp->dynticks_nmi_nesting, DYNTICK_IRQ_NONIDLE);
887} 757}
888 758
889/** 759/**
@@ -921,24 +791,25 @@ void rcu_user_exit(void)
921#endif /* CONFIG_NO_HZ_FULL */ 791#endif /* CONFIG_NO_HZ_FULL */
922 792
923/** 793/**
924 * rcu_nmi_enter - inform RCU of entry to NMI context 794 * rcu_nmi_enter_common - inform RCU of entry to NMI context
795 * @irq: Is this call from rcu_irq_enter?
925 * 796 *
926 * If the CPU was idle from RCU's viewpoint, update rdtp->dynticks and 797 * If the CPU was idle from RCU's viewpoint, update rdp->dynticks and
927 * rdtp->dynticks_nmi_nesting to let the RCU grace-period handling know 798 * rdp->dynticks_nmi_nesting to let the RCU grace-period handling know
928 * that the CPU is active. This implementation permits nested NMIs, as 799 * that the CPU is active. This implementation permits nested NMIs, as
929 * long as the nesting level does not overflow an int. (You will probably 800 * long as the nesting level does not overflow an int. (You will probably
930 * run out of stack space first.) 801 * run out of stack space first.)
931 * 802 *
932 * If you add or remove a call to rcu_nmi_enter(), be sure to test 803 * If you add or remove a call to rcu_nmi_enter_common(), be sure to test
933 * with CONFIG_RCU_EQS_DEBUG=y. 804 * with CONFIG_RCU_EQS_DEBUG=y.
934 */ 805 */
935void rcu_nmi_enter(void) 806static __always_inline void rcu_nmi_enter_common(bool irq)
936{ 807{
937 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 808 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
938 long incby = 2; 809 long incby = 2;
939 810
940 /* Complain about underflow. */ 811 /* Complain about underflow. */
941 WARN_ON_ONCE(rdtp->dynticks_nmi_nesting < 0); 812 WARN_ON_ONCE(rdp->dynticks_nmi_nesting < 0);
942 813
943 /* 814 /*
944 * If idle from RCU viewpoint, atomically increment ->dynticks 815 * If idle from RCU viewpoint, atomically increment ->dynticks
@@ -949,18 +820,34 @@ void rcu_nmi_enter(void)
949 * period (observation due to Andy Lutomirski). 820 * period (observation due to Andy Lutomirski).
950 */ 821 */
951 if (rcu_dynticks_curr_cpu_in_eqs()) { 822 if (rcu_dynticks_curr_cpu_in_eqs()) {
823
824 if (irq)
825 rcu_dynticks_task_exit();
826
952 rcu_dynticks_eqs_exit(); 827 rcu_dynticks_eqs_exit();
828
829 if (irq)
830 rcu_cleanup_after_idle();
831
953 incby = 1; 832 incby = 1;
954 } 833 }
955 trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="), 834 trace_rcu_dyntick(incby == 1 ? TPS("Endirq") : TPS("++="),
956 rdtp->dynticks_nmi_nesting, 835 rdp->dynticks_nmi_nesting,
957 rdtp->dynticks_nmi_nesting + incby, rdtp->dynticks); 836 rdp->dynticks_nmi_nesting + incby, rdp->dynticks);
958 WRITE_ONCE(rdtp->dynticks_nmi_nesting, /* Prevent store tearing. */ 837 WRITE_ONCE(rdp->dynticks_nmi_nesting, /* Prevent store tearing. */
959 rdtp->dynticks_nmi_nesting + incby); 838 rdp->dynticks_nmi_nesting + incby);
960 barrier(); 839 barrier();
961} 840}
962 841
963/** 842/**
843 * rcu_nmi_enter - inform RCU of entry to NMI context
844 */
845void rcu_nmi_enter(void)
846{
847 rcu_nmi_enter_common(false);
848}
849
850/**
964 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle 851 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
965 * 852 *
966 * Enter an interrupt handler, which might possibly result in exiting 853 * Enter an interrupt handler, which might possibly result in exiting
@@ -984,14 +871,8 @@ void rcu_nmi_enter(void)
984 */ 871 */
985void rcu_irq_enter(void) 872void rcu_irq_enter(void)
986{ 873{
987 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
988
989 lockdep_assert_irqs_disabled(); 874 lockdep_assert_irqs_disabled();
990 if (rdtp->dynticks_nmi_nesting == 0) 875 rcu_nmi_enter_common(true);
991 rcu_dynticks_task_exit();
992 rcu_nmi_enter();
993 if (rdtp->dynticks_nmi_nesting == 1)
994 rcu_cleanup_after_idle();
995} 876}
996 877
997/* 878/*
@@ -1043,7 +924,7 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
1043 cpu = task_cpu(t); 924 cpu = task_cpu(t);
1044 if (!task_curr(t)) 925 if (!task_curr(t))
1045 return; /* This task is not running on that CPU. */ 926 return; /* This task is not running on that CPU. */
1046 smp_store_release(per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, cpu), true); 927 smp_store_release(per_cpu_ptr(&rcu_data.rcu_urgent_qs, cpu), true);
1047} 928}
1048 929
1049#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) 930#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
@@ -1054,11 +935,7 @@ void rcu_request_urgent_qs_task(struct task_struct *t)
1054 * Disable preemption to avoid false positives that could otherwise 935 * Disable preemption to avoid false positives that could otherwise
1055 * happen due to the current CPU number being sampled, this task being 936 * happen due to the current CPU number being sampled, this task being
1056 * preempted, its old CPU being taken offline, resuming on some other CPU, 937 * preempted, its old CPU being taken offline, resuming on some other CPU,
1057 * then determining that its old CPU is now offline. Because there are 938 * then determining that its old CPU is now offline.
1058 * multiple flavors of RCU, and because this function can be called in the
1059 * midst of updating the flavors while a given CPU coming online or going
1060 * offline, it is necessary to check all flavors. If any of the flavors
1061 * believe that given CPU is online, it is considered to be online.
1062 * 939 *
1063 * Disable checking if in an NMI handler because we cannot safely 940 * Disable checking if in an NMI handler because we cannot safely
1064 * report errors from NMI handlers anyway. In addition, it is OK to use 941 * report errors from NMI handlers anyway. In addition, it is OK to use
@@ -1069,39 +946,22 @@ bool rcu_lockdep_current_cpu_online(void)
1069{ 946{
1070 struct rcu_data *rdp; 947 struct rcu_data *rdp;
1071 struct rcu_node *rnp; 948 struct rcu_node *rnp;
1072 struct rcu_state *rsp; 949 bool ret = false;
1073 950
1074 if (in_nmi() || !rcu_scheduler_fully_active) 951 if (in_nmi() || !rcu_scheduler_fully_active)
1075 return true; 952 return true;
1076 preempt_disable(); 953 preempt_disable();
1077 for_each_rcu_flavor(rsp) { 954 rdp = this_cpu_ptr(&rcu_data);
1078 rdp = this_cpu_ptr(rsp->rda); 955 rnp = rdp->mynode;
1079 rnp = rdp->mynode; 956 if (rdp->grpmask & rcu_rnp_online_cpus(rnp))
1080 if (rdp->grpmask & rcu_rnp_online_cpus(rnp)) { 957 ret = true;
1081 preempt_enable();
1082 return true;
1083 }
1084 }
1085 preempt_enable(); 958 preempt_enable();
1086 return false; 959 return ret;
1087} 960}
1088EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online); 961EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
1089 962
1090#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */ 963#endif /* #if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) */
1091 964
1092/**
1093 * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle
1094 *
1095 * If the current CPU is idle or running at a first-level (not nested)
1096 * interrupt from idle, return true. The caller must have at least
1097 * disabled preemption.
1098 */
1099static int rcu_is_cpu_rrupt_from_idle(void)
1100{
1101 return __this_cpu_read(rcu_dynticks.dynticks_nesting) <= 0 &&
1102 __this_cpu_read(rcu_dynticks.dynticks_nmi_nesting) <= 1;
1103}
1104
1105/* 965/*
1106 * We are reporting a quiescent state on behalf of some other CPU, so 966 * We are reporting a quiescent state on behalf of some other CPU, so
1107 * it is our responsibility to check for and handle potential overflow 967 * it is our responsibility to check for and handle potential overflow
@@ -1126,9 +986,9 @@ static void rcu_gpnum_ovf(struct rcu_node *rnp, struct rcu_data *rdp)
1126 */ 986 */
1127static int dyntick_save_progress_counter(struct rcu_data *rdp) 987static int dyntick_save_progress_counter(struct rcu_data *rdp)
1128{ 988{
1129 rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks); 989 rdp->dynticks_snap = rcu_dynticks_snap(rdp);
1130 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { 990 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
1131 trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("dti")); 991 trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
1132 rcu_gpnum_ovf(rdp->mynode, rdp); 992 rcu_gpnum_ovf(rdp->mynode, rdp);
1133 return 1; 993 return 1;
1134 } 994 }
@@ -1177,35 +1037,15 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1177 * read-side critical section that started before the beginning 1037 * read-side critical section that started before the beginning
1178 * of the current RCU grace period. 1038 * of the current RCU grace period.
1179 */ 1039 */
1180 if (rcu_dynticks_in_eqs_since(rdp->dynticks, rdp->dynticks_snap)) { 1040 if (rcu_dynticks_in_eqs_since(rdp, rdp->dynticks_snap)) {
1181 trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("dti")); 1041 trace_rcu_fqs(rcu_state.name, rdp->gp_seq, rdp->cpu, TPS("dti"));
1182 rdp->dynticks_fqs++;
1183 rcu_gpnum_ovf(rnp, rdp);
1184 return 1;
1185 }
1186
1187 /*
1188 * Has this CPU encountered a cond_resched() since the beginning
1189 * of the grace period? For this to be the case, the CPU has to
1190 * have noticed the current grace period. This might not be the
1191 * case for nohz_full CPUs looping in the kernel.
1192 */
1193 jtsq = jiffies_till_sched_qs;
1194 ruqp = per_cpu_ptr(&rcu_dynticks.rcu_urgent_qs, rdp->cpu);
1195 if (time_after(jiffies, rdp->rsp->gp_start + jtsq) &&
1196 READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_dynticks.rcu_qs_ctr, rdp->cpu) &&
1197 rcu_seq_current(&rdp->gp_seq) == rnp->gp_seq && !rdp->gpwrap) {
1198 trace_rcu_fqs(rdp->rsp->name, rdp->gp_seq, rdp->cpu, TPS("rqc"));
1199 rcu_gpnum_ovf(rnp, rdp); 1042 rcu_gpnum_ovf(rnp, rdp);
1200 return 1; 1043 return 1;
1201 } else if (time_after(jiffies, rdp->rsp->gp_start + jtsq)) {
1202 /* Load rcu_qs_ctr before store to rcu_urgent_qs. */
1203 smp_store_release(ruqp, true);
1204 } 1044 }
1205 1045
1206 /* If waiting too long on an offline CPU, complain. */ 1046 /* If waiting too long on an offline CPU, complain. */
1207 if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) && 1047 if (!(rdp->grpmask & rcu_rnp_online_cpus(rnp)) &&
1208 time_after(jiffies, rdp->rsp->gp_start + HZ)) { 1048 time_after(jiffies, rcu_state.gp_start + HZ)) {
1209 bool onl; 1049 bool onl;
1210 struct rcu_node *rnp1; 1050 struct rcu_node *rnp1;
1211 1051
@@ -1226,39 +1066,56 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1226 1066
1227 /* 1067 /*
1228 * A CPU running for an extended time within the kernel can 1068 * A CPU running for an extended time within the kernel can
1229 * delay RCU grace periods. When the CPU is in NO_HZ_FULL mode, 1069 * delay RCU grace periods: (1) At age jiffies_to_sched_qs,
1230 * even context-switching back and forth between a pair of 1070 * set .rcu_urgent_qs, (2) At age 2*jiffies_to_sched_qs, set
1231 * in-kernel CPU-bound tasks cannot advance grace periods. 1071 * both .rcu_need_heavy_qs and .rcu_urgent_qs. Note that the
1232 * So if the grace period is old enough, make the CPU pay attention. 1072 * unsynchronized assignments to the per-CPU rcu_need_heavy_qs
1233 * Note that the unsynchronized assignments to the per-CPU 1073 * variable are safe because the assignments are repeated if this
1234 * rcu_need_heavy_qs variable are safe. Yes, setting of 1074 * CPU failed to pass through a quiescent state. This code
1235 * bits can be lost, but they will be set again on the next 1075 * also checks .jiffies_resched in case jiffies_to_sched_qs
1236 * force-quiescent-state pass. So lost bit sets do not result 1076 * is set way high.
1237 * in incorrect behavior, merely in a grace period lasting
1238 * a few jiffies longer than it might otherwise. Because
1239 * there are at most four threads involved, and because the
1240 * updates are only once every few jiffies, the probability of
1241 * lossage (and thus of slight grace-period extension) is
1242 * quite low.
1243 */ 1077 */
1244 rnhqp = &per_cpu(rcu_dynticks.rcu_need_heavy_qs, rdp->cpu); 1078 jtsq = READ_ONCE(jiffies_to_sched_qs);
1079 ruqp = per_cpu_ptr(&rcu_data.rcu_urgent_qs, rdp->cpu);
1080 rnhqp = &per_cpu(rcu_data.rcu_need_heavy_qs, rdp->cpu);
1245 if (!READ_ONCE(*rnhqp) && 1081 if (!READ_ONCE(*rnhqp) &&
1246 (time_after(jiffies, rdp->rsp->gp_start + jtsq) || 1082 (time_after(jiffies, rcu_state.gp_start + jtsq * 2) ||
1247 time_after(jiffies, rdp->rsp->jiffies_resched))) { 1083 time_after(jiffies, rcu_state.jiffies_resched))) {
1248 WRITE_ONCE(*rnhqp, true); 1084 WRITE_ONCE(*rnhqp, true);
1249 /* Store rcu_need_heavy_qs before rcu_urgent_qs. */ 1085 /* Store rcu_need_heavy_qs before rcu_urgent_qs. */
1250 smp_store_release(ruqp, true); 1086 smp_store_release(ruqp, true);
1251 rdp->rsp->jiffies_resched += jtsq; /* Re-enable beating. */ 1087 } else if (time_after(jiffies, rcu_state.gp_start + jtsq)) {
1088 WRITE_ONCE(*ruqp, true);
1252 } 1089 }
1253 1090
1254 /* 1091 /*
1255 * If more than halfway to RCU CPU stall-warning time, do a 1092 * NO_HZ_FULL CPUs can run in-kernel without rcu_check_callbacks!
1256 * resched_cpu() to try to loosen things up a bit. Also check to 1093 * The above code handles this, but only for straight cond_resched().
1257 * see if the CPU is getting hammered with interrupts, but only 1094 * And some in-kernel loops check need_resched() before calling
1258 * once per grace period, just to keep the IPIs down to a dull roar. 1095 * cond_resched(), which defeats the above code for CPUs that are
1096 * running in-kernel with scheduling-clock interrupts disabled.
1097 * So hit them over the head with the resched_cpu() hammer!
1259 */ 1098 */
1260 if (jiffies - rdp->rsp->gp_start > rcu_jiffies_till_stall_check() / 2) { 1099 if (tick_nohz_full_cpu(rdp->cpu) &&
1100 time_after(jiffies,
1101 READ_ONCE(rdp->last_fqs_resched) + jtsq * 3)) {
1261 resched_cpu(rdp->cpu); 1102 resched_cpu(rdp->cpu);
1103 WRITE_ONCE(rdp->last_fqs_resched, jiffies);
1104 }
1105
1106 /*
1107 * If more than halfway to RCU CPU stall-warning time, invoke
1108 * resched_cpu() more frequently to try to loosen things up a bit.
1109 * Also check to see if the CPU is getting hammered with interrupts,
1110 * but only once per grace period, just to keep the IPIs down to
1111 * a dull roar.
1112 */
1113 if (time_after(jiffies, rcu_state.jiffies_resched)) {
1114 if (time_after(jiffies,
1115 READ_ONCE(rdp->last_fqs_resched) + jtsq)) {
1116 resched_cpu(rdp->cpu);
1117 WRITE_ONCE(rdp->last_fqs_resched, jiffies);
1118 }
1262 if (IS_ENABLED(CONFIG_IRQ_WORK) && 1119 if (IS_ENABLED(CONFIG_IRQ_WORK) &&
1263 !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && 1120 !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
1264 (rnp->ffmask & rdp->grpmask)) { 1121 (rnp->ffmask & rdp->grpmask)) {
@@ -1272,17 +1129,17 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1272 return 0; 1129 return 0;
1273} 1130}
1274 1131
1275static void record_gp_stall_check_time(struct rcu_state *rsp) 1132static void record_gp_stall_check_time(void)
1276{ 1133{
1277 unsigned long j = jiffies; 1134 unsigned long j = jiffies;
1278 unsigned long j1; 1135 unsigned long j1;
1279 1136
1280 rsp->gp_start = j; 1137 rcu_state.gp_start = j;
1281 j1 = rcu_jiffies_till_stall_check(); 1138 j1 = rcu_jiffies_till_stall_check();
1282 /* Record ->gp_start before ->jiffies_stall. */ 1139 /* Record ->gp_start before ->jiffies_stall. */
1283 smp_store_release(&rsp->jiffies_stall, j + j1); /* ^^^ */ 1140 smp_store_release(&rcu_state.jiffies_stall, j + j1); /* ^^^ */
1284 rsp->jiffies_resched = j + j1 / 2; 1141 rcu_state.jiffies_resched = j + j1 / 2;
1285 rsp->n_force_qs_gpstart = READ_ONCE(rsp->n_force_qs); 1142 rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs);
1286} 1143}
1287 1144
1288/* 1145/*
@@ -1298,25 +1155,23 @@ static const char *gp_state_getname(short gs)
1298/* 1155/*
1299 * Complain about starvation of grace-period kthread. 1156 * Complain about starvation of grace-period kthread.
1300 */ 1157 */
1301static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp) 1158static void rcu_check_gp_kthread_starvation(void)
1302{ 1159{
1303 unsigned long gpa; 1160 struct task_struct *gpk = rcu_state.gp_kthread;
1304 unsigned long j; 1161 unsigned long j;
1305 1162
1306 j = jiffies; 1163 j = jiffies - READ_ONCE(rcu_state.gp_activity);
1307 gpa = READ_ONCE(rsp->gp_activity); 1164 if (j > 2 * HZ) {
1308 if (j - gpa > 2 * HZ) {
1309 pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n", 1165 pr_err("%s kthread starved for %ld jiffies! g%ld f%#x %s(%d) ->state=%#lx ->cpu=%d\n",
1310 rsp->name, j - gpa, 1166 rcu_state.name, j,
1311 (long)rcu_seq_current(&rsp->gp_seq), 1167 (long)rcu_seq_current(&rcu_state.gp_seq),
1312 rsp->gp_flags, 1168 rcu_state.gp_flags,
1313 gp_state_getname(rsp->gp_state), rsp->gp_state, 1169 gp_state_getname(rcu_state.gp_state), rcu_state.gp_state,
1314 rsp->gp_kthread ? rsp->gp_kthread->state : ~0, 1170 gpk ? gpk->state : ~0, gpk ? task_cpu(gpk) : -1);
1315 rsp->gp_kthread ? task_cpu(rsp->gp_kthread) : -1); 1171 if (gpk) {
1316 if (rsp->gp_kthread) {
1317 pr_err("RCU grace-period kthread stack dump:\n"); 1172 pr_err("RCU grace-period kthread stack dump:\n");
1318 sched_show_task(rsp->gp_kthread); 1173 sched_show_task(gpk);
1319 wake_up_process(rsp->gp_kthread); 1174 wake_up_process(gpk);
1320 } 1175 }
1321 } 1176 }
1322} 1177}
@@ -1327,13 +1182,13 @@ static void rcu_check_gp_kthread_starvation(struct rcu_state *rsp)
1327 * that don't support NMI-based stack dumps. The NMI-triggered stack 1182 * that don't support NMI-based stack dumps. The NMI-triggered stack
1328 * traces are more accurate because they are printed by the target CPU. 1183 * traces are more accurate because they are printed by the target CPU.
1329 */ 1184 */
1330static void rcu_dump_cpu_stacks(struct rcu_state *rsp) 1185static void rcu_dump_cpu_stacks(void)
1331{ 1186{
1332 int cpu; 1187 int cpu;
1333 unsigned long flags; 1188 unsigned long flags;
1334 struct rcu_node *rnp; 1189 struct rcu_node *rnp;
1335 1190
1336 rcu_for_each_leaf_node(rsp, rnp) { 1191 rcu_for_each_leaf_node(rnp) {
1337 raw_spin_lock_irqsave_rcu_node(rnp, flags); 1192 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1338 for_each_leaf_node_possible_cpu(rnp, cpu) 1193 for_each_leaf_node_possible_cpu(rnp, cpu)
1339 if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) 1194 if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
@@ -1347,19 +1202,20 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
1347 * If too much time has passed in the current grace period, and if 1202 * If too much time has passed in the current grace period, and if
1348 * so configured, go kick the relevant kthreads. 1203 * so configured, go kick the relevant kthreads.
1349 */ 1204 */
1350static void rcu_stall_kick_kthreads(struct rcu_state *rsp) 1205static void rcu_stall_kick_kthreads(void)
1351{ 1206{
1352 unsigned long j; 1207 unsigned long j;
1353 1208
1354 if (!rcu_kick_kthreads) 1209 if (!rcu_kick_kthreads)
1355 return; 1210 return;
1356 j = READ_ONCE(rsp->jiffies_kick_kthreads); 1211 j = READ_ONCE(rcu_state.jiffies_kick_kthreads);
1357 if (time_after(jiffies, j) && rsp->gp_kthread && 1212 if (time_after(jiffies, j) && rcu_state.gp_kthread &&
1358 (rcu_gp_in_progress(rsp) || READ_ONCE(rsp->gp_flags))) { 1213 (rcu_gp_in_progress() || READ_ONCE(rcu_state.gp_flags))) {
1359 WARN_ONCE(1, "Kicking %s grace-period kthread\n", rsp->name); 1214 WARN_ONCE(1, "Kicking %s grace-period kthread\n",
1215 rcu_state.name);
1360 rcu_ftrace_dump(DUMP_ALL); 1216 rcu_ftrace_dump(DUMP_ALL);
1361 wake_up_process(rsp->gp_kthread); 1217 wake_up_process(rcu_state.gp_kthread);
1362 WRITE_ONCE(rsp->jiffies_kick_kthreads, j + HZ); 1218 WRITE_ONCE(rcu_state.jiffies_kick_kthreads, j + HZ);
1363 } 1219 }
1364} 1220}
1365 1221
@@ -1369,18 +1225,18 @@ static void panic_on_rcu_stall(void)
1369 panic("RCU Stall\n"); 1225 panic("RCU Stall\n");
1370} 1226}
1371 1227
1372static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gp_seq) 1228static void print_other_cpu_stall(unsigned long gp_seq)
1373{ 1229{
1374 int cpu; 1230 int cpu;
1375 unsigned long flags; 1231 unsigned long flags;
1376 unsigned long gpa; 1232 unsigned long gpa;
1377 unsigned long j; 1233 unsigned long j;
1378 int ndetected = 0; 1234 int ndetected = 0;
1379 struct rcu_node *rnp = rcu_get_root(rsp); 1235 struct rcu_node *rnp = rcu_get_root();
1380 long totqlen = 0; 1236 long totqlen = 0;
1381 1237
1382 /* Kick and suppress, if so configured. */ 1238 /* Kick and suppress, if so configured. */
1383 rcu_stall_kick_kthreads(rsp); 1239 rcu_stall_kick_kthreads();
1384 if (rcu_cpu_stall_suppress) 1240 if (rcu_cpu_stall_suppress)
1385 return; 1241 return;
1386 1242
@@ -1389,15 +1245,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gp_seq)
1389 * See Documentation/RCU/stallwarn.txt for info on how to debug 1245 * See Documentation/RCU/stallwarn.txt for info on how to debug
1390 * RCU CPU stall warnings. 1246 * RCU CPU stall warnings.
1391 */ 1247 */
1392 pr_err("INFO: %s detected stalls on CPUs/tasks:", rsp->name); 1248 pr_err("INFO: %s detected stalls on CPUs/tasks:", rcu_state.name);
1393 print_cpu_stall_info_begin(); 1249 print_cpu_stall_info_begin();
1394 rcu_for_each_leaf_node(rsp, rnp) { 1250 rcu_for_each_leaf_node(rnp) {
1395 raw_spin_lock_irqsave_rcu_node(rnp, flags); 1251 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1396 ndetected += rcu_print_task_stall(rnp); 1252 ndetected += rcu_print_task_stall(rnp);
1397 if (rnp->qsmask != 0) { 1253 if (rnp->qsmask != 0) {
1398 for_each_leaf_node_possible_cpu(rnp, cpu) 1254 for_each_leaf_node_possible_cpu(rnp, cpu)
1399 if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { 1255 if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
1400 print_cpu_stall_info(rsp, cpu); 1256 print_cpu_stall_info(cpu);
1401 ndetected++; 1257 ndetected++;
1402 } 1258 }
1403 } 1259 }
@@ -1406,52 +1262,52 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gp_seq)
1406 1262
1407 print_cpu_stall_info_end(); 1263 print_cpu_stall_info_end();
1408 for_each_possible_cpu(cpu) 1264 for_each_possible_cpu(cpu)
1409 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, 1265 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(&rcu_data,
1410 cpu)->cblist); 1266 cpu)->cblist);
1411 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n", 1267 pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n",
1412 smp_processor_id(), (long)(jiffies - rsp->gp_start), 1268 smp_processor_id(), (long)(jiffies - rcu_state.gp_start),
1413 (long)rcu_seq_current(&rsp->gp_seq), totqlen); 1269 (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
1414 if (ndetected) { 1270 if (ndetected) {
1415 rcu_dump_cpu_stacks(rsp); 1271 rcu_dump_cpu_stacks();
1416 1272
1417 /* Complain about tasks blocking the grace period. */ 1273 /* Complain about tasks blocking the grace period. */
1418 rcu_print_detail_task_stall(rsp); 1274 rcu_print_detail_task_stall();
1419 } else { 1275 } else {
1420 if (rcu_seq_current(&rsp->gp_seq) != gp_seq) { 1276 if (rcu_seq_current(&rcu_state.gp_seq) != gp_seq) {
1421 pr_err("INFO: Stall ended before state dump start\n"); 1277 pr_err("INFO: Stall ended before state dump start\n");
1422 } else { 1278 } else {
1423 j = jiffies; 1279 j = jiffies;
1424 gpa = READ_ONCE(rsp->gp_activity); 1280 gpa = READ_ONCE(rcu_state.gp_activity);
1425 pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n", 1281 pr_err("All QSes seen, last %s kthread activity %ld (%ld-%ld), jiffies_till_next_fqs=%ld, root ->qsmask %#lx\n",
1426 rsp->name, j - gpa, j, gpa, 1282 rcu_state.name, j - gpa, j, gpa,
1427 jiffies_till_next_fqs, 1283 READ_ONCE(jiffies_till_next_fqs),
1428 rcu_get_root(rsp)->qsmask); 1284 rcu_get_root()->qsmask);
1429 /* In this case, the current CPU might be at fault. */ 1285 /* In this case, the current CPU might be at fault. */
1430 sched_show_task(current); 1286 sched_show_task(current);
1431 } 1287 }
1432 } 1288 }
1433 /* Rewrite if needed in case of slow consoles. */ 1289 /* Rewrite if needed in case of slow consoles. */
1434 if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) 1290 if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
1435 WRITE_ONCE(rsp->jiffies_stall, 1291 WRITE_ONCE(rcu_state.jiffies_stall,
1436 jiffies + 3 * rcu_jiffies_till_stall_check() + 3); 1292 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
1437 1293
1438 rcu_check_gp_kthread_starvation(rsp); 1294 rcu_check_gp_kthread_starvation();
1439 1295
1440 panic_on_rcu_stall(); 1296 panic_on_rcu_stall();
1441 1297
1442 force_quiescent_state(rsp); /* Kick them all. */ 1298 force_quiescent_state(); /* Kick them all. */
1443} 1299}
1444 1300
1445static void print_cpu_stall(struct rcu_state *rsp) 1301static void print_cpu_stall(void)
1446{ 1302{
1447 int cpu; 1303 int cpu;
1448 unsigned long flags; 1304 unsigned long flags;
1449 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1305 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1450 struct rcu_node *rnp = rcu_get_root(rsp); 1306 struct rcu_node *rnp = rcu_get_root();
1451 long totqlen = 0; 1307 long totqlen = 0;
1452 1308
1453 /* Kick and suppress, if so configured. */ 1309 /* Kick and suppress, if so configured. */
1454 rcu_stall_kick_kthreads(rsp); 1310 rcu_stall_kick_kthreads();
1455 if (rcu_cpu_stall_suppress) 1311 if (rcu_cpu_stall_suppress)
1456 return; 1312 return;
1457 1313
@@ -1460,27 +1316,27 @@ static void print_cpu_stall(struct rcu_state *rsp)
1460 * See Documentation/RCU/stallwarn.txt for info on how to debug 1316 * See Documentation/RCU/stallwarn.txt for info on how to debug
1461 * RCU CPU stall warnings. 1317 * RCU CPU stall warnings.
1462 */ 1318 */
1463 pr_err("INFO: %s self-detected stall on CPU", rsp->name); 1319 pr_err("INFO: %s self-detected stall on CPU", rcu_state.name);
1464 print_cpu_stall_info_begin(); 1320 print_cpu_stall_info_begin();
1465 raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); 1321 raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
1466 print_cpu_stall_info(rsp, smp_processor_id()); 1322 print_cpu_stall_info(smp_processor_id());
1467 raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); 1323 raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags);
1468 print_cpu_stall_info_end(); 1324 print_cpu_stall_info_end();
1469 for_each_possible_cpu(cpu) 1325 for_each_possible_cpu(cpu)
1470 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(rsp->rda, 1326 totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(&rcu_data,
1471 cpu)->cblist); 1327 cpu)->cblist);
1472 pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n", 1328 pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n",
1473 jiffies - rsp->gp_start, 1329 jiffies - rcu_state.gp_start,
1474 (long)rcu_seq_current(&rsp->gp_seq), totqlen); 1330 (long)rcu_seq_current(&rcu_state.gp_seq), totqlen);
1475 1331
1476 rcu_check_gp_kthread_starvation(rsp); 1332 rcu_check_gp_kthread_starvation();
1477 1333
1478 rcu_dump_cpu_stacks(rsp); 1334 rcu_dump_cpu_stacks();
1479 1335
1480 raw_spin_lock_irqsave_rcu_node(rnp, flags); 1336 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1481 /* Rewrite if needed in case of slow consoles. */ 1337 /* Rewrite if needed in case of slow consoles. */
1482 if (ULONG_CMP_GE(jiffies, READ_ONCE(rsp->jiffies_stall))) 1338 if (ULONG_CMP_GE(jiffies, READ_ONCE(rcu_state.jiffies_stall)))
1483 WRITE_ONCE(rsp->jiffies_stall, 1339 WRITE_ONCE(rcu_state.jiffies_stall,
1484 jiffies + 3 * rcu_jiffies_till_stall_check() + 3); 1340 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
1485 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1341 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1486 1342
@@ -1493,10 +1349,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
1493 * progress and it could be we're stuck in kernel space without context 1349 * progress and it could be we're stuck in kernel space without context
1494 * switches for an entirely unreasonable amount of time. 1350 * switches for an entirely unreasonable amount of time.
1495 */ 1351 */
1496 resched_cpu(smp_processor_id()); 1352 set_tsk_need_resched(current);
1353 set_preempt_need_resched();
1497} 1354}
1498 1355
1499static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp) 1356static void check_cpu_stall(struct rcu_data *rdp)
1500{ 1357{
1501 unsigned long gs1; 1358 unsigned long gs1;
1502 unsigned long gs2; 1359 unsigned long gs2;
@@ -1507,54 +1364,55 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
1507 struct rcu_node *rnp; 1364 struct rcu_node *rnp;
1508 1365
1509 if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) || 1366 if ((rcu_cpu_stall_suppress && !rcu_kick_kthreads) ||
1510 !rcu_gp_in_progress(rsp)) 1367 !rcu_gp_in_progress())
1511 return; 1368 return;
1512 rcu_stall_kick_kthreads(rsp); 1369 rcu_stall_kick_kthreads();
1513 j = jiffies; 1370 j = jiffies;
1514 1371
1515 /* 1372 /*
1516 * Lots of memory barriers to reject false positives. 1373 * Lots of memory barriers to reject false positives.
1517 * 1374 *
1518 * The idea is to pick up rsp->gp_seq, then rsp->jiffies_stall, 1375 * The idea is to pick up rcu_state.gp_seq, then
1519 * then rsp->gp_start, and finally another copy of rsp->gp_seq. 1376 * rcu_state.jiffies_stall, then rcu_state.gp_start, and finally
1520 * These values are updated in the opposite order with memory 1377 * another copy of rcu_state.gp_seq. These values are updated in
1521 * barriers (or equivalent) during grace-period initialization 1378 * the opposite order with memory barriers (or equivalent) during
1522 * and cleanup. Now, a false positive can occur if we get an new 1379 * grace-period initialization and cleanup. Now, a false positive
1523 * value of rsp->gp_start and a old value of rsp->jiffies_stall. 1380 * can occur if we get an new value of rcu_state.gp_start and a old
1524 * But given the memory barriers, the only way that this can happen 1381 * value of rcu_state.jiffies_stall. But given the memory barriers,
1525 * is if one grace period ends and another starts between these 1382 * the only way that this can happen is if one grace period ends
1526 * two fetches. This is detected by comparing the second fetch 1383 * and another starts between these two fetches. This is detected
1527 * of rsp->gp_seq with the previous fetch from rsp->gp_seq. 1384 * by comparing the second fetch of rcu_state.gp_seq with the
1385 * previous fetch from rcu_state.gp_seq.
1528 * 1386 *
1529 * Given this check, comparisons of jiffies, rsp->jiffies_stall, 1387 * Given this check, comparisons of jiffies, rcu_state.jiffies_stall,
1530 * and rsp->gp_start suffice to forestall false positives. 1388 * and rcu_state.gp_start suffice to forestall false positives.
1531 */ 1389 */
1532 gs1 = READ_ONCE(rsp->gp_seq); 1390 gs1 = READ_ONCE(rcu_state.gp_seq);
1533 smp_rmb(); /* Pick up ->gp_seq first... */ 1391 smp_rmb(); /* Pick up ->gp_seq first... */
1534 js = READ_ONCE(rsp->jiffies_stall); 1392 js = READ_ONCE(rcu_state.jiffies_stall);
1535 smp_rmb(); /* ...then ->jiffies_stall before the rest... */ 1393 smp_rmb(); /* ...then ->jiffies_stall before the rest... */
1536 gps = READ_ONCE(rsp->gp_start); 1394 gps = READ_ONCE(rcu_state.gp_start);
1537 smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */ 1395 smp_rmb(); /* ...and finally ->gp_start before ->gp_seq again. */
1538 gs2 = READ_ONCE(rsp->gp_seq); 1396 gs2 = READ_ONCE(rcu_state.gp_seq);
1539 if (gs1 != gs2 || 1397 if (gs1 != gs2 ||
1540 ULONG_CMP_LT(j, js) || 1398 ULONG_CMP_LT(j, js) ||
1541 ULONG_CMP_GE(gps, js)) 1399 ULONG_CMP_GE(gps, js))
1542 return; /* No stall or GP completed since entering function. */ 1400 return; /* No stall or GP completed since entering function. */
1543 rnp = rdp->mynode; 1401 rnp = rdp->mynode;
1544 jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3; 1402 jn = jiffies + 3 * rcu_jiffies_till_stall_check() + 3;
1545 if (rcu_gp_in_progress(rsp) && 1403 if (rcu_gp_in_progress() &&
1546 (READ_ONCE(rnp->qsmask) & rdp->grpmask) && 1404 (READ_ONCE(rnp->qsmask) & rdp->grpmask) &&
1547 cmpxchg(&rsp->jiffies_stall, js, jn) == js) { 1405 cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
1548 1406
1549 /* We haven't checked in, so go dump stack. */ 1407 /* We haven't checked in, so go dump stack. */
1550 print_cpu_stall(rsp); 1408 print_cpu_stall();
1551 1409
1552 } else if (rcu_gp_in_progress(rsp) && 1410 } else if (rcu_gp_in_progress() &&
1553 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) && 1411 ULONG_CMP_GE(j, js + RCU_STALL_RAT_DELAY) &&
1554 cmpxchg(&rsp->jiffies_stall, js, jn) == js) { 1412 cmpxchg(&rcu_state.jiffies_stall, js, jn) == js) {
1555 1413
1556 /* They had a few time units to dump stack, so complain. */ 1414 /* They had a few time units to dump stack, so complain. */
1557 print_other_cpu_stall(rsp, gs2); 1415 print_other_cpu_stall(gs2);
1558 } 1416 }
1559} 1417}
1560 1418
@@ -1569,17 +1427,14 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
1569 */ 1427 */
1570void rcu_cpu_stall_reset(void) 1428void rcu_cpu_stall_reset(void)
1571{ 1429{
1572 struct rcu_state *rsp; 1430 WRITE_ONCE(rcu_state.jiffies_stall, jiffies + ULONG_MAX / 2);
1573
1574 for_each_rcu_flavor(rsp)
1575 WRITE_ONCE(rsp->jiffies_stall, jiffies + ULONG_MAX / 2);
1576} 1431}
1577 1432
1578/* Trace-event wrapper function for trace_rcu_future_grace_period. */ 1433/* Trace-event wrapper function for trace_rcu_future_grace_period. */
1579static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp, 1434static void trace_rcu_this_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1580 unsigned long gp_seq_req, const char *s) 1435 unsigned long gp_seq_req, const char *s)
1581{ 1436{
1582 trace_rcu_future_grace_period(rdp->rsp->name, rnp->gp_seq, gp_seq_req, 1437 trace_rcu_future_grace_period(rcu_state.name, rnp->gp_seq, gp_seq_req,
1583 rnp->level, rnp->grplo, rnp->grphi, s); 1438 rnp->level, rnp->grplo, rnp->grphi, s);
1584} 1439}
1585 1440
@@ -1603,7 +1458,6 @@ static bool rcu_start_this_gp(struct rcu_node *rnp_start, struct rcu_data *rdp,
1603 unsigned long gp_seq_req) 1458 unsigned long gp_seq_req)
1604{ 1459{
1605 bool ret = false; 1460 bool ret = false;
1606 struct rcu_state *rsp = rdp->rsp;
1607 struct rcu_node *rnp; 1461 struct rcu_node *rnp;
1608 1462
1609 /* 1463 /*
@@ -1647,18 +1501,18 @@ static bool rcu_start_this_gp(struct rcu_node *rnp_start, struct rcu_data *rdp,
1647 } 1501 }
1648 1502
1649 /* If GP already in progress, just leave, otherwise start one. */ 1503 /* If GP already in progress, just leave, otherwise start one. */
1650 if (rcu_gp_in_progress(rsp)) { 1504 if (rcu_gp_in_progress()) {
1651 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedleafroot")); 1505 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedleafroot"));
1652 goto unlock_out; 1506 goto unlock_out;
1653 } 1507 }
1654 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedroot")); 1508 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("Startedroot"));
1655 WRITE_ONCE(rsp->gp_flags, rsp->gp_flags | RCU_GP_FLAG_INIT); 1509 WRITE_ONCE(rcu_state.gp_flags, rcu_state.gp_flags | RCU_GP_FLAG_INIT);
1656 rsp->gp_req_activity = jiffies; 1510 rcu_state.gp_req_activity = jiffies;
1657 if (!rsp->gp_kthread) { 1511 if (!rcu_state.gp_kthread) {
1658 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread")); 1512 trace_rcu_this_gp(rnp, rdp, gp_seq_req, TPS("NoGPkthread"));
1659 goto unlock_out; 1513 goto unlock_out;
1660 } 1514 }
1661 trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq), TPS("newreq")); 1515 trace_rcu_grace_period(rcu_state.name, READ_ONCE(rcu_state.gp_seq), TPS("newreq"));
1662 ret = true; /* Caller must wake GP kthread. */ 1516 ret = true; /* Caller must wake GP kthread. */
1663unlock_out: 1517unlock_out:
1664 /* Push furthest requested GP to leaf node and rcu_data structure. */ 1518 /* Push furthest requested GP to leaf node and rcu_data structure. */
@@ -1675,10 +1529,10 @@ unlock_out:
1675 * Clean up any old requests for the just-ended grace period. Also return 1529 * Clean up any old requests for the just-ended grace period. Also return
1676 * whether any additional grace periods have been requested. 1530 * whether any additional grace periods have been requested.
1677 */ 1531 */
1678static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp) 1532static bool rcu_future_gp_cleanup(struct rcu_node *rnp)
1679{ 1533{
1680 bool needmore; 1534 bool needmore;
1681 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1535 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1682 1536
1683 needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed); 1537 needmore = ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed);
1684 if (!needmore) 1538 if (!needmore)
@@ -1689,19 +1543,18 @@ static bool rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
1689} 1543}
1690 1544
1691/* 1545/*
1692 * Awaken the grace-period kthread for the specified flavor of RCU. 1546 * Awaken the grace-period kthread. Don't do a self-awaken, and don't
1693 * Don't do a self-awaken, and don't bother awakening when there is 1547 * bother awakening when there is nothing for the grace-period kthread
1694 * nothing for the grace-period kthread to do (as in several CPUs 1548 * to do (as in several CPUs raced to awaken, and we lost), and finally
1695 * raced to awaken, and we lost), and finally don't try to awaken 1549 * don't try to awaken a kthread that has not yet been created.
1696 * a kthread that has not yet been created.
1697 */ 1550 */
1698static void rcu_gp_kthread_wake(struct rcu_state *rsp) 1551static void rcu_gp_kthread_wake(void)
1699{ 1552{
1700 if (current == rsp->gp_kthread || 1553 if (current == rcu_state.gp_kthread ||
1701 !READ_ONCE(rsp->gp_flags) || 1554 !READ_ONCE(rcu_state.gp_flags) ||
1702 !rsp->gp_kthread) 1555 !rcu_state.gp_kthread)
1703 return; 1556 return;
1704 swake_up_one(&rsp->gp_wq); 1557 swake_up_one(&rcu_state.gp_wq);
1705} 1558}
1706 1559
1707/* 1560/*
@@ -1716,8 +1569,7 @@ static void rcu_gp_kthread_wake(struct rcu_state *rsp)
1716 * 1569 *
1717 * The caller must hold rnp->lock with interrupts disabled. 1570 * The caller must hold rnp->lock with interrupts disabled.
1718 */ 1571 */
1719static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1572static bool rcu_accelerate_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
1720 struct rcu_data *rdp)
1721{ 1573{
1722 unsigned long gp_seq_req; 1574 unsigned long gp_seq_req;
1723 bool ret = false; 1575 bool ret = false;
@@ -1738,15 +1590,15 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1738 * accelerating callback invocation to an earlier grace-period 1590 * accelerating callback invocation to an earlier grace-period
1739 * number. 1591 * number.
1740 */ 1592 */
1741 gp_seq_req = rcu_seq_snap(&rsp->gp_seq); 1593 gp_seq_req = rcu_seq_snap(&rcu_state.gp_seq);
1742 if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req)) 1594 if (rcu_segcblist_accelerate(&rdp->cblist, gp_seq_req))
1743 ret = rcu_start_this_gp(rnp, rdp, gp_seq_req); 1595 ret = rcu_start_this_gp(rnp, rdp, gp_seq_req);
1744 1596
1745 /* Trace depending on how much we were able to accelerate. */ 1597 /* Trace depending on how much we were able to accelerate. */
1746 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL)) 1598 if (rcu_segcblist_restempty(&rdp->cblist, RCU_WAIT_TAIL))
1747 trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccWaitCB")); 1599 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("AccWaitCB"));
1748 else 1600 else
1749 trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("AccReadyCB")); 1601 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("AccReadyCB"));
1750 return ret; 1602 return ret;
1751} 1603}
1752 1604
@@ -1757,25 +1609,24 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1757 * that a new grace-period request be made, invokes rcu_accelerate_cbs() 1609 * that a new grace-period request be made, invokes rcu_accelerate_cbs()
1758 * while holding the leaf rcu_node structure's ->lock. 1610 * while holding the leaf rcu_node structure's ->lock.
1759 */ 1611 */
1760static void rcu_accelerate_cbs_unlocked(struct rcu_state *rsp, 1612static void rcu_accelerate_cbs_unlocked(struct rcu_node *rnp,
1761 struct rcu_node *rnp,
1762 struct rcu_data *rdp) 1613 struct rcu_data *rdp)
1763{ 1614{
1764 unsigned long c; 1615 unsigned long c;
1765 bool needwake; 1616 bool needwake;
1766 1617
1767 lockdep_assert_irqs_disabled(); 1618 lockdep_assert_irqs_disabled();
1768 c = rcu_seq_snap(&rsp->gp_seq); 1619 c = rcu_seq_snap(&rcu_state.gp_seq);
1769 if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) { 1620 if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
1770 /* Old request still live, so mark recent callbacks. */ 1621 /* Old request still live, so mark recent callbacks. */
1771 (void)rcu_segcblist_accelerate(&rdp->cblist, c); 1622 (void)rcu_segcblist_accelerate(&rdp->cblist, c);
1772 return; 1623 return;
1773 } 1624 }
1774 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 1625 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
1775 needwake = rcu_accelerate_cbs(rsp, rnp, rdp); 1626 needwake = rcu_accelerate_cbs(rnp, rdp);
1776 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ 1627 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
1777 if (needwake) 1628 if (needwake)
1778 rcu_gp_kthread_wake(rsp); 1629 rcu_gp_kthread_wake();
1779} 1630}
1780 1631
1781/* 1632/*
@@ -1788,8 +1639,7 @@ static void rcu_accelerate_cbs_unlocked(struct rcu_state *rsp,
1788 * 1639 *
1789 * The caller must hold rnp->lock with interrupts disabled. 1640 * The caller must hold rnp->lock with interrupts disabled.
1790 */ 1641 */
1791static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1642static bool rcu_advance_cbs(struct rcu_node *rnp, struct rcu_data *rdp)
1792 struct rcu_data *rdp)
1793{ 1643{
1794 raw_lockdep_assert_held_rcu_node(rnp); 1644 raw_lockdep_assert_held_rcu_node(rnp);
1795 1645
@@ -1804,7 +1654,7 @@ static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1804 rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq); 1654 rcu_segcblist_advance(&rdp->cblist, rnp->gp_seq);
1805 1655
1806 /* Classify any remaining callbacks. */ 1656 /* Classify any remaining callbacks. */
1807 return rcu_accelerate_cbs(rsp, rnp, rdp); 1657 return rcu_accelerate_cbs(rnp, rdp);
1808} 1658}
1809 1659
1810/* 1660/*
@@ -1813,8 +1663,7 @@ static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1813 * structure corresponding to the current CPU, and must have irqs disabled. 1663 * structure corresponding to the current CPU, and must have irqs disabled.
1814 * Returns true if the grace-period kthread needs to be awakened. 1664 * Returns true if the grace-period kthread needs to be awakened.
1815 */ 1665 */
1816static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp, 1666static bool __note_gp_changes(struct rcu_node *rnp, struct rcu_data *rdp)
1817 struct rcu_data *rdp)
1818{ 1667{
1819 bool ret; 1668 bool ret;
1820 bool need_gp; 1669 bool need_gp;
@@ -1827,10 +1676,10 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1827 /* Handle the ends of any preceding grace periods first. */ 1676 /* Handle the ends of any preceding grace periods first. */
1828 if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) || 1677 if (rcu_seq_completed_gp(rdp->gp_seq, rnp->gp_seq) ||
1829 unlikely(READ_ONCE(rdp->gpwrap))) { 1678 unlikely(READ_ONCE(rdp->gpwrap))) {
1830 ret = rcu_advance_cbs(rsp, rnp, rdp); /* Advance callbacks. */ 1679 ret = rcu_advance_cbs(rnp, rdp); /* Advance callbacks. */
1831 trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuend")); 1680 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuend"));
1832 } else { 1681 } else {
1833 ret = rcu_accelerate_cbs(rsp, rnp, rdp); /* Recent callbacks. */ 1682 ret = rcu_accelerate_cbs(rnp, rdp); /* Recent callbacks. */
1834 } 1683 }
1835 1684
1836 /* Now handle the beginnings of any new-to-this-CPU grace periods. */ 1685 /* Now handle the beginnings of any new-to-this-CPU grace periods. */
@@ -1841,10 +1690,9 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1841 * set up to detect a quiescent state, otherwise don't 1690 * set up to detect a quiescent state, otherwise don't
1842 * go looking for one. 1691 * go looking for one.
1843 */ 1692 */
1844 trace_rcu_grace_period(rsp->name, rnp->gp_seq, TPS("cpustart")); 1693 trace_rcu_grace_period(rcu_state.name, rnp->gp_seq, TPS("cpustart"));
1845 need_gp = !!(rnp->qsmask & rdp->grpmask); 1694 need_gp = !!(rnp->qsmask & rdp->grpmask);
1846 rdp->cpu_no_qs.b.norm = need_gp; 1695 rdp->cpu_no_qs.b.norm = need_gp;
1847 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
1848 rdp->core_needs_qs = need_gp; 1696 rdp->core_needs_qs = need_gp;
1849 zero_cpu_stall_ticks(rdp); 1697 zero_cpu_stall_ticks(rdp);
1850 } 1698 }
@@ -1856,7 +1704,7 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1856 return ret; 1704 return ret;
1857} 1705}
1858 1706
1859static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp) 1707static void note_gp_changes(struct rcu_data *rdp)
1860{ 1708{
1861 unsigned long flags; 1709 unsigned long flags;
1862 bool needwake; 1710 bool needwake;
@@ -1870,16 +1718,16 @@ static void note_gp_changes(struct rcu_state *rsp, struct rcu_data *rdp)
1870 local_irq_restore(flags); 1718 local_irq_restore(flags);
1871 return; 1719 return;
1872 } 1720 }
1873 needwake = __note_gp_changes(rsp, rnp, rdp); 1721 needwake = __note_gp_changes(rnp, rdp);
1874 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1722 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1875 if (needwake) 1723 if (needwake)
1876 rcu_gp_kthread_wake(rsp); 1724 rcu_gp_kthread_wake();
1877} 1725}
1878 1726
1879static void rcu_gp_slow(struct rcu_state *rsp, int delay) 1727static void rcu_gp_slow(int delay)
1880{ 1728{
1881 if (delay > 0 && 1729 if (delay > 0 &&
1882 !(rcu_seq_ctr(rsp->gp_seq) % 1730 !(rcu_seq_ctr(rcu_state.gp_seq) %
1883 (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay))) 1731 (rcu_num_nodes * PER_RCU_NODE_PERIOD * delay)))
1884 schedule_timeout_uninterruptible(delay); 1732 schedule_timeout_uninterruptible(delay);
1885} 1733}
@@ -1887,24 +1735,24 @@ static void rcu_gp_slow(struct rcu_state *rsp, int delay)
1887/* 1735/*
1888 * Initialize a new grace period. Return false if no grace period required. 1736 * Initialize a new grace period. Return false if no grace period required.
1889 */ 1737 */
1890static bool rcu_gp_init(struct rcu_state *rsp) 1738static bool rcu_gp_init(void)
1891{ 1739{
1892 unsigned long flags; 1740 unsigned long flags;
1893 unsigned long oldmask; 1741 unsigned long oldmask;
1894 unsigned long mask; 1742 unsigned long mask;
1895 struct rcu_data *rdp; 1743 struct rcu_data *rdp;
1896 struct rcu_node *rnp = rcu_get_root(rsp); 1744 struct rcu_node *rnp = rcu_get_root();
1897 1745
1898 WRITE_ONCE(rsp->gp_activity, jiffies); 1746 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1899 raw_spin_lock_irq_rcu_node(rnp); 1747 raw_spin_lock_irq_rcu_node(rnp);
1900 if (!READ_ONCE(rsp->gp_flags)) { 1748 if (!READ_ONCE(rcu_state.gp_flags)) {
1901 /* Spurious wakeup, tell caller to go back to sleep. */ 1749 /* Spurious wakeup, tell caller to go back to sleep. */
1902 raw_spin_unlock_irq_rcu_node(rnp); 1750 raw_spin_unlock_irq_rcu_node(rnp);
1903 return false; 1751 return false;
1904 } 1752 }
1905 WRITE_ONCE(rsp->gp_flags, 0); /* Clear all flags: New grace period. */ 1753 WRITE_ONCE(rcu_state.gp_flags, 0); /* Clear all flags: New GP. */
1906 1754
1907 if (WARN_ON_ONCE(rcu_gp_in_progress(rsp))) { 1755 if (WARN_ON_ONCE(rcu_gp_in_progress())) {
1908 /* 1756 /*
1909 * Grace period already in progress, don't start another. 1757 * Grace period already in progress, don't start another.
1910 * Not supposed to be able to happen. 1758 * Not supposed to be able to happen.
@@ -1914,10 +1762,10 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1914 } 1762 }
1915 1763
1916 /* Advance to a new grace period and initialize state. */ 1764 /* Advance to a new grace period and initialize state. */
1917 record_gp_stall_check_time(rsp); 1765 record_gp_stall_check_time();
1918 /* Record GP times before starting GP, hence rcu_seq_start(). */ 1766 /* Record GP times before starting GP, hence rcu_seq_start(). */
1919 rcu_seq_start(&rsp->gp_seq); 1767 rcu_seq_start(&rcu_state.gp_seq);
1920 trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("start")); 1768 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("start"));
1921 raw_spin_unlock_irq_rcu_node(rnp); 1769 raw_spin_unlock_irq_rcu_node(rnp);
1922 1770
1923 /* 1771 /*
@@ -1926,15 +1774,15 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1926 * for subsequent online CPUs, and that quiescent-state forcing 1774 * for subsequent online CPUs, and that quiescent-state forcing
1927 * will handle subsequent offline CPUs. 1775 * will handle subsequent offline CPUs.
1928 */ 1776 */
1929 rsp->gp_state = RCU_GP_ONOFF; 1777 rcu_state.gp_state = RCU_GP_ONOFF;
1930 rcu_for_each_leaf_node(rsp, rnp) { 1778 rcu_for_each_leaf_node(rnp) {
1931 spin_lock(&rsp->ofl_lock); 1779 raw_spin_lock(&rcu_state.ofl_lock);
1932 raw_spin_lock_irq_rcu_node(rnp); 1780 raw_spin_lock_irq_rcu_node(rnp);
1933 if (rnp->qsmaskinit == rnp->qsmaskinitnext && 1781 if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
1934 !rnp->wait_blkd_tasks) { 1782 !rnp->wait_blkd_tasks) {
1935 /* Nothing to do on this leaf rcu_node structure. */ 1783 /* Nothing to do on this leaf rcu_node structure. */
1936 raw_spin_unlock_irq_rcu_node(rnp); 1784 raw_spin_unlock_irq_rcu_node(rnp);
1937 spin_unlock(&rsp->ofl_lock); 1785 raw_spin_unlock(&rcu_state.ofl_lock);
1938 continue; 1786 continue;
1939 } 1787 }
1940 1788
@@ -1970,45 +1818,45 @@ static bool rcu_gp_init(struct rcu_state *rsp)
1970 } 1818 }
1971 1819
1972 raw_spin_unlock_irq_rcu_node(rnp); 1820 raw_spin_unlock_irq_rcu_node(rnp);
1973 spin_unlock(&rsp->ofl_lock); 1821 raw_spin_unlock(&rcu_state.ofl_lock);
1974 } 1822 }
1975 rcu_gp_slow(rsp, gp_preinit_delay); /* Races with CPU hotplug. */ 1823 rcu_gp_slow(gp_preinit_delay); /* Races with CPU hotplug. */
1976 1824
1977 /* 1825 /*
1978 * Set the quiescent-state-needed bits in all the rcu_node 1826 * Set the quiescent-state-needed bits in all the rcu_node
1979 * structures for all currently online CPUs in breadth-first order, 1827 * structures for all currently online CPUs in breadth-first
1980 * starting from the root rcu_node structure, relying on the layout 1828 * order, starting from the root rcu_node structure, relying on the
1981 * of the tree within the rsp->node[] array. Note that other CPUs 1829 * layout of the tree within the rcu_state.node[] array. Note that
1982 * will access only the leaves of the hierarchy, thus seeing that no 1830 * other CPUs will access only the leaves of the hierarchy, thus
1983 * grace period is in progress, at least until the corresponding 1831 * seeing that no grace period is in progress, at least until the
1984 * leaf node has been initialized. 1832 * corresponding leaf node has been initialized.
1985 * 1833 *
1986 * The grace period cannot complete until the initialization 1834 * The grace period cannot complete until the initialization
1987 * process finishes, because this kthread handles both. 1835 * process finishes, because this kthread handles both.
1988 */ 1836 */
1989 rsp->gp_state = RCU_GP_INIT; 1837 rcu_state.gp_state = RCU_GP_INIT;
1990 rcu_for_each_node_breadth_first(rsp, rnp) { 1838 rcu_for_each_node_breadth_first(rnp) {
1991 rcu_gp_slow(rsp, gp_init_delay); 1839 rcu_gp_slow(gp_init_delay);
1992 raw_spin_lock_irqsave_rcu_node(rnp, flags); 1840 raw_spin_lock_irqsave_rcu_node(rnp, flags);
1993 rdp = this_cpu_ptr(rsp->rda); 1841 rdp = this_cpu_ptr(&rcu_data);
1994 rcu_preempt_check_blocked_tasks(rsp, rnp); 1842 rcu_preempt_check_blocked_tasks(rnp);
1995 rnp->qsmask = rnp->qsmaskinit; 1843 rnp->qsmask = rnp->qsmaskinit;
1996 WRITE_ONCE(rnp->gp_seq, rsp->gp_seq); 1844 WRITE_ONCE(rnp->gp_seq, rcu_state.gp_seq);
1997 if (rnp == rdp->mynode) 1845 if (rnp == rdp->mynode)
1998 (void)__note_gp_changes(rsp, rnp, rdp); 1846 (void)__note_gp_changes(rnp, rdp);
1999 rcu_preempt_boost_start_gp(rnp); 1847 rcu_preempt_boost_start_gp(rnp);
2000 trace_rcu_grace_period_init(rsp->name, rnp->gp_seq, 1848 trace_rcu_grace_period_init(rcu_state.name, rnp->gp_seq,
2001 rnp->level, rnp->grplo, 1849 rnp->level, rnp->grplo,
2002 rnp->grphi, rnp->qsmask); 1850 rnp->grphi, rnp->qsmask);
2003 /* Quiescent states for tasks on any now-offline CPUs. */ 1851 /* Quiescent states for tasks on any now-offline CPUs. */
2004 mask = rnp->qsmask & ~rnp->qsmaskinitnext; 1852 mask = rnp->qsmask & ~rnp->qsmaskinitnext;
2005 rnp->rcu_gp_init_mask = mask; 1853 rnp->rcu_gp_init_mask = mask;
2006 if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp)) 1854 if ((mask || rnp->wait_blkd_tasks) && rcu_is_leaf_node(rnp))
2007 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); 1855 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
2008 else 1856 else
2009 raw_spin_unlock_irq_rcu_node(rnp); 1857 raw_spin_unlock_irq_rcu_node(rnp);
2010 cond_resched_tasks_rcu_qs(); 1858 cond_resched_tasks_rcu_qs();
2011 WRITE_ONCE(rsp->gp_activity, jiffies); 1859 WRITE_ONCE(rcu_state.gp_activity, jiffies);
2012 } 1860 }
2013 1861
2014 return true; 1862 return true;
@@ -2018,12 +1866,12 @@ static bool rcu_gp_init(struct rcu_state *rsp)
2018 * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state 1866 * Helper function for swait_event_idle_exclusive() wakeup at force-quiescent-state
2019 * time. 1867 * time.
2020 */ 1868 */
2021static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp) 1869static bool rcu_gp_fqs_check_wake(int *gfp)
2022{ 1870{
2023 struct rcu_node *rnp = rcu_get_root(rsp); 1871 struct rcu_node *rnp = rcu_get_root();
2024 1872
2025 /* Someone like call_rcu() requested a force-quiescent-state scan. */ 1873 /* Someone like call_rcu() requested a force-quiescent-state scan. */
2026 *gfp = READ_ONCE(rsp->gp_flags); 1874 *gfp = READ_ONCE(rcu_state.gp_flags);
2027 if (*gfp & RCU_GP_FLAG_FQS) 1875 if (*gfp & RCU_GP_FLAG_FQS)
2028 return true; 1876 return true;
2029 1877
@@ -2037,45 +1885,110 @@ static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
2037/* 1885/*
2038 * Do one round of quiescent-state forcing. 1886 * Do one round of quiescent-state forcing.
2039 */ 1887 */
2040static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time) 1888static void rcu_gp_fqs(bool first_time)
2041{ 1889{
2042 struct rcu_node *rnp = rcu_get_root(rsp); 1890 struct rcu_node *rnp = rcu_get_root();
2043 1891
2044 WRITE_ONCE(rsp->gp_activity, jiffies); 1892 WRITE_ONCE(rcu_state.gp_activity, jiffies);
2045 rsp->n_force_qs++; 1893 rcu_state.n_force_qs++;
2046 if (first_time) { 1894 if (first_time) {
2047 /* Collect dyntick-idle snapshots. */ 1895 /* Collect dyntick-idle snapshots. */
2048 force_qs_rnp(rsp, dyntick_save_progress_counter); 1896 force_qs_rnp(dyntick_save_progress_counter);
2049 } else { 1897 } else {
2050 /* Handle dyntick-idle and offline CPUs. */ 1898 /* Handle dyntick-idle and offline CPUs. */
2051 force_qs_rnp(rsp, rcu_implicit_dynticks_qs); 1899 force_qs_rnp(rcu_implicit_dynticks_qs);
2052 } 1900 }
2053 /* Clear flag to prevent immediate re-entry. */ 1901 /* Clear flag to prevent immediate re-entry. */
2054 if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 1902 if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) {
2055 raw_spin_lock_irq_rcu_node(rnp); 1903 raw_spin_lock_irq_rcu_node(rnp);
2056 WRITE_ONCE(rsp->gp_flags, 1904 WRITE_ONCE(rcu_state.gp_flags,
2057 READ_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS); 1905 READ_ONCE(rcu_state.gp_flags) & ~RCU_GP_FLAG_FQS);
2058 raw_spin_unlock_irq_rcu_node(rnp); 1906 raw_spin_unlock_irq_rcu_node(rnp);
2059 } 1907 }
2060} 1908}
2061 1909
2062/* 1910/*
1911 * Loop doing repeated quiescent-state forcing until the grace period ends.
1912 */
1913static void rcu_gp_fqs_loop(void)
1914{
1915 bool first_gp_fqs;
1916 int gf;
1917 unsigned long j;
1918 int ret;
1919 struct rcu_node *rnp = rcu_get_root();
1920
1921 first_gp_fqs = true;
1922 j = READ_ONCE(jiffies_till_first_fqs);
1923 ret = 0;
1924 for (;;) {
1925 if (!ret) {
1926 rcu_state.jiffies_force_qs = jiffies + j;
1927 WRITE_ONCE(rcu_state.jiffies_kick_kthreads,
1928 jiffies + 3 * j);
1929 }
1930 trace_rcu_grace_period(rcu_state.name,
1931 READ_ONCE(rcu_state.gp_seq),
1932 TPS("fqswait"));
1933 rcu_state.gp_state = RCU_GP_WAIT_FQS;
1934 ret = swait_event_idle_timeout_exclusive(
1935 rcu_state.gp_wq, rcu_gp_fqs_check_wake(&gf), j);
1936 rcu_state.gp_state = RCU_GP_DOING_FQS;
1937 /* Locking provides needed memory barriers. */
1938 /* If grace period done, leave loop. */
1939 if (!READ_ONCE(rnp->qsmask) &&
1940 !rcu_preempt_blocked_readers_cgp(rnp))
1941 break;
1942 /* If time for quiescent-state forcing, do it. */
1943 if (ULONG_CMP_GE(jiffies, rcu_state.jiffies_force_qs) ||
1944 (gf & RCU_GP_FLAG_FQS)) {
1945 trace_rcu_grace_period(rcu_state.name,
1946 READ_ONCE(rcu_state.gp_seq),
1947 TPS("fqsstart"));
1948 rcu_gp_fqs(first_gp_fqs);
1949 first_gp_fqs = false;
1950 trace_rcu_grace_period(rcu_state.name,
1951 READ_ONCE(rcu_state.gp_seq),
1952 TPS("fqsend"));
1953 cond_resched_tasks_rcu_qs();
1954 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1955 ret = 0; /* Force full wait till next FQS. */
1956 j = READ_ONCE(jiffies_till_next_fqs);
1957 } else {
1958 /* Deal with stray signal. */
1959 cond_resched_tasks_rcu_qs();
1960 WRITE_ONCE(rcu_state.gp_activity, jiffies);
1961 WARN_ON(signal_pending(current));
1962 trace_rcu_grace_period(rcu_state.name,
1963 READ_ONCE(rcu_state.gp_seq),
1964 TPS("fqswaitsig"));
1965 ret = 1; /* Keep old FQS timing. */
1966 j = jiffies;
1967 if (time_after(jiffies, rcu_state.jiffies_force_qs))
1968 j = 1;
1969 else
1970 j = rcu_state.jiffies_force_qs - j;
1971 }
1972 }
1973}
1974
1975/*
2063 * Clean up after the old grace period. 1976 * Clean up after the old grace period.
2064 */ 1977 */
2065static void rcu_gp_cleanup(struct rcu_state *rsp) 1978static void rcu_gp_cleanup(void)
2066{ 1979{
2067 unsigned long gp_duration; 1980 unsigned long gp_duration;
2068 bool needgp = false; 1981 bool needgp = false;
2069 unsigned long new_gp_seq; 1982 unsigned long new_gp_seq;
2070 struct rcu_data *rdp; 1983 struct rcu_data *rdp;
2071 struct rcu_node *rnp = rcu_get_root(rsp); 1984 struct rcu_node *rnp = rcu_get_root();
2072 struct swait_queue_head *sq; 1985 struct swait_queue_head *sq;
2073 1986
2074 WRITE_ONCE(rsp->gp_activity, jiffies); 1987 WRITE_ONCE(rcu_state.gp_activity, jiffies);
2075 raw_spin_lock_irq_rcu_node(rnp); 1988 raw_spin_lock_irq_rcu_node(rnp);
2076 gp_duration = jiffies - rsp->gp_start; 1989 gp_duration = jiffies - rcu_state.gp_start;
2077 if (gp_duration > rsp->gp_max) 1990 if (gp_duration > rcu_state.gp_max)
2078 rsp->gp_max = gp_duration; 1991 rcu_state.gp_max = gp_duration;
2079 1992
2080 /* 1993 /*
2081 * We know the grace period is complete, but to everyone else 1994 * We know the grace period is complete, but to everyone else
@@ -2096,48 +2009,50 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
2096 * the rcu_node structures before the beginning of the next grace 2009 * the rcu_node structures before the beginning of the next grace
2097 * period is recorded in any of the rcu_node structures. 2010 * period is recorded in any of the rcu_node structures.
2098 */ 2011 */
2099 new_gp_seq = rsp->gp_seq; 2012 new_gp_seq = rcu_state.gp_seq;
2100 rcu_seq_end(&new_gp_seq); 2013 rcu_seq_end(&new_gp_seq);
2101 rcu_for_each_node_breadth_first(rsp, rnp) { 2014 rcu_for_each_node_breadth_first(rnp) {
2102 raw_spin_lock_irq_rcu_node(rnp); 2015 raw_spin_lock_irq_rcu_node(rnp);
2103 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) 2016 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
2104 dump_blkd_tasks(rsp, rnp, 10); 2017 dump_blkd_tasks(rnp, 10);
2105 WARN_ON_ONCE(rnp->qsmask); 2018 WARN_ON_ONCE(rnp->qsmask);
2106 WRITE_ONCE(rnp->gp_seq, new_gp_seq); 2019 WRITE_ONCE(rnp->gp_seq, new_gp_seq);
2107 rdp = this_cpu_ptr(rsp->rda); 2020 rdp = this_cpu_ptr(&rcu_data);
2108 if (rnp == rdp->mynode) 2021 if (rnp == rdp->mynode)
2109 needgp = __note_gp_changes(rsp, rnp, rdp) || needgp; 2022 needgp = __note_gp_changes(rnp, rdp) || needgp;
2110 /* smp_mb() provided by prior unlock-lock pair. */ 2023 /* smp_mb() provided by prior unlock-lock pair. */
2111 needgp = rcu_future_gp_cleanup(rsp, rnp) || needgp; 2024 needgp = rcu_future_gp_cleanup(rnp) || needgp;
2112 sq = rcu_nocb_gp_get(rnp); 2025 sq = rcu_nocb_gp_get(rnp);
2113 raw_spin_unlock_irq_rcu_node(rnp); 2026 raw_spin_unlock_irq_rcu_node(rnp);
2114 rcu_nocb_gp_cleanup(sq); 2027 rcu_nocb_gp_cleanup(sq);
2115 cond_resched_tasks_rcu_qs(); 2028 cond_resched_tasks_rcu_qs();
2116 WRITE_ONCE(rsp->gp_activity, jiffies); 2029 WRITE_ONCE(rcu_state.gp_activity, jiffies);
2117 rcu_gp_slow(rsp, gp_cleanup_delay); 2030 rcu_gp_slow(gp_cleanup_delay);
2118 } 2031 }
2119 rnp = rcu_get_root(rsp); 2032 rnp = rcu_get_root();
2120 raw_spin_lock_irq_rcu_node(rnp); /* GP before rsp->gp_seq update. */ 2033 raw_spin_lock_irq_rcu_node(rnp); /* GP before ->gp_seq update. */
2121 2034
2122 /* Declare grace period done. */ 2035 /* Declare grace period done. */
2123 rcu_seq_end(&rsp->gp_seq); 2036 rcu_seq_end(&rcu_state.gp_seq);
2124 trace_rcu_grace_period(rsp->name, rsp->gp_seq, TPS("end")); 2037 trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
2125 rsp->gp_state = RCU_GP_IDLE; 2038 rcu_state.gp_state = RCU_GP_IDLE;
2126 /* Check for GP requests since above loop. */ 2039 /* Check for GP requests since above loop. */
2127 rdp = this_cpu_ptr(rsp->rda); 2040 rdp = this_cpu_ptr(&rcu_data);
2128 if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) { 2041 if (!needgp && ULONG_CMP_LT(rnp->gp_seq, rnp->gp_seq_needed)) {
2129 trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed, 2042 trace_rcu_this_gp(rnp, rdp, rnp->gp_seq_needed,
2130 TPS("CleanupMore")); 2043 TPS("CleanupMore"));
2131 needgp = true; 2044 needgp = true;
2132 } 2045 }
2133 /* Advance CBs to reduce false positives below. */ 2046 /* Advance CBs to reduce false positives below. */
2134 if (!rcu_accelerate_cbs(rsp, rnp, rdp) && needgp) { 2047 if (!rcu_accelerate_cbs(rnp, rdp) && needgp) {
2135 WRITE_ONCE(rsp->gp_flags, RCU_GP_FLAG_INIT); 2048 WRITE_ONCE(rcu_state.gp_flags, RCU_GP_FLAG_INIT);
2136 rsp->gp_req_activity = jiffies; 2049 rcu_state.gp_req_activity = jiffies;
2137 trace_rcu_grace_period(rsp->name, READ_ONCE(rsp->gp_seq), 2050 trace_rcu_grace_period(rcu_state.name,
2051 READ_ONCE(rcu_state.gp_seq),
2138 TPS("newreq")); 2052 TPS("newreq"));
2139 } else { 2053 } else {
2140 WRITE_ONCE(rsp->gp_flags, rsp->gp_flags & RCU_GP_FLAG_INIT); 2054 WRITE_ONCE(rcu_state.gp_flags,
2055 rcu_state.gp_flags & RCU_GP_FLAG_INIT);
2141 } 2056 }
2142 raw_spin_unlock_irq_rcu_node(rnp); 2057 raw_spin_unlock_irq_rcu_node(rnp);
2143} 2058}
@@ -2145,116 +2060,60 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
2145/* 2060/*
2146 * Body of kthread that handles grace periods. 2061 * Body of kthread that handles grace periods.
2147 */ 2062 */
2148static int __noreturn rcu_gp_kthread(void *arg) 2063static int __noreturn rcu_gp_kthread(void *unused)
2149{ 2064{
2150 bool first_gp_fqs;
2151 int gf;
2152 unsigned long j;
2153 int ret;
2154 struct rcu_state *rsp = arg;
2155 struct rcu_node *rnp = rcu_get_root(rsp);
2156
2157 rcu_bind_gp_kthread(); 2065 rcu_bind_gp_kthread();
2158 for (;;) { 2066 for (;;) {
2159 2067
2160 /* Handle grace-period start. */ 2068 /* Handle grace-period start. */
2161 for (;;) { 2069 for (;;) {
2162 trace_rcu_grace_period(rsp->name, 2070 trace_rcu_grace_period(rcu_state.name,
2163 READ_ONCE(rsp->gp_seq), 2071 READ_ONCE(rcu_state.gp_seq),
2164 TPS("reqwait")); 2072 TPS("reqwait"));
2165 rsp->gp_state = RCU_GP_WAIT_GPS; 2073 rcu_state.gp_state = RCU_GP_WAIT_GPS;
2166 swait_event_idle_exclusive(rsp->gp_wq, READ_ONCE(rsp->gp_flags) & 2074 swait_event_idle_exclusive(rcu_state.gp_wq,
2167 RCU_GP_FLAG_INIT); 2075 READ_ONCE(rcu_state.gp_flags) &
2168 rsp->gp_state = RCU_GP_DONE_GPS; 2076 RCU_GP_FLAG_INIT);
2077 rcu_state.gp_state = RCU_GP_DONE_GPS;
2169 /* Locking provides needed memory barrier. */ 2078 /* Locking provides needed memory barrier. */
2170 if (rcu_gp_init(rsp)) 2079 if (rcu_gp_init())
2171 break; 2080 break;
2172 cond_resched_tasks_rcu_qs(); 2081 cond_resched_tasks_rcu_qs();
2173 WRITE_ONCE(rsp->gp_activity, jiffies); 2082 WRITE_ONCE(rcu_state.gp_activity, jiffies);
2174 WARN_ON(signal_pending(current)); 2083 WARN_ON(signal_pending(current));
2175 trace_rcu_grace_period(rsp->name, 2084 trace_rcu_grace_period(rcu_state.name,
2176 READ_ONCE(rsp->gp_seq), 2085 READ_ONCE(rcu_state.gp_seq),
2177 TPS("reqwaitsig")); 2086 TPS("reqwaitsig"));
2178 } 2087 }
2179 2088
2180 /* Handle quiescent-state forcing. */ 2089 /* Handle quiescent-state forcing. */
2181 first_gp_fqs = true; 2090 rcu_gp_fqs_loop();
2182 j = jiffies_till_first_fqs;
2183 ret = 0;
2184 for (;;) {
2185 if (!ret) {
2186 rsp->jiffies_force_qs = jiffies + j;
2187 WRITE_ONCE(rsp->jiffies_kick_kthreads,
2188 jiffies + 3 * j);
2189 }
2190 trace_rcu_grace_period(rsp->name,
2191 READ_ONCE(rsp->gp_seq),
2192 TPS("fqswait"));
2193 rsp->gp_state = RCU_GP_WAIT_FQS;
2194 ret = swait_event_idle_timeout_exclusive(rsp->gp_wq,
2195 rcu_gp_fqs_check_wake(rsp, &gf), j);
2196 rsp->gp_state = RCU_GP_DOING_FQS;
2197 /* Locking provides needed memory barriers. */
2198 /* If grace period done, leave loop. */
2199 if (!READ_ONCE(rnp->qsmask) &&
2200 !rcu_preempt_blocked_readers_cgp(rnp))
2201 break;
2202 /* If time for quiescent-state forcing, do it. */
2203 if (ULONG_CMP_GE(jiffies, rsp->jiffies_force_qs) ||
2204 (gf & RCU_GP_FLAG_FQS)) {
2205 trace_rcu_grace_period(rsp->name,
2206 READ_ONCE(rsp->gp_seq),
2207 TPS("fqsstart"));
2208 rcu_gp_fqs(rsp, first_gp_fqs);
2209 first_gp_fqs = false;
2210 trace_rcu_grace_period(rsp->name,
2211 READ_ONCE(rsp->gp_seq),
2212 TPS("fqsend"));
2213 cond_resched_tasks_rcu_qs();
2214 WRITE_ONCE(rsp->gp_activity, jiffies);
2215 ret = 0; /* Force full wait till next FQS. */
2216 j = jiffies_till_next_fqs;
2217 } else {
2218 /* Deal with stray signal. */
2219 cond_resched_tasks_rcu_qs();
2220 WRITE_ONCE(rsp->gp_activity, jiffies);
2221 WARN_ON(signal_pending(current));
2222 trace_rcu_grace_period(rsp->name,
2223 READ_ONCE(rsp->gp_seq),
2224 TPS("fqswaitsig"));
2225 ret = 1; /* Keep old FQS timing. */
2226 j = jiffies;
2227 if (time_after(jiffies, rsp->jiffies_force_qs))
2228 j = 1;
2229 else
2230 j = rsp->jiffies_force_qs - j;
2231 }
2232 }
2233 2091
2234 /* Handle grace-period end. */ 2092 /* Handle grace-period end. */
2235 rsp->gp_state = RCU_GP_CLEANUP; 2093 rcu_state.gp_state = RCU_GP_CLEANUP;
2236 rcu_gp_cleanup(rsp); 2094 rcu_gp_cleanup();
2237 rsp->gp_state = RCU_GP_CLEANED; 2095 rcu_state.gp_state = RCU_GP_CLEANED;
2238 } 2096 }
2239} 2097}
2240 2098
2241/* 2099/*
2242 * Report a full set of quiescent states to the specified rcu_state data 2100 * Report a full set of quiescent states to the rcu_state data structure.
2243 * structure. Invoke rcu_gp_kthread_wake() to awaken the grace-period 2101 * Invoke rcu_gp_kthread_wake() to awaken the grace-period kthread if
2244 * kthread if another grace period is required. Whether we wake 2102 * another grace period is required. Whether we wake the grace-period
2245 * the grace-period kthread or it awakens itself for the next round 2103 * kthread or it awakens itself for the next round of quiescent-state
2246 * of quiescent-state forcing, that kthread will clean up after the 2104 * forcing, that kthread will clean up after the just-completed grace
2247 * just-completed grace period. Note that the caller must hold rnp->lock, 2105 * period. Note that the caller must hold rnp->lock, which is released
2248 * which is released before return. 2106 * before return.
2249 */ 2107 */
2250static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 2108static void rcu_report_qs_rsp(unsigned long flags)
2251 __releases(rcu_get_root(rsp)->lock) 2109 __releases(rcu_get_root()->lock)
2252{ 2110{
2253 raw_lockdep_assert_held_rcu_node(rcu_get_root(rsp)); 2111 raw_lockdep_assert_held_rcu_node(rcu_get_root());
2254 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 2112 WARN_ON_ONCE(!rcu_gp_in_progress());
2255 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); 2113 WRITE_ONCE(rcu_state.gp_flags,
2256 raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); 2114 READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS);
2257 rcu_gp_kthread_wake(rsp); 2115 raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(), flags);
2116 rcu_gp_kthread_wake();
2258} 2117}
2259 2118
2260/* 2119/*
@@ -2271,9 +2130,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
2271 * disabled. This allows propagating quiescent state due to resumed tasks 2130 * disabled. This allows propagating quiescent state due to resumed tasks
2272 * during grace-period initialization. 2131 * during grace-period initialization.
2273 */ 2132 */
2274static void 2133static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp,
2275rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp, 2134 unsigned long gps, unsigned long flags)
2276 struct rcu_node *rnp, unsigned long gps, unsigned long flags)
2277 __releases(rnp->lock) 2135 __releases(rnp->lock)
2278{ 2136{
2279 unsigned long oldmask = 0; 2137 unsigned long oldmask = 0;
@@ -2296,7 +2154,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2296 WARN_ON_ONCE(!rcu_is_leaf_node(rnp) && 2154 WARN_ON_ONCE(!rcu_is_leaf_node(rnp) &&
2297 rcu_preempt_blocked_readers_cgp(rnp)); 2155 rcu_preempt_blocked_readers_cgp(rnp));
2298 rnp->qsmask &= ~mask; 2156 rnp->qsmask &= ~mask;
2299 trace_rcu_quiescent_state_report(rsp->name, rnp->gp_seq, 2157 trace_rcu_quiescent_state_report(rcu_state.name, rnp->gp_seq,
2300 mask, rnp->qsmask, rnp->level, 2158 mask, rnp->qsmask, rnp->level,
2301 rnp->grplo, rnp->grphi, 2159 rnp->grplo, rnp->grphi,
2302 !!rnp->gp_tasks); 2160 !!rnp->gp_tasks);
@@ -2326,19 +2184,18 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2326 * state for this grace period. Invoke rcu_report_qs_rsp() 2184 * state for this grace period. Invoke rcu_report_qs_rsp()
2327 * to clean up and start the next grace period if one is needed. 2185 * to clean up and start the next grace period if one is needed.
2328 */ 2186 */
2329 rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */ 2187 rcu_report_qs_rsp(flags); /* releases rnp->lock. */
2330} 2188}
2331 2189
2332/* 2190/*
2333 * Record a quiescent state for all tasks that were previously queued 2191 * Record a quiescent state for all tasks that were previously queued
2334 * on the specified rcu_node structure and that were blocking the current 2192 * on the specified rcu_node structure and that were blocking the current
2335 * RCU grace period. The caller must hold the specified rnp->lock with 2193 * RCU grace period. The caller must hold the corresponding rnp->lock with
2336 * irqs disabled, and this lock is released upon return, but irqs remain 2194 * irqs disabled, and this lock is released upon return, but irqs remain
2337 * disabled. 2195 * disabled.
2338 */ 2196 */
2339static void __maybe_unused 2197static void __maybe_unused
2340rcu_report_unblock_qs_rnp(struct rcu_state *rsp, 2198rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
2341 struct rcu_node *rnp, unsigned long flags)
2342 __releases(rnp->lock) 2199 __releases(rnp->lock)
2343{ 2200{
2344 unsigned long gps; 2201 unsigned long gps;
@@ -2346,8 +2203,7 @@ rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2346 struct rcu_node *rnp_p; 2203 struct rcu_node *rnp_p;
2347 2204
2348 raw_lockdep_assert_held_rcu_node(rnp); 2205 raw_lockdep_assert_held_rcu_node(rnp);
2349 if (WARN_ON_ONCE(rcu_state_p == &rcu_sched_state) || 2206 if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)) ||
2350 WARN_ON_ONCE(rsp != rcu_state_p) ||
2351 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) || 2207 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)) ||
2352 rnp->qsmask != 0) { 2208 rnp->qsmask != 0) {
2353 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2209 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2361,7 +2217,7 @@ rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2361 * Only one rcu_node structure in the tree, so don't 2217 * Only one rcu_node structure in the tree, so don't
2362 * try to report up to its nonexistent parent! 2218 * try to report up to its nonexistent parent!
2363 */ 2219 */
2364 rcu_report_qs_rsp(rsp, flags); 2220 rcu_report_qs_rsp(flags);
2365 return; 2221 return;
2366 } 2222 }
2367 2223
@@ -2370,7 +2226,7 @@ rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2370 mask = rnp->grpmask; 2226 mask = rnp->grpmask;
2371 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ 2227 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
2372 raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */ 2228 raw_spin_lock_rcu_node(rnp_p); /* irqs already disabled. */
2373 rcu_report_qs_rnp(mask, rsp, rnp_p, gps, flags); 2229 rcu_report_qs_rnp(mask, rnp_p, gps, flags);
2374} 2230}
2375 2231
2376/* 2232/*
@@ -2378,7 +2234,7 @@ rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2378 * structure. This must be called from the specified CPU. 2234 * structure. This must be called from the specified CPU.
2379 */ 2235 */
2380static void 2236static void
2381rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) 2237rcu_report_qs_rdp(int cpu, struct rcu_data *rdp)
2382{ 2238{
2383 unsigned long flags; 2239 unsigned long flags;
2384 unsigned long mask; 2240 unsigned long mask;
@@ -2397,7 +2253,6 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2397 * within the current grace period. 2253 * within the current grace period.
2398 */ 2254 */
2399 rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */ 2255 rdp->cpu_no_qs.b.norm = true; /* need qs for new gp. */
2400 rdp->rcu_qs_ctr_snap = __this_cpu_read(rcu_dynticks.rcu_qs_ctr);
2401 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2256 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2402 return; 2257 return;
2403 } 2258 }
@@ -2411,12 +2266,12 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2411 * This GP can't end until cpu checks in, so all of our 2266 * This GP can't end until cpu checks in, so all of our
2412 * callbacks can be processed during the next GP. 2267 * callbacks can be processed during the next GP.
2413 */ 2268 */
2414 needwake = rcu_accelerate_cbs(rsp, rnp, rdp); 2269 needwake = rcu_accelerate_cbs(rnp, rdp);
2415 2270
2416 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); 2271 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
2417 /* ^^^ Released rnp->lock */ 2272 /* ^^^ Released rnp->lock */
2418 if (needwake) 2273 if (needwake)
2419 rcu_gp_kthread_wake(rsp); 2274 rcu_gp_kthread_wake();
2420 } 2275 }
2421} 2276}
2422 2277
@@ -2427,10 +2282,10 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp)
2427 * quiescent state for this grace period, and record that fact if so. 2282 * quiescent state for this grace period, and record that fact if so.
2428 */ 2283 */
2429static void 2284static void
2430rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) 2285rcu_check_quiescent_state(struct rcu_data *rdp)
2431{ 2286{
2432 /* Check for grace-period ends and beginnings. */ 2287 /* Check for grace-period ends and beginnings. */
2433 note_gp_changes(rsp, rdp); 2288 note_gp_changes(rdp);
2434 2289
2435 /* 2290 /*
2436 * Does this CPU still need to do its part for current grace period? 2291 * Does this CPU still need to do its part for current grace period?
@@ -2450,24 +2305,26 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
2450 * Tell RCU we are done (but rcu_report_qs_rdp() will be the 2305 * Tell RCU we are done (but rcu_report_qs_rdp() will be the
2451 * judge of that). 2306 * judge of that).
2452 */ 2307 */
2453 rcu_report_qs_rdp(rdp->cpu, rsp, rdp); 2308 rcu_report_qs_rdp(rdp->cpu, rdp);
2454} 2309}
2455 2310
2456/* 2311/*
2457 * Trace the fact that this CPU is going offline. 2312 * Near the end of the offline process. Trace the fact that this CPU
2313 * is going offline.
2458 */ 2314 */
2459static void rcu_cleanup_dying_cpu(struct rcu_state *rsp) 2315int rcutree_dying_cpu(unsigned int cpu)
2460{ 2316{
2461 RCU_TRACE(bool blkd;) 2317 RCU_TRACE(bool blkd;)
2462 RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(rsp->rda);) 2318 RCU_TRACE(struct rcu_data *rdp = this_cpu_ptr(&rcu_data);)
2463 RCU_TRACE(struct rcu_node *rnp = rdp->mynode;) 2319 RCU_TRACE(struct rcu_node *rnp = rdp->mynode;)
2464 2320
2465 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) 2321 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
2466 return; 2322 return 0;
2467 2323
2468 RCU_TRACE(blkd = !!(rnp->qsmask & rdp->grpmask);) 2324 RCU_TRACE(blkd = !!(rnp->qsmask & rdp->grpmask);)
2469 trace_rcu_grace_period(rsp->name, rnp->gp_seq, 2325 trace_rcu_grace_period(rcu_state.name, rnp->gp_seq,
2470 blkd ? TPS("cpuofl") : TPS("cpuofl-bgp")); 2326 blkd ? TPS("cpuofl") : TPS("cpuofl-bgp"));
2327 return 0;
2471} 2328}
2472 2329
2473/* 2330/*
@@ -2521,23 +2378,26 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2521 * There can only be one CPU hotplug operation at a time, so no need for 2378 * There can only be one CPU hotplug operation at a time, so no need for
2522 * explicit locking. 2379 * explicit locking.
2523 */ 2380 */
2524static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) 2381int rcutree_dead_cpu(unsigned int cpu)
2525{ 2382{
2526 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2383 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
2527 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 2384 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
2528 2385
2529 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) 2386 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
2530 return; 2387 return 0;
2531 2388
2532 /* Adjust any no-longer-needed kthreads. */ 2389 /* Adjust any no-longer-needed kthreads. */
2533 rcu_boost_kthread_setaffinity(rnp, -1); 2390 rcu_boost_kthread_setaffinity(rnp, -1);
2391 /* Do any needed no-CB deferred wakeups from this CPU. */
2392 do_nocb_deferred_wakeup(per_cpu_ptr(&rcu_data, cpu));
2393 return 0;
2534} 2394}
2535 2395
2536/* 2396/*
2537 * Invoke any RCU callbacks that have made it to the end of their grace 2397 * Invoke any RCU callbacks that have made it to the end of their grace
2538 * period. Thottle as specified by rdp->blimit. 2398 * period. Thottle as specified by rdp->blimit.
2539 */ 2399 */
2540static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) 2400static void rcu_do_batch(struct rcu_data *rdp)
2541{ 2401{
2542 unsigned long flags; 2402 unsigned long flags;
2543 struct rcu_head *rhp; 2403 struct rcu_head *rhp;
@@ -2546,10 +2406,10 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2546 2406
2547 /* If no callbacks are ready, just return. */ 2407 /* If no callbacks are ready, just return. */
2548 if (!rcu_segcblist_ready_cbs(&rdp->cblist)) { 2408 if (!rcu_segcblist_ready_cbs(&rdp->cblist)) {
2549 trace_rcu_batch_start(rsp->name, 2409 trace_rcu_batch_start(rcu_state.name,
2550 rcu_segcblist_n_lazy_cbs(&rdp->cblist), 2410 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2551 rcu_segcblist_n_cbs(&rdp->cblist), 0); 2411 rcu_segcblist_n_cbs(&rdp->cblist), 0);
2552 trace_rcu_batch_end(rsp->name, 0, 2412 trace_rcu_batch_end(rcu_state.name, 0,
2553 !rcu_segcblist_empty(&rdp->cblist), 2413 !rcu_segcblist_empty(&rdp->cblist),
2554 need_resched(), is_idle_task(current), 2414 need_resched(), is_idle_task(current),
2555 rcu_is_callbacks_kthread()); 2415 rcu_is_callbacks_kthread());
@@ -2564,7 +2424,8 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2564 local_irq_save(flags); 2424 local_irq_save(flags);
2565 WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); 2425 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
2566 bl = rdp->blimit; 2426 bl = rdp->blimit;
2567 trace_rcu_batch_start(rsp->name, rcu_segcblist_n_lazy_cbs(&rdp->cblist), 2427 trace_rcu_batch_start(rcu_state.name,
2428 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
2568 rcu_segcblist_n_cbs(&rdp->cblist), bl); 2429 rcu_segcblist_n_cbs(&rdp->cblist), bl);
2569 rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl); 2430 rcu_segcblist_extract_done_cbs(&rdp->cblist, &rcl);
2570 local_irq_restore(flags); 2431 local_irq_restore(flags);
@@ -2573,7 +2434,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2573 rhp = rcu_cblist_dequeue(&rcl); 2434 rhp = rcu_cblist_dequeue(&rcl);
2574 for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) { 2435 for (; rhp; rhp = rcu_cblist_dequeue(&rcl)) {
2575 debug_rcu_head_unqueue(rhp); 2436 debug_rcu_head_unqueue(rhp);
2576 if (__rcu_reclaim(rsp->name, rhp)) 2437 if (__rcu_reclaim(rcu_state.name, rhp))
2577 rcu_cblist_dequeued_lazy(&rcl); 2438 rcu_cblist_dequeued_lazy(&rcl);
2578 /* 2439 /*
2579 * Stop only if limit reached and CPU has something to do. 2440 * Stop only if limit reached and CPU has something to do.
@@ -2587,7 +2448,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2587 2448
2588 local_irq_save(flags); 2449 local_irq_save(flags);
2589 count = -rcl.len; 2450 count = -rcl.len;
2590 trace_rcu_batch_end(rsp->name, count, !!rcl.head, need_resched(), 2451 trace_rcu_batch_end(rcu_state.name, count, !!rcl.head, need_resched(),
2591 is_idle_task(current), rcu_is_callbacks_kthread()); 2452 is_idle_task(current), rcu_is_callbacks_kthread());
2592 2453
2593 /* Update counts and requeue any remaining callbacks. */ 2454 /* Update counts and requeue any remaining callbacks. */
@@ -2603,7 +2464,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2603 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ 2464 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
2604 if (count == 0 && rdp->qlen_last_fqs_check != 0) { 2465 if (count == 0 && rdp->qlen_last_fqs_check != 0) {
2605 rdp->qlen_last_fqs_check = 0; 2466 rdp->qlen_last_fqs_check = 0;
2606 rdp->n_force_qs_snap = rsp->n_force_qs; 2467 rdp->n_force_qs_snap = rcu_state.n_force_qs;
2607 } else if (count < rdp->qlen_last_fqs_check - qhimark) 2468 } else if (count < rdp->qlen_last_fqs_check - qhimark)
2608 rdp->qlen_last_fqs_check = count; 2469 rdp->qlen_last_fqs_check = count;
2609 2470
@@ -2631,37 +2492,17 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
2631void rcu_check_callbacks(int user) 2492void rcu_check_callbacks(int user)
2632{ 2493{
2633 trace_rcu_utilization(TPS("Start scheduler-tick")); 2494 trace_rcu_utilization(TPS("Start scheduler-tick"));
2634 increment_cpu_stall_ticks(); 2495 raw_cpu_inc(rcu_data.ticks_this_gp);
2635 if (user || rcu_is_cpu_rrupt_from_idle()) { 2496 /* The load-acquire pairs with the store-release setting to true. */
2636 2497 if (smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
2637 /* 2498 /* Idle and userspace execution already are quiescent states. */
2638 * Get here if this CPU took its interrupt from user 2499 if (!rcu_is_cpu_rrupt_from_idle() && !user) {
2639 * mode or from the idle loop, and if this is not a 2500 set_tsk_need_resched(current);
2640 * nested interrupt. In this case, the CPU is in 2501 set_preempt_need_resched();
2641 * a quiescent state, so note it. 2502 }
2642 * 2503 __this_cpu_write(rcu_data.rcu_urgent_qs, false);
2643 * No memory barrier is required here because both
2644 * rcu_sched_qs() and rcu_bh_qs() reference only CPU-local
2645 * variables that other CPUs neither access nor modify,
2646 * at least not while the corresponding CPU is online.
2647 */
2648
2649 rcu_sched_qs();
2650 rcu_bh_qs();
2651 rcu_note_voluntary_context_switch(current);
2652
2653 } else if (!in_softirq()) {
2654
2655 /*
2656 * Get here if this CPU did not take its interrupt from
2657 * softirq, in other words, if it is not interrupting
2658 * a rcu_bh read-side critical section. This is an _bh
2659 * critical section, so note it.
2660 */
2661
2662 rcu_bh_qs();
2663 } 2504 }
2664 rcu_preempt_check_callbacks(); 2505 rcu_flavor_check_callbacks(user);
2665 if (rcu_pending()) 2506 if (rcu_pending())
2666 invoke_rcu_core(); 2507 invoke_rcu_core();
2667 2508
@@ -2675,20 +2516,19 @@ void rcu_check_callbacks(int user)
2675 * 2516 *
2676 * The caller must have suppressed start of new grace periods. 2517 * The caller must have suppressed start of new grace periods.
2677 */ 2518 */
2678static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp)) 2519static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
2679{ 2520{
2680 int cpu; 2521 int cpu;
2681 unsigned long flags; 2522 unsigned long flags;
2682 unsigned long mask; 2523 unsigned long mask;
2683 struct rcu_node *rnp; 2524 struct rcu_node *rnp;
2684 2525
2685 rcu_for_each_leaf_node(rsp, rnp) { 2526 rcu_for_each_leaf_node(rnp) {
2686 cond_resched_tasks_rcu_qs(); 2527 cond_resched_tasks_rcu_qs();
2687 mask = 0; 2528 mask = 0;
2688 raw_spin_lock_irqsave_rcu_node(rnp, flags); 2529 raw_spin_lock_irqsave_rcu_node(rnp, flags);
2689 if (rnp->qsmask == 0) { 2530 if (rnp->qsmask == 0) {
2690 if (rcu_state_p == &rcu_sched_state || 2531 if (!IS_ENABLED(CONFIG_PREEMPT) ||
2691 rsp != rcu_state_p ||
2692 rcu_preempt_blocked_readers_cgp(rnp)) { 2532 rcu_preempt_blocked_readers_cgp(rnp)) {
2693 /* 2533 /*
2694 * No point in scanning bits because they 2534 * No point in scanning bits because they
@@ -2705,13 +2545,13 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
2705 for_each_leaf_node_possible_cpu(rnp, cpu) { 2545 for_each_leaf_node_possible_cpu(rnp, cpu) {
2706 unsigned long bit = leaf_node_cpu_bit(rnp, cpu); 2546 unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
2707 if ((rnp->qsmask & bit) != 0) { 2547 if ((rnp->qsmask & bit) != 0) {
2708 if (f(per_cpu_ptr(rsp->rda, cpu))) 2548 if (f(per_cpu_ptr(&rcu_data, cpu)))
2709 mask |= bit; 2549 mask |= bit;
2710 } 2550 }
2711 } 2551 }
2712 if (mask != 0) { 2552 if (mask != 0) {
2713 /* Idle/offline CPUs, report (releases rnp->lock). */ 2553 /* Idle/offline CPUs, report (releases rnp->lock). */
2714 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); 2554 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
2715 } else { 2555 } else {
2716 /* Nothing to do here, so just drop the lock. */ 2556 /* Nothing to do here, so just drop the lock. */
2717 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2557 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2723,7 +2563,7 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
2723 * Force quiescent states on reluctant CPUs, and also detect which 2563 * Force quiescent states on reluctant CPUs, and also detect which
2724 * CPUs are in dyntick-idle mode. 2564 * CPUs are in dyntick-idle mode.
2725 */ 2565 */
2726static void force_quiescent_state(struct rcu_state *rsp) 2566static void force_quiescent_state(void)
2727{ 2567{
2728 unsigned long flags; 2568 unsigned long flags;
2729 bool ret; 2569 bool ret;
@@ -2731,9 +2571,9 @@ static void force_quiescent_state(struct rcu_state *rsp)
2731 struct rcu_node *rnp_old = NULL; 2571 struct rcu_node *rnp_old = NULL;
2732 2572
2733 /* Funnel through hierarchy to reduce memory contention. */ 2573 /* Funnel through hierarchy to reduce memory contention. */
2734 rnp = __this_cpu_read(rsp->rda->mynode); 2574 rnp = __this_cpu_read(rcu_data.mynode);
2735 for (; rnp != NULL; rnp = rnp->parent) { 2575 for (; rnp != NULL; rnp = rnp->parent) {
2736 ret = (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) || 2576 ret = (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) ||
2737 !raw_spin_trylock(&rnp->fqslock); 2577 !raw_spin_trylock(&rnp->fqslock);
2738 if (rnp_old != NULL) 2578 if (rnp_old != NULL)
2739 raw_spin_unlock(&rnp_old->fqslock); 2579 raw_spin_unlock(&rnp_old->fqslock);
@@ -2741,18 +2581,19 @@ static void force_quiescent_state(struct rcu_state *rsp)
2741 return; 2581 return;
2742 rnp_old = rnp; 2582 rnp_old = rnp;
2743 } 2583 }
2744 /* rnp_old == rcu_get_root(rsp), rnp == NULL. */ 2584 /* rnp_old == rcu_get_root(), rnp == NULL. */
2745 2585
2746 /* Reached the root of the rcu_node tree, acquire lock. */ 2586 /* Reached the root of the rcu_node tree, acquire lock. */
2747 raw_spin_lock_irqsave_rcu_node(rnp_old, flags); 2587 raw_spin_lock_irqsave_rcu_node(rnp_old, flags);
2748 raw_spin_unlock(&rnp_old->fqslock); 2588 raw_spin_unlock(&rnp_old->fqslock);
2749 if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 2589 if (READ_ONCE(rcu_state.gp_flags) & RCU_GP_FLAG_FQS) {
2750 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); 2590 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
2751 return; /* Someone beat us to it. */ 2591 return; /* Someone beat us to it. */
2752 } 2592 }
2753 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); 2593 WRITE_ONCE(rcu_state.gp_flags,
2594 READ_ONCE(rcu_state.gp_flags) | RCU_GP_FLAG_FQS);
2754 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags); 2595 raw_spin_unlock_irqrestore_rcu_node(rnp_old, flags);
2755 rcu_gp_kthread_wake(rsp); 2596 rcu_gp_kthread_wake();
2756} 2597}
2757 2598
2758/* 2599/*
@@ -2760,30 +2601,29 @@ static void force_quiescent_state(struct rcu_state *rsp)
2760 * RCU to come out of its idle mode. 2601 * RCU to come out of its idle mode.
2761 */ 2602 */
2762static void 2603static void
2763rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp, 2604rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp)
2764 struct rcu_data *rdp)
2765{ 2605{
2766 const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ; 2606 const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ;
2767 unsigned long flags; 2607 unsigned long flags;
2768 unsigned long j; 2608 unsigned long j;
2769 struct rcu_node *rnp_root = rcu_get_root(rsp); 2609 struct rcu_node *rnp_root = rcu_get_root();
2770 static atomic_t warned = ATOMIC_INIT(0); 2610 static atomic_t warned = ATOMIC_INIT(0);
2771 2611
2772 if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress(rsp) || 2612 if (!IS_ENABLED(CONFIG_PROVE_RCU) || rcu_gp_in_progress() ||
2773 ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed)) 2613 ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed))
2774 return; 2614 return;
2775 j = jiffies; /* Expensive access, and in common case don't get here. */ 2615 j = jiffies; /* Expensive access, and in common case don't get here. */
2776 if (time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) || 2616 if (time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
2777 time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) || 2617 time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
2778 atomic_read(&warned)) 2618 atomic_read(&warned))
2779 return; 2619 return;
2780 2620
2781 raw_spin_lock_irqsave_rcu_node(rnp, flags); 2621 raw_spin_lock_irqsave_rcu_node(rnp, flags);
2782 j = jiffies; 2622 j = jiffies;
2783 if (rcu_gp_in_progress(rsp) || 2623 if (rcu_gp_in_progress() ||
2784 ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || 2624 ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
2785 time_before(j, READ_ONCE(rsp->gp_req_activity) + gpssdelay) || 2625 time_before(j, READ_ONCE(rcu_state.gp_req_activity) + gpssdelay) ||
2786 time_before(j, READ_ONCE(rsp->gp_activity) + gpssdelay) || 2626 time_before(j, READ_ONCE(rcu_state.gp_activity) + gpssdelay) ||
2787 atomic_read(&warned)) { 2627 atomic_read(&warned)) {
2788 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2628 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2789 return; 2629 return;
@@ -2793,21 +2633,21 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
2793 if (rnp_root != rnp) 2633 if (rnp_root != rnp)
2794 raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ 2634 raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
2795 j = jiffies; 2635 j = jiffies;
2796 if (rcu_gp_in_progress(rsp) || 2636 if (rcu_gp_in_progress() ||
2797 ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) || 2637 ULONG_CMP_GE(rnp_root->gp_seq, rnp_root->gp_seq_needed) ||
2798 time_before(j, rsp->gp_req_activity + gpssdelay) || 2638 time_before(j, rcu_state.gp_req_activity + gpssdelay) ||
2799 time_before(j, rsp->gp_activity + gpssdelay) || 2639 time_before(j, rcu_state.gp_activity + gpssdelay) ||
2800 atomic_xchg(&warned, 1)) { 2640 atomic_xchg(&warned, 1)) {
2801 raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */ 2641 raw_spin_unlock_rcu_node(rnp_root); /* irqs remain disabled. */
2802 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2642 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2803 return; 2643 return;
2804 } 2644 }
2805 pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n", 2645 pr_alert("%s: g%ld->%ld gar:%lu ga:%lu f%#x gs:%d %s->state:%#lx\n",
2806 __func__, (long)READ_ONCE(rsp->gp_seq), 2646 __func__, (long)READ_ONCE(rcu_state.gp_seq),
2807 (long)READ_ONCE(rnp_root->gp_seq_needed), 2647 (long)READ_ONCE(rnp_root->gp_seq_needed),
2808 j - rsp->gp_req_activity, j - rsp->gp_activity, 2648 j - rcu_state.gp_req_activity, j - rcu_state.gp_activity,
2809 rsp->gp_flags, rsp->gp_state, rsp->name, 2649 rcu_state.gp_flags, rcu_state.gp_state, rcu_state.name,
2810 rsp->gp_kthread ? rsp->gp_kthread->state : 0x1ffffL); 2650 rcu_state.gp_kthread ? rcu_state.gp_kthread->state : 0x1ffffL);
2811 WARN_ON(1); 2651 WARN_ON(1);
2812 if (rnp_root != rnp) 2652 if (rnp_root != rnp)
2813 raw_spin_unlock_rcu_node(rnp_root); 2653 raw_spin_unlock_rcu_node(rnp_root);
@@ -2815,69 +2655,65 @@ rcu_check_gp_start_stall(struct rcu_state *rsp, struct rcu_node *rnp,
2815} 2655}
2816 2656
2817/* 2657/*
2818 * This does the RCU core processing work for the specified rcu_state 2658 * This does the RCU core processing work for the specified rcu_data
2819 * and rcu_data structures. This may be called only from the CPU to 2659 * structures. This may be called only from the CPU to whom the rdp
2820 * whom the rdp belongs. 2660 * belongs.
2821 */ 2661 */
2822static void 2662static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
2823__rcu_process_callbacks(struct rcu_state *rsp)
2824{ 2663{
2825 unsigned long flags; 2664 unsigned long flags;
2826 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 2665 struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
2827 struct rcu_node *rnp = rdp->mynode; 2666 struct rcu_node *rnp = rdp->mynode;
2828 2667
2668 if (cpu_is_offline(smp_processor_id()))
2669 return;
2670 trace_rcu_utilization(TPS("Start RCU core"));
2829 WARN_ON_ONCE(!rdp->beenonline); 2671 WARN_ON_ONCE(!rdp->beenonline);
2830 2672
2673 /* Report any deferred quiescent states if preemption enabled. */
2674 if (!(preempt_count() & PREEMPT_MASK)) {
2675 rcu_preempt_deferred_qs(current);
2676 } else if (rcu_preempt_need_deferred_qs(current)) {
2677 set_tsk_need_resched(current);
2678 set_preempt_need_resched();
2679 }
2680
2831 /* Update RCU state based on any recent quiescent states. */ 2681 /* Update RCU state based on any recent quiescent states. */
2832 rcu_check_quiescent_state(rsp, rdp); 2682 rcu_check_quiescent_state(rdp);
2833 2683
2834 /* No grace period and unregistered callbacks? */ 2684 /* No grace period and unregistered callbacks? */
2835 if (!rcu_gp_in_progress(rsp) && 2685 if (!rcu_gp_in_progress() &&
2836 rcu_segcblist_is_enabled(&rdp->cblist)) { 2686 rcu_segcblist_is_enabled(&rdp->cblist)) {
2837 local_irq_save(flags); 2687 local_irq_save(flags);
2838 if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) 2688 if (!rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
2839 rcu_accelerate_cbs_unlocked(rsp, rnp, rdp); 2689 rcu_accelerate_cbs_unlocked(rnp, rdp);
2840 local_irq_restore(flags); 2690 local_irq_restore(flags);
2841 } 2691 }
2842 2692
2843 rcu_check_gp_start_stall(rsp, rnp, rdp); 2693 rcu_check_gp_start_stall(rnp, rdp);
2844 2694
2845 /* If there are callbacks ready, invoke them. */ 2695 /* If there are callbacks ready, invoke them. */
2846 if (rcu_segcblist_ready_cbs(&rdp->cblist)) 2696 if (rcu_segcblist_ready_cbs(&rdp->cblist))
2847 invoke_rcu_callbacks(rsp, rdp); 2697 invoke_rcu_callbacks(rdp);
2848 2698
2849 /* Do any needed deferred wakeups of rcuo kthreads. */ 2699 /* Do any needed deferred wakeups of rcuo kthreads. */
2850 do_nocb_deferred_wakeup(rdp); 2700 do_nocb_deferred_wakeup(rdp);
2851}
2852
2853/*
2854 * Do RCU core processing for the current CPU.
2855 */
2856static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
2857{
2858 struct rcu_state *rsp;
2859
2860 if (cpu_is_offline(smp_processor_id()))
2861 return;
2862 trace_rcu_utilization(TPS("Start RCU core"));
2863 for_each_rcu_flavor(rsp)
2864 __rcu_process_callbacks(rsp);
2865 trace_rcu_utilization(TPS("End RCU core")); 2701 trace_rcu_utilization(TPS("End RCU core"));
2866} 2702}
2867 2703
2868/* 2704/*
2869 * Schedule RCU callback invocation. If the specified type of RCU 2705 * Schedule RCU callback invocation. If the running implementation of RCU
2870 * does not support RCU priority boosting, just do a direct call, 2706 * does not support RCU priority boosting, just do a direct call, otherwise
2871 * otherwise wake up the per-CPU kernel kthread. Note that because we 2707 * wake up the per-CPU kernel kthread. Note that because we are running
2872 * are running on the current CPU with softirqs disabled, the 2708 * on the current CPU with softirqs disabled, the rcu_cpu_kthread_task
2873 * rcu_cpu_kthread_task cannot disappear out from under us. 2709 * cannot disappear out from under us.
2874 */ 2710 */
2875static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 2711static void invoke_rcu_callbacks(struct rcu_data *rdp)
2876{ 2712{
2877 if (unlikely(!READ_ONCE(rcu_scheduler_fully_active))) 2713 if (unlikely(!READ_ONCE(rcu_scheduler_fully_active)))
2878 return; 2714 return;
2879 if (likely(!rsp->boost)) { 2715 if (likely(!rcu_state.boost)) {
2880 rcu_do_batch(rsp, rdp); 2716 rcu_do_batch(rdp);
2881 return; 2717 return;
2882 } 2718 }
2883 invoke_rcu_callbacks_kthread(); 2719 invoke_rcu_callbacks_kthread();
@@ -2892,8 +2728,8 @@ static void invoke_rcu_core(void)
2892/* 2728/*
2893 * Handle any core-RCU processing required by a call_rcu() invocation. 2729 * Handle any core-RCU processing required by a call_rcu() invocation.
2894 */ 2730 */
2895static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, 2731static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head,
2896 struct rcu_head *head, unsigned long flags) 2732 unsigned long flags)
2897{ 2733{
2898 /* 2734 /*
2899 * If called from an extended quiescent state, invoke the RCU 2735 * If called from an extended quiescent state, invoke the RCU
@@ -2917,18 +2753,18 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2917 rdp->qlen_last_fqs_check + qhimark)) { 2753 rdp->qlen_last_fqs_check + qhimark)) {
2918 2754
2919 /* Are we ignoring a completed grace period? */ 2755 /* Are we ignoring a completed grace period? */
2920 note_gp_changes(rsp, rdp); 2756 note_gp_changes(rdp);
2921 2757
2922 /* Start a new grace period if one not already started. */ 2758 /* Start a new grace period if one not already started. */
2923 if (!rcu_gp_in_progress(rsp)) { 2759 if (!rcu_gp_in_progress()) {
2924 rcu_accelerate_cbs_unlocked(rsp, rdp->mynode, rdp); 2760 rcu_accelerate_cbs_unlocked(rdp->mynode, rdp);
2925 } else { 2761 } else {
2926 /* Give the grace period a kick. */ 2762 /* Give the grace period a kick. */
2927 rdp->blimit = LONG_MAX; 2763 rdp->blimit = LONG_MAX;
2928 if (rsp->n_force_qs == rdp->n_force_qs_snap && 2764 if (rcu_state.n_force_qs == rdp->n_force_qs_snap &&
2929 rcu_segcblist_first_pend_cb(&rdp->cblist) != head) 2765 rcu_segcblist_first_pend_cb(&rdp->cblist) != head)
2930 force_quiescent_state(rsp); 2766 force_quiescent_state();
2931 rdp->n_force_qs_snap = rsp->n_force_qs; 2767 rdp->n_force_qs_snap = rcu_state.n_force_qs;
2932 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist); 2768 rdp->qlen_last_fqs_check = rcu_segcblist_n_cbs(&rdp->cblist);
2933 } 2769 }
2934 } 2770 }
@@ -2944,12 +2780,11 @@ static void rcu_leak_callback(struct rcu_head *rhp)
2944/* 2780/*
2945 * Helper function for call_rcu() and friends. The cpu argument will 2781 * Helper function for call_rcu() and friends. The cpu argument will
2946 * normally be -1, indicating "currently running CPU". It may specify 2782 * normally be -1, indicating "currently running CPU". It may specify
2947 * a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier() 2783 * a CPU only if that CPU is a no-CBs CPU. Currently, only rcu_barrier()
2948 * is expected to specify a CPU. 2784 * is expected to specify a CPU.
2949 */ 2785 */
2950static void 2786static void
2951__call_rcu(struct rcu_head *head, rcu_callback_t func, 2787__call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy)
2952 struct rcu_state *rsp, int cpu, bool lazy)
2953{ 2788{
2954 unsigned long flags; 2789 unsigned long flags;
2955 struct rcu_data *rdp; 2790 struct rcu_data *rdp;
@@ -2971,14 +2806,14 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
2971 head->func = func; 2806 head->func = func;
2972 head->next = NULL; 2807 head->next = NULL;
2973 local_irq_save(flags); 2808 local_irq_save(flags);
2974 rdp = this_cpu_ptr(rsp->rda); 2809 rdp = this_cpu_ptr(&rcu_data);
2975 2810
2976 /* Add the callback to our list. */ 2811 /* Add the callback to our list. */
2977 if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist)) || cpu != -1) { 2812 if (unlikely(!rcu_segcblist_is_enabled(&rdp->cblist)) || cpu != -1) {
2978 int offline; 2813 int offline;
2979 2814
2980 if (cpu != -1) 2815 if (cpu != -1)
2981 rdp = per_cpu_ptr(rsp->rda, cpu); 2816 rdp = per_cpu_ptr(&rcu_data, cpu);
2982 if (likely(rdp->mynode)) { 2817 if (likely(rdp->mynode)) {
2983 /* Post-boot, so this should be for a no-CBs CPU. */ 2818 /* Post-boot, so this should be for a no-CBs CPU. */
2984 offline = !__call_rcu_nocb(rdp, head, lazy, flags); 2819 offline = !__call_rcu_nocb(rdp, head, lazy, flags);
@@ -3001,72 +2836,60 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
3001 rcu_idle_count_callbacks_posted(); 2836 rcu_idle_count_callbacks_posted();
3002 2837
3003 if (__is_kfree_rcu_offset((unsigned long)func)) 2838 if (__is_kfree_rcu_offset((unsigned long)func))
3004 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, 2839 trace_rcu_kfree_callback(rcu_state.name, head,
2840 (unsigned long)func,
3005 rcu_segcblist_n_lazy_cbs(&rdp->cblist), 2841 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
3006 rcu_segcblist_n_cbs(&rdp->cblist)); 2842 rcu_segcblist_n_cbs(&rdp->cblist));
3007 else 2843 else
3008 trace_rcu_callback(rsp->name, head, 2844 trace_rcu_callback(rcu_state.name, head,
3009 rcu_segcblist_n_lazy_cbs(&rdp->cblist), 2845 rcu_segcblist_n_lazy_cbs(&rdp->cblist),
3010 rcu_segcblist_n_cbs(&rdp->cblist)); 2846 rcu_segcblist_n_cbs(&rdp->cblist));
3011 2847
3012 /* Go handle any RCU core processing required. */ 2848 /* Go handle any RCU core processing required. */
3013 __call_rcu_core(rsp, rdp, head, flags); 2849 __call_rcu_core(rdp, head, flags);
3014 local_irq_restore(flags); 2850 local_irq_restore(flags);
3015} 2851}
3016 2852
3017/** 2853/**
3018 * call_rcu_sched() - Queue an RCU for invocation after sched grace period. 2854 * call_rcu() - Queue an RCU callback for invocation after a grace period.
3019 * @head: structure to be used for queueing the RCU updates.
3020 * @func: actual callback function to be invoked after the grace period
3021 *
3022 * The callback function will be invoked some time after a full grace
3023 * period elapses, in other words after all currently executing RCU
3024 * read-side critical sections have completed. call_rcu_sched() assumes
3025 * that the read-side critical sections end on enabling of preemption
3026 * or on voluntary preemption.
3027 * RCU read-side critical sections are delimited by:
3028 *
3029 * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
3030 * - anything that disables preemption.
3031 *
3032 * These may be nested.
3033 *
3034 * See the description of call_rcu() for more detailed information on
3035 * memory ordering guarantees.
3036 */
3037void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
3038{
3039 __call_rcu(head, func, &rcu_sched_state, -1, 0);
3040}
3041EXPORT_SYMBOL_GPL(call_rcu_sched);
3042
3043/**
3044 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
3045 * @head: structure to be used for queueing the RCU updates. 2855 * @head: structure to be used for queueing the RCU updates.
3046 * @func: actual callback function to be invoked after the grace period 2856 * @func: actual callback function to be invoked after the grace period
3047 * 2857 *
3048 * The callback function will be invoked some time after a full grace 2858 * The callback function will be invoked some time after a full grace
3049 * period elapses, in other words after all currently executing RCU 2859 * period elapses, in other words after all pre-existing RCU read-side
3050 * read-side critical sections have completed. call_rcu_bh() assumes 2860 * critical sections have completed. However, the callback function
3051 * that the read-side critical sections end on completion of a softirq 2861 * might well execute concurrently with RCU read-side critical sections
3052 * handler. This means that read-side critical sections in process 2862 * that started after call_rcu() was invoked. RCU read-side critical
3053 * context must not be interrupted by softirqs. This interface is to be 2863 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), and
3054 * used when most of the read-side critical sections are in softirq context. 2864 * may be nested. In addition, regions of code across which interrupts,
3055 * RCU read-side critical sections are delimited by: 2865 * preemption, or softirqs have been disabled also serve as RCU read-side
3056 * 2866 * critical sections. This includes hardware interrupt handlers, softirq
3057 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context, OR 2867 * handlers, and NMI handlers.
3058 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context. 2868 *
3059 * 2869 * Note that all CPUs must agree that the grace period extended beyond
3060 * These may be nested. 2870 * all pre-existing RCU read-side critical section. On systems with more
3061 * 2871 * than one CPU, this means that when "func()" is invoked, each CPU is
3062 * See the description of call_rcu() for more detailed information on 2872 * guaranteed to have executed a full memory barrier since the end of its
3063 * memory ordering guarantees. 2873 * last RCU read-side critical section whose beginning preceded the call
3064 */ 2874 * to call_rcu(). It also means that each CPU executing an RCU read-side
3065void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) 2875 * critical section that continues beyond the start of "func()" must have
3066{ 2876 * executed a memory barrier after the call_rcu() but before the beginning
3067 __call_rcu(head, func, &rcu_bh_state, -1, 0); 2877 * of that RCU read-side critical section. Note that these guarantees
3068} 2878 * include CPUs that are offline, idle, or executing in user mode, as
3069EXPORT_SYMBOL_GPL(call_rcu_bh); 2879 * well as CPUs that are executing in the kernel.
2880 *
2881 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
2882 * resulting RCU callback function "func()", then both CPU A and CPU B are
2883 * guaranteed to execute a full memory barrier during the time interval
2884 * between the call to call_rcu() and the invocation of "func()" -- even
2885 * if CPU A and CPU B are the same CPU (but again only if the system has
2886 * more than one CPU).
2887 */
2888void call_rcu(struct rcu_head *head, rcu_callback_t func)
2889{
2890 __call_rcu(head, func, -1, 0);
2891}
2892EXPORT_SYMBOL_GPL(call_rcu);
3070 2893
3071/* 2894/*
3072 * Queue an RCU callback for lazy invocation after a grace period. 2895 * Queue an RCU callback for lazy invocation after a grace period.
@@ -3075,110 +2898,12 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
3075 * callbacks in the list of pending callbacks. Until then, this 2898 * callbacks in the list of pending callbacks. Until then, this
3076 * function may only be called from __kfree_rcu(). 2899 * function may only be called from __kfree_rcu().
3077 */ 2900 */
3078void kfree_call_rcu(struct rcu_head *head, 2901void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
3079 rcu_callback_t func)
3080{ 2902{
3081 __call_rcu(head, func, rcu_state_p, -1, 1); 2903 __call_rcu(head, func, -1, 1);
3082} 2904}
3083EXPORT_SYMBOL_GPL(kfree_call_rcu); 2905EXPORT_SYMBOL_GPL(kfree_call_rcu);
3084 2906
3085/*
3086 * Because a context switch is a grace period for RCU-sched and RCU-bh,
3087 * any blocking grace-period wait automatically implies a grace period
3088 * if there is only one CPU online at any point time during execution
3089 * of either synchronize_sched() or synchronize_rcu_bh(). It is OK to
3090 * occasionally incorrectly indicate that there are multiple CPUs online
3091 * when there was in fact only one the whole time, as this just adds
3092 * some overhead: RCU still operates correctly.
3093 */
3094static int rcu_blocking_is_gp(void)
3095{
3096 int ret;
3097
3098 might_sleep(); /* Check for RCU read-side critical section. */
3099 preempt_disable();
3100 ret = num_online_cpus() <= 1;
3101 preempt_enable();
3102 return ret;
3103}
3104
3105/**
3106 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
3107 *
3108 * Control will return to the caller some time after a full rcu-sched
3109 * grace period has elapsed, in other words after all currently executing
3110 * rcu-sched read-side critical sections have completed. These read-side
3111 * critical sections are delimited by rcu_read_lock_sched() and
3112 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
3113 * local_irq_disable(), and so on may be used in place of
3114 * rcu_read_lock_sched().
3115 *
3116 * This means that all preempt_disable code sequences, including NMI and
3117 * non-threaded hardware-interrupt handlers, in progress on entry will
3118 * have completed before this primitive returns. However, this does not
3119 * guarantee that softirq handlers will have completed, since in some
3120 * kernels, these handlers can run in process context, and can block.
3121 *
3122 * Note that this guarantee implies further memory-ordering guarantees.
3123 * On systems with more than one CPU, when synchronize_sched() returns,
3124 * each CPU is guaranteed to have executed a full memory barrier since the
3125 * end of its last RCU-sched read-side critical section whose beginning
3126 * preceded the call to synchronize_sched(). In addition, each CPU having
3127 * an RCU read-side critical section that extends beyond the return from
3128 * synchronize_sched() is guaranteed to have executed a full memory barrier
3129 * after the beginning of synchronize_sched() and before the beginning of
3130 * that RCU read-side critical section. Note that these guarantees include
3131 * CPUs that are offline, idle, or executing in user mode, as well as CPUs
3132 * that are executing in the kernel.
3133 *
3134 * Furthermore, if CPU A invoked synchronize_sched(), which returned
3135 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
3136 * to have executed a full memory barrier during the execution of
3137 * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
3138 * again only if the system has more than one CPU).
3139 */
3140void synchronize_sched(void)
3141{
3142 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
3143 lock_is_held(&rcu_lock_map) ||
3144 lock_is_held(&rcu_sched_lock_map),
3145 "Illegal synchronize_sched() in RCU-sched read-side critical section");
3146 if (rcu_blocking_is_gp())
3147 return;
3148 if (rcu_gp_is_expedited())
3149 synchronize_sched_expedited();
3150 else
3151 wait_rcu_gp(call_rcu_sched);
3152}
3153EXPORT_SYMBOL_GPL(synchronize_sched);
3154
3155/**
3156 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
3157 *
3158 * Control will return to the caller some time after a full rcu_bh grace
3159 * period has elapsed, in other words after all currently executing rcu_bh
3160 * read-side critical sections have completed. RCU read-side critical
3161 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
3162 * and may be nested.
3163 *
3164 * See the description of synchronize_sched() for more detailed information
3165 * on memory ordering guarantees.
3166 */
3167void synchronize_rcu_bh(void)
3168{
3169 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
3170 lock_is_held(&rcu_lock_map) ||
3171 lock_is_held(&rcu_sched_lock_map),
3172 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
3173 if (rcu_blocking_is_gp())
3174 return;
3175 if (rcu_gp_is_expedited())
3176 synchronize_rcu_bh_expedited();
3177 else
3178 wait_rcu_gp(call_rcu_bh);
3179}
3180EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
3181
3182/** 2907/**
3183 * get_state_synchronize_rcu - Snapshot current RCU state 2908 * get_state_synchronize_rcu - Snapshot current RCU state
3184 * 2909 *
@@ -3193,7 +2918,7 @@ unsigned long get_state_synchronize_rcu(void)
3193 * before the load from ->gp_seq. 2918 * before the load from ->gp_seq.
3194 */ 2919 */
3195 smp_mb(); /* ^^^ */ 2920 smp_mb(); /* ^^^ */
3196 return rcu_seq_snap(&rcu_state_p->gp_seq); 2921 return rcu_seq_snap(&rcu_state.gp_seq);
3197} 2922}
3198EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); 2923EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
3199 2924
@@ -3213,70 +2938,30 @@ EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
3213 */ 2938 */
3214void cond_synchronize_rcu(unsigned long oldstate) 2939void cond_synchronize_rcu(unsigned long oldstate)
3215{ 2940{
3216 if (!rcu_seq_done(&rcu_state_p->gp_seq, oldstate)) 2941 if (!rcu_seq_done(&rcu_state.gp_seq, oldstate))
3217 synchronize_rcu(); 2942 synchronize_rcu();
3218 else 2943 else
3219 smp_mb(); /* Ensure GP ends before subsequent accesses. */ 2944 smp_mb(); /* Ensure GP ends before subsequent accesses. */
3220} 2945}
3221EXPORT_SYMBOL_GPL(cond_synchronize_rcu); 2946EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
3222 2947
3223/**
3224 * get_state_synchronize_sched - Snapshot current RCU-sched state
3225 *
3226 * Returns a cookie that is used by a later call to cond_synchronize_sched()
3227 * to determine whether or not a full grace period has elapsed in the
3228 * meantime.
3229 */
3230unsigned long get_state_synchronize_sched(void)
3231{
3232 /*
3233 * Any prior manipulation of RCU-protected data must happen
3234 * before the load from ->gp_seq.
3235 */
3236 smp_mb(); /* ^^^ */
3237 return rcu_seq_snap(&rcu_sched_state.gp_seq);
3238}
3239EXPORT_SYMBOL_GPL(get_state_synchronize_sched);
3240
3241/**
3242 * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period
3243 *
3244 * @oldstate: return value from earlier call to get_state_synchronize_sched()
3245 *
3246 * If a full RCU-sched grace period has elapsed since the earlier call to
3247 * get_state_synchronize_sched(), just return. Otherwise, invoke
3248 * synchronize_sched() to wait for a full grace period.
3249 *
3250 * Yes, this function does not take counter wrap into account. But
3251 * counter wrap is harmless. If the counter wraps, we have waited for
3252 * more than 2 billion grace periods (and way more on a 64-bit system!),
3253 * so waiting for one additional grace period should be just fine.
3254 */
3255void cond_synchronize_sched(unsigned long oldstate)
3256{
3257 if (!rcu_seq_done(&rcu_sched_state.gp_seq, oldstate))
3258 synchronize_sched();
3259 else
3260 smp_mb(); /* Ensure GP ends before subsequent accesses. */
3261}
3262EXPORT_SYMBOL_GPL(cond_synchronize_sched);
3263
3264/* 2948/*
3265 * Check to see if there is any immediate RCU-related work to be done 2949 * Check to see if there is any immediate RCU-related work to be done by
3266 * by the current CPU, for the specified type of RCU, returning 1 if so. 2950 * the current CPU, returning 1 if so and zero otherwise. The checks are
3267 * The checks are in order of increasing expense: checks that can be 2951 * in order of increasing expense: checks that can be carried out against
3268 * carried out against CPU-local state are performed first. However, 2952 * CPU-local state are performed first. However, we must check for CPU
3269 * we must check for CPU stalls first, else we might not get a chance. 2953 * stalls first, else we might not get a chance.
3270 */ 2954 */
3271static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) 2955static int rcu_pending(void)
3272{ 2956{
2957 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
3273 struct rcu_node *rnp = rdp->mynode; 2958 struct rcu_node *rnp = rdp->mynode;
3274 2959
3275 /* Check for CPU stalls, if enabled. */ 2960 /* Check for CPU stalls, if enabled. */
3276 check_cpu_stall(rsp, rdp); 2961 check_cpu_stall(rdp);
3277 2962
3278 /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */ 2963 /* Is this CPU a NO_HZ_FULL CPU that should ignore RCU? */
3279 if (rcu_nohz_full_cpu(rsp)) 2964 if (rcu_nohz_full_cpu())
3280 return 0; 2965 return 0;
3281 2966
3282 /* Is the RCU core waiting for a quiescent state from this CPU? */ 2967 /* Is the RCU core waiting for a quiescent state from this CPU? */
@@ -3288,7 +2973,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3288 return 1; 2973 return 1;
3289 2974
3290 /* Has RCU gone idle with this CPU needing another grace period? */ 2975 /* Has RCU gone idle with this CPU needing another grace period? */
3291 if (!rcu_gp_in_progress(rsp) && 2976 if (!rcu_gp_in_progress() &&
3292 rcu_segcblist_is_enabled(&rdp->cblist) && 2977 rcu_segcblist_is_enabled(&rdp->cblist) &&
3293 !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL)) 2978 !rcu_segcblist_restempty(&rdp->cblist, RCU_NEXT_READY_TAIL))
3294 return 1; 2979 return 1;
@@ -3307,21 +2992,6 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
3307} 2992}
3308 2993
3309/* 2994/*
3310 * Check to see if there is any immediate RCU-related work to be done
3311 * by the current CPU, returning 1 if so. This function is part of the
3312 * RCU implementation; it is -not- an exported member of the RCU API.
3313 */
3314static int rcu_pending(void)
3315{
3316 struct rcu_state *rsp;
3317
3318 for_each_rcu_flavor(rsp)
3319 if (__rcu_pending(rsp, this_cpu_ptr(rsp->rda)))
3320 return 1;
3321 return 0;
3322}
3323
3324/*
3325 * Return true if the specified CPU has any callback. If all_lazy is 2995 * Return true if the specified CPU has any callback. If all_lazy is
3326 * non-NULL, store an indication of whether all callbacks are lazy. 2996 * non-NULL, store an indication of whether all callbacks are lazy.
3327 * (If there are no callbacks, all of them are deemed to be lazy.) 2997 * (If there are no callbacks, all of them are deemed to be lazy.)
@@ -3331,17 +3001,12 @@ static bool rcu_cpu_has_callbacks(bool *all_lazy)
3331 bool al = true; 3001 bool al = true;
3332 bool hc = false; 3002 bool hc = false;
3333 struct rcu_data *rdp; 3003 struct rcu_data *rdp;
3334 struct rcu_state *rsp;
3335 3004
3336 for_each_rcu_flavor(rsp) { 3005 rdp = this_cpu_ptr(&rcu_data);
3337 rdp = this_cpu_ptr(rsp->rda); 3006 if (!rcu_segcblist_empty(&rdp->cblist)) {
3338 if (rcu_segcblist_empty(&rdp->cblist))
3339 continue;
3340 hc = true; 3007 hc = true;
3341 if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist) || !all_lazy) { 3008 if (rcu_segcblist_n_nonlazy_cbs(&rdp->cblist))
3342 al = false; 3009 al = false;
3343 break;
3344 }
3345 } 3010 }
3346 if (all_lazy) 3011 if (all_lazy)
3347 *all_lazy = al; 3012 *all_lazy = al;
@@ -3349,81 +3014,80 @@ static bool rcu_cpu_has_callbacks(bool *all_lazy)
3349} 3014}
3350 3015
3351/* 3016/*
3352 * Helper function for _rcu_barrier() tracing. If tracing is disabled, 3017 * Helper function for rcu_barrier() tracing. If tracing is disabled,
3353 * the compiler is expected to optimize this away. 3018 * the compiler is expected to optimize this away.
3354 */ 3019 */
3355static void _rcu_barrier_trace(struct rcu_state *rsp, const char *s, 3020static void rcu_barrier_trace(const char *s, int cpu, unsigned long done)
3356 int cpu, unsigned long done)
3357{ 3021{
3358 trace_rcu_barrier(rsp->name, s, cpu, 3022 trace_rcu_barrier(rcu_state.name, s, cpu,
3359 atomic_read(&rsp->barrier_cpu_count), done); 3023 atomic_read(&rcu_state.barrier_cpu_count), done);
3360} 3024}
3361 3025
3362/* 3026/*
3363 * RCU callback function for _rcu_barrier(). If we are last, wake 3027 * RCU callback function for rcu_barrier(). If we are last, wake
3364 * up the task executing _rcu_barrier(). 3028 * up the task executing rcu_barrier().
3365 */ 3029 */
3366static void rcu_barrier_callback(struct rcu_head *rhp) 3030static void rcu_barrier_callback(struct rcu_head *rhp)
3367{ 3031{
3368 struct rcu_data *rdp = container_of(rhp, struct rcu_data, barrier_head); 3032 if (atomic_dec_and_test(&rcu_state.barrier_cpu_count)) {
3369 struct rcu_state *rsp = rdp->rsp; 3033 rcu_barrier_trace(TPS("LastCB"), -1,
3370 3034 rcu_state.barrier_sequence);
3371 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) { 3035 complete(&rcu_state.barrier_completion);
3372 _rcu_barrier_trace(rsp, TPS("LastCB"), -1,
3373 rsp->barrier_sequence);
3374 complete(&rsp->barrier_completion);
3375 } else { 3036 } else {
3376 _rcu_barrier_trace(rsp, TPS("CB"), -1, rsp->barrier_sequence); 3037 rcu_barrier_trace(TPS("CB"), -1, rcu_state.barrier_sequence);
3377 } 3038 }
3378} 3039}
3379 3040
3380/* 3041/*
3381 * Called with preemption disabled, and from cross-cpu IRQ context. 3042 * Called with preemption disabled, and from cross-cpu IRQ context.
3382 */ 3043 */
3383static void rcu_barrier_func(void *type) 3044static void rcu_barrier_func(void *unused)
3384{ 3045{
3385 struct rcu_state *rsp = type; 3046 struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
3386 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
3387 3047
3388 _rcu_barrier_trace(rsp, TPS("IRQ"), -1, rsp->barrier_sequence); 3048 rcu_barrier_trace(TPS("IRQ"), -1, rcu_state.barrier_sequence);
3389 rdp->barrier_head.func = rcu_barrier_callback; 3049 rdp->barrier_head.func = rcu_barrier_callback;
3390 debug_rcu_head_queue(&rdp->barrier_head); 3050 debug_rcu_head_queue(&rdp->barrier_head);
3391 if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) { 3051 if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
3392 atomic_inc(&rsp->barrier_cpu_count); 3052 atomic_inc(&rcu_state.barrier_cpu_count);
3393 } else { 3053 } else {
3394 debug_rcu_head_unqueue(&rdp->barrier_head); 3054 debug_rcu_head_unqueue(&rdp->barrier_head);
3395 _rcu_barrier_trace(rsp, TPS("IRQNQ"), -1, 3055 rcu_barrier_trace(TPS("IRQNQ"), -1,
3396 rsp->barrier_sequence); 3056 rcu_state.barrier_sequence);
3397 } 3057 }
3398} 3058}
3399 3059
3400/* 3060/**
3401 * Orchestrate the specified type of RCU barrier, waiting for all 3061 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
3402 * RCU callbacks of the specified type to complete. 3062 *
3063 * Note that this primitive does not necessarily wait for an RCU grace period
3064 * to complete. For example, if there are no RCU callbacks queued anywhere
3065 * in the system, then rcu_barrier() is within its rights to return
3066 * immediately, without waiting for anything, much less an RCU grace period.
3403 */ 3067 */
3404static void _rcu_barrier(struct rcu_state *rsp) 3068void rcu_barrier(void)
3405{ 3069{
3406 int cpu; 3070 int cpu;
3407 struct rcu_data *rdp; 3071 struct rcu_data *rdp;
3408 unsigned long s = rcu_seq_snap(&rsp->barrier_sequence); 3072 unsigned long s = rcu_seq_snap(&rcu_state.barrier_sequence);
3409 3073
3410 _rcu_barrier_trace(rsp, TPS("Begin"), -1, s); 3074 rcu_barrier_trace(TPS("Begin"), -1, s);
3411 3075
3412 /* Take mutex to serialize concurrent rcu_barrier() requests. */ 3076 /* Take mutex to serialize concurrent rcu_barrier() requests. */
3413 mutex_lock(&rsp->barrier_mutex); 3077 mutex_lock(&rcu_state.barrier_mutex);
3414 3078
3415 /* Did someone else do our work for us? */ 3079 /* Did someone else do our work for us? */
3416 if (rcu_seq_done(&rsp->barrier_sequence, s)) { 3080 if (rcu_seq_done(&rcu_state.barrier_sequence, s)) {
3417 _rcu_barrier_trace(rsp, TPS("EarlyExit"), -1, 3081 rcu_barrier_trace(TPS("EarlyExit"), -1,
3418 rsp->barrier_sequence); 3082 rcu_state.barrier_sequence);
3419 smp_mb(); /* caller's subsequent code after above check. */ 3083 smp_mb(); /* caller's subsequent code after above check. */
3420 mutex_unlock(&rsp->barrier_mutex); 3084 mutex_unlock(&rcu_state.barrier_mutex);
3421 return; 3085 return;
3422 } 3086 }
3423 3087
3424 /* Mark the start of the barrier operation. */ 3088 /* Mark the start of the barrier operation. */
3425 rcu_seq_start(&rsp->barrier_sequence); 3089 rcu_seq_start(&rcu_state.barrier_sequence);
3426 _rcu_barrier_trace(rsp, TPS("Inc1"), -1, rsp->barrier_sequence); 3090 rcu_barrier_trace(TPS("Inc1"), -1, rcu_state.barrier_sequence);
3427 3091
3428 /* 3092 /*
3429 * Initialize the count to one rather than to zero in order to 3093 * Initialize the count to one rather than to zero in order to
@@ -3431,8 +3095,8 @@ static void _rcu_barrier(struct rcu_state *rsp)
3431 * (or preemption of this task). Exclude CPU-hotplug operations 3095 * (or preemption of this task). Exclude CPU-hotplug operations
3432 * to ensure that no offline CPU has callbacks queued. 3096 * to ensure that no offline CPU has callbacks queued.
3433 */ 3097 */
3434 init_completion(&rsp->barrier_completion); 3098 init_completion(&rcu_state.barrier_completion);
3435 atomic_set(&rsp->barrier_cpu_count, 1); 3099 atomic_set(&rcu_state.barrier_cpu_count, 1);
3436 get_online_cpus(); 3100 get_online_cpus();
3437 3101
3438 /* 3102 /*
@@ -3443,26 +3107,26 @@ static void _rcu_barrier(struct rcu_state *rsp)
3443 for_each_possible_cpu(cpu) { 3107 for_each_possible_cpu(cpu) {
3444 if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu)) 3108 if (!cpu_online(cpu) && !rcu_is_nocb_cpu(cpu))
3445 continue; 3109 continue;
3446 rdp = per_cpu_ptr(rsp->rda, cpu); 3110 rdp = per_cpu_ptr(&rcu_data, cpu);
3447 if (rcu_is_nocb_cpu(cpu)) { 3111 if (rcu_is_nocb_cpu(cpu)) {
3448 if (!rcu_nocb_cpu_needs_barrier(rsp, cpu)) { 3112 if (!rcu_nocb_cpu_needs_barrier(cpu)) {
3449 _rcu_barrier_trace(rsp, TPS("OfflineNoCB"), cpu, 3113 rcu_barrier_trace(TPS("OfflineNoCB"), cpu,
3450 rsp->barrier_sequence); 3114 rcu_state.barrier_sequence);
3451 } else { 3115 } else {
3452 _rcu_barrier_trace(rsp, TPS("OnlineNoCB"), cpu, 3116 rcu_barrier_trace(TPS("OnlineNoCB"), cpu,
3453 rsp->barrier_sequence); 3117 rcu_state.barrier_sequence);
3454 smp_mb__before_atomic(); 3118 smp_mb__before_atomic();
3455 atomic_inc(&rsp->barrier_cpu_count); 3119 atomic_inc(&rcu_state.barrier_cpu_count);
3456 __call_rcu(&rdp->barrier_head, 3120 __call_rcu(&rdp->barrier_head,
3457 rcu_barrier_callback, rsp, cpu, 0); 3121 rcu_barrier_callback, cpu, 0);
3458 } 3122 }
3459 } else if (rcu_segcblist_n_cbs(&rdp->cblist)) { 3123 } else if (rcu_segcblist_n_cbs(&rdp->cblist)) {
3460 _rcu_barrier_trace(rsp, TPS("OnlineQ"), cpu, 3124 rcu_barrier_trace(TPS("OnlineQ"), cpu,
3461 rsp->barrier_sequence); 3125 rcu_state.barrier_sequence);
3462 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); 3126 smp_call_function_single(cpu, rcu_barrier_func, NULL, 1);
3463 } else { 3127 } else {
3464 _rcu_barrier_trace(rsp, TPS("OnlineNQ"), cpu, 3128 rcu_barrier_trace(TPS("OnlineNQ"), cpu,
3465 rsp->barrier_sequence); 3129 rcu_state.barrier_sequence);
3466 } 3130 }
3467 } 3131 }
3468 put_online_cpus(); 3132 put_online_cpus();
@@ -3471,37 +3135,20 @@ static void _rcu_barrier(struct rcu_state *rsp)
3471 * Now that we have an rcu_barrier_callback() callback on each 3135 * Now that we have an rcu_barrier_callback() callback on each
3472 * CPU, and thus each counted, remove the initial count. 3136 * CPU, and thus each counted, remove the initial count.
3473 */ 3137 */
3474 if (atomic_dec_and_test(&rsp->barrier_cpu_count)) 3138 if (atomic_dec_and_test(&rcu_state.barrier_cpu_count))
3475 complete(&rsp->barrier_completion); 3139 complete(&rcu_state.barrier_completion);
3476 3140
3477 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */ 3141 /* Wait for all rcu_barrier_callback() callbacks to be invoked. */
3478 wait_for_completion(&rsp->barrier_completion); 3142 wait_for_completion(&rcu_state.barrier_completion);
3479 3143
3480 /* Mark the end of the barrier operation. */ 3144 /* Mark the end of the barrier operation. */
3481 _rcu_barrier_trace(rsp, TPS("Inc2"), -1, rsp->barrier_sequence); 3145 rcu_barrier_trace(TPS("Inc2"), -1, rcu_state.barrier_sequence);
3482 rcu_seq_end(&rsp->barrier_sequence); 3146 rcu_seq_end(&rcu_state.barrier_sequence);
3483 3147
3484 /* Other rcu_barrier() invocations can now safely proceed. */ 3148 /* Other rcu_barrier() invocations can now safely proceed. */
3485 mutex_unlock(&rsp->barrier_mutex); 3149 mutex_unlock(&rcu_state.barrier_mutex);
3486}
3487
3488/**
3489 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
3490 */
3491void rcu_barrier_bh(void)
3492{
3493 _rcu_barrier(&rcu_bh_state);
3494}
3495EXPORT_SYMBOL_GPL(rcu_barrier_bh);
3496
3497/**
3498 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
3499 */
3500void rcu_barrier_sched(void)
3501{
3502 _rcu_barrier(&rcu_sched_state);
3503} 3150}
3504EXPORT_SYMBOL_GPL(rcu_barrier_sched); 3151EXPORT_SYMBOL_GPL(rcu_barrier);
3505 3152
3506/* 3153/*
3507 * Propagate ->qsinitmask bits up the rcu_node tree to account for the 3154 * Propagate ->qsinitmask bits up the rcu_node tree to account for the
@@ -3535,46 +3182,46 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
3535 * Do boot-time initialization of a CPU's per-CPU RCU data. 3182 * Do boot-time initialization of a CPU's per-CPU RCU data.
3536 */ 3183 */
3537static void __init 3184static void __init
3538rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp) 3185rcu_boot_init_percpu_data(int cpu)
3539{ 3186{
3540 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 3187 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
3541 3188
3542 /* Set up local state, ensuring consistent view of global state. */ 3189 /* Set up local state, ensuring consistent view of global state. */
3543 rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu); 3190 rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
3544 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 3191 WARN_ON_ONCE(rdp->dynticks_nesting != 1);
3545 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != 1); 3192 WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp)));
3546 WARN_ON_ONCE(rcu_dynticks_in_eqs(rcu_dynticks_snap(rdp->dynticks))); 3193 rdp->rcu_ofl_gp_seq = rcu_state.gp_seq;
3547 rdp->rcu_ofl_gp_seq = rsp->gp_seq;
3548 rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED; 3194 rdp->rcu_ofl_gp_flags = RCU_GP_CLEANED;
3549 rdp->rcu_onl_gp_seq = rsp->gp_seq; 3195 rdp->rcu_onl_gp_seq = rcu_state.gp_seq;
3550 rdp->rcu_onl_gp_flags = RCU_GP_CLEANED; 3196 rdp->rcu_onl_gp_flags = RCU_GP_CLEANED;
3551 rdp->cpu = cpu; 3197 rdp->cpu = cpu;
3552 rdp->rsp = rsp;
3553 rcu_boot_init_nocb_percpu_data(rdp); 3198 rcu_boot_init_nocb_percpu_data(rdp);
3554} 3199}
3555 3200
3556/* 3201/*
3557 * Initialize a CPU's per-CPU RCU data. Note that only one online or 3202 * Invoked early in the CPU-online process, when pretty much all services
3203 * are available. The incoming CPU is not present.
3204 *
3205 * Initializes a CPU's per-CPU RCU data. Note that only one online or
3558 * offline event can be happening at a given time. Note also that we can 3206 * offline event can be happening at a given time. Note also that we can
3559 * accept some slop in the rsp->gp_seq access due to the fact that this 3207 * accept some slop in the rsp->gp_seq access due to the fact that this
3560 * CPU cannot possibly have any RCU callbacks in flight yet. 3208 * CPU cannot possibly have any RCU callbacks in flight yet.
3561 */ 3209 */
3562static void 3210int rcutree_prepare_cpu(unsigned int cpu)
3563rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3564{ 3211{
3565 unsigned long flags; 3212 unsigned long flags;
3566 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 3213 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
3567 struct rcu_node *rnp = rcu_get_root(rsp); 3214 struct rcu_node *rnp = rcu_get_root();
3568 3215
3569 /* Set up local state, ensuring consistent view of global state. */ 3216 /* Set up local state, ensuring consistent view of global state. */
3570 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3217 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3571 rdp->qlen_last_fqs_check = 0; 3218 rdp->qlen_last_fqs_check = 0;
3572 rdp->n_force_qs_snap = rsp->n_force_qs; 3219 rdp->n_force_qs_snap = rcu_state.n_force_qs;
3573 rdp->blimit = blimit; 3220 rdp->blimit = blimit;
3574 if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */ 3221 if (rcu_segcblist_empty(&rdp->cblist) && /* No early-boot CBs? */
3575 !init_nocb_callback_list(rdp)) 3222 !init_nocb_callback_list(rdp))
3576 rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ 3223 rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */
3577 rdp->dynticks->dynticks_nesting = 1; /* CPU not up, no tearing. */ 3224 rdp->dynticks_nesting = 1; /* CPU not up, no tearing. */
3578 rcu_dynticks_eqs_online(); 3225 rcu_dynticks_eqs_online();
3579 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ 3226 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
3580 3227
@@ -3589,25 +3236,11 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3589 rdp->gp_seq = rnp->gp_seq; 3236 rdp->gp_seq = rnp->gp_seq;
3590 rdp->gp_seq_needed = rnp->gp_seq; 3237 rdp->gp_seq_needed = rnp->gp_seq;
3591 rdp->cpu_no_qs.b.norm = true; 3238 rdp->cpu_no_qs.b.norm = true;
3592 rdp->rcu_qs_ctr_snap = per_cpu(rcu_dynticks.rcu_qs_ctr, cpu);
3593 rdp->core_needs_qs = false; 3239 rdp->core_needs_qs = false;
3594 rdp->rcu_iw_pending = false; 3240 rdp->rcu_iw_pending = false;
3595 rdp->rcu_iw_gp_seq = rnp->gp_seq - 1; 3241 rdp->rcu_iw_gp_seq = rnp->gp_seq - 1;
3596 trace_rcu_grace_period(rsp->name, rdp->gp_seq, TPS("cpuonl")); 3242 trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl"));
3597 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3243 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3598}
3599
3600/*
3601 * Invoked early in the CPU-online process, when pretty much all
3602 * services are available. The incoming CPU is not present.
3603 */
3604int rcutree_prepare_cpu(unsigned int cpu)
3605{
3606 struct rcu_state *rsp;
3607
3608 for_each_rcu_flavor(rsp)
3609 rcu_init_percpu_data(cpu, rsp);
3610
3611 rcu_prepare_kthreads(cpu); 3244 rcu_prepare_kthreads(cpu);
3612 rcu_spawn_all_nocb_kthreads(cpu); 3245 rcu_spawn_all_nocb_kthreads(cpu);
3613 3246
@@ -3619,7 +3252,7 @@ int rcutree_prepare_cpu(unsigned int cpu)
3619 */ 3252 */
3620static void rcutree_affinity_setting(unsigned int cpu, int outgoing) 3253static void rcutree_affinity_setting(unsigned int cpu, int outgoing)
3621{ 3254{
3622 struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); 3255 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
3623 3256
3624 rcu_boost_kthread_setaffinity(rdp->mynode, outgoing); 3257 rcu_boost_kthread_setaffinity(rdp->mynode, outgoing);
3625} 3258}
@@ -3633,15 +3266,12 @@ int rcutree_online_cpu(unsigned int cpu)
3633 unsigned long flags; 3266 unsigned long flags;
3634 struct rcu_data *rdp; 3267 struct rcu_data *rdp;
3635 struct rcu_node *rnp; 3268 struct rcu_node *rnp;
3636 struct rcu_state *rsp;
3637 3269
3638 for_each_rcu_flavor(rsp) { 3270 rdp = per_cpu_ptr(&rcu_data, cpu);
3639 rdp = per_cpu_ptr(rsp->rda, cpu); 3271 rnp = rdp->mynode;
3640 rnp = rdp->mynode; 3272 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3641 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3273 rnp->ffmask |= rdp->grpmask;
3642 rnp->ffmask |= rdp->grpmask; 3274 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3643 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3644 }
3645 if (IS_ENABLED(CONFIG_TREE_SRCU)) 3275 if (IS_ENABLED(CONFIG_TREE_SRCU))
3646 srcu_online_cpu(cpu); 3276 srcu_online_cpu(cpu);
3647 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) 3277 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
@@ -3660,15 +3290,12 @@ int rcutree_offline_cpu(unsigned int cpu)
3660 unsigned long flags; 3290 unsigned long flags;
3661 struct rcu_data *rdp; 3291 struct rcu_data *rdp;
3662 struct rcu_node *rnp; 3292 struct rcu_node *rnp;
3663 struct rcu_state *rsp;
3664 3293
3665 for_each_rcu_flavor(rsp) { 3294 rdp = per_cpu_ptr(&rcu_data, cpu);
3666 rdp = per_cpu_ptr(rsp->rda, cpu); 3295 rnp = rdp->mynode;
3667 rnp = rdp->mynode; 3296 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3668 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3297 rnp->ffmask &= ~rdp->grpmask;
3669 rnp->ffmask &= ~rdp->grpmask; 3298 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3670 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3671 }
3672 3299
3673 rcutree_affinity_setting(cpu, cpu); 3300 rcutree_affinity_setting(cpu, cpu);
3674 if (IS_ENABLED(CONFIG_TREE_SRCU)) 3301 if (IS_ENABLED(CONFIG_TREE_SRCU))
@@ -3676,32 +3303,6 @@ int rcutree_offline_cpu(unsigned int cpu)
3676 return 0; 3303 return 0;
3677} 3304}
3678 3305
3679/*
3680 * Near the end of the offline process. We do only tracing here.
3681 */
3682int rcutree_dying_cpu(unsigned int cpu)
3683{
3684 struct rcu_state *rsp;
3685
3686 for_each_rcu_flavor(rsp)
3687 rcu_cleanup_dying_cpu(rsp);
3688 return 0;
3689}
3690
3691/*
3692 * The outgoing CPU is gone and we are running elsewhere.
3693 */
3694int rcutree_dead_cpu(unsigned int cpu)
3695{
3696 struct rcu_state *rsp;
3697
3698 for_each_rcu_flavor(rsp) {
3699 rcu_cleanup_dead_cpu(cpu, rsp);
3700 do_nocb_deferred_wakeup(per_cpu_ptr(rsp->rda, cpu));
3701 }
3702 return 0;
3703}
3704
3705static DEFINE_PER_CPU(int, rcu_cpu_started); 3306static DEFINE_PER_CPU(int, rcu_cpu_started);
3706 3307
3707/* 3308/*
@@ -3723,137 +3324,113 @@ void rcu_cpu_starting(unsigned int cpu)
3723 unsigned long oldmask; 3324 unsigned long oldmask;
3724 struct rcu_data *rdp; 3325 struct rcu_data *rdp;
3725 struct rcu_node *rnp; 3326 struct rcu_node *rnp;
3726 struct rcu_state *rsp;
3727 3327
3728 if (per_cpu(rcu_cpu_started, cpu)) 3328 if (per_cpu(rcu_cpu_started, cpu))
3729 return; 3329 return;
3730 3330
3731 per_cpu(rcu_cpu_started, cpu) = 1; 3331 per_cpu(rcu_cpu_started, cpu) = 1;
3732 3332
3733 for_each_rcu_flavor(rsp) { 3333 rdp = per_cpu_ptr(&rcu_data, cpu);
3734 rdp = per_cpu_ptr(rsp->rda, cpu); 3334 rnp = rdp->mynode;
3735 rnp = rdp->mynode; 3335 mask = rdp->grpmask;
3736 mask = rdp->grpmask; 3336 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3737 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3337 rnp->qsmaskinitnext |= mask;
3738 rnp->qsmaskinitnext |= mask; 3338 oldmask = rnp->expmaskinitnext;
3739 oldmask = rnp->expmaskinitnext; 3339 rnp->expmaskinitnext |= mask;
3740 rnp->expmaskinitnext |= mask; 3340 oldmask ^= rnp->expmaskinitnext;
3741 oldmask ^= rnp->expmaskinitnext; 3341 nbits = bitmap_weight(&oldmask, BITS_PER_LONG);
3742 nbits = bitmap_weight(&oldmask, BITS_PER_LONG); 3342 /* Allow lockless access for expedited grace periods. */
3743 /* Allow lockless access for expedited grace periods. */ 3343 smp_store_release(&rcu_state.ncpus, rcu_state.ncpus + nbits); /* ^^^ */
3744 smp_store_release(&rsp->ncpus, rsp->ncpus + nbits); /* ^^^ */ 3344 rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */
3745 rcu_gpnum_ovf(rnp, rdp); /* Offline-induced counter wrap? */ 3345 rdp->rcu_onl_gp_seq = READ_ONCE(rcu_state.gp_seq);
3746 rdp->rcu_onl_gp_seq = READ_ONCE(rsp->gp_seq); 3346 rdp->rcu_onl_gp_flags = READ_ONCE(rcu_state.gp_flags);
3747 rdp->rcu_onl_gp_flags = READ_ONCE(rsp->gp_flags); 3347 if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */
3748 if (rnp->qsmask & mask) { /* RCU waiting on incoming CPU? */ 3348 /* Report QS -after- changing ->qsmaskinitnext! */
3749 /* Report QS -after- changing ->qsmaskinitnext! */ 3349 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
3750 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); 3350 } else {
3751 } else { 3351 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3752 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3753 }
3754 } 3352 }
3755 smp_mb(); /* Ensure RCU read-side usage follows above initialization. */ 3353 smp_mb(); /* Ensure RCU read-side usage follows above initialization. */
3756} 3354}
3757 3355
3758#ifdef CONFIG_HOTPLUG_CPU 3356#ifdef CONFIG_HOTPLUG_CPU
3759/* 3357/*
3760 * The CPU is exiting the idle loop into the arch_cpu_idle_dead() 3358 * The outgoing function has no further need of RCU, so remove it from
3761 * function. We now remove it from the rcu_node tree's ->qsmaskinitnext 3359 * the rcu_node tree's ->qsmaskinitnext bit masks.
3762 * bit masks. 3360 *
3361 * Note that this function is special in that it is invoked directly
3362 * from the outgoing CPU rather than from the cpuhp_step mechanism.
3363 * This is because this function must be invoked at a precise location.
3763 */ 3364 */
3764static void rcu_cleanup_dying_idle_cpu(int cpu, struct rcu_state *rsp) 3365void rcu_report_dead(unsigned int cpu)
3765{ 3366{
3766 unsigned long flags; 3367 unsigned long flags;
3767 unsigned long mask; 3368 unsigned long mask;
3768 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 3369 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
3769 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ 3370 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */
3770 3371
3372 /* QS for any half-done expedited grace period. */
3373 preempt_disable();
3374 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
3375 preempt_enable();
3376 rcu_preempt_deferred_qs(current);
3377
3771 /* Remove outgoing CPU from mask in the leaf rcu_node structure. */ 3378 /* Remove outgoing CPU from mask in the leaf rcu_node structure. */
3772 mask = rdp->grpmask; 3379 mask = rdp->grpmask;
3773 spin_lock(&rsp->ofl_lock); 3380 raw_spin_lock(&rcu_state.ofl_lock);
3774 raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */ 3381 raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Enforce GP memory-order guarantee. */
3775 rdp->rcu_ofl_gp_seq = READ_ONCE(rsp->gp_seq); 3382 rdp->rcu_ofl_gp_seq = READ_ONCE(rcu_state.gp_seq);
3776 rdp->rcu_ofl_gp_flags = READ_ONCE(rsp->gp_flags); 3383 rdp->rcu_ofl_gp_flags = READ_ONCE(rcu_state.gp_flags);
3777 if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */ 3384 if (rnp->qsmask & mask) { /* RCU waiting on outgoing CPU? */
3778 /* Report quiescent state -before- changing ->qsmaskinitnext! */ 3385 /* Report quiescent state -before- changing ->qsmaskinitnext! */
3779 rcu_report_qs_rnp(mask, rsp, rnp, rnp->gp_seq, flags); 3386 rcu_report_qs_rnp(mask, rnp, rnp->gp_seq, flags);
3780 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3387 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3781 } 3388 }
3782 rnp->qsmaskinitnext &= ~mask; 3389 rnp->qsmaskinitnext &= ~mask;
3783 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 3390 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3784 spin_unlock(&rsp->ofl_lock); 3391 raw_spin_unlock(&rcu_state.ofl_lock);
3785}
3786
3787/*
3788 * The outgoing function has no further need of RCU, so remove it from
3789 * the list of CPUs that RCU must track.
3790 *
3791 * Note that this function is special in that it is invoked directly
3792 * from the outgoing CPU rather than from the cpuhp_step mechanism.
3793 * This is because this function must be invoked at a precise location.
3794 */
3795void rcu_report_dead(unsigned int cpu)
3796{
3797 struct rcu_state *rsp;
3798
3799 /* QS for any half-done expedited RCU-sched GP. */
3800 preempt_disable();
3801 rcu_report_exp_rdp(&rcu_sched_state,
3802 this_cpu_ptr(rcu_sched_state.rda), true);
3803 preempt_enable();
3804 for_each_rcu_flavor(rsp)
3805 rcu_cleanup_dying_idle_cpu(cpu, rsp);
3806 3392
3807 per_cpu(rcu_cpu_started, cpu) = 0; 3393 per_cpu(rcu_cpu_started, cpu) = 0;
3808} 3394}
3809 3395
3810/* Migrate the dead CPU's callbacks to the current CPU. */ 3396/*
3811static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp) 3397 * The outgoing CPU has just passed through the dying-idle state, and we
3398 * are being invoked from the CPU that was IPIed to continue the offline
3399 * operation. Migrate the outgoing CPU's callbacks to the current CPU.
3400 */
3401void rcutree_migrate_callbacks(int cpu)
3812{ 3402{
3813 unsigned long flags; 3403 unsigned long flags;
3814 struct rcu_data *my_rdp; 3404 struct rcu_data *my_rdp;
3815 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 3405 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
3816 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); 3406 struct rcu_node *rnp_root = rcu_get_root();
3817 bool needwake; 3407 bool needwake;
3818 3408
3819 if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist)) 3409 if (rcu_is_nocb_cpu(cpu) || rcu_segcblist_empty(&rdp->cblist))
3820 return; /* No callbacks to migrate. */ 3410 return; /* No callbacks to migrate. */
3821 3411
3822 local_irq_save(flags); 3412 local_irq_save(flags);
3823 my_rdp = this_cpu_ptr(rsp->rda); 3413 my_rdp = this_cpu_ptr(&rcu_data);
3824 if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) { 3414 if (rcu_nocb_adopt_orphan_cbs(my_rdp, rdp, flags)) {
3825 local_irq_restore(flags); 3415 local_irq_restore(flags);
3826 return; 3416 return;
3827 } 3417 }
3828 raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */ 3418 raw_spin_lock_rcu_node(rnp_root); /* irqs already disabled. */
3829 /* Leverage recent GPs and set GP for new callbacks. */ 3419 /* Leverage recent GPs and set GP for new callbacks. */
3830 needwake = rcu_advance_cbs(rsp, rnp_root, rdp) || 3420 needwake = rcu_advance_cbs(rnp_root, rdp) ||
3831 rcu_advance_cbs(rsp, rnp_root, my_rdp); 3421 rcu_advance_cbs(rnp_root, my_rdp);
3832 rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist); 3422 rcu_segcblist_merge(&my_rdp->cblist, &rdp->cblist);
3833 WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) != 3423 WARN_ON_ONCE(rcu_segcblist_empty(&my_rdp->cblist) !=
3834 !rcu_segcblist_n_cbs(&my_rdp->cblist)); 3424 !rcu_segcblist_n_cbs(&my_rdp->cblist));
3835 raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags); 3425 raw_spin_unlock_irqrestore_rcu_node(rnp_root, flags);
3836 if (needwake) 3426 if (needwake)
3837 rcu_gp_kthread_wake(rsp); 3427 rcu_gp_kthread_wake();
3838 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 || 3428 WARN_ONCE(rcu_segcblist_n_cbs(&rdp->cblist) != 0 ||
3839 !rcu_segcblist_empty(&rdp->cblist), 3429 !rcu_segcblist_empty(&rdp->cblist),
3840 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n", 3430 "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, 1stCB=%p\n",
3841 cpu, rcu_segcblist_n_cbs(&rdp->cblist), 3431 cpu, rcu_segcblist_n_cbs(&rdp->cblist),
3842 rcu_segcblist_first_cb(&rdp->cblist)); 3432 rcu_segcblist_first_cb(&rdp->cblist));
3843} 3433}
3844
3845/*
3846 * The outgoing CPU has just passed through the dying-idle state,
3847 * and we are being invoked from the CPU that was IPIed to continue the
3848 * offline operation. We need to migrate the outgoing CPU's callbacks.
3849 */
3850void rcutree_migrate_callbacks(int cpu)
3851{
3852 struct rcu_state *rsp;
3853
3854 for_each_rcu_flavor(rsp)
3855 rcu_migrate_callbacks(cpu, rsp);
3856}
3857#endif 3434#endif
3858 3435
3859/* 3436/*
@@ -3881,14 +3458,13 @@ static int rcu_pm_notify(struct notifier_block *self,
3881} 3458}
3882 3459
3883/* 3460/*
3884 * Spawn the kthreads that handle each RCU flavor's grace periods. 3461 * Spawn the kthreads that handle RCU's grace periods.
3885 */ 3462 */
3886static int __init rcu_spawn_gp_kthread(void) 3463static int __init rcu_spawn_gp_kthread(void)
3887{ 3464{
3888 unsigned long flags; 3465 unsigned long flags;
3889 int kthread_prio_in = kthread_prio; 3466 int kthread_prio_in = kthread_prio;
3890 struct rcu_node *rnp; 3467 struct rcu_node *rnp;
3891 struct rcu_state *rsp;
3892 struct sched_param sp; 3468 struct sched_param sp;
3893 struct task_struct *t; 3469 struct task_struct *t;
3894 3470
@@ -3908,19 +3484,17 @@ static int __init rcu_spawn_gp_kthread(void)
3908 kthread_prio, kthread_prio_in); 3484 kthread_prio, kthread_prio_in);
3909 3485
3910 rcu_scheduler_fully_active = 1; 3486 rcu_scheduler_fully_active = 1;
3911 for_each_rcu_flavor(rsp) { 3487 t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name);
3912 t = kthread_create(rcu_gp_kthread, rsp, "%s", rsp->name); 3488 BUG_ON(IS_ERR(t));
3913 BUG_ON(IS_ERR(t)); 3489 rnp = rcu_get_root();
3914 rnp = rcu_get_root(rsp); 3490 raw_spin_lock_irqsave_rcu_node(rnp, flags);
3915 raw_spin_lock_irqsave_rcu_node(rnp, flags); 3491 rcu_state.gp_kthread = t;
3916 rsp->gp_kthread = t; 3492 if (kthread_prio) {
3917 if (kthread_prio) { 3493 sp.sched_priority = kthread_prio;
3918 sp.sched_priority = kthread_prio; 3494 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
3919 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
3920 }
3921 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3922 wake_up_process(t);
3923 } 3495 }
3496 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
3497 wake_up_process(t);
3924 rcu_spawn_nocb_kthreads(); 3498 rcu_spawn_nocb_kthreads();
3925 rcu_spawn_boost_kthreads(); 3499 rcu_spawn_boost_kthreads();
3926 return 0; 3500 return 0;
@@ -3947,9 +3521,9 @@ void rcu_scheduler_starting(void)
3947} 3521}
3948 3522
3949/* 3523/*
3950 * Helper function for rcu_init() that initializes one rcu_state structure. 3524 * Helper function for rcu_init() that initializes the rcu_state structure.
3951 */ 3525 */
3952static void __init rcu_init_one(struct rcu_state *rsp) 3526static void __init rcu_init_one(void)
3953{ 3527{
3954 static const char * const buf[] = RCU_NODE_NAME_INIT; 3528 static const char * const buf[] = RCU_NODE_NAME_INIT;
3955 static const char * const fqs[] = RCU_FQS_NAME_INIT; 3529 static const char * const fqs[] = RCU_FQS_NAME_INIT;
@@ -3971,14 +3545,15 @@ static void __init rcu_init_one(struct rcu_state *rsp)
3971 /* Initialize the level-tracking arrays. */ 3545 /* Initialize the level-tracking arrays. */
3972 3546
3973 for (i = 1; i < rcu_num_lvls; i++) 3547 for (i = 1; i < rcu_num_lvls; i++)
3974 rsp->level[i] = rsp->level[i - 1] + num_rcu_lvl[i - 1]; 3548 rcu_state.level[i] =
3549 rcu_state.level[i - 1] + num_rcu_lvl[i - 1];
3975 rcu_init_levelspread(levelspread, num_rcu_lvl); 3550 rcu_init_levelspread(levelspread, num_rcu_lvl);
3976 3551
3977 /* Initialize the elements themselves, starting from the leaves. */ 3552 /* Initialize the elements themselves, starting from the leaves. */
3978 3553
3979 for (i = rcu_num_lvls - 1; i >= 0; i--) { 3554 for (i = rcu_num_lvls - 1; i >= 0; i--) {
3980 cpustride *= levelspread[i]; 3555 cpustride *= levelspread[i];
3981 rnp = rsp->level[i]; 3556 rnp = rcu_state.level[i];
3982 for (j = 0; j < num_rcu_lvl[i]; j++, rnp++) { 3557 for (j = 0; j < num_rcu_lvl[i]; j++, rnp++) {
3983 raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock)); 3558 raw_spin_lock_init(&ACCESS_PRIVATE(rnp, lock));
3984 lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock), 3559 lockdep_set_class_and_name(&ACCESS_PRIVATE(rnp, lock),
@@ -3986,9 +3561,9 @@ static void __init rcu_init_one(struct rcu_state *rsp)
3986 raw_spin_lock_init(&rnp->fqslock); 3561 raw_spin_lock_init(&rnp->fqslock);
3987 lockdep_set_class_and_name(&rnp->fqslock, 3562 lockdep_set_class_and_name(&rnp->fqslock,
3988 &rcu_fqs_class[i], fqs[i]); 3563 &rcu_fqs_class[i], fqs[i]);
3989 rnp->gp_seq = rsp->gp_seq; 3564 rnp->gp_seq = rcu_state.gp_seq;
3990 rnp->gp_seq_needed = rsp->gp_seq; 3565 rnp->gp_seq_needed = rcu_state.gp_seq;
3991 rnp->completedqs = rsp->gp_seq; 3566 rnp->completedqs = rcu_state.gp_seq;
3992 rnp->qsmask = 0; 3567 rnp->qsmask = 0;
3993 rnp->qsmaskinit = 0; 3568 rnp->qsmaskinit = 0;
3994 rnp->grplo = j * cpustride; 3569 rnp->grplo = j * cpustride;
@@ -4001,8 +3576,8 @@ static void __init rcu_init_one(struct rcu_state *rsp)
4001 rnp->parent = NULL; 3576 rnp->parent = NULL;
4002 } else { 3577 } else {
4003 rnp->grpnum = j % levelspread[i - 1]; 3578 rnp->grpnum = j % levelspread[i - 1];
4004 rnp->grpmask = 1UL << rnp->grpnum; 3579 rnp->grpmask = BIT(rnp->grpnum);
4005 rnp->parent = rsp->level[i - 1] + 3580 rnp->parent = rcu_state.level[i - 1] +
4006 j / levelspread[i - 1]; 3581 j / levelspread[i - 1];
4007 } 3582 }
4008 rnp->level = i; 3583 rnp->level = i;
@@ -4016,16 +3591,15 @@ static void __init rcu_init_one(struct rcu_state *rsp)
4016 } 3591 }
4017 } 3592 }
4018 3593
4019 init_swait_queue_head(&rsp->gp_wq); 3594 init_swait_queue_head(&rcu_state.gp_wq);
4020 init_swait_queue_head(&rsp->expedited_wq); 3595 init_swait_queue_head(&rcu_state.expedited_wq);
4021 rnp = rcu_first_leaf_node(rsp); 3596 rnp = rcu_first_leaf_node();
4022 for_each_possible_cpu(i) { 3597 for_each_possible_cpu(i) {
4023 while (i > rnp->grphi) 3598 while (i > rnp->grphi)
4024 rnp++; 3599 rnp++;
4025 per_cpu_ptr(rsp->rda, i)->mynode = rnp; 3600 per_cpu_ptr(&rcu_data, i)->mynode = rnp;
4026 rcu_boot_init_percpu_data(i, rsp); 3601 rcu_boot_init_percpu_data(i);
4027 } 3602 }
4028 list_add(&rsp->flavors, &rcu_struct_flavors);
4029} 3603}
4030 3604
4031/* 3605/*
@@ -4051,6 +3625,8 @@ static void __init rcu_init_geometry(void)
4051 jiffies_till_first_fqs = d; 3625 jiffies_till_first_fqs = d;
4052 if (jiffies_till_next_fqs == ULONG_MAX) 3626 if (jiffies_till_next_fqs == ULONG_MAX)
4053 jiffies_till_next_fqs = d; 3627 jiffies_till_next_fqs = d;
3628 if (jiffies_till_sched_qs == ULONG_MAX)
3629 adjust_jiffies_till_sched_qs();
4054 3630
4055 /* If the compile-time values are accurate, just leave. */ 3631 /* If the compile-time values are accurate, just leave. */
4056 if (rcu_fanout_leaf == RCU_FANOUT_LEAF && 3632 if (rcu_fanout_leaf == RCU_FANOUT_LEAF &&
@@ -4109,16 +3685,16 @@ static void __init rcu_init_geometry(void)
4109 3685
4110/* 3686/*
4111 * Dump out the structure of the rcu_node combining tree associated 3687 * Dump out the structure of the rcu_node combining tree associated
4112 * with the rcu_state structure referenced by rsp. 3688 * with the rcu_state structure.
4113 */ 3689 */
4114static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp) 3690static void __init rcu_dump_rcu_node_tree(void)
4115{ 3691{
4116 int level = 0; 3692 int level = 0;
4117 struct rcu_node *rnp; 3693 struct rcu_node *rnp;
4118 3694
4119 pr_info("rcu_node tree layout dump\n"); 3695 pr_info("rcu_node tree layout dump\n");
4120 pr_info(" "); 3696 pr_info(" ");
4121 rcu_for_each_node_breadth_first(rsp, rnp) { 3697 rcu_for_each_node_breadth_first(rnp) {
4122 if (rnp->level != level) { 3698 if (rnp->level != level) {
4123 pr_cont("\n"); 3699 pr_cont("\n");
4124 pr_info(" "); 3700 pr_info(" ");
@@ -4140,11 +3716,9 @@ void __init rcu_init(void)
4140 3716
4141 rcu_bootup_announce(); 3717 rcu_bootup_announce();
4142 rcu_init_geometry(); 3718 rcu_init_geometry();
4143 rcu_init_one(&rcu_bh_state); 3719 rcu_init_one();
4144 rcu_init_one(&rcu_sched_state);
4145 if (dump_tree) 3720 if (dump_tree)
4146 rcu_dump_rcu_node_tree(&rcu_sched_state); 3721 rcu_dump_rcu_node_tree();
4147 __rcu_init_preempt();
4148 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 3722 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
4149 3723
4150 /* 3724 /*
@@ -4164,6 +3738,7 @@ void __init rcu_init(void)
4164 WARN_ON(!rcu_gp_wq); 3738 WARN_ON(!rcu_gp_wq);
4165 rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); 3739 rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0);
4166 WARN_ON(!rcu_par_gp_wq); 3740 WARN_ON(!rcu_par_gp_wq);
3741 srcu_init();
4167} 3742}
4168 3743
4169#include "tree_exp.h" 3744#include "tree_exp.h"
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4e74df768c57..703e19ff532d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -34,34 +34,9 @@
34 34
35#include "rcu_segcblist.h" 35#include "rcu_segcblist.h"
36 36
37/*
38 * Dynticks per-CPU state.
39 */
40struct rcu_dynticks {
41 long dynticks_nesting; /* Track process nesting level. */
42 long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */
43 atomic_t dynticks; /* Even value for idle, else odd. */
44 bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */
45 unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */
46 bool rcu_urgent_qs; /* GP old need light quiescent state. */
47#ifdef CONFIG_RCU_FAST_NO_HZ
48 bool all_lazy; /* Are all CPU's CBs lazy? */
49 unsigned long nonlazy_posted;
50 /* # times non-lazy CBs posted to CPU. */
51 unsigned long nonlazy_posted_snap;
52 /* idle-period nonlazy_posted snapshot. */
53 unsigned long last_accelerate;
54 /* Last jiffy CBs were accelerated. */
55 unsigned long last_advance_all;
56 /* Last jiffy CBs were all advanced. */
57 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
58#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
59};
60
61/* Communicate arguments to a workqueue handler. */ 37/* Communicate arguments to a workqueue handler. */
62struct rcu_exp_work { 38struct rcu_exp_work {
63 smp_call_func_t rew_func; 39 smp_call_func_t rew_func;
64 struct rcu_state *rew_rsp;
65 unsigned long rew_s; 40 unsigned long rew_s;
66 struct work_struct rew_work; 41 struct work_struct rew_work;
67}; 42};
@@ -170,7 +145,7 @@ struct rcu_node {
170 * are indexed relative to this interval rather than the global CPU ID space. 145 * are indexed relative to this interval rather than the global CPU ID space.
171 * This generates the bit for a CPU in node-local masks. 146 * This generates the bit for a CPU in node-local masks.
172 */ 147 */
173#define leaf_node_cpu_bit(rnp, cpu) (1UL << ((cpu) - (rnp)->grplo)) 148#define leaf_node_cpu_bit(rnp, cpu) (BIT((cpu) - (rnp)->grplo))
174 149
175/* 150/*
176 * Union to allow "aggregate OR" operation on the need for a quiescent 151 * Union to allow "aggregate OR" operation on the need for a quiescent
@@ -189,12 +164,11 @@ struct rcu_data {
189 /* 1) quiescent-state and grace-period handling : */ 164 /* 1) quiescent-state and grace-period handling : */
190 unsigned long gp_seq; /* Track rsp->rcu_gp_seq counter. */ 165 unsigned long gp_seq; /* Track rsp->rcu_gp_seq counter. */
191 unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed ctr. */ 166 unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed ctr. */
192 unsigned long rcu_qs_ctr_snap;/* Snapshot of rcu_qs_ctr to check */
193 /* for rcu_all_qs() invocations. */
194 union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ 167 union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */
195 bool core_needs_qs; /* Core waits for quiesc state. */ 168 bool core_needs_qs; /* Core waits for quiesc state. */
196 bool beenonline; /* CPU online at least once. */ 169 bool beenonline; /* CPU online at least once. */
197 bool gpwrap; /* Possible ->gp_seq wrap. */ 170 bool gpwrap; /* Possible ->gp_seq wrap. */
171 bool deferred_qs; /* This CPU awaiting a deferred QS? */
198 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */ 172 struct rcu_node *mynode; /* This CPU's leaf of hierarchy */
199 unsigned long grpmask; /* Mask to apply to leaf qsmask. */ 173 unsigned long grpmask; /* Mask to apply to leaf qsmask. */
200 unsigned long ticks_this_gp; /* The number of scheduling-clock */ 174 unsigned long ticks_this_gp; /* The number of scheduling-clock */
@@ -213,23 +187,27 @@ struct rcu_data {
213 long blimit; /* Upper limit on a processed batch */ 187 long blimit; /* Upper limit on a processed batch */
214 188
215 /* 3) dynticks interface. */ 189 /* 3) dynticks interface. */
216 struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */
217 int dynticks_snap; /* Per-GP tracking for dynticks. */ 190 int dynticks_snap; /* Per-GP tracking for dynticks. */
218 191 long dynticks_nesting; /* Track process nesting level. */
219 /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ 192 long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */
220 unsigned long dynticks_fqs; /* Kicked due to dynticks idle. */ 193 atomic_t dynticks; /* Even value for idle, else odd. */
221 unsigned long cond_resched_completed; 194 bool rcu_need_heavy_qs; /* GP old, so heavy quiescent state! */
222 /* Grace period that needs help */ 195 bool rcu_urgent_qs; /* GP old need light quiescent state. */
223 /* from cond_resched(). */
224
225 /* 5) _rcu_barrier(), OOM callbacks, and expediting. */
226 struct rcu_head barrier_head;
227#ifdef CONFIG_RCU_FAST_NO_HZ 196#ifdef CONFIG_RCU_FAST_NO_HZ
228 struct rcu_head oom_head; 197 bool all_lazy; /* Are all CPU's CBs lazy? */
198 unsigned long nonlazy_posted; /* # times non-lazy CB posted to CPU. */
199 unsigned long nonlazy_posted_snap;
200 /* Nonlazy_posted snapshot. */
201 unsigned long last_accelerate; /* Last jiffy CBs were accelerated. */
202 unsigned long last_advance_all; /* Last jiffy CBs were all advanced. */
203 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
229#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 204#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
205
206 /* 4) rcu_barrier(), OOM callbacks, and expediting. */
207 struct rcu_head barrier_head;
230 int exp_dynticks_snap; /* Double-check need for IPI. */ 208 int exp_dynticks_snap; /* Double-check need for IPI. */
231 209
232 /* 6) Callback offloading. */ 210 /* 5) Callback offloading. */
233#ifdef CONFIG_RCU_NOCB_CPU 211#ifdef CONFIG_RCU_NOCB_CPU
234 struct rcu_head *nocb_head; /* CBs waiting for kthread. */ 212 struct rcu_head *nocb_head; /* CBs waiting for kthread. */
235 struct rcu_head **nocb_tail; 213 struct rcu_head **nocb_tail;
@@ -256,7 +234,7 @@ struct rcu_data {
256 /* Leader CPU takes GP-end wakeups. */ 234 /* Leader CPU takes GP-end wakeups. */
257#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 235#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
258 236
259 /* 7) Diagnostic data, including RCU CPU stall warnings. */ 237 /* 6) Diagnostic data, including RCU CPU stall warnings. */
260 unsigned int softirq_snap; /* Snapshot of softirq activity. */ 238 unsigned int softirq_snap; /* Snapshot of softirq activity. */
261 /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */ 239 /* ->rcu_iw* fields protected by leaf rcu_node ->lock. */
262 struct irq_work rcu_iw; /* Check for non-irq activity. */ 240 struct irq_work rcu_iw; /* Check for non-irq activity. */
@@ -266,9 +244,9 @@ struct rcu_data {
266 short rcu_ofl_gp_flags; /* ->gp_flags at last offline. */ 244 short rcu_ofl_gp_flags; /* ->gp_flags at last offline. */
267 unsigned long rcu_onl_gp_seq; /* ->gp_seq at last online. */ 245 unsigned long rcu_onl_gp_seq; /* ->gp_seq at last online. */
268 short rcu_onl_gp_flags; /* ->gp_flags at last online. */ 246 short rcu_onl_gp_flags; /* ->gp_flags at last online. */
247 unsigned long last_fqs_resched; /* Time of last rcu_resched(). */
269 248
270 int cpu; 249 int cpu;
271 struct rcu_state *rsp;
272}; 250};
273 251
274/* Values for nocb_defer_wakeup field in struct rcu_data. */ 252/* Values for nocb_defer_wakeup field in struct rcu_data. */
@@ -314,8 +292,6 @@ struct rcu_state {
314 struct rcu_node *level[RCU_NUM_LVLS + 1]; 292 struct rcu_node *level[RCU_NUM_LVLS + 1];
315 /* Hierarchy levels (+1 to */ 293 /* Hierarchy levels (+1 to */
316 /* shut bogus gcc warning) */ 294 /* shut bogus gcc warning) */
317 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
318 call_rcu_func_t call; /* call_rcu() flavor. */
319 int ncpus; /* # CPUs seen so far. */ 295 int ncpus; /* # CPUs seen so far. */
320 296
321 /* The following fields are guarded by the root rcu_node's lock. */ 297 /* The following fields are guarded by the root rcu_node's lock. */
@@ -334,7 +310,7 @@ struct rcu_state {
334 atomic_t barrier_cpu_count; /* # CPUs waiting on. */ 310 atomic_t barrier_cpu_count; /* # CPUs waiting on. */
335 struct completion barrier_completion; /* Wake at barrier end. */ 311 struct completion barrier_completion; /* Wake at barrier end. */
336 unsigned long barrier_sequence; /* ++ at start and end of */ 312 unsigned long barrier_sequence; /* ++ at start and end of */
337 /* _rcu_barrier(). */ 313 /* rcu_barrier(). */
338 /* End of fields guarded by barrier_mutex. */ 314 /* End of fields guarded by barrier_mutex. */
339 315
340 struct mutex exp_mutex; /* Serialize expedited GP. */ 316 struct mutex exp_mutex; /* Serialize expedited GP. */
@@ -366,9 +342,8 @@ struct rcu_state {
366 /* jiffies. */ 342 /* jiffies. */
367 const char *name; /* Name of structure. */ 343 const char *name; /* Name of structure. */
368 char abbr; /* Abbreviated name. */ 344 char abbr; /* Abbreviated name. */
369 struct list_head flavors; /* List of RCU flavors. */
370 345
371 spinlock_t ofl_lock ____cacheline_internodealigned_in_smp; 346 raw_spinlock_t ofl_lock ____cacheline_internodealigned_in_smp;
372 /* Synchronize offline with */ 347 /* Synchronize offline with */
373 /* GP pre-initialization. */ 348 /* GP pre-initialization. */
374}; 349};
@@ -388,7 +363,6 @@ struct rcu_state {
388#define RCU_GP_CLEANUP 7 /* Grace-period cleanup started. */ 363#define RCU_GP_CLEANUP 7 /* Grace-period cleanup started. */
389#define RCU_GP_CLEANED 8 /* Grace-period cleanup complete. */ 364#define RCU_GP_CLEANED 8 /* Grace-period cleanup complete. */
390 365
391#ifndef RCU_TREE_NONCORE
392static const char * const gp_state_names[] = { 366static const char * const gp_state_names[] = {
393 "RCU_GP_IDLE", 367 "RCU_GP_IDLE",
394 "RCU_GP_WAIT_GPS", 368 "RCU_GP_WAIT_GPS",
@@ -400,13 +374,29 @@ static const char * const gp_state_names[] = {
400 "RCU_GP_CLEANUP", 374 "RCU_GP_CLEANUP",
401 "RCU_GP_CLEANED", 375 "RCU_GP_CLEANED",
402}; 376};
403#endif /* #ifndef RCU_TREE_NONCORE */
404
405extern struct list_head rcu_struct_flavors;
406 377
407/* Sequence through rcu_state structures for each RCU flavor. */ 378/*
408#define for_each_rcu_flavor(rsp) \ 379 * In order to export the rcu_state name to the tracing tools, it
409 list_for_each_entry((rsp), &rcu_struct_flavors, flavors) 380 * needs to be added in the __tracepoint_string section.
381 * This requires defining a separate variable tp_<sname>_varname
382 * that points to the string being used, and this will allow
383 * the tracing userspace tools to be able to decipher the string
384 * address to the matching string.
385 */
386#ifdef CONFIG_PREEMPT_RCU
387#define RCU_ABBR 'p'
388#define RCU_NAME_RAW "rcu_preempt"
389#else /* #ifdef CONFIG_PREEMPT_RCU */
390#define RCU_ABBR 's'
391#define RCU_NAME_RAW "rcu_sched"
392#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
393#ifndef CONFIG_TRACING
394#define RCU_NAME RCU_NAME_RAW
395#else /* #ifdef CONFIG_TRACING */
396static char rcu_name[] = RCU_NAME_RAW;
397static const char *tp_rcu_varname __used __tracepoint_string = rcu_name;
398#define RCU_NAME rcu_name
399#endif /* #else #ifdef CONFIG_TRACING */
410 400
411/* 401/*
412 * RCU implementation internal declarations: 402 * RCU implementation internal declarations:
@@ -419,7 +409,7 @@ extern struct rcu_state rcu_bh_state;
419extern struct rcu_state rcu_preempt_state; 409extern struct rcu_state rcu_preempt_state;
420#endif /* #ifdef CONFIG_PREEMPT_RCU */ 410#endif /* #ifdef CONFIG_PREEMPT_RCU */
421 411
422int rcu_dynticks_snap(struct rcu_dynticks *rdtp); 412int rcu_dynticks_snap(struct rcu_data *rdp);
423 413
424#ifdef CONFIG_RCU_BOOST 414#ifdef CONFIG_RCU_BOOST
425DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 415DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
@@ -428,45 +418,37 @@ DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
428DECLARE_PER_CPU(char, rcu_cpu_has_work); 418DECLARE_PER_CPU(char, rcu_cpu_has_work);
429#endif /* #ifdef CONFIG_RCU_BOOST */ 419#endif /* #ifdef CONFIG_RCU_BOOST */
430 420
431#ifndef RCU_TREE_NONCORE
432
433/* Forward declarations for rcutree_plugin.h */ 421/* Forward declarations for rcutree_plugin.h */
434static void rcu_bootup_announce(void); 422static void rcu_bootup_announce(void);
435static void rcu_preempt_note_context_switch(bool preempt); 423static void rcu_qs(void);
436static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 424static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
437#ifdef CONFIG_HOTPLUG_CPU 425#ifdef CONFIG_HOTPLUG_CPU
438static bool rcu_preempt_has_tasks(struct rcu_node *rnp); 426static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
439#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 427#endif /* #ifdef CONFIG_HOTPLUG_CPU */
440static void rcu_print_detail_task_stall(struct rcu_state *rsp); 428static void rcu_print_detail_task_stall(void);
441static int rcu_print_task_stall(struct rcu_node *rnp); 429static int rcu_print_task_stall(struct rcu_node *rnp);
442static int rcu_print_task_exp_stall(struct rcu_node *rnp); 430static int rcu_print_task_exp_stall(struct rcu_node *rnp);
443static void rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, 431static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
444 struct rcu_node *rnp); 432static void rcu_flavor_check_callbacks(int user);
445static void rcu_preempt_check_callbacks(void);
446void call_rcu(struct rcu_head *head, rcu_callback_t func); 433void call_rcu(struct rcu_head *head, rcu_callback_t func);
447static void __init __rcu_init_preempt(void); 434static void dump_blkd_tasks(struct rcu_node *rnp, int ncheck);
448static void dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp,
449 int ncheck);
450static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 435static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
451static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 436static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
452static void invoke_rcu_callbacks_kthread(void); 437static void invoke_rcu_callbacks_kthread(void);
453static bool rcu_is_callbacks_kthread(void); 438static bool rcu_is_callbacks_kthread(void);
454#ifdef CONFIG_RCU_BOOST
455static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
456 struct rcu_node *rnp);
457#endif /* #ifdef CONFIG_RCU_BOOST */
458static void __init rcu_spawn_boost_kthreads(void); 439static void __init rcu_spawn_boost_kthreads(void);
459static void rcu_prepare_kthreads(int cpu); 440static void rcu_prepare_kthreads(int cpu);
460static void rcu_cleanup_after_idle(void); 441static void rcu_cleanup_after_idle(void);
461static void rcu_prepare_for_idle(void); 442static void rcu_prepare_for_idle(void);
462static void rcu_idle_count_callbacks_posted(void); 443static void rcu_idle_count_callbacks_posted(void);
463static bool rcu_preempt_has_tasks(struct rcu_node *rnp); 444static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
445static bool rcu_preempt_need_deferred_qs(struct task_struct *t);
446static void rcu_preempt_deferred_qs(struct task_struct *t);
464static void print_cpu_stall_info_begin(void); 447static void print_cpu_stall_info_begin(void);
465static void print_cpu_stall_info(struct rcu_state *rsp, int cpu); 448static void print_cpu_stall_info(int cpu);
466static void print_cpu_stall_info_end(void); 449static void print_cpu_stall_info_end(void);
467static void zero_cpu_stall_ticks(struct rcu_data *rdp); 450static void zero_cpu_stall_ticks(struct rcu_data *rdp);
468static void increment_cpu_stall_ticks(void); 451static bool rcu_nocb_cpu_needs_barrier(int cpu);
469static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
470static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp); 452static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
471static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq); 453static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
472static void rcu_init_one_nocb(struct rcu_node *rnp); 454static void rcu_init_one_nocb(struct rcu_node *rnp);
@@ -481,11 +463,11 @@ static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
481static void rcu_spawn_all_nocb_kthreads(int cpu); 463static void rcu_spawn_all_nocb_kthreads(int cpu);
482static void __init rcu_spawn_nocb_kthreads(void); 464static void __init rcu_spawn_nocb_kthreads(void);
483#ifdef CONFIG_RCU_NOCB_CPU 465#ifdef CONFIG_RCU_NOCB_CPU
484static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp); 466static void __init rcu_organize_nocb_kthreads(void);
485#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 467#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
486static bool init_nocb_callback_list(struct rcu_data *rdp); 468static bool init_nocb_callback_list(struct rcu_data *rdp);
487static void rcu_bind_gp_kthread(void); 469static void rcu_bind_gp_kthread(void);
488static bool rcu_nohz_full_cpu(struct rcu_state *rsp); 470static bool rcu_nohz_full_cpu(void);
489static void rcu_dynticks_task_enter(void); 471static void rcu_dynticks_task_enter(void);
490static void rcu_dynticks_task_exit(void); 472static void rcu_dynticks_task_exit(void);
491 473
@@ -496,5 +478,3 @@ void srcu_offline_cpu(unsigned int cpu);
496void srcu_online_cpu(unsigned int cpu) { } 478void srcu_online_cpu(unsigned int cpu) { }
497void srcu_offline_cpu(unsigned int cpu) { } 479void srcu_offline_cpu(unsigned int cpu) { }
498#endif /* #else #ifdef CONFIG_SRCU */ 480#endif /* #else #ifdef CONFIG_SRCU */
499
500#endif /* #ifndef RCU_TREE_NONCORE */
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index 0b2c2ad69629..8d18c1014e2b 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -25,39 +25,39 @@
25/* 25/*
26 * Record the start of an expedited grace period. 26 * Record the start of an expedited grace period.
27 */ 27 */
28static void rcu_exp_gp_seq_start(struct rcu_state *rsp) 28static void rcu_exp_gp_seq_start(void)
29{ 29{
30 rcu_seq_start(&rsp->expedited_sequence); 30 rcu_seq_start(&rcu_state.expedited_sequence);
31} 31}
32 32
33/* 33/*
34 * Return then value that expedited-grace-period counter will have 34 * Return then value that expedited-grace-period counter will have
35 * at the end of the current grace period. 35 * at the end of the current grace period.
36 */ 36 */
37static __maybe_unused unsigned long rcu_exp_gp_seq_endval(struct rcu_state *rsp) 37static __maybe_unused unsigned long rcu_exp_gp_seq_endval(void)
38{ 38{
39 return rcu_seq_endval(&rsp->expedited_sequence); 39 return rcu_seq_endval(&rcu_state.expedited_sequence);
40} 40}
41 41
42/* 42/*
43 * Record the end of an expedited grace period. 43 * Record the end of an expedited grace period.
44 */ 44 */
45static void rcu_exp_gp_seq_end(struct rcu_state *rsp) 45static void rcu_exp_gp_seq_end(void)
46{ 46{
47 rcu_seq_end(&rsp->expedited_sequence); 47 rcu_seq_end(&rcu_state.expedited_sequence);
48 smp_mb(); /* Ensure that consecutive grace periods serialize. */ 48 smp_mb(); /* Ensure that consecutive grace periods serialize. */
49} 49}
50 50
51/* 51/*
52 * Take a snapshot of the expedited-grace-period counter. 52 * Take a snapshot of the expedited-grace-period counter.
53 */ 53 */
54static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp) 54static unsigned long rcu_exp_gp_seq_snap(void)
55{ 55{
56 unsigned long s; 56 unsigned long s;
57 57
58 smp_mb(); /* Caller's modifications seen first by other CPUs. */ 58 smp_mb(); /* Caller's modifications seen first by other CPUs. */
59 s = rcu_seq_snap(&rsp->expedited_sequence); 59 s = rcu_seq_snap(&rcu_state.expedited_sequence);
60 trace_rcu_exp_grace_period(rsp->name, s, TPS("snap")); 60 trace_rcu_exp_grace_period(rcu_state.name, s, TPS("snap"));
61 return s; 61 return s;
62} 62}
63 63
@@ -66,9 +66,9 @@ static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
66 * if a full expedited grace period has elapsed since that snapshot 66 * if a full expedited grace period has elapsed since that snapshot
67 * was taken. 67 * was taken.
68 */ 68 */
69static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s) 69static bool rcu_exp_gp_seq_done(unsigned long s)
70{ 70{
71 return rcu_seq_done(&rsp->expedited_sequence, s); 71 return rcu_seq_done(&rcu_state.expedited_sequence, s);
72} 72}
73 73
74/* 74/*
@@ -78,26 +78,26 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
78 * ever been online. This means that this function normally takes its 78 * ever been online. This means that this function normally takes its
79 * no-work-to-do fastpath. 79 * no-work-to-do fastpath.
80 */ 80 */
81static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp) 81static void sync_exp_reset_tree_hotplug(void)
82{ 82{
83 bool done; 83 bool done;
84 unsigned long flags; 84 unsigned long flags;
85 unsigned long mask; 85 unsigned long mask;
86 unsigned long oldmask; 86 unsigned long oldmask;
87 int ncpus = smp_load_acquire(&rsp->ncpus); /* Order against locking. */ 87 int ncpus = smp_load_acquire(&rcu_state.ncpus); /* Order vs. locking. */
88 struct rcu_node *rnp; 88 struct rcu_node *rnp;
89 struct rcu_node *rnp_up; 89 struct rcu_node *rnp_up;
90 90
91 /* If no new CPUs onlined since last time, nothing to do. */ 91 /* If no new CPUs onlined since last time, nothing to do. */
92 if (likely(ncpus == rsp->ncpus_snap)) 92 if (likely(ncpus == rcu_state.ncpus_snap))
93 return; 93 return;
94 rsp->ncpus_snap = ncpus; 94 rcu_state.ncpus_snap = ncpus;
95 95
96 /* 96 /*
97 * Each pass through the following loop propagates newly onlined 97 * Each pass through the following loop propagates newly onlined
98 * CPUs for the current rcu_node structure up the rcu_node tree. 98 * CPUs for the current rcu_node structure up the rcu_node tree.
99 */ 99 */
100 rcu_for_each_leaf_node(rsp, rnp) { 100 rcu_for_each_leaf_node(rnp) {
101 raw_spin_lock_irqsave_rcu_node(rnp, flags); 101 raw_spin_lock_irqsave_rcu_node(rnp, flags);
102 if (rnp->expmaskinit == rnp->expmaskinitnext) { 102 if (rnp->expmaskinit == rnp->expmaskinitnext) {
103 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 103 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -135,13 +135,13 @@ static void sync_exp_reset_tree_hotplug(struct rcu_state *rsp)
135 * Reset the ->expmask values in the rcu_node tree in preparation for 135 * Reset the ->expmask values in the rcu_node tree in preparation for
136 * a new expedited grace period. 136 * a new expedited grace period.
137 */ 137 */
138static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp) 138static void __maybe_unused sync_exp_reset_tree(void)
139{ 139{
140 unsigned long flags; 140 unsigned long flags;
141 struct rcu_node *rnp; 141 struct rcu_node *rnp;
142 142
143 sync_exp_reset_tree_hotplug(rsp); 143 sync_exp_reset_tree_hotplug();
144 rcu_for_each_node_breadth_first(rsp, rnp) { 144 rcu_for_each_node_breadth_first(rnp) {
145 raw_spin_lock_irqsave_rcu_node(rnp, flags); 145 raw_spin_lock_irqsave_rcu_node(rnp, flags);
146 WARN_ON_ONCE(rnp->expmask); 146 WARN_ON_ONCE(rnp->expmask);
147 rnp->expmask = rnp->expmaskinit; 147 rnp->expmask = rnp->expmaskinit;
@@ -194,7 +194,7 @@ static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp)
194 * 194 *
195 * Caller must hold the specified rcu_node structure's ->lock. 195 * Caller must hold the specified rcu_node structure's ->lock.
196 */ 196 */
197static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp, 197static void __rcu_report_exp_rnp(struct rcu_node *rnp,
198 bool wake, unsigned long flags) 198 bool wake, unsigned long flags)
199 __releases(rnp->lock) 199 __releases(rnp->lock)
200{ 200{
@@ -212,7 +212,7 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
212 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 212 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
213 if (wake) { 213 if (wake) {
214 smp_mb(); /* EGP done before wake_up(). */ 214 smp_mb(); /* EGP done before wake_up(). */
215 swake_up_one(&rsp->expedited_wq); 215 swake_up_one(&rcu_state.expedited_wq);
216 } 216 }
217 break; 217 break;
218 } 218 }
@@ -229,20 +229,19 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
229 * Report expedited quiescent state for specified node. This is a 229 * Report expedited quiescent state for specified node. This is a
230 * lock-acquisition wrapper function for __rcu_report_exp_rnp(). 230 * lock-acquisition wrapper function for __rcu_report_exp_rnp().
231 */ 231 */
232static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp, 232static void __maybe_unused rcu_report_exp_rnp(struct rcu_node *rnp, bool wake)
233 struct rcu_node *rnp, bool wake)
234{ 233{
235 unsigned long flags; 234 unsigned long flags;
236 235
237 raw_spin_lock_irqsave_rcu_node(rnp, flags); 236 raw_spin_lock_irqsave_rcu_node(rnp, flags);
238 __rcu_report_exp_rnp(rsp, rnp, wake, flags); 237 __rcu_report_exp_rnp(rnp, wake, flags);
239} 238}
240 239
241/* 240/*
242 * Report expedited quiescent state for multiple CPUs, all covered by the 241 * Report expedited quiescent state for multiple CPUs, all covered by the
243 * specified leaf rcu_node structure. 242 * specified leaf rcu_node structure.
244 */ 243 */
245static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp, 244static void rcu_report_exp_cpu_mult(struct rcu_node *rnp,
246 unsigned long mask, bool wake) 245 unsigned long mask, bool wake)
247{ 246{
248 unsigned long flags; 247 unsigned long flags;
@@ -253,23 +252,23 @@ static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
253 return; 252 return;
254 } 253 }
255 rnp->expmask &= ~mask; 254 rnp->expmask &= ~mask;
256 __rcu_report_exp_rnp(rsp, rnp, wake, flags); /* Releases rnp->lock. */ 255 __rcu_report_exp_rnp(rnp, wake, flags); /* Releases rnp->lock. */
257} 256}
258 257
259/* 258/*
260 * Report expedited quiescent state for specified rcu_data (CPU). 259 * Report expedited quiescent state for specified rcu_data (CPU).
261 */ 260 */
262static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp, 261static void rcu_report_exp_rdp(struct rcu_data *rdp)
263 bool wake)
264{ 262{
265 rcu_report_exp_cpu_mult(rsp, rdp->mynode, rdp->grpmask, wake); 263 WRITE_ONCE(rdp->deferred_qs, false);
264 rcu_report_exp_cpu_mult(rdp->mynode, rdp->grpmask, true);
266} 265}
267 266
268/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */ 267/* Common code for work-done checking. */
269static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s) 268static bool sync_exp_work_done(unsigned long s)
270{ 269{
271 if (rcu_exp_gp_seq_done(rsp, s)) { 270 if (rcu_exp_gp_seq_done(s)) {
272 trace_rcu_exp_grace_period(rsp->name, s, TPS("done")); 271 trace_rcu_exp_grace_period(rcu_state.name, s, TPS("done"));
273 /* Ensure test happens before caller kfree(). */ 272 /* Ensure test happens before caller kfree(). */
274 smp_mb__before_atomic(); /* ^^^ */ 273 smp_mb__before_atomic(); /* ^^^ */
275 return true; 274 return true;
@@ -284,28 +283,28 @@ static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
284 * with the mutex held, indicating that the caller must actually do the 283 * with the mutex held, indicating that the caller must actually do the
285 * expedited grace period. 284 * expedited grace period.
286 */ 285 */
287static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) 286static bool exp_funnel_lock(unsigned long s)
288{ 287{
289 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); 288 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
290 struct rcu_node *rnp = rdp->mynode; 289 struct rcu_node *rnp = rdp->mynode;
291 struct rcu_node *rnp_root = rcu_get_root(rsp); 290 struct rcu_node *rnp_root = rcu_get_root();
292 291
293 /* Low-contention fastpath. */ 292 /* Low-contention fastpath. */
294 if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) && 293 if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s) &&
295 (rnp == rnp_root || 294 (rnp == rnp_root ||
296 ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) && 295 ULONG_CMP_LT(READ_ONCE(rnp_root->exp_seq_rq), s)) &&
297 mutex_trylock(&rsp->exp_mutex)) 296 mutex_trylock(&rcu_state.exp_mutex))
298 goto fastpath; 297 goto fastpath;
299 298
300 /* 299 /*
301 * Each pass through the following loop works its way up 300 * Each pass through the following loop works its way up
302 * the rcu_node tree, returning if others have done the work or 301 * the rcu_node tree, returning if others have done the work or
303 * otherwise falls through to acquire rsp->exp_mutex. The mapping 302 * otherwise falls through to acquire ->exp_mutex. The mapping
304 * from CPU to rcu_node structure can be inexact, as it is just 303 * from CPU to rcu_node structure can be inexact, as it is just
305 * promoting locality and is not strictly needed for correctness. 304 * promoting locality and is not strictly needed for correctness.
306 */ 305 */
307 for (; rnp != NULL; rnp = rnp->parent) { 306 for (; rnp != NULL; rnp = rnp->parent) {
308 if (sync_exp_work_done(rsp, s)) 307 if (sync_exp_work_done(s))
309 return true; 308 return true;
310 309
311 /* Work not done, either wait here or go up. */ 310 /* Work not done, either wait here or go up. */
@@ -314,68 +313,29 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
314 313
315 /* Someone else doing GP, so wait for them. */ 314 /* Someone else doing GP, so wait for them. */
316 spin_unlock(&rnp->exp_lock); 315 spin_unlock(&rnp->exp_lock);
317 trace_rcu_exp_funnel_lock(rsp->name, rnp->level, 316 trace_rcu_exp_funnel_lock(rcu_state.name, rnp->level,
318 rnp->grplo, rnp->grphi, 317 rnp->grplo, rnp->grphi,
319 TPS("wait")); 318 TPS("wait"));
320 wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], 319 wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
321 sync_exp_work_done(rsp, s)); 320 sync_exp_work_done(s));
322 return true; 321 return true;
323 } 322 }
324 rnp->exp_seq_rq = s; /* Followers can wait on us. */ 323 rnp->exp_seq_rq = s; /* Followers can wait on us. */
325 spin_unlock(&rnp->exp_lock); 324 spin_unlock(&rnp->exp_lock);
326 trace_rcu_exp_funnel_lock(rsp->name, rnp->level, rnp->grplo, 325 trace_rcu_exp_funnel_lock(rcu_state.name, rnp->level,
327 rnp->grphi, TPS("nxtlvl")); 326 rnp->grplo, rnp->grphi, TPS("nxtlvl"));
328 } 327 }
329 mutex_lock(&rsp->exp_mutex); 328 mutex_lock(&rcu_state.exp_mutex);
330fastpath: 329fastpath:
331 if (sync_exp_work_done(rsp, s)) { 330 if (sync_exp_work_done(s)) {
332 mutex_unlock(&rsp->exp_mutex); 331 mutex_unlock(&rcu_state.exp_mutex);
333 return true; 332 return true;
334 } 333 }
335 rcu_exp_gp_seq_start(rsp); 334 rcu_exp_gp_seq_start();
336 trace_rcu_exp_grace_period(rsp->name, s, TPS("start")); 335 trace_rcu_exp_grace_period(rcu_state.name, s, TPS("start"));
337 return false; 336 return false;
338} 337}
339 338
340/* Invoked on each online non-idle CPU for expedited quiescent state. */
341static void sync_sched_exp_handler(void *data)
342{
343 struct rcu_data *rdp;
344 struct rcu_node *rnp;
345 struct rcu_state *rsp = data;
346
347 rdp = this_cpu_ptr(rsp->rda);
348 rnp = rdp->mynode;
349 if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
350 __this_cpu_read(rcu_sched_data.cpu_no_qs.b.exp))
351 return;
352 if (rcu_is_cpu_rrupt_from_idle()) {
353 rcu_report_exp_rdp(&rcu_sched_state,
354 this_cpu_ptr(&rcu_sched_data), true);
355 return;
356 }
357 __this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true);
358 /* Store .exp before .rcu_urgent_qs. */
359 smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
360 resched_cpu(smp_processor_id());
361}
362
363/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
364static void sync_sched_exp_online_cleanup(int cpu)
365{
366 struct rcu_data *rdp;
367 int ret;
368 struct rcu_node *rnp;
369 struct rcu_state *rsp = &rcu_sched_state;
370
371 rdp = per_cpu_ptr(rsp->rda, cpu);
372 rnp = rdp->mynode;
373 if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
374 return;
375 ret = smp_call_function_single(cpu, sync_sched_exp_handler, rsp, 0);
376 WARN_ON_ONCE(ret);
377}
378
379/* 339/*
380 * Select the CPUs within the specified rcu_node that the upcoming 340 * Select the CPUs within the specified rcu_node that the upcoming
381 * expedited grace period needs to wait for. 341 * expedited grace period needs to wait for.
@@ -391,7 +351,6 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
391 struct rcu_exp_work *rewp = 351 struct rcu_exp_work *rewp =
392 container_of(wp, struct rcu_exp_work, rew_work); 352 container_of(wp, struct rcu_exp_work, rew_work);
393 struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew); 353 struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
394 struct rcu_state *rsp = rewp->rew_rsp;
395 354
396 func = rewp->rew_func; 355 func = rewp->rew_func;
397 raw_spin_lock_irqsave_rcu_node(rnp, flags); 356 raw_spin_lock_irqsave_rcu_node(rnp, flags);
@@ -400,15 +359,14 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
400 mask_ofl_test = 0; 359 mask_ofl_test = 0;
401 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { 360 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
402 unsigned long mask = leaf_node_cpu_bit(rnp, cpu); 361 unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
403 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 362 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
404 struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
405 int snap; 363 int snap;
406 364
407 if (raw_smp_processor_id() == cpu || 365 if (raw_smp_processor_id() == cpu ||
408 !(rnp->qsmaskinitnext & mask)) { 366 !(rnp->qsmaskinitnext & mask)) {
409 mask_ofl_test |= mask; 367 mask_ofl_test |= mask;
410 } else { 368 } else {
411 snap = rcu_dynticks_snap(rdtp); 369 snap = rcu_dynticks_snap(rdp);
412 if (rcu_dynticks_in_eqs(snap)) 370 if (rcu_dynticks_in_eqs(snap))
413 mask_ofl_test |= mask; 371 mask_ofl_test |= mask;
414 else 372 else
@@ -429,17 +387,16 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
429 /* IPI the remaining CPUs for expedited quiescent state. */ 387 /* IPI the remaining CPUs for expedited quiescent state. */
430 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) { 388 for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
431 unsigned long mask = leaf_node_cpu_bit(rnp, cpu); 389 unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
432 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 390 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
433 391
434 if (!(mask_ofl_ipi & mask)) 392 if (!(mask_ofl_ipi & mask))
435 continue; 393 continue;
436retry_ipi: 394retry_ipi:
437 if (rcu_dynticks_in_eqs_since(rdp->dynticks, 395 if (rcu_dynticks_in_eqs_since(rdp, rdp->exp_dynticks_snap)) {
438 rdp->exp_dynticks_snap)) {
439 mask_ofl_test |= mask; 396 mask_ofl_test |= mask;
440 continue; 397 continue;
441 } 398 }
442 ret = smp_call_function_single(cpu, func, rsp, 0); 399 ret = smp_call_function_single(cpu, func, NULL, 0);
443 if (!ret) { 400 if (!ret) {
444 mask_ofl_ipi &= ~mask; 401 mask_ofl_ipi &= ~mask;
445 continue; 402 continue;
@@ -450,7 +407,7 @@ retry_ipi:
450 (rnp->expmask & mask)) { 407 (rnp->expmask & mask)) {
451 /* Online, so delay for a bit and try again. */ 408 /* Online, so delay for a bit and try again. */
452 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 409 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
453 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl")); 410 trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("selectofl"));
454 schedule_timeout_uninterruptible(1); 411 schedule_timeout_uninterruptible(1);
455 goto retry_ipi; 412 goto retry_ipi;
456 } 413 }
@@ -462,33 +419,31 @@ retry_ipi:
462 /* Report quiescent states for those that went offline. */ 419 /* Report quiescent states for those that went offline. */
463 mask_ofl_test |= mask_ofl_ipi; 420 mask_ofl_test |= mask_ofl_ipi;
464 if (mask_ofl_test) 421 if (mask_ofl_test)
465 rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false); 422 rcu_report_exp_cpu_mult(rnp, mask_ofl_test, false);
466} 423}
467 424
468/* 425/*
469 * Select the nodes that the upcoming expedited grace period needs 426 * Select the nodes that the upcoming expedited grace period needs
470 * to wait for. 427 * to wait for.
471 */ 428 */
472static void sync_rcu_exp_select_cpus(struct rcu_state *rsp, 429static void sync_rcu_exp_select_cpus(smp_call_func_t func)
473 smp_call_func_t func)
474{ 430{
475 int cpu; 431 int cpu;
476 struct rcu_node *rnp; 432 struct rcu_node *rnp;
477 433
478 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset")); 434 trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("reset"));
479 sync_exp_reset_tree(rsp); 435 sync_exp_reset_tree();
480 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select")); 436 trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("select"));
481 437
482 /* Schedule work for each leaf rcu_node structure. */ 438 /* Schedule work for each leaf rcu_node structure. */
483 rcu_for_each_leaf_node(rsp, rnp) { 439 rcu_for_each_leaf_node(rnp) {
484 rnp->exp_need_flush = false; 440 rnp->exp_need_flush = false;
485 if (!READ_ONCE(rnp->expmask)) 441 if (!READ_ONCE(rnp->expmask))
486 continue; /* Avoid early boot non-existent wq. */ 442 continue; /* Avoid early boot non-existent wq. */
487 rnp->rew.rew_func = func; 443 rnp->rew.rew_func = func;
488 rnp->rew.rew_rsp = rsp;
489 if (!READ_ONCE(rcu_par_gp_wq) || 444 if (!READ_ONCE(rcu_par_gp_wq) ||
490 rcu_scheduler_active != RCU_SCHEDULER_RUNNING || 445 rcu_scheduler_active != RCU_SCHEDULER_RUNNING ||
491 rcu_is_last_leaf_node(rsp, rnp)) { 446 rcu_is_last_leaf_node(rnp)) {
492 /* No workqueues yet or last leaf, do direct call. */ 447 /* No workqueues yet or last leaf, do direct call. */
493 sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work); 448 sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
494 continue; 449 continue;
@@ -505,12 +460,12 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
505 } 460 }
506 461
507 /* Wait for workqueue jobs (if any) to complete. */ 462 /* Wait for workqueue jobs (if any) to complete. */
508 rcu_for_each_leaf_node(rsp, rnp) 463 rcu_for_each_leaf_node(rnp)
509 if (rnp->exp_need_flush) 464 if (rnp->exp_need_flush)
510 flush_work(&rnp->rew.rew_work); 465 flush_work(&rnp->rew.rew_work);
511} 466}
512 467
513static void synchronize_sched_expedited_wait(struct rcu_state *rsp) 468static void synchronize_sched_expedited_wait(void)
514{ 469{
515 int cpu; 470 int cpu;
516 unsigned long jiffies_stall; 471 unsigned long jiffies_stall;
@@ -518,16 +473,16 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
518 unsigned long mask; 473 unsigned long mask;
519 int ndetected; 474 int ndetected;
520 struct rcu_node *rnp; 475 struct rcu_node *rnp;
521 struct rcu_node *rnp_root = rcu_get_root(rsp); 476 struct rcu_node *rnp_root = rcu_get_root();
522 int ret; 477 int ret;
523 478
524 trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("startwait")); 479 trace_rcu_exp_grace_period(rcu_state.name, rcu_exp_gp_seq_endval(), TPS("startwait"));
525 jiffies_stall = rcu_jiffies_till_stall_check(); 480 jiffies_stall = rcu_jiffies_till_stall_check();
526 jiffies_start = jiffies; 481 jiffies_start = jiffies;
527 482
528 for (;;) { 483 for (;;) {
529 ret = swait_event_timeout_exclusive( 484 ret = swait_event_timeout_exclusive(
530 rsp->expedited_wq, 485 rcu_state.expedited_wq,
531 sync_rcu_preempt_exp_done_unlocked(rnp_root), 486 sync_rcu_preempt_exp_done_unlocked(rnp_root),
532 jiffies_stall); 487 jiffies_stall);
533 if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root)) 488 if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root))
@@ -537,9 +492,9 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
537 continue; 492 continue;
538 panic_on_rcu_stall(); 493 panic_on_rcu_stall();
539 pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {", 494 pr_err("INFO: %s detected expedited stalls on CPUs/tasks: {",
540 rsp->name); 495 rcu_state.name);
541 ndetected = 0; 496 ndetected = 0;
542 rcu_for_each_leaf_node(rsp, rnp) { 497 rcu_for_each_leaf_node(rnp) {
543 ndetected += rcu_print_task_exp_stall(rnp); 498 ndetected += rcu_print_task_exp_stall(rnp);
544 for_each_leaf_node_possible_cpu(rnp, cpu) { 499 for_each_leaf_node_possible_cpu(rnp, cpu) {
545 struct rcu_data *rdp; 500 struct rcu_data *rdp;
@@ -548,7 +503,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
548 if (!(rnp->expmask & mask)) 503 if (!(rnp->expmask & mask))
549 continue; 504 continue;
550 ndetected++; 505 ndetected++;
551 rdp = per_cpu_ptr(rsp->rda, cpu); 506 rdp = per_cpu_ptr(&rcu_data, cpu);
552 pr_cont(" %d-%c%c%c", cpu, 507 pr_cont(" %d-%c%c%c", cpu,
553 "O."[!!cpu_online(cpu)], 508 "O."[!!cpu_online(cpu)],
554 "o."[!!(rdp->grpmask & rnp->expmaskinit)], 509 "o."[!!(rdp->grpmask & rnp->expmaskinit)],
@@ -556,11 +511,11 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
556 } 511 }
557 } 512 }
558 pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n", 513 pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
559 jiffies - jiffies_start, rsp->expedited_sequence, 514 jiffies - jiffies_start, rcu_state.expedited_sequence,
560 rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]); 515 rnp_root->expmask, ".T"[!!rnp_root->exp_tasks]);
561 if (ndetected) { 516 if (ndetected) {
562 pr_err("blocking rcu_node structures:"); 517 pr_err("blocking rcu_node structures:");
563 rcu_for_each_node_breadth_first(rsp, rnp) { 518 rcu_for_each_node_breadth_first(rnp) {
564 if (rnp == rnp_root) 519 if (rnp == rnp_root)
565 continue; /* printed unconditionally */ 520 continue; /* printed unconditionally */
566 if (sync_rcu_preempt_exp_done_unlocked(rnp)) 521 if (sync_rcu_preempt_exp_done_unlocked(rnp))
@@ -572,7 +527,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
572 } 527 }
573 pr_cont("\n"); 528 pr_cont("\n");
574 } 529 }
575 rcu_for_each_leaf_node(rsp, rnp) { 530 rcu_for_each_leaf_node(rnp) {
576 for_each_leaf_node_possible_cpu(rnp, cpu) { 531 for_each_leaf_node_possible_cpu(rnp, cpu) {
577 mask = leaf_node_cpu_bit(rnp, cpu); 532 mask = leaf_node_cpu_bit(rnp, cpu);
578 if (!(rnp->expmask & mask)) 533 if (!(rnp->expmask & mask))
@@ -590,21 +545,21 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
590 * grace period. Also update all the ->exp_seq_rq counters as needed 545 * grace period. Also update all the ->exp_seq_rq counters as needed
591 * in order to avoid counter-wrap problems. 546 * in order to avoid counter-wrap problems.
592 */ 547 */
593static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s) 548static void rcu_exp_wait_wake(unsigned long s)
594{ 549{
595 struct rcu_node *rnp; 550 struct rcu_node *rnp;
596 551
597 synchronize_sched_expedited_wait(rsp); 552 synchronize_sched_expedited_wait();
598 rcu_exp_gp_seq_end(rsp); 553 rcu_exp_gp_seq_end();
599 trace_rcu_exp_grace_period(rsp->name, s, TPS("end")); 554 trace_rcu_exp_grace_period(rcu_state.name, s, TPS("end"));
600 555
601 /* 556 /*
602 * Switch over to wakeup mode, allowing the next GP, but -only- the 557 * Switch over to wakeup mode, allowing the next GP, but -only- the
603 * next GP, to proceed. 558 * next GP, to proceed.
604 */ 559 */
605 mutex_lock(&rsp->exp_wake_mutex); 560 mutex_lock(&rcu_state.exp_wake_mutex);
606 561
607 rcu_for_each_node_breadth_first(rsp, rnp) { 562 rcu_for_each_node_breadth_first(rnp) {
608 if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) { 563 if (ULONG_CMP_LT(READ_ONCE(rnp->exp_seq_rq), s)) {
609 spin_lock(&rnp->exp_lock); 564 spin_lock(&rnp->exp_lock);
610 /* Recheck, avoid hang in case someone just arrived. */ 565 /* Recheck, avoid hang in case someone just arrived. */
@@ -613,24 +568,23 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
613 spin_unlock(&rnp->exp_lock); 568 spin_unlock(&rnp->exp_lock);
614 } 569 }
615 smp_mb(); /* All above changes before wakeup. */ 570 smp_mb(); /* All above changes before wakeup. */
616 wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rsp->expedited_sequence) & 0x3]); 571 wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rcu_state.expedited_sequence) & 0x3]);
617 } 572 }
618 trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); 573 trace_rcu_exp_grace_period(rcu_state.name, s, TPS("endwake"));
619 mutex_unlock(&rsp->exp_wake_mutex); 574 mutex_unlock(&rcu_state.exp_wake_mutex);
620} 575}
621 576
622/* 577/*
623 * Common code to drive an expedited grace period forward, used by 578 * Common code to drive an expedited grace period forward, used by
624 * workqueues and mid-boot-time tasks. 579 * workqueues and mid-boot-time tasks.
625 */ 580 */
626static void rcu_exp_sel_wait_wake(struct rcu_state *rsp, 581static void rcu_exp_sel_wait_wake(smp_call_func_t func, unsigned long s)
627 smp_call_func_t func, unsigned long s)
628{ 582{
629 /* Initialize the rcu_node tree in preparation for the wait. */ 583 /* Initialize the rcu_node tree in preparation for the wait. */
630 sync_rcu_exp_select_cpus(rsp, func); 584 sync_rcu_exp_select_cpus(func);
631 585
632 /* Wait and clean up, including waking everyone. */ 586 /* Wait and clean up, including waking everyone. */
633 rcu_exp_wait_wake(rsp, s); 587 rcu_exp_wait_wake(s);
634} 588}
635 589
636/* 590/*
@@ -641,15 +595,14 @@ static void wait_rcu_exp_gp(struct work_struct *wp)
641 struct rcu_exp_work *rewp; 595 struct rcu_exp_work *rewp;
642 596
643 rewp = container_of(wp, struct rcu_exp_work, rew_work); 597 rewp = container_of(wp, struct rcu_exp_work, rew_work);
644 rcu_exp_sel_wait_wake(rewp->rew_rsp, rewp->rew_func, rewp->rew_s); 598 rcu_exp_sel_wait_wake(rewp->rew_func, rewp->rew_s);
645} 599}
646 600
647/* 601/*
648 * Given an rcu_state pointer and a smp_call_function() handler, kick 602 * Given a smp_call_function() handler, kick off the specified
649 * off the specified flavor of expedited grace period. 603 * implementation of expedited grace period.
650 */ 604 */
651static void _synchronize_rcu_expedited(struct rcu_state *rsp, 605static void _synchronize_rcu_expedited(smp_call_func_t func)
652 smp_call_func_t func)
653{ 606{
654 struct rcu_data *rdp; 607 struct rcu_data *rdp;
655 struct rcu_exp_work rew; 608 struct rcu_exp_work rew;
@@ -658,71 +611,37 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
658 611
659 /* If expedited grace periods are prohibited, fall back to normal. */ 612 /* If expedited grace periods are prohibited, fall back to normal. */
660 if (rcu_gp_is_normal()) { 613 if (rcu_gp_is_normal()) {
661 wait_rcu_gp(rsp->call); 614 wait_rcu_gp(call_rcu);
662 return; 615 return;
663 } 616 }
664 617
665 /* Take a snapshot of the sequence number. */ 618 /* Take a snapshot of the sequence number. */
666 s = rcu_exp_gp_seq_snap(rsp); 619 s = rcu_exp_gp_seq_snap();
667 if (exp_funnel_lock(rsp, s)) 620 if (exp_funnel_lock(s))
668 return; /* Someone else did our work for us. */ 621 return; /* Someone else did our work for us. */
669 622
670 /* Ensure that load happens before action based on it. */ 623 /* Ensure that load happens before action based on it. */
671 if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) { 624 if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) {
672 /* Direct call during scheduler init and early_initcalls(). */ 625 /* Direct call during scheduler init and early_initcalls(). */
673 rcu_exp_sel_wait_wake(rsp, func, s); 626 rcu_exp_sel_wait_wake(func, s);
674 } else { 627 } else {
675 /* Marshall arguments & schedule the expedited grace period. */ 628 /* Marshall arguments & schedule the expedited grace period. */
676 rew.rew_func = func; 629 rew.rew_func = func;
677 rew.rew_rsp = rsp;
678 rew.rew_s = s; 630 rew.rew_s = s;
679 INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); 631 INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp);
680 queue_work(rcu_gp_wq, &rew.rew_work); 632 queue_work(rcu_gp_wq, &rew.rew_work);
681 } 633 }
682 634
683 /* Wait for expedited grace period to complete. */ 635 /* Wait for expedited grace period to complete. */
684 rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); 636 rdp = per_cpu_ptr(&rcu_data, raw_smp_processor_id());
685 rnp = rcu_get_root(rsp); 637 rnp = rcu_get_root();
686 wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], 638 wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
687 sync_exp_work_done(rsp, s)); 639 sync_exp_work_done(s));
688 smp_mb(); /* Workqueue actions happen before return. */ 640 smp_mb(); /* Workqueue actions happen before return. */
689 641
690 /* Let the next expedited grace period start. */ 642 /* Let the next expedited grace period start. */
691 mutex_unlock(&rsp->exp_mutex); 643 mutex_unlock(&rcu_state.exp_mutex);
692}
693
694/**
695 * synchronize_sched_expedited - Brute-force RCU-sched grace period
696 *
697 * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
698 * approach to force the grace period to end quickly. This consumes
699 * significant time on all CPUs and is unfriendly to real-time workloads,
700 * so is thus not recommended for any sort of common-case code. In fact,
701 * if you are using synchronize_sched_expedited() in a loop, please
702 * restructure your code to batch your updates, and then use a single
703 * synchronize_sched() instead.
704 *
705 * This implementation can be thought of as an application of sequence
706 * locking to expedited grace periods, but using the sequence counter to
707 * determine when someone else has already done the work instead of for
708 * retrying readers.
709 */
710void synchronize_sched_expedited(void)
711{
712 struct rcu_state *rsp = &rcu_sched_state;
713
714 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
715 lock_is_held(&rcu_lock_map) ||
716 lock_is_held(&rcu_sched_lock_map),
717 "Illegal synchronize_sched_expedited() in RCU read-side critical section");
718
719 /* If only one CPU, this is automatically a grace period. */
720 if (rcu_blocking_is_gp())
721 return;
722
723 _synchronize_rcu_expedited(rsp, sync_sched_exp_handler);
724} 644}
725EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
726 645
727#ifdef CONFIG_PREEMPT_RCU 646#ifdef CONFIG_PREEMPT_RCU
728 647
@@ -733,34 +652,78 @@ EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
733 * ->expmask fields in the rcu_node tree. Otherwise, immediately 652 * ->expmask fields in the rcu_node tree. Otherwise, immediately
734 * report the quiescent state. 653 * report the quiescent state.
735 */ 654 */
736static void sync_rcu_exp_handler(void *info) 655static void sync_rcu_exp_handler(void *unused)
737{ 656{
738 struct rcu_data *rdp; 657 unsigned long flags;
739 struct rcu_state *rsp = info; 658 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
659 struct rcu_node *rnp = rdp->mynode;
740 struct task_struct *t = current; 660 struct task_struct *t = current;
741 661
742 /* 662 /*
743 * Within an RCU read-side critical section, request that the next 663 * First, the common case of not being in an RCU read-side
744 * rcu_read_unlock() report. Unless this RCU read-side critical 664 * critical section. If also enabled or idle, immediately
745 * section has already blocked, in which case it is already set 665 * report the quiescent state, otherwise defer.
746 * up for the expedited grace period to wait on it.
747 */ 666 */
748 if (t->rcu_read_lock_nesting > 0 && 667 if (!t->rcu_read_lock_nesting) {
749 !t->rcu_read_unlock_special.b.blocked) { 668 if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
750 t->rcu_read_unlock_special.b.exp_need_qs = true; 669 rcu_dynticks_curr_cpu_in_eqs()) {
670 rcu_report_exp_rdp(rdp);
671 } else {
672 rdp->deferred_qs = true;
673 set_tsk_need_resched(t);
674 set_preempt_need_resched();
675 }
751 return; 676 return;
752 } 677 }
753 678
754 /* 679 /*
755 * We are either exiting an RCU read-side critical section (negative 680 * Second, the less-common case of being in an RCU read-side
756 * values of t->rcu_read_lock_nesting) or are not in one at all 681 * critical section. In this case we can count on a future
757 * (zero value of t->rcu_read_lock_nesting). Or we are in an RCU 682 * rcu_read_unlock(). However, this rcu_read_unlock() might
758 * read-side critical section that blocked before this expedited 683 * execute on some other CPU, but in that case there will be
759 * grace period started. Either way, we can immediately report 684 * a future context switch. Either way, if the expedited
760 * the quiescent state. 685 * grace period is still waiting on this CPU, set ->deferred_qs
686 * so that the eventual quiescent state will be reported.
687 * Note that there is a large group of race conditions that
688 * can have caused this quiescent state to already have been
689 * reported, so we really do need to check ->expmask.
690 */
691 if (t->rcu_read_lock_nesting > 0) {
692 raw_spin_lock_irqsave_rcu_node(rnp, flags);
693 if (rnp->expmask & rdp->grpmask)
694 rdp->deferred_qs = true;
695 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
696 }
697
698 /*
699 * The final and least likely case is where the interrupted
700 * code was just about to or just finished exiting the RCU-preempt
701 * read-side critical section, and no, we can't tell which.
702 * So either way, set ->deferred_qs to flag later code that
703 * a quiescent state is required.
704 *
705 * If the CPU is fully enabled (or if some buggy RCU-preempt
706 * read-side critical section is being used from idle), just
707 * invoke rcu_preempt_defer_qs() to immediately report the
708 * quiescent state. We cannot use rcu_read_unlock_special()
709 * because we are in an interrupt handler, which will cause that
710 * function to take an early exit without doing anything.
711 *
712 * Otherwise, force a context switch after the CPU enables everything.
761 */ 713 */
762 rdp = this_cpu_ptr(rsp->rda); 714 rdp->deferred_qs = true;
763 rcu_report_exp_rdp(rsp, rdp, true); 715 if (!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)) ||
716 WARN_ON_ONCE(rcu_dynticks_curr_cpu_in_eqs())) {
717 rcu_preempt_deferred_qs(t);
718 } else {
719 set_tsk_need_resched(t);
720 set_preempt_need_resched();
721 }
722}
723
724/* PREEMPT=y, so no PREEMPT=n expedited grace period to clean up after. */
725static void sync_sched_exp_online_cleanup(int cpu)
726{
764} 727}
765 728
766/** 729/**
@@ -780,11 +743,11 @@ static void sync_rcu_exp_handler(void *info)
780 * you are using synchronize_rcu_expedited() in a loop, please restructure 743 * you are using synchronize_rcu_expedited() in a loop, please restructure
781 * your code to batch your updates, and then Use a single synchronize_rcu() 744 * your code to batch your updates, and then Use a single synchronize_rcu()
782 * instead. 745 * instead.
746 *
747 * This has the same semantics as (but is more brutal than) synchronize_rcu().
783 */ 748 */
784void synchronize_rcu_expedited(void) 749void synchronize_rcu_expedited(void)
785{ 750{
786 struct rcu_state *rsp = rcu_state_p;
787
788 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || 751 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
789 lock_is_held(&rcu_lock_map) || 752 lock_is_held(&rcu_lock_map) ||
790 lock_is_held(&rcu_sched_lock_map), 753 lock_is_held(&rcu_sched_lock_map),
@@ -792,19 +755,82 @@ void synchronize_rcu_expedited(void)
792 755
793 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) 756 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
794 return; 757 return;
795 _synchronize_rcu_expedited(rsp, sync_rcu_exp_handler); 758 _synchronize_rcu_expedited(sync_rcu_exp_handler);
796} 759}
797EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 760EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
798 761
799#else /* #ifdef CONFIG_PREEMPT_RCU */ 762#else /* #ifdef CONFIG_PREEMPT_RCU */
800 763
764/* Invoked on each online non-idle CPU for expedited quiescent state. */
765static void sync_sched_exp_handler(void *unused)
766{
767 struct rcu_data *rdp;
768 struct rcu_node *rnp;
769
770 rdp = this_cpu_ptr(&rcu_data);
771 rnp = rdp->mynode;
772 if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
773 __this_cpu_read(rcu_data.cpu_no_qs.b.exp))
774 return;
775 if (rcu_is_cpu_rrupt_from_idle()) {
776 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
777 return;
778 }
779 __this_cpu_write(rcu_data.cpu_no_qs.b.exp, true);
780 /* Store .exp before .rcu_urgent_qs. */
781 smp_store_release(this_cpu_ptr(&rcu_data.rcu_urgent_qs), true);
782 set_tsk_need_resched(current);
783 set_preempt_need_resched();
784}
785
786/* Send IPI for expedited cleanup if needed at end of CPU-hotplug operation. */
787static void sync_sched_exp_online_cleanup(int cpu)
788{
789 struct rcu_data *rdp;
790 int ret;
791 struct rcu_node *rnp;
792
793 rdp = per_cpu_ptr(&rcu_data, cpu);
794 rnp = rdp->mynode;
795 if (!(READ_ONCE(rnp->expmask) & rdp->grpmask))
796 return;
797 ret = smp_call_function_single(cpu, sync_sched_exp_handler, NULL, 0);
798 WARN_ON_ONCE(ret);
799}
800
801/* 801/*
802 * Wait for an rcu-preempt grace period, but make it happen quickly. 802 * Because a context switch is a grace period for !PREEMPT, any
803 * But because preemptible RCU does not exist, map to rcu-sched. 803 * blocking grace-period wait automatically implies a grace period if
804 * there is only one CPU online at any point time during execution of
805 * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to
806 * occasionally incorrectly indicate that there are multiple CPUs online
807 * when there was in fact only one the whole time, as this just adds some
808 * overhead: RCU still operates correctly.
804 */ 809 */
810static int rcu_blocking_is_gp(void)
811{
812 int ret;
813
814 might_sleep(); /* Check for RCU read-side critical section. */
815 preempt_disable();
816 ret = num_online_cpus() <= 1;
817 preempt_enable();
818 return ret;
819}
820
821/* PREEMPT=n implementation of synchronize_rcu_expedited(). */
805void synchronize_rcu_expedited(void) 822void synchronize_rcu_expedited(void)
806{ 823{
807 synchronize_sched_expedited(); 824 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
825 lock_is_held(&rcu_lock_map) ||
826 lock_is_held(&rcu_sched_lock_map),
827 "Illegal synchronize_rcu_expedited() in RCU read-side critical section");
828
829 /* If only one CPU, this is automatically a grace period. */
830 if (rcu_blocking_is_gp())
831 return;
832
833 _synchronize_rcu_expedited(sync_sched_exp_handler);
808} 834}
809EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); 835EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
810 836
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a97c20ea9bce..05915e536336 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -38,8 +38,7 @@
38#include "../locking/rtmutex_common.h" 38#include "../locking/rtmutex_common.h"
39 39
40/* 40/*
41 * Control variables for per-CPU and per-rcu_node kthreads. These 41 * Control variables for per-CPU and per-rcu_node kthreads.
42 * handle all flavors of RCU.
43 */ 42 */
44static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); 43static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task);
45DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 44DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
@@ -106,6 +105,8 @@ static void __init rcu_bootup_announce_oddness(void)
106 pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs); 105 pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
107 if (jiffies_till_next_fqs != ULONG_MAX) 106 if (jiffies_till_next_fqs != ULONG_MAX)
108 pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs); 107 pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
108 if (jiffies_till_sched_qs != ULONG_MAX)
109 pr_info("\tBoot-time adjustment of scheduler-enlistment delay to %ld jiffies.\n", jiffies_till_sched_qs);
109 if (rcu_kick_kthreads) 110 if (rcu_kick_kthreads)
110 pr_info("\tKick kthreads if too-long grace period.\n"); 111 pr_info("\tKick kthreads if too-long grace period.\n");
111 if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD)) 112 if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
@@ -123,12 +124,7 @@ static void __init rcu_bootup_announce_oddness(void)
123 124
124#ifdef CONFIG_PREEMPT_RCU 125#ifdef CONFIG_PREEMPT_RCU
125 126
126RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu); 127static void rcu_report_exp_rnp(struct rcu_node *rnp, bool wake);
127static struct rcu_state *const rcu_state_p = &rcu_preempt_state;
128static struct rcu_data __percpu *const rcu_data_p = &rcu_preempt_data;
129
130static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
131 bool wake);
132static void rcu_read_unlock_special(struct task_struct *t); 128static void rcu_read_unlock_special(struct task_struct *t);
133 129
134/* 130/*
@@ -284,13 +280,10 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
284 * no need to check for a subsequent expedited GP. (Though we are 280 * no need to check for a subsequent expedited GP. (Though we are
285 * still in a quiescent state in any case.) 281 * still in a quiescent state in any case.)
286 */ 282 */
287 if (blkd_state & RCU_EXP_BLKD && 283 if (blkd_state & RCU_EXP_BLKD && rdp->deferred_qs)
288 t->rcu_read_unlock_special.b.exp_need_qs) { 284 rcu_report_exp_rdp(rdp);
289 t->rcu_read_unlock_special.b.exp_need_qs = false; 285 else
290 rcu_report_exp_rdp(rdp->rsp, rdp, true); 286 WARN_ON_ONCE(rdp->deferred_qs);
291 } else {
292 WARN_ON_ONCE(t->rcu_read_unlock_special.b.exp_need_qs);
293 }
294} 287}
295 288
296/* 289/*
@@ -306,15 +299,15 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
306 * 299 *
307 * Callers to this function must disable preemption. 300 * Callers to this function must disable preemption.
308 */ 301 */
309static void rcu_preempt_qs(void) 302static void rcu_qs(void)
310{ 303{
311 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n"); 304 RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!\n");
312 if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) { 305 if (__this_cpu_read(rcu_data.cpu_no_qs.s)) {
313 trace_rcu_grace_period(TPS("rcu_preempt"), 306 trace_rcu_grace_period(TPS("rcu_preempt"),
314 __this_cpu_read(rcu_data_p->gp_seq), 307 __this_cpu_read(rcu_data.gp_seq),
315 TPS("cpuqs")); 308 TPS("cpuqs"));
316 __this_cpu_write(rcu_data_p->cpu_no_qs.b.norm, false); 309 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
317 barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */ 310 barrier(); /* Coordinate with rcu_flavor_check_callbacks(). */
318 current->rcu_read_unlock_special.b.need_qs = false; 311 current->rcu_read_unlock_special.b.need_qs = false;
319 } 312 }
320} 313}
@@ -332,19 +325,20 @@ static void rcu_preempt_qs(void)
332 * 325 *
333 * Caller must disable interrupts. 326 * Caller must disable interrupts.
334 */ 327 */
335static void rcu_preempt_note_context_switch(bool preempt) 328void rcu_note_context_switch(bool preempt)
336{ 329{
337 struct task_struct *t = current; 330 struct task_struct *t = current;
338 struct rcu_data *rdp; 331 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
339 struct rcu_node *rnp; 332 struct rcu_node *rnp;
340 333
334 barrier(); /* Avoid RCU read-side critical sections leaking down. */
335 trace_rcu_utilization(TPS("Start context switch"));
341 lockdep_assert_irqs_disabled(); 336 lockdep_assert_irqs_disabled();
342 WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0); 337 WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
343 if (t->rcu_read_lock_nesting > 0 && 338 if (t->rcu_read_lock_nesting > 0 &&
344 !t->rcu_read_unlock_special.b.blocked) { 339 !t->rcu_read_unlock_special.b.blocked) {
345 340
346 /* Possibly blocking in an RCU read-side critical section. */ 341 /* Possibly blocking in an RCU read-side critical section. */
347 rdp = this_cpu_ptr(rcu_state_p->rda);
348 rnp = rdp->mynode; 342 rnp = rdp->mynode;
349 raw_spin_lock_rcu_node(rnp); 343 raw_spin_lock_rcu_node(rnp);
350 t->rcu_read_unlock_special.b.blocked = true; 344 t->rcu_read_unlock_special.b.blocked = true;
@@ -357,7 +351,7 @@ static void rcu_preempt_note_context_switch(bool preempt)
357 */ 351 */
358 WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0); 352 WARN_ON_ONCE((rdp->grpmask & rcu_rnp_online_cpus(rnp)) == 0);
359 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry)); 353 WARN_ON_ONCE(!list_empty(&t->rcu_node_entry));
360 trace_rcu_preempt_task(rdp->rsp->name, 354 trace_rcu_preempt_task(rcu_state.name,
361 t->pid, 355 t->pid,
362 (rnp->qsmask & rdp->grpmask) 356 (rnp->qsmask & rdp->grpmask)
363 ? rnp->gp_seq 357 ? rnp->gp_seq
@@ -371,6 +365,9 @@ static void rcu_preempt_note_context_switch(bool preempt)
371 * behalf of preempted instance of __rcu_read_unlock(). 365 * behalf of preempted instance of __rcu_read_unlock().
372 */ 366 */
373 rcu_read_unlock_special(t); 367 rcu_read_unlock_special(t);
368 rcu_preempt_deferred_qs(t);
369 } else {
370 rcu_preempt_deferred_qs(t);
374 } 371 }
375 372
376 /* 373 /*
@@ -382,8 +379,13 @@ static void rcu_preempt_note_context_switch(bool preempt)
382 * grace period, then the fact that the task has been enqueued 379 * grace period, then the fact that the task has been enqueued
383 * means that we continue to block the current grace period. 380 * means that we continue to block the current grace period.
384 */ 381 */
385 rcu_preempt_qs(); 382 rcu_qs();
383 if (rdp->deferred_qs)
384 rcu_report_exp_rdp(rdp);
385 trace_rcu_utilization(TPS("End context switch"));
386 barrier(); /* Avoid RCU read-side critical sections leaking up. */
386} 387}
388EXPORT_SYMBOL_GPL(rcu_note_context_switch);
387 389
388/* 390/*
389 * Check for preempted RCU readers blocking the current grace period 391 * Check for preempted RCU readers blocking the current grace period
@@ -464,74 +466,56 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
464} 466}
465 467
466/* 468/*
467 * Handle special cases during rcu_read_unlock(), such as needing to 469 * Report deferred quiescent states. The deferral time can
468 * notify RCU core processing or task having blocked during the RCU 470 * be quite short, for example, in the case of the call from
469 * read-side critical section. 471 * rcu_read_unlock_special().
470 */ 472 */
471static void rcu_read_unlock_special(struct task_struct *t) 473static void
474rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags)
472{ 475{
473 bool empty_exp; 476 bool empty_exp;
474 bool empty_norm; 477 bool empty_norm;
475 bool empty_exp_now; 478 bool empty_exp_now;
476 unsigned long flags;
477 struct list_head *np; 479 struct list_head *np;
478 bool drop_boost_mutex = false; 480 bool drop_boost_mutex = false;
479 struct rcu_data *rdp; 481 struct rcu_data *rdp;
480 struct rcu_node *rnp; 482 struct rcu_node *rnp;
481 union rcu_special special; 483 union rcu_special special;
482 484
483 /* NMI handlers cannot block and cannot safely manipulate state. */
484 if (in_nmi())
485 return;
486
487 local_irq_save(flags);
488
489 /* 485 /*
490 * If RCU core is waiting for this CPU to exit its critical section, 486 * If RCU core is waiting for this CPU to exit its critical section,
491 * report the fact that it has exited. Because irqs are disabled, 487 * report the fact that it has exited. Because irqs are disabled,
492 * t->rcu_read_unlock_special cannot change. 488 * t->rcu_read_unlock_special cannot change.
493 */ 489 */
494 special = t->rcu_read_unlock_special; 490 special = t->rcu_read_unlock_special;
491 rdp = this_cpu_ptr(&rcu_data);
492 if (!special.s && !rdp->deferred_qs) {
493 local_irq_restore(flags);
494 return;
495 }
495 if (special.b.need_qs) { 496 if (special.b.need_qs) {
496 rcu_preempt_qs(); 497 rcu_qs();
497 t->rcu_read_unlock_special.b.need_qs = false; 498 t->rcu_read_unlock_special.b.need_qs = false;
498 if (!t->rcu_read_unlock_special.s) { 499 if (!t->rcu_read_unlock_special.s && !rdp->deferred_qs) {
499 local_irq_restore(flags); 500 local_irq_restore(flags);
500 return; 501 return;
501 } 502 }
502 } 503 }
503 504
504 /* 505 /*
505 * Respond to a request for an expedited grace period, but only if 506 * Respond to a request by an expedited grace period for a
506 * we were not preempted, meaning that we were running on the same 507 * quiescent state from this CPU. Note that requests from
507 * CPU throughout. If we were preempted, the exp_need_qs flag 508 * tasks are handled when removing the task from the
508 * would have been cleared at the time of the first preemption, 509 * blocked-tasks list below.
509 * and the quiescent state would be reported when we were dequeued.
510 */ 510 */
511 if (special.b.exp_need_qs) { 511 if (rdp->deferred_qs) {
512 WARN_ON_ONCE(special.b.blocked); 512 rcu_report_exp_rdp(rdp);
513 t->rcu_read_unlock_special.b.exp_need_qs = false;
514 rdp = this_cpu_ptr(rcu_state_p->rda);
515 rcu_report_exp_rdp(rcu_state_p, rdp, true);
516 if (!t->rcu_read_unlock_special.s) { 513 if (!t->rcu_read_unlock_special.s) {
517 local_irq_restore(flags); 514 local_irq_restore(flags);
518 return; 515 return;
519 } 516 }
520 } 517 }
521 518
522 /* Hardware IRQ handlers cannot block, complain if they get here. */
523 if (in_irq() || in_serving_softirq()) {
524 lockdep_rcu_suspicious(__FILE__, __LINE__,
525 "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
526 pr_alert("->rcu_read_unlock_special: %#x (b: %d, enq: %d nq: %d)\n",
527 t->rcu_read_unlock_special.s,
528 t->rcu_read_unlock_special.b.blocked,
529 t->rcu_read_unlock_special.b.exp_need_qs,
530 t->rcu_read_unlock_special.b.need_qs);
531 local_irq_restore(flags);
532 return;
533 }
534
535 /* Clean up if blocked during RCU read-side critical section. */ 519 /* Clean up if blocked during RCU read-side critical section. */
536 if (special.b.blocked) { 520 if (special.b.blocked) {
537 t->rcu_read_unlock_special.b.blocked = false; 521 t->rcu_read_unlock_special.b.blocked = false;
@@ -582,7 +566,7 @@ static void rcu_read_unlock_special(struct task_struct *t)
582 rnp->grplo, 566 rnp->grplo,
583 rnp->grphi, 567 rnp->grphi,
584 !!rnp->gp_tasks); 568 !!rnp->gp_tasks);
585 rcu_report_unblock_qs_rnp(rcu_state_p, rnp, flags); 569 rcu_report_unblock_qs_rnp(rnp, flags);
586 } else { 570 } else {
587 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 571 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
588 } 572 }
@@ -596,13 +580,79 @@ static void rcu_read_unlock_special(struct task_struct *t)
596 * then we need to report up the rcu_node hierarchy. 580 * then we need to report up the rcu_node hierarchy.
597 */ 581 */
598 if (!empty_exp && empty_exp_now) 582 if (!empty_exp && empty_exp_now)
599 rcu_report_exp_rnp(rcu_state_p, rnp, true); 583 rcu_report_exp_rnp(rnp, true);
600 } else { 584 } else {
601 local_irq_restore(flags); 585 local_irq_restore(flags);
602 } 586 }
603} 587}
604 588
605/* 589/*
590 * Is a deferred quiescent-state pending, and are we also not in
591 * an RCU read-side critical section? It is the caller's responsibility
592 * to ensure it is otherwise safe to report any deferred quiescent
593 * states. The reason for this is that it is safe to report a
594 * quiescent state during context switch even though preemption
595 * is disabled. This function cannot be expected to understand these
596 * nuances, so the caller must handle them.
597 */
598static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
599{
600 return (this_cpu_ptr(&rcu_data)->deferred_qs ||
601 READ_ONCE(t->rcu_read_unlock_special.s)) &&
602 t->rcu_read_lock_nesting <= 0;
603}
604
605/*
606 * Report a deferred quiescent state if needed and safe to do so.
607 * As with rcu_preempt_need_deferred_qs(), "safe" involves only
608 * not being in an RCU read-side critical section. The caller must
609 * evaluate safety in terms of interrupt, softirq, and preemption
610 * disabling.
611 */
612static void rcu_preempt_deferred_qs(struct task_struct *t)
613{
614 unsigned long flags;
615 bool couldrecurse = t->rcu_read_lock_nesting >= 0;
616
617 if (!rcu_preempt_need_deferred_qs(t))
618 return;
619 if (couldrecurse)
620 t->rcu_read_lock_nesting -= INT_MIN;
621 local_irq_save(flags);
622 rcu_preempt_deferred_qs_irqrestore(t, flags);
623 if (couldrecurse)
624 t->rcu_read_lock_nesting += INT_MIN;
625}
626
627/*
628 * Handle special cases during rcu_read_unlock(), such as needing to
629 * notify RCU core processing or task having blocked during the RCU
630 * read-side critical section.
631 */
632static void rcu_read_unlock_special(struct task_struct *t)
633{
634 unsigned long flags;
635 bool preempt_bh_were_disabled =
636 !!(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
637 bool irqs_were_disabled;
638
639 /* NMI handlers cannot block and cannot safely manipulate state. */
640 if (in_nmi())
641 return;
642
643 local_irq_save(flags);
644 irqs_were_disabled = irqs_disabled_flags(flags);
645 if ((preempt_bh_were_disabled || irqs_were_disabled) &&
646 t->rcu_read_unlock_special.b.blocked) {
647 /* Need to defer quiescent state until everything is enabled. */
648 raise_softirq_irqoff(RCU_SOFTIRQ);
649 local_irq_restore(flags);
650 return;
651 }
652 rcu_preempt_deferred_qs_irqrestore(t, flags);
653}
654
655/*
606 * Dump detailed information for all tasks blocking the current RCU 656 * Dump detailed information for all tasks blocking the current RCU
607 * grace period on the specified rcu_node structure. 657 * grace period on the specified rcu_node structure.
608 */ 658 */
@@ -633,12 +683,12 @@ static void rcu_print_detail_task_stall_rnp(struct rcu_node *rnp)
633 * Dump detailed information for all tasks blocking the current RCU 683 * Dump detailed information for all tasks blocking the current RCU
634 * grace period. 684 * grace period.
635 */ 685 */
636static void rcu_print_detail_task_stall(struct rcu_state *rsp) 686static void rcu_print_detail_task_stall(void)
637{ 687{
638 struct rcu_node *rnp = rcu_get_root(rsp); 688 struct rcu_node *rnp = rcu_get_root();
639 689
640 rcu_print_detail_task_stall_rnp(rnp); 690 rcu_print_detail_task_stall_rnp(rnp);
641 rcu_for_each_leaf_node(rsp, rnp) 691 rcu_for_each_leaf_node(rnp)
642 rcu_print_detail_task_stall_rnp(rnp); 692 rcu_print_detail_task_stall_rnp(rnp);
643} 693}
644 694
@@ -706,14 +756,13 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
706 * Also, if there are blocked tasks on the list, they automatically 756 * Also, if there are blocked tasks on the list, they automatically
707 * block the newly created grace period, so set up ->gp_tasks accordingly. 757 * block the newly created grace period, so set up ->gp_tasks accordingly.
708 */ 758 */
709static void 759static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
710rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
711{ 760{
712 struct task_struct *t; 761 struct task_struct *t;
713 762
714 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n"); 763 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
715 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp))) 764 if (WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)))
716 dump_blkd_tasks(rsp, rnp, 10); 765 dump_blkd_tasks(rnp, 10);
717 if (rcu_preempt_has_tasks(rnp) && 766 if (rcu_preempt_has_tasks(rnp) &&
718 (rnp->qsmaskinit || rnp->wait_blkd_tasks)) { 767 (rnp->qsmaskinit || rnp->wait_blkd_tasks)) {
719 rnp->gp_tasks = rnp->blkd_tasks.next; 768 rnp->gp_tasks = rnp->blkd_tasks.next;
@@ -732,62 +781,38 @@ rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
732 * 781 *
733 * Caller must disable hard irqs. 782 * Caller must disable hard irqs.
734 */ 783 */
735static void rcu_preempt_check_callbacks(void) 784static void rcu_flavor_check_callbacks(int user)
736{ 785{
737 struct rcu_state *rsp = &rcu_preempt_state;
738 struct task_struct *t = current; 786 struct task_struct *t = current;
739 787
740 if (t->rcu_read_lock_nesting == 0) { 788 if (user || rcu_is_cpu_rrupt_from_idle()) {
741 rcu_preempt_qs(); 789 rcu_note_voluntary_context_switch(current);
790 }
791 if (t->rcu_read_lock_nesting > 0 ||
792 (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
793 /* No QS, force context switch if deferred. */
794 if (rcu_preempt_need_deferred_qs(t)) {
795 set_tsk_need_resched(t);
796 set_preempt_need_resched();
797 }
798 } else if (rcu_preempt_need_deferred_qs(t)) {
799 rcu_preempt_deferred_qs(t); /* Report deferred QS. */
800 return;
801 } else if (!t->rcu_read_lock_nesting) {
802 rcu_qs(); /* Report immediate QS. */
742 return; 803 return;
743 } 804 }
805
806 /* If GP is oldish, ask for help from rcu_read_unlock_special(). */
744 if (t->rcu_read_lock_nesting > 0 && 807 if (t->rcu_read_lock_nesting > 0 &&
745 __this_cpu_read(rcu_data_p->core_needs_qs) && 808 __this_cpu_read(rcu_data.core_needs_qs) &&
746 __this_cpu_read(rcu_data_p->cpu_no_qs.b.norm) && 809 __this_cpu_read(rcu_data.cpu_no_qs.b.norm) &&
747 !t->rcu_read_unlock_special.b.need_qs && 810 !t->rcu_read_unlock_special.b.need_qs &&
748 time_after(jiffies, rsp->gp_start + HZ)) 811 time_after(jiffies, rcu_state.gp_start + HZ))
749 t->rcu_read_unlock_special.b.need_qs = true; 812 t->rcu_read_unlock_special.b.need_qs = true;
750} 813}
751 814
752/** 815/**
753 * call_rcu() - Queue an RCU callback for invocation after a grace period.
754 * @head: structure to be used for queueing the RCU updates.
755 * @func: actual callback function to be invoked after the grace period
756 *
757 * The callback function will be invoked some time after a full grace
758 * period elapses, in other words after all pre-existing RCU read-side
759 * critical sections have completed. However, the callback function
760 * might well execute concurrently with RCU read-side critical sections
761 * that started after call_rcu() was invoked. RCU read-side critical
762 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
763 * and may be nested.
764 *
765 * Note that all CPUs must agree that the grace period extended beyond
766 * all pre-existing RCU read-side critical section. On systems with more
767 * than one CPU, this means that when "func()" is invoked, each CPU is
768 * guaranteed to have executed a full memory barrier since the end of its
769 * last RCU read-side critical section whose beginning preceded the call
770 * to call_rcu(). It also means that each CPU executing an RCU read-side
771 * critical section that continues beyond the start of "func()" must have
772 * executed a memory barrier after the call_rcu() but before the beginning
773 * of that RCU read-side critical section. Note that these guarantees
774 * include CPUs that are offline, idle, or executing in user mode, as
775 * well as CPUs that are executing in the kernel.
776 *
777 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
778 * resulting RCU callback function "func()", then both CPU A and CPU B are
779 * guaranteed to execute a full memory barrier during the time interval
780 * between the call to call_rcu() and the invocation of "func()" -- even
781 * if CPU A and CPU B are the same CPU (but again only if the system has
782 * more than one CPU).
783 */
784void call_rcu(struct rcu_head *head, rcu_callback_t func)
785{
786 __call_rcu(head, func, rcu_state_p, -1, 0);
787}
788EXPORT_SYMBOL_GPL(call_rcu);
789
790/**
791 * synchronize_rcu - wait until a grace period has elapsed. 816 * synchronize_rcu - wait until a grace period has elapsed.
792 * 817 *
793 * Control will return to the caller some time after a full grace 818 * Control will return to the caller some time after a full grace
@@ -797,14 +822,28 @@ EXPORT_SYMBOL_GPL(call_rcu);
797 * concurrently with new RCU read-side critical sections that began while 822 * concurrently with new RCU read-side critical sections that began while
798 * synchronize_rcu() was waiting. RCU read-side critical sections are 823 * synchronize_rcu() was waiting. RCU read-side critical sections are
799 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. 824 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
825 * In addition, regions of code across which interrupts, preemption, or
826 * softirqs have been disabled also serve as RCU read-side critical
827 * sections. This includes hardware interrupt handlers, softirq handlers,
828 * and NMI handlers.
829 *
830 * Note that this guarantee implies further memory-ordering guarantees.
831 * On systems with more than one CPU, when synchronize_rcu() returns,
832 * each CPU is guaranteed to have executed a full memory barrier since
833 * the end of its last RCU read-side critical section whose beginning
834 * preceded the call to synchronize_rcu(). In addition, each CPU having
835 * an RCU read-side critical section that extends beyond the return from
836 * synchronize_rcu() is guaranteed to have executed a full memory barrier
837 * after the beginning of synchronize_rcu() and before the beginning of
838 * that RCU read-side critical section. Note that these guarantees include
839 * CPUs that are offline, idle, or executing in user mode, as well as CPUs
840 * that are executing in the kernel.
800 * 841 *
801 * See the description of synchronize_sched() for more detailed 842 * Furthermore, if CPU A invoked synchronize_rcu(), which returned
802 * information on memory-ordering guarantees. However, please note 843 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
803 * that -only- the memory-ordering guarantees apply. For example, 844 * to have executed a full memory barrier during the execution of
804 * synchronize_rcu() is -not- guaranteed to wait on things like code 845 * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but
805 * protected by preempt_disable(), instead, synchronize_rcu() is -only- 846 * again only if the system has more than one CPU).
806 * guaranteed to wait on RCU read-side critical sections, that is, sections
807 * of code protected by rcu_read_lock().
808 */ 847 */
809void synchronize_rcu(void) 848void synchronize_rcu(void)
810{ 849{
@@ -821,28 +860,6 @@ void synchronize_rcu(void)
821} 860}
822EXPORT_SYMBOL_GPL(synchronize_rcu); 861EXPORT_SYMBOL_GPL(synchronize_rcu);
823 862
824/**
825 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
826 *
827 * Note that this primitive does not necessarily wait for an RCU grace period
828 * to complete. For example, if there are no RCU callbacks queued anywhere
829 * in the system, then rcu_barrier() is within its rights to return
830 * immediately, without waiting for anything, much less an RCU grace period.
831 */
832void rcu_barrier(void)
833{
834 _rcu_barrier(rcu_state_p);
835}
836EXPORT_SYMBOL_GPL(rcu_barrier);
837
838/*
839 * Initialize preemptible RCU's state structures.
840 */
841static void __init __rcu_init_preempt(void)
842{
843 rcu_init_one(rcu_state_p);
844}
845
846/* 863/*
847 * Check for a task exiting while in a preemptible-RCU read-side 864 * Check for a task exiting while in a preemptible-RCU read-side
848 * critical section, clean up if so. No need to issue warnings, 865 * critical section, clean up if so. No need to issue warnings,
@@ -859,6 +876,7 @@ void exit_rcu(void)
859 barrier(); 876 barrier();
860 t->rcu_read_unlock_special.b.blocked = true; 877 t->rcu_read_unlock_special.b.blocked = true;
861 __rcu_read_unlock(); 878 __rcu_read_unlock();
879 rcu_preempt_deferred_qs(current);
862} 880}
863 881
864/* 882/*
@@ -866,7 +884,7 @@ void exit_rcu(void)
866 * specified number of elements. 884 * specified number of elements.
867 */ 885 */
868static void 886static void
869dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck) 887dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
870{ 888{
871 int cpu; 889 int cpu;
872 int i; 890 int i;
@@ -893,7 +911,7 @@ dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)
893 } 911 }
894 pr_cont("\n"); 912 pr_cont("\n");
895 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) { 913 for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
896 rdp = per_cpu_ptr(rsp->rda, cpu); 914 rdp = per_cpu_ptr(&rcu_data, cpu);
897 onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp)); 915 onl = !!(rdp->grpmask & rcu_rnp_online_cpus(rnp));
898 pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n", 916 pr_info("\t%d: %c online: %ld(%d) offline: %ld(%d)\n",
899 cpu, ".o"[onl], 917 cpu, ".o"[onl],
@@ -904,8 +922,6 @@ dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck)
904 922
905#else /* #ifdef CONFIG_PREEMPT_RCU */ 923#else /* #ifdef CONFIG_PREEMPT_RCU */
906 924
907static struct rcu_state *const rcu_state_p = &rcu_sched_state;
908
909/* 925/*
910 * Tell them what RCU they are running. 926 * Tell them what RCU they are running.
911 */ 927 */
@@ -916,14 +932,85 @@ static void __init rcu_bootup_announce(void)
916} 932}
917 933
918/* 934/*
919 * Because preemptible RCU does not exist, we never have to check for 935 * Note a quiescent state for PREEMPT=n. Because we do not need to know
920 * CPUs being in quiescent states. 936 * how many quiescent states passed, just if there was at least one since
937 * the start of the grace period, this just sets a flag. The caller must
938 * have disabled preemption.
921 */ 939 */
922static void rcu_preempt_note_context_switch(bool preempt) 940static void rcu_qs(void)
923{ 941{
942 RCU_LOCKDEP_WARN(preemptible(), "rcu_qs() invoked with preemption enabled!!!");
943 if (!__this_cpu_read(rcu_data.cpu_no_qs.s))
944 return;
945 trace_rcu_grace_period(TPS("rcu_sched"),
946 __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs"));
947 __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false);
948 if (!__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
949 return;
950 __this_cpu_write(rcu_data.cpu_no_qs.b.exp, false);
951 rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
924} 952}
925 953
926/* 954/*
955 * Register an urgently needed quiescent state. If there is an
956 * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
957 * dyntick-idle quiescent state visible to other CPUs, which will in
958 * some cases serve for expedited as well as normal grace periods.
959 * Either way, register a lightweight quiescent state.
960 *
961 * The barrier() calls are redundant in the common case when this is
962 * called externally, but just in case this is called from within this
963 * file.
964 *
965 */
966void rcu_all_qs(void)
967{
968 unsigned long flags;
969
970 if (!raw_cpu_read(rcu_data.rcu_urgent_qs))
971 return;
972 preempt_disable();
973 /* Load rcu_urgent_qs before other flags. */
974 if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
975 preempt_enable();
976 return;
977 }
978 this_cpu_write(rcu_data.rcu_urgent_qs, false);
979 barrier(); /* Avoid RCU read-side critical sections leaking down. */
980 if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs))) {
981 local_irq_save(flags);
982 rcu_momentary_dyntick_idle();
983 local_irq_restore(flags);
984 }
985 rcu_qs();
986 barrier(); /* Avoid RCU read-side critical sections leaking up. */
987 preempt_enable();
988}
989EXPORT_SYMBOL_GPL(rcu_all_qs);
990
991/*
992 * Note a PREEMPT=n context switch. The caller must have disabled interrupts.
993 */
994void rcu_note_context_switch(bool preempt)
995{
996 barrier(); /* Avoid RCU read-side critical sections leaking down. */
997 trace_rcu_utilization(TPS("Start context switch"));
998 rcu_qs();
999 /* Load rcu_urgent_qs before other flags. */
1000 if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs)))
1001 goto out;
1002 this_cpu_write(rcu_data.rcu_urgent_qs, false);
1003 if (unlikely(raw_cpu_read(rcu_data.rcu_need_heavy_qs)))
1004 rcu_momentary_dyntick_idle();
1005 if (!preempt)
1006 rcu_tasks_qs(current);
1007out:
1008 trace_rcu_utilization(TPS("End context switch"));
1009 barrier(); /* Avoid RCU read-side critical sections leaking up. */
1010}
1011EXPORT_SYMBOL_GPL(rcu_note_context_switch);
1012
1013/*
927 * Because preemptible RCU does not exist, there are never any preempted 1014 * Because preemptible RCU does not exist, there are never any preempted
928 * RCU readers. 1015 * RCU readers.
929 */ 1016 */
@@ -941,10 +1028,20 @@ static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
941} 1028}
942 1029
943/* 1030/*
1031 * Because there is no preemptible RCU, there can be no deferred quiescent
1032 * states.
1033 */
1034static bool rcu_preempt_need_deferred_qs(struct task_struct *t)
1035{
1036 return false;
1037}
1038static void rcu_preempt_deferred_qs(struct task_struct *t) { }
1039
1040/*
944 * Because preemptible RCU does not exist, we never have to check for 1041 * Because preemptible RCU does not exist, we never have to check for
945 * tasks blocked within RCU read-side critical sections. 1042 * tasks blocked within RCU read-side critical sections.
946 */ 1043 */
947static void rcu_print_detail_task_stall(struct rcu_state *rsp) 1044static void rcu_print_detail_task_stall(void)
948{ 1045{
949} 1046}
950 1047
@@ -972,36 +1069,54 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
972 * so there is no need to check for blocked tasks. So check only for 1069 * so there is no need to check for blocked tasks. So check only for
973 * bogus qsmask values. 1070 * bogus qsmask values.
974 */ 1071 */
975static void 1072static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
976rcu_preempt_check_blocked_tasks(struct rcu_state *rsp, struct rcu_node *rnp)
977{ 1073{
978 WARN_ON_ONCE(rnp->qsmask); 1074 WARN_ON_ONCE(rnp->qsmask);
979} 1075}
980 1076
981/* 1077/*
982 * Because preemptible RCU does not exist, it never has any callbacks 1078 * Check to see if this CPU is in a non-context-switch quiescent state
983 * to check. 1079 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1080 * Also schedule RCU core processing.
1081 *
1082 * This function must be called from hardirq context. It is normally
1083 * invoked from the scheduling-clock interrupt.
984 */ 1084 */
985static void rcu_preempt_check_callbacks(void) 1085static void rcu_flavor_check_callbacks(int user)
986{ 1086{
987} 1087 if (user || rcu_is_cpu_rrupt_from_idle()) {
988 1088
989/* 1089 /*
990 * Because preemptible RCU does not exist, rcu_barrier() is just 1090 * Get here if this CPU took its interrupt from user
991 * another name for rcu_barrier_sched(). 1091 * mode or from the idle loop, and if this is not a
992 */ 1092 * nested interrupt. In this case, the CPU is in
993void rcu_barrier(void) 1093 * a quiescent state, so note it.
994{ 1094 *
995 rcu_barrier_sched(); 1095 * No memory barrier is required here because rcu_qs()
1096 * references only CPU-local variables that other CPUs
1097 * neither access nor modify, at least not while the
1098 * corresponding CPU is online.
1099 */
1100
1101 rcu_qs();
1102 }
996} 1103}
997EXPORT_SYMBOL_GPL(rcu_barrier);
998 1104
999/* 1105/* PREEMPT=n implementation of synchronize_rcu(). */
1000 * Because preemptible RCU does not exist, it need not be initialized. 1106void synchronize_rcu(void)
1001 */
1002static void __init __rcu_init_preempt(void)
1003{ 1107{
1108 RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
1109 lock_is_held(&rcu_lock_map) ||
1110 lock_is_held(&rcu_sched_lock_map),
1111 "Illegal synchronize_rcu() in RCU read-side critical section");
1112 if (rcu_blocking_is_gp())
1113 return;
1114 if (rcu_gp_is_expedited())
1115 synchronize_rcu_expedited();
1116 else
1117 wait_rcu_gp(call_rcu);
1004} 1118}
1119EXPORT_SYMBOL_GPL(synchronize_rcu);
1005 1120
1006/* 1121/*
1007 * Because preemptible RCU does not exist, tasks cannot possibly exit 1122 * Because preemptible RCU does not exist, tasks cannot possibly exit
@@ -1015,7 +1130,7 @@ void exit_rcu(void)
1015 * Dump the guaranteed-empty blocked-tasks state. Trust but verify. 1130 * Dump the guaranteed-empty blocked-tasks state. Trust but verify.
1016 */ 1131 */
1017static void 1132static void
1018dump_blkd_tasks(struct rcu_state *rsp, struct rcu_node *rnp, int ncheck) 1133dump_blkd_tasks(struct rcu_node *rnp, int ncheck)
1019{ 1134{
1020 WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks)); 1135 WARN_ON_ONCE(!list_empty(&rnp->blkd_tasks));
1021} 1136}
@@ -1212,21 +1327,20 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1212 * already exist. We only create this kthread for preemptible RCU. 1327 * already exist. We only create this kthread for preemptible RCU.
1213 * Returns zero if all is well, a negated errno otherwise. 1328 * Returns zero if all is well, a negated errno otherwise.
1214 */ 1329 */
1215static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1330static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp)
1216 struct rcu_node *rnp)
1217{ 1331{
1218 int rnp_index = rnp - &rsp->node[0]; 1332 int rnp_index = rnp - rcu_get_root();
1219 unsigned long flags; 1333 unsigned long flags;
1220 struct sched_param sp; 1334 struct sched_param sp;
1221 struct task_struct *t; 1335 struct task_struct *t;
1222 1336
1223 if (rcu_state_p != rsp) 1337 if (!IS_ENABLED(CONFIG_PREEMPT_RCU))
1224 return 0; 1338 return 0;
1225 1339
1226 if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0) 1340 if (!rcu_scheduler_fully_active || rcu_rnp_online_cpus(rnp) == 0)
1227 return 0; 1341 return 0;
1228 1342
1229 rsp->boost = 1; 1343 rcu_state.boost = 1;
1230 if (rnp->boost_kthread_task != NULL) 1344 if (rnp->boost_kthread_task != NULL)
1231 return 0; 1345 return 0;
1232 t = kthread_create(rcu_boost_kthread, (void *)rnp, 1346 t = kthread_create(rcu_boost_kthread, (void *)rnp,
@@ -1244,9 +1358,7 @@ static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1244 1358
1245static void rcu_kthread_do_work(void) 1359static void rcu_kthread_do_work(void)
1246{ 1360{
1247 rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data)); 1361 rcu_do_batch(this_cpu_ptr(&rcu_data));
1248 rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
1249 rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
1250} 1362}
1251 1363
1252static void rcu_cpu_kthread_setup(unsigned int cpu) 1364static void rcu_cpu_kthread_setup(unsigned int cpu)
@@ -1268,9 +1380,9 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu)
1268} 1380}
1269 1381
1270/* 1382/*
1271 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the 1383 * Per-CPU kernel thread that invokes RCU callbacks. This replaces
1272 * RCU softirq used in flavors and configurations of RCU that do not 1384 * the RCU softirq used in configurations of RCU that do not support RCU
1273 * support RCU priority boosting. 1385 * priority boosting.
1274 */ 1386 */
1275static void rcu_cpu_kthread(unsigned int cpu) 1387static void rcu_cpu_kthread(unsigned int cpu)
1276{ 1388{
@@ -1353,18 +1465,18 @@ static void __init rcu_spawn_boost_kthreads(void)
1353 for_each_possible_cpu(cpu) 1465 for_each_possible_cpu(cpu)
1354 per_cpu(rcu_cpu_has_work, cpu) = 0; 1466 per_cpu(rcu_cpu_has_work, cpu) = 0;
1355 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); 1467 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
1356 rcu_for_each_leaf_node(rcu_state_p, rnp) 1468 rcu_for_each_leaf_node(rnp)
1357 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); 1469 (void)rcu_spawn_one_boost_kthread(rnp);
1358} 1470}
1359 1471
1360static void rcu_prepare_kthreads(int cpu) 1472static void rcu_prepare_kthreads(int cpu)
1361{ 1473{
1362 struct rcu_data *rdp = per_cpu_ptr(rcu_state_p->rda, cpu); 1474 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1363 struct rcu_node *rnp = rdp->mynode; 1475 struct rcu_node *rnp = rdp->mynode;
1364 1476
1365 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */ 1477 /* Fire up the incoming CPU's kthread and leaf rcu_node kthread. */
1366 if (rcu_scheduler_fully_active) 1478 if (rcu_scheduler_fully_active)
1367 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); 1479 (void)rcu_spawn_one_boost_kthread(rnp);
1368} 1480}
1369 1481
1370#else /* #ifdef CONFIG_RCU_BOOST */ 1482#else /* #ifdef CONFIG_RCU_BOOST */
@@ -1411,8 +1523,8 @@ static void rcu_prepare_kthreads(int cpu)
1411 * 1 if so. This function is part of the RCU implementation; it is -not- 1523 * 1 if so. This function is part of the RCU implementation; it is -not-
1412 * an exported member of the RCU API. 1524 * an exported member of the RCU API.
1413 * 1525 *
1414 * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs 1526 * Because we not have RCU_FAST_NO_HZ, just check whether or not this
1415 * any flavor of RCU. 1527 * CPU has RCU callbacks queued.
1416 */ 1528 */
1417int rcu_needs_cpu(u64 basemono, u64 *nextevt) 1529int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1418{ 1530{
@@ -1478,41 +1590,36 @@ static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
1478module_param(rcu_idle_lazy_gp_delay, int, 0644); 1590module_param(rcu_idle_lazy_gp_delay, int, 0644);
1479 1591
1480/* 1592/*
1481 * Try to advance callbacks for all flavors of RCU on the current CPU, but 1593 * Try to advance callbacks on the current CPU, but only if it has been
1482 * only if it has been awhile since the last time we did so. Afterwards, 1594 * awhile since the last time we did so. Afterwards, if there are any
1483 * if there are any callbacks ready for immediate invocation, return true. 1595 * callbacks ready for immediate invocation, return true.
1484 */ 1596 */
1485static bool __maybe_unused rcu_try_advance_all_cbs(void) 1597static bool __maybe_unused rcu_try_advance_all_cbs(void)
1486{ 1598{
1487 bool cbs_ready = false; 1599 bool cbs_ready = false;
1488 struct rcu_data *rdp; 1600 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1489 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
1490 struct rcu_node *rnp; 1601 struct rcu_node *rnp;
1491 struct rcu_state *rsp;
1492 1602
1493 /* Exit early if we advanced recently. */ 1603 /* Exit early if we advanced recently. */
1494 if (jiffies == rdtp->last_advance_all) 1604 if (jiffies == rdp->last_advance_all)
1495 return false; 1605 return false;
1496 rdtp->last_advance_all = jiffies; 1606 rdp->last_advance_all = jiffies;
1497 1607
1498 for_each_rcu_flavor(rsp) { 1608 rnp = rdp->mynode;
1499 rdp = this_cpu_ptr(rsp->rda);
1500 rnp = rdp->mynode;
1501 1609
1502 /* 1610 /*
1503 * Don't bother checking unless a grace period has 1611 * Don't bother checking unless a grace period has
1504 * completed since we last checked and there are 1612 * completed since we last checked and there are
1505 * callbacks not yet ready to invoke. 1613 * callbacks not yet ready to invoke.
1506 */ 1614 */
1507 if ((rcu_seq_completed_gp(rdp->gp_seq, 1615 if ((rcu_seq_completed_gp(rdp->gp_seq,
1508 rcu_seq_current(&rnp->gp_seq)) || 1616 rcu_seq_current(&rnp->gp_seq)) ||
1509 unlikely(READ_ONCE(rdp->gpwrap))) && 1617 unlikely(READ_ONCE(rdp->gpwrap))) &&
1510 rcu_segcblist_pend_cbs(&rdp->cblist)) 1618 rcu_segcblist_pend_cbs(&rdp->cblist))
1511 note_gp_changes(rsp, rdp); 1619 note_gp_changes(rdp);
1512 1620
1513 if (rcu_segcblist_ready_cbs(&rdp->cblist)) 1621 if (rcu_segcblist_ready_cbs(&rdp->cblist))
1514 cbs_ready = true; 1622 cbs_ready = true;
1515 }
1516 return cbs_ready; 1623 return cbs_ready;
1517} 1624}
1518 1625
@@ -1526,16 +1633,16 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
1526 */ 1633 */
1527int rcu_needs_cpu(u64 basemono, u64 *nextevt) 1634int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1528{ 1635{
1529 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 1636 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1530 unsigned long dj; 1637 unsigned long dj;
1531 1638
1532 lockdep_assert_irqs_disabled(); 1639 lockdep_assert_irqs_disabled();
1533 1640
1534 /* Snapshot to detect later posting of non-lazy callback. */ 1641 /* Snapshot to detect later posting of non-lazy callback. */
1535 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; 1642 rdp->nonlazy_posted_snap = rdp->nonlazy_posted;
1536 1643
1537 /* If no callbacks, RCU doesn't need the CPU. */ 1644 /* If no callbacks, RCU doesn't need the CPU. */
1538 if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) { 1645 if (!rcu_cpu_has_callbacks(&rdp->all_lazy)) {
1539 *nextevt = KTIME_MAX; 1646 *nextevt = KTIME_MAX;
1540 return 0; 1647 return 0;
1541 } 1648 }
@@ -1546,10 +1653,10 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1546 invoke_rcu_core(); 1653 invoke_rcu_core();
1547 return 1; 1654 return 1;
1548 } 1655 }
1549 rdtp->last_accelerate = jiffies; 1656 rdp->last_accelerate = jiffies;
1550 1657
1551 /* Request timer delay depending on laziness, and round. */ 1658 /* Request timer delay depending on laziness, and round. */
1552 if (!rdtp->all_lazy) { 1659 if (!rdp->all_lazy) {
1553 dj = round_up(rcu_idle_gp_delay + jiffies, 1660 dj = round_up(rcu_idle_gp_delay + jiffies,
1554 rcu_idle_gp_delay) - jiffies; 1661 rcu_idle_gp_delay) - jiffies;
1555 } else { 1662 } else {
@@ -1572,10 +1679,8 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1572static void rcu_prepare_for_idle(void) 1679static void rcu_prepare_for_idle(void)
1573{ 1680{
1574 bool needwake; 1681 bool needwake;
1575 struct rcu_data *rdp; 1682 struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
1576 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
1577 struct rcu_node *rnp; 1683 struct rcu_node *rnp;
1578 struct rcu_state *rsp;
1579 int tne; 1684 int tne;
1580 1685
1581 lockdep_assert_irqs_disabled(); 1686 lockdep_assert_irqs_disabled();
@@ -1584,10 +1689,10 @@ static void rcu_prepare_for_idle(void)
1584 1689
1585 /* Handle nohz enablement switches conservatively. */ 1690 /* Handle nohz enablement switches conservatively. */
1586 tne = READ_ONCE(tick_nohz_active); 1691 tne = READ_ONCE(tick_nohz_active);
1587 if (tne != rdtp->tick_nohz_enabled_snap) { 1692 if (tne != rdp->tick_nohz_enabled_snap) {
1588 if (rcu_cpu_has_callbacks(NULL)) 1693 if (rcu_cpu_has_callbacks(NULL))
1589 invoke_rcu_core(); /* force nohz to see update. */ 1694 invoke_rcu_core(); /* force nohz to see update. */
1590 rdtp->tick_nohz_enabled_snap = tne; 1695 rdp->tick_nohz_enabled_snap = tne;
1591 return; 1696 return;
1592 } 1697 }
1593 if (!tne) 1698 if (!tne)
@@ -1598,10 +1703,10 @@ static void rcu_prepare_for_idle(void)
1598 * callbacks, invoke RCU core for the side-effect of recalculating 1703 * callbacks, invoke RCU core for the side-effect of recalculating
1599 * idle duration on re-entry to idle. 1704 * idle duration on re-entry to idle.
1600 */ 1705 */
1601 if (rdtp->all_lazy && 1706 if (rdp->all_lazy &&
1602 rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { 1707 rdp->nonlazy_posted != rdp->nonlazy_posted_snap) {
1603 rdtp->all_lazy = false; 1708 rdp->all_lazy = false;
1604 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; 1709 rdp->nonlazy_posted_snap = rdp->nonlazy_posted;
1605 invoke_rcu_core(); 1710 invoke_rcu_core();
1606 return; 1711 return;
1607 } 1712 }
@@ -1610,19 +1715,16 @@ static void rcu_prepare_for_idle(void)
1610 * If we have not yet accelerated this jiffy, accelerate all 1715 * If we have not yet accelerated this jiffy, accelerate all
1611 * callbacks on this CPU. 1716 * callbacks on this CPU.
1612 */ 1717 */
1613 if (rdtp->last_accelerate == jiffies) 1718 if (rdp->last_accelerate == jiffies)
1614 return; 1719 return;
1615 rdtp->last_accelerate = jiffies; 1720 rdp->last_accelerate = jiffies;
1616 for_each_rcu_flavor(rsp) { 1721 if (rcu_segcblist_pend_cbs(&rdp->cblist)) {
1617 rdp = this_cpu_ptr(rsp->rda);
1618 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1619 continue;
1620 rnp = rdp->mynode; 1722 rnp = rdp->mynode;
1621 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ 1723 raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
1622 needwake = rcu_accelerate_cbs(rsp, rnp, rdp); 1724 needwake = rcu_accelerate_cbs(rnp, rdp);
1623 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ 1725 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
1624 if (needwake) 1726 if (needwake)
1625 rcu_gp_kthread_wake(rsp); 1727 rcu_gp_kthread_wake();
1626 } 1728 }
1627} 1729}
1628 1730
@@ -1650,104 +1752,23 @@ static void rcu_cleanup_after_idle(void)
1650 */ 1752 */
1651static void rcu_idle_count_callbacks_posted(void) 1753static void rcu_idle_count_callbacks_posted(void)
1652{ 1754{
1653 __this_cpu_add(rcu_dynticks.nonlazy_posted, 1); 1755 __this_cpu_add(rcu_data.nonlazy_posted, 1);
1654}
1655
1656/*
1657 * Data for flushing lazy RCU callbacks at OOM time.
1658 */
1659static atomic_t oom_callback_count;
1660static DECLARE_WAIT_QUEUE_HEAD(oom_callback_wq);
1661
1662/*
1663 * RCU OOM callback -- decrement the outstanding count and deliver the
1664 * wake-up if we are the last one.
1665 */
1666static void rcu_oom_callback(struct rcu_head *rhp)
1667{
1668 if (atomic_dec_and_test(&oom_callback_count))
1669 wake_up(&oom_callback_wq);
1670}
1671
1672/*
1673 * Post an rcu_oom_notify callback on the current CPU if it has at
1674 * least one lazy callback. This will unnecessarily post callbacks
1675 * to CPUs that already have a non-lazy callback at the end of their
1676 * callback list, but this is an infrequent operation, so accept some
1677 * extra overhead to keep things simple.
1678 */
1679static void rcu_oom_notify_cpu(void *unused)
1680{
1681 struct rcu_state *rsp;
1682 struct rcu_data *rdp;
1683
1684 for_each_rcu_flavor(rsp) {
1685 rdp = raw_cpu_ptr(rsp->rda);
1686 if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) {
1687 atomic_inc(&oom_callback_count);
1688 rsp->call(&rdp->oom_head, rcu_oom_callback);
1689 }
1690 }
1691}
1692
1693/*
1694 * If low on memory, ensure that each CPU has a non-lazy callback.
1695 * This will wake up CPUs that have only lazy callbacks, in turn
1696 * ensuring that they free up the corresponding memory in a timely manner.
1697 * Because an uncertain amount of memory will be freed in some uncertain
1698 * timeframe, we do not claim to have freed anything.
1699 */
1700static int rcu_oom_notify(struct notifier_block *self,
1701 unsigned long notused, void *nfreed)
1702{
1703 int cpu;
1704
1705 /* Wait for callbacks from earlier instance to complete. */
1706 wait_event(oom_callback_wq, atomic_read(&oom_callback_count) == 0);
1707 smp_mb(); /* Ensure callback reuse happens after callback invocation. */
1708
1709 /*
1710 * Prevent premature wakeup: ensure that all increments happen
1711 * before there is a chance of the counter reaching zero.
1712 */
1713 atomic_set(&oom_callback_count, 1);
1714
1715 for_each_online_cpu(cpu) {
1716 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
1717 cond_resched_tasks_rcu_qs();
1718 }
1719
1720 /* Unconditionally decrement: no need to wake ourselves up. */
1721 atomic_dec(&oom_callback_count);
1722
1723 return NOTIFY_OK;
1724} 1756}
1725 1757
1726static struct notifier_block rcu_oom_nb = {
1727 .notifier_call = rcu_oom_notify
1728};
1729
1730static int __init rcu_register_oom_notifier(void)
1731{
1732 register_oom_notifier(&rcu_oom_nb);
1733 return 0;
1734}
1735early_initcall(rcu_register_oom_notifier);
1736
1737#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1758#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1738 1759
1739#ifdef CONFIG_RCU_FAST_NO_HZ 1760#ifdef CONFIG_RCU_FAST_NO_HZ
1740 1761
1741static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 1762static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
1742{ 1763{
1743 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1764 struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
1744 unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap; 1765 unsigned long nlpd = rdp->nonlazy_posted - rdp->nonlazy_posted_snap;
1745 1766
1746 sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", 1767 sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
1747 rdtp->last_accelerate & 0xffff, jiffies & 0xffff, 1768 rdp->last_accelerate & 0xffff, jiffies & 0xffff,
1748 ulong2long(nlpd), 1769 ulong2long(nlpd),
1749 rdtp->all_lazy ? 'L' : '.', 1770 rdp->all_lazy ? 'L' : '.',
1750 rdtp->tick_nohz_enabled_snap ? '.' : 'D'); 1771 rdp->tick_nohz_enabled_snap ? '.' : 'D');
1751} 1772}
1752 1773
1753#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 1774#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
@@ -1768,21 +1789,19 @@ static void print_cpu_stall_info_begin(void)
1768/* 1789/*
1769 * Print out diagnostic information for the specified stalled CPU. 1790 * Print out diagnostic information for the specified stalled CPU.
1770 * 1791 *
1771 * If the specified CPU is aware of the current RCU grace period 1792 * If the specified CPU is aware of the current RCU grace period, then
1772 * (flavor specified by rsp), then print the number of scheduling 1793 * print the number of scheduling clock interrupts the CPU has taken
1773 * clock interrupts the CPU has taken during the time that it has 1794 * during the time that it has been aware. Otherwise, print the number
1774 * been aware. Otherwise, print the number of RCU grace periods 1795 * of RCU grace periods that this CPU is ignorant of, for example, "1"
1775 * that this CPU is ignorant of, for example, "1" if the CPU was 1796 * if the CPU was aware of the previous grace period.
1776 * aware of the previous grace period.
1777 * 1797 *
1778 * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info. 1798 * Also print out idle and (if CONFIG_RCU_FAST_NO_HZ) idle-entry info.
1779 */ 1799 */
1780static void print_cpu_stall_info(struct rcu_state *rsp, int cpu) 1800static void print_cpu_stall_info(int cpu)
1781{ 1801{
1782 unsigned long delta; 1802 unsigned long delta;
1783 char fast_no_hz[72]; 1803 char fast_no_hz[72];
1784 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1804 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1785 struct rcu_dynticks *rdtp = rdp->dynticks;
1786 char *ticks_title; 1805 char *ticks_title;
1787 unsigned long ticks_value; 1806 unsigned long ticks_value;
1788 1807
@@ -1792,7 +1811,7 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1792 */ 1811 */
1793 touch_nmi_watchdog(); 1812 touch_nmi_watchdog();
1794 1813
1795 ticks_value = rcu_seq_ctr(rsp->gp_seq - rdp->gp_seq); 1814 ticks_value = rcu_seq_ctr(rcu_state.gp_seq - rdp->gp_seq);
1796 if (ticks_value) { 1815 if (ticks_value) {
1797 ticks_title = "GPs behind"; 1816 ticks_title = "GPs behind";
1798 } else { 1817 } else {
@@ -1810,10 +1829,10 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
1810 rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' : 1829 rdp->rcu_iw_pending ? (int)min(delta, 9UL) + '0' :
1811 "!."[!delta], 1830 "!."[!delta],
1812 ticks_value, ticks_title, 1831 ticks_value, ticks_title,
1813 rcu_dynticks_snap(rdtp) & 0xfff, 1832 rcu_dynticks_snap(rdp) & 0xfff,
1814 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, 1833 rdp->dynticks_nesting, rdp->dynticks_nmi_nesting,
1815 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu), 1834 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
1816 READ_ONCE(rsp->n_force_qs) - rsp->n_force_qs_gpstart, 1835 READ_ONCE(rcu_state.n_force_qs) - rcu_state.n_force_qs_gpstart,
1817 fast_no_hz); 1836 fast_no_hz);
1818} 1837}
1819 1838
@@ -1823,20 +1842,12 @@ static void print_cpu_stall_info_end(void)
1823 pr_err("\t"); 1842 pr_err("\t");
1824} 1843}
1825 1844
1826/* Zero ->ticks_this_gp for all flavors of RCU. */ 1845/* Zero ->ticks_this_gp and snapshot the number of RCU softirq handlers. */
1827static void zero_cpu_stall_ticks(struct rcu_data *rdp) 1846static void zero_cpu_stall_ticks(struct rcu_data *rdp)
1828{ 1847{
1829 rdp->ticks_this_gp = 0; 1848 rdp->ticks_this_gp = 0;
1830 rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id()); 1849 rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
1831} 1850 WRITE_ONCE(rdp->last_fqs_resched, jiffies);
1832
1833/* Increment ->ticks_this_gp for all flavors of RCU. */
1834static void increment_cpu_stall_ticks(void)
1835{
1836 struct rcu_state *rsp;
1837
1838 for_each_rcu_flavor(rsp)
1839 raw_cpu_inc(rsp->rda->ticks_this_gp);
1840} 1851}
1841 1852
1842#ifdef CONFIG_RCU_NOCB_CPU 1853#ifdef CONFIG_RCU_NOCB_CPU
@@ -1958,17 +1969,17 @@ static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype,
1958 if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) 1969 if (rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT)
1959 mod_timer(&rdp->nocb_timer, jiffies + 1); 1970 mod_timer(&rdp->nocb_timer, jiffies + 1);
1960 WRITE_ONCE(rdp->nocb_defer_wakeup, waketype); 1971 WRITE_ONCE(rdp->nocb_defer_wakeup, waketype);
1961 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, reason); 1972 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, reason);
1962 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); 1973 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
1963} 1974}
1964 1975
1965/* 1976/*
1966 * Does the specified CPU need an RCU callback for the specified flavor 1977 * Does the specified CPU need an RCU callback for this invocation
1967 * of rcu_barrier()? 1978 * of rcu_barrier()?
1968 */ 1979 */
1969static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) 1980static bool rcu_nocb_cpu_needs_barrier(int cpu)
1970{ 1981{
1971 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1982 struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
1972 unsigned long ret; 1983 unsigned long ret;
1973#ifdef CONFIG_PROVE_RCU 1984#ifdef CONFIG_PROVE_RCU
1974 struct rcu_head *rhp; 1985 struct rcu_head *rhp;
@@ -1979,7 +1990,7 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
1979 * There needs to be a barrier before this function is called, 1990 * There needs to be a barrier before this function is called,
1980 * but associated with a prior determination that no more 1991 * but associated with a prior determination that no more
1981 * callbacks would be posted. In the worst case, the first 1992 * callbacks would be posted. In the worst case, the first
1982 * barrier in _rcu_barrier() suffices (but the caller cannot 1993 * barrier in rcu_barrier() suffices (but the caller cannot
1983 * necessarily rely on this, not a substitute for the caller 1994 * necessarily rely on this, not a substitute for the caller
1984 * getting the concurrency design right!). There must also be 1995 * getting the concurrency design right!). There must also be
1985 * a barrier between the following load an posting of a callback 1996 * a barrier between the following load an posting of a callback
@@ -2037,7 +2048,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2037 /* If we are not being polled and there is a kthread, awaken it ... */ 2048 /* If we are not being polled and there is a kthread, awaken it ... */
2038 t = READ_ONCE(rdp->nocb_kthread); 2049 t = READ_ONCE(rdp->nocb_kthread);
2039 if (rcu_nocb_poll || !t) { 2050 if (rcu_nocb_poll || !t) {
2040 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2051 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
2041 TPS("WakeNotPoll")); 2052 TPS("WakeNotPoll"));
2042 return; 2053 return;
2043 } 2054 }
@@ -2046,7 +2057,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2046 if (!irqs_disabled_flags(flags)) { 2057 if (!irqs_disabled_flags(flags)) {
2047 /* ... if queue was empty ... */ 2058 /* ... if queue was empty ... */
2048 wake_nocb_leader(rdp, false); 2059 wake_nocb_leader(rdp, false);
2049 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2060 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
2050 TPS("WakeEmpty")); 2061 TPS("WakeEmpty"));
2051 } else { 2062 } else {
2052 wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE, 2063 wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
@@ -2057,7 +2068,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2057 /* ... or if many callbacks queued. */ 2068 /* ... or if many callbacks queued. */
2058 if (!irqs_disabled_flags(flags)) { 2069 if (!irqs_disabled_flags(flags)) {
2059 wake_nocb_leader(rdp, true); 2070 wake_nocb_leader(rdp, true);
2060 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2071 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
2061 TPS("WakeOvf")); 2072 TPS("WakeOvf"));
2062 } else { 2073 } else {
2063 wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE, 2074 wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE,
@@ -2065,7 +2076,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2065 } 2076 }
2066 rdp->qlen_last_fqs_check = LONG_MAX / 2; 2077 rdp->qlen_last_fqs_check = LONG_MAX / 2;
2067 } else { 2078 } else {
2068 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot")); 2079 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot"));
2069 } 2080 }
2070 return; 2081 return;
2071} 2082}
@@ -2087,12 +2098,12 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2087 return false; 2098 return false;
2088 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags); 2099 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
2089 if (__is_kfree_rcu_offset((unsigned long)rhp->func)) 2100 if (__is_kfree_rcu_offset((unsigned long)rhp->func))
2090 trace_rcu_kfree_callback(rdp->rsp->name, rhp, 2101 trace_rcu_kfree_callback(rcu_state.name, rhp,
2091 (unsigned long)rhp->func, 2102 (unsigned long)rhp->func,
2092 -atomic_long_read(&rdp->nocb_q_count_lazy), 2103 -atomic_long_read(&rdp->nocb_q_count_lazy),
2093 -atomic_long_read(&rdp->nocb_q_count)); 2104 -atomic_long_read(&rdp->nocb_q_count));
2094 else 2105 else
2095 trace_rcu_callback(rdp->rsp->name, rhp, 2106 trace_rcu_callback(rcu_state.name, rhp,
2096 -atomic_long_read(&rdp->nocb_q_count_lazy), 2107 -atomic_long_read(&rdp->nocb_q_count_lazy),
2097 -atomic_long_read(&rdp->nocb_q_count)); 2108 -atomic_long_read(&rdp->nocb_q_count));
2098 2109
@@ -2142,7 +2153,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2142 struct rcu_node *rnp = rdp->mynode; 2153 struct rcu_node *rnp = rdp->mynode;
2143 2154
2144 local_irq_save(flags); 2155 local_irq_save(flags);
2145 c = rcu_seq_snap(&rdp->rsp->gp_seq); 2156 c = rcu_seq_snap(&rcu_state.gp_seq);
2146 if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) { 2157 if (!rdp->gpwrap && ULONG_CMP_GE(rdp->gp_seq_needed, c)) {
2147 local_irq_restore(flags); 2158 local_irq_restore(flags);
2148 } else { 2159 } else {
@@ -2150,7 +2161,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2150 needwake = rcu_start_this_gp(rnp, rdp, c); 2161 needwake = rcu_start_this_gp(rnp, rdp, c);
2151 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2162 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
2152 if (needwake) 2163 if (needwake)
2153 rcu_gp_kthread_wake(rdp->rsp); 2164 rcu_gp_kthread_wake();
2154 } 2165 }
2155 2166
2156 /* 2167 /*
@@ -2187,7 +2198,7 @@ wait_again:
2187 2198
2188 /* Wait for callbacks to appear. */ 2199 /* Wait for callbacks to appear. */
2189 if (!rcu_nocb_poll) { 2200 if (!rcu_nocb_poll) {
2190 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Sleep")); 2201 trace_rcu_nocb_wake(rcu_state.name, my_rdp->cpu, TPS("Sleep"));
2191 swait_event_interruptible_exclusive(my_rdp->nocb_wq, 2202 swait_event_interruptible_exclusive(my_rdp->nocb_wq,
2192 !READ_ONCE(my_rdp->nocb_leader_sleep)); 2203 !READ_ONCE(my_rdp->nocb_leader_sleep));
2193 raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags); 2204 raw_spin_lock_irqsave(&my_rdp->nocb_lock, flags);
@@ -2197,7 +2208,7 @@ wait_again:
2197 raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags); 2208 raw_spin_unlock_irqrestore(&my_rdp->nocb_lock, flags);
2198 } else if (firsttime) { 2209 } else if (firsttime) {
2199 firsttime = false; /* Don't drown trace log with "Poll"! */ 2210 firsttime = false; /* Don't drown trace log with "Poll"! */
2200 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, TPS("Poll")); 2211 trace_rcu_nocb_wake(rcu_state.name, my_rdp->cpu, TPS("Poll"));
2201 } 2212 }
2202 2213
2203 /* 2214 /*
@@ -2224,7 +2235,7 @@ wait_again:
2224 if (rcu_nocb_poll) { 2235 if (rcu_nocb_poll) {
2225 schedule_timeout_interruptible(1); 2236 schedule_timeout_interruptible(1);
2226 } else { 2237 } else {
2227 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, 2238 trace_rcu_nocb_wake(rcu_state.name, my_rdp->cpu,
2228 TPS("WokeEmpty")); 2239 TPS("WokeEmpty"));
2229 } 2240 }
2230 goto wait_again; 2241 goto wait_again;
@@ -2269,7 +2280,7 @@ wait_again:
2269static void nocb_follower_wait(struct rcu_data *rdp) 2280static void nocb_follower_wait(struct rcu_data *rdp)
2270{ 2281{
2271 for (;;) { 2282 for (;;) {
2272 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("FollowerSleep")); 2283 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("FollowerSleep"));
2273 swait_event_interruptible_exclusive(rdp->nocb_wq, 2284 swait_event_interruptible_exclusive(rdp->nocb_wq,
2274 READ_ONCE(rdp->nocb_follower_head)); 2285 READ_ONCE(rdp->nocb_follower_head));
2275 if (smp_load_acquire(&rdp->nocb_follower_head)) { 2286 if (smp_load_acquire(&rdp->nocb_follower_head)) {
@@ -2277,7 +2288,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
2277 return; 2288 return;
2278 } 2289 }
2279 WARN_ON(signal_pending(current)); 2290 WARN_ON(signal_pending(current));
2280 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeEmpty")); 2291 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty"));
2281 } 2292 }
2282} 2293}
2283 2294
@@ -2312,10 +2323,10 @@ static int rcu_nocb_kthread(void *arg)
2312 rdp->nocb_follower_tail = &rdp->nocb_follower_head; 2323 rdp->nocb_follower_tail = &rdp->nocb_follower_head;
2313 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); 2324 raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags);
2314 BUG_ON(!list); 2325 BUG_ON(!list);
2315 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WokeNonEmpty")); 2326 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeNonEmpty"));
2316 2327
2317 /* Each pass through the following loop invokes a callback. */ 2328 /* Each pass through the following loop invokes a callback. */
2318 trace_rcu_batch_start(rdp->rsp->name, 2329 trace_rcu_batch_start(rcu_state.name,
2319 atomic_long_read(&rdp->nocb_q_count_lazy), 2330 atomic_long_read(&rdp->nocb_q_count_lazy),
2320 atomic_long_read(&rdp->nocb_q_count), -1); 2331 atomic_long_read(&rdp->nocb_q_count), -1);
2321 c = cl = 0; 2332 c = cl = 0;
@@ -2323,23 +2334,23 @@ static int rcu_nocb_kthread(void *arg)
2323 next = list->next; 2334 next = list->next;
2324 /* Wait for enqueuing to complete, if needed. */ 2335 /* Wait for enqueuing to complete, if needed. */
2325 while (next == NULL && &list->next != tail) { 2336 while (next == NULL && &list->next != tail) {
2326 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2337 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
2327 TPS("WaitQueue")); 2338 TPS("WaitQueue"));
2328 schedule_timeout_interruptible(1); 2339 schedule_timeout_interruptible(1);
2329 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2340 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu,
2330 TPS("WokeQueue")); 2341 TPS("WokeQueue"));
2331 next = list->next; 2342 next = list->next;
2332 } 2343 }
2333 debug_rcu_head_unqueue(list); 2344 debug_rcu_head_unqueue(list);
2334 local_bh_disable(); 2345 local_bh_disable();
2335 if (__rcu_reclaim(rdp->rsp->name, list)) 2346 if (__rcu_reclaim(rcu_state.name, list))
2336 cl++; 2347 cl++;
2337 c++; 2348 c++;
2338 local_bh_enable(); 2349 local_bh_enable();
2339 cond_resched_tasks_rcu_qs(); 2350 cond_resched_tasks_rcu_qs();
2340 list = next; 2351 list = next;
2341 } 2352 }
2342 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); 2353 trace_rcu_batch_end(rcu_state.name, c, !!list, 0, 0, 1);
2343 smp_mb__before_atomic(); /* _add after CB invocation. */ 2354 smp_mb__before_atomic(); /* _add after CB invocation. */
2344 atomic_long_add(-c, &rdp->nocb_q_count); 2355 atomic_long_add(-c, &rdp->nocb_q_count);
2345 atomic_long_add(-cl, &rdp->nocb_q_count_lazy); 2356 atomic_long_add(-cl, &rdp->nocb_q_count_lazy);
@@ -2367,7 +2378,7 @@ static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp)
2367 ndw = READ_ONCE(rdp->nocb_defer_wakeup); 2378 ndw = READ_ONCE(rdp->nocb_defer_wakeup);
2368 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); 2379 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
2369 __wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); 2380 __wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags);
2370 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); 2381 trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake"));
2371} 2382}
2372 2383
2373/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */ 2384/* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */
@@ -2393,7 +2404,6 @@ void __init rcu_init_nohz(void)
2393{ 2404{
2394 int cpu; 2405 int cpu;
2395 bool need_rcu_nocb_mask = false; 2406 bool need_rcu_nocb_mask = false;
2396 struct rcu_state *rsp;
2397 2407
2398#if defined(CONFIG_NO_HZ_FULL) 2408#if defined(CONFIG_NO_HZ_FULL)
2399 if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask)) 2409 if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
@@ -2427,11 +2437,9 @@ void __init rcu_init_nohz(void)
2427 if (rcu_nocb_poll) 2437 if (rcu_nocb_poll)
2428 pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); 2438 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
2429 2439
2430 for_each_rcu_flavor(rsp) { 2440 for_each_cpu(cpu, rcu_nocb_mask)
2431 for_each_cpu(cpu, rcu_nocb_mask) 2441 init_nocb_callback_list(per_cpu_ptr(&rcu_data, cpu));
2432 init_nocb_callback_list(per_cpu_ptr(rsp->rda, cpu)); 2442 rcu_organize_nocb_kthreads();
2433 rcu_organize_nocb_kthreads(rsp);
2434 }
2435} 2443}
2436 2444
2437/* Initialize per-rcu_data variables for no-CBs CPUs. */ 2445/* Initialize per-rcu_data variables for no-CBs CPUs. */
@@ -2446,16 +2454,15 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2446 2454
2447/* 2455/*
2448 * If the specified CPU is a no-CBs CPU that does not already have its 2456 * If the specified CPU is a no-CBs CPU that does not already have its
2449 * rcuo kthread for the specified RCU flavor, spawn it. If the CPUs are 2457 * rcuo kthread, spawn it. If the CPUs are brought online out of order,
2450 * brought online out of order, this can require re-organizing the 2458 * this can require re-organizing the leader-follower relationships.
2451 * leader-follower relationships.
2452 */ 2459 */
2453static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu) 2460static void rcu_spawn_one_nocb_kthread(int cpu)
2454{ 2461{
2455 struct rcu_data *rdp; 2462 struct rcu_data *rdp;
2456 struct rcu_data *rdp_last; 2463 struct rcu_data *rdp_last;
2457 struct rcu_data *rdp_old_leader; 2464 struct rcu_data *rdp_old_leader;
2458 struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu); 2465 struct rcu_data *rdp_spawn = per_cpu_ptr(&rcu_data, cpu);
2459 struct task_struct *t; 2466 struct task_struct *t;
2460 2467
2461 /* 2468 /*
@@ -2485,9 +2492,9 @@ static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
2485 rdp_spawn->nocb_next_follower = rdp_old_leader; 2492 rdp_spawn->nocb_next_follower = rdp_old_leader;
2486 } 2493 }
2487 2494
2488 /* Spawn the kthread for this CPU and RCU flavor. */ 2495 /* Spawn the kthread for this CPU. */
2489 t = kthread_run(rcu_nocb_kthread, rdp_spawn, 2496 t = kthread_run(rcu_nocb_kthread, rdp_spawn,
2490 "rcuo%c/%d", rsp->abbr, cpu); 2497 "rcuo%c/%d", rcu_state.abbr, cpu);
2491 BUG_ON(IS_ERR(t)); 2498 BUG_ON(IS_ERR(t));
2492 WRITE_ONCE(rdp_spawn->nocb_kthread, t); 2499 WRITE_ONCE(rdp_spawn->nocb_kthread, t);
2493} 2500}
@@ -2498,11 +2505,8 @@ static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
2498 */ 2505 */
2499static void rcu_spawn_all_nocb_kthreads(int cpu) 2506static void rcu_spawn_all_nocb_kthreads(int cpu)
2500{ 2507{
2501 struct rcu_state *rsp;
2502
2503 if (rcu_scheduler_fully_active) 2508 if (rcu_scheduler_fully_active)
2504 for_each_rcu_flavor(rsp) 2509 rcu_spawn_one_nocb_kthread(cpu);
2505 rcu_spawn_one_nocb_kthread(rsp, cpu);
2506} 2510}
2507 2511
2508/* 2512/*
@@ -2526,7 +2530,7 @@ module_param(rcu_nocb_leader_stride, int, 0444);
2526/* 2530/*
2527 * Initialize leader-follower relationships for all no-CBs CPU. 2531 * Initialize leader-follower relationships for all no-CBs CPU.
2528 */ 2532 */
2529static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp) 2533static void __init rcu_organize_nocb_kthreads(void)
2530{ 2534{
2531 int cpu; 2535 int cpu;
2532 int ls = rcu_nocb_leader_stride; 2536 int ls = rcu_nocb_leader_stride;
@@ -2548,7 +2552,7 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
2548 * we will spawn the needed set of rcu_nocb_kthread() kthreads. 2552 * we will spawn the needed set of rcu_nocb_kthread() kthreads.
2549 */ 2553 */
2550 for_each_cpu(cpu, rcu_nocb_mask) { 2554 for_each_cpu(cpu, rcu_nocb_mask) {
2551 rdp = per_cpu_ptr(rsp->rda, cpu); 2555 rdp = per_cpu_ptr(&rcu_data, cpu);
2552 if (rdp->cpu >= nl) { 2556 if (rdp->cpu >= nl) {
2553 /* New leader, set up for followers & next leader. */ 2557 /* New leader, set up for followers & next leader. */
2554 nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls; 2558 nl = DIV_ROUND_UP(rdp->cpu + 1, ls) * ls;
@@ -2585,7 +2589,7 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
2585 2589
2586#else /* #ifdef CONFIG_RCU_NOCB_CPU */ 2590#else /* #ifdef CONFIG_RCU_NOCB_CPU */
2587 2591
2588static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) 2592static bool rcu_nocb_cpu_needs_barrier(int cpu)
2589{ 2593{
2590 WARN_ON_ONCE(1); /* Should be dead code. */ 2594 WARN_ON_ONCE(1); /* Should be dead code. */
2591 return false; 2595 return false;
@@ -2654,12 +2658,12 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
2654 * This code relies on the fact that all NO_HZ_FULL CPUs are also 2658 * This code relies on the fact that all NO_HZ_FULL CPUs are also
2655 * CONFIG_RCU_NOCB_CPU CPUs. 2659 * CONFIG_RCU_NOCB_CPU CPUs.
2656 */ 2660 */
2657static bool rcu_nohz_full_cpu(struct rcu_state *rsp) 2661static bool rcu_nohz_full_cpu(void)
2658{ 2662{
2659#ifdef CONFIG_NO_HZ_FULL 2663#ifdef CONFIG_NO_HZ_FULL
2660 if (tick_nohz_full_cpu(smp_processor_id()) && 2664 if (tick_nohz_full_cpu(smp_processor_id()) &&
2661 (!rcu_gp_in_progress(rsp) || 2665 (!rcu_gp_in_progress() ||
2662 ULONG_CMP_LT(jiffies, READ_ONCE(rsp->gp_start) + HZ))) 2666 ULONG_CMP_LT(jiffies, READ_ONCE(rcu_state.gp_start) + HZ)))
2663 return true; 2667 return true;
2664#endif /* #ifdef CONFIG_NO_HZ_FULL */ 2668#endif /* #ifdef CONFIG_NO_HZ_FULL */
2665 return false; 2669 return false;
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 39cb23d22109..f203b94f6b5b 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -203,11 +203,7 @@ void rcu_test_sync_prims(void)
203 if (!IS_ENABLED(CONFIG_PROVE_RCU)) 203 if (!IS_ENABLED(CONFIG_PROVE_RCU))
204 return; 204 return;
205 synchronize_rcu(); 205 synchronize_rcu();
206 synchronize_rcu_bh();
207 synchronize_sched();
208 synchronize_rcu_expedited(); 206 synchronize_rcu_expedited();
209 synchronize_rcu_bh_expedited();
210 synchronize_sched_expedited();
211} 207}
212 208
213#if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) 209#if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU)
@@ -298,7 +294,7 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_held);
298 * 294 *
299 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot. 295 * Check debug_lockdep_rcu_enabled() to prevent false positives during boot.
300 * 296 *
301 * Note that rcu_read_lock() is disallowed if the CPU is either idle or 297 * Note that rcu_read_lock_bh() is disallowed if the CPU is either idle or
302 * offline from an RCU perspective, so check for those as well. 298 * offline from an RCU perspective, so check for those as well.
303 */ 299 */
304int rcu_read_lock_bh_held(void) 300int rcu_read_lock_bh_held(void)
@@ -336,7 +332,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
336 int i; 332 int i;
337 int j; 333 int j;
338 334
339 /* Initialize and register callbacks for each flavor specified. */ 335 /* Initialize and register callbacks for each crcu_array element. */
340 for (i = 0; i < n; i++) { 336 for (i = 0; i < n; i++) {
341 if (checktiny && 337 if (checktiny &&
342 (crcu_array[i] == call_rcu || 338 (crcu_array[i] == call_rcu ||
@@ -472,6 +468,7 @@ int rcu_jiffies_till_stall_check(void)
472 } 468 }
473 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA; 469 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
474} 470}
471EXPORT_SYMBOL_GPL(rcu_jiffies_till_stall_check);
475 472
476void rcu_sysrq_start(void) 473void rcu_sysrq_start(void)
477{ 474{
@@ -701,19 +698,19 @@ static int __noreturn rcu_tasks_kthread(void *arg)
701 698
702 /* 699 /*
703 * Wait for all pre-existing t->on_rq and t->nvcsw 700 * Wait for all pre-existing t->on_rq and t->nvcsw
704 * transitions to complete. Invoking synchronize_sched() 701 * transitions to complete. Invoking synchronize_rcu()
705 * suffices because all these transitions occur with 702 * suffices because all these transitions occur with
706 * interrupts disabled. Without this synchronize_sched(), 703 * interrupts disabled. Without this synchronize_rcu(),
707 * a read-side critical section that started before the 704 * a read-side critical section that started before the
708 * grace period might be incorrectly seen as having started 705 * grace period might be incorrectly seen as having started
709 * after the grace period. 706 * after the grace period.
710 * 707 *
711 * This synchronize_sched() also dispenses with the 708 * This synchronize_rcu() also dispenses with the
712 * need for a memory barrier on the first store to 709 * need for a memory barrier on the first store to
713 * ->rcu_tasks_holdout, as it forces the store to happen 710 * ->rcu_tasks_holdout, as it forces the store to happen
714 * after the beginning of the grace period. 711 * after the beginning of the grace period.
715 */ 712 */
716 synchronize_sched(); 713 synchronize_rcu();
717 714
718 /* 715 /*
719 * There were callbacks, so we need to wait for an 716 * There were callbacks, so we need to wait for an
@@ -740,7 +737,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
740 * This does only part of the job, ensuring that all 737 * This does only part of the job, ensuring that all
741 * tasks that were previously exiting reach the point 738 * tasks that were previously exiting reach the point
742 * where they have disabled preemption, allowing the 739 * where they have disabled preemption, allowing the
743 * later synchronize_sched() to finish the job. 740 * later synchronize_rcu() to finish the job.
744 */ 741 */
745 synchronize_srcu(&tasks_rcu_exit_srcu); 742 synchronize_srcu(&tasks_rcu_exit_srcu);
746 743
@@ -790,20 +787,20 @@ static int __noreturn rcu_tasks_kthread(void *arg)
790 * cause their RCU-tasks read-side critical sections to 787 * cause their RCU-tasks read-side critical sections to
791 * extend past the end of the grace period. However, 788 * extend past the end of the grace period. However,
792 * because these ->nvcsw updates are carried out with 789 * because these ->nvcsw updates are carried out with
793 * interrupts disabled, we can use synchronize_sched() 790 * interrupts disabled, we can use synchronize_rcu()
794 * to force the needed ordering on all such CPUs. 791 * to force the needed ordering on all such CPUs.
795 * 792 *
796 * This synchronize_sched() also confines all 793 * This synchronize_rcu() also confines all
797 * ->rcu_tasks_holdout accesses to be within the grace 794 * ->rcu_tasks_holdout accesses to be within the grace
798 * period, avoiding the need for memory barriers for 795 * period, avoiding the need for memory barriers for
799 * ->rcu_tasks_holdout accesses. 796 * ->rcu_tasks_holdout accesses.
800 * 797 *
801 * In addition, this synchronize_sched() waits for exiting 798 * In addition, this synchronize_rcu() waits for exiting
802 * tasks to complete their final preempt_disable() region 799 * tasks to complete their final preempt_disable() region
803 * of execution, cleaning up after the synchronize_srcu() 800 * of execution, cleaning up after the synchronize_srcu()
804 * above. 801 * above.
805 */ 802 */
806 synchronize_sched(); 803 synchronize_rcu();
807 804
808 /* Invoke the callbacks. */ 805 /* Invoke the callbacks. */
809 while (list) { 806 while (list) {
@@ -870,15 +867,10 @@ static void __init rcu_tasks_bootup_oddness(void)
870#ifdef CONFIG_PROVE_RCU 867#ifdef CONFIG_PROVE_RCU
871 868
872/* 869/*
873 * Early boot self test parameters, one for each flavor 870 * Early boot self test parameters.
874 */ 871 */
875static bool rcu_self_test; 872static bool rcu_self_test;
876static bool rcu_self_test_bh;
877static bool rcu_self_test_sched;
878
879module_param(rcu_self_test, bool, 0444); 873module_param(rcu_self_test, bool, 0444);
880module_param(rcu_self_test_bh, bool, 0444);
881module_param(rcu_self_test_sched, bool, 0444);
882 874
883static int rcu_self_test_counter; 875static int rcu_self_test_counter;
884 876
@@ -888,25 +880,16 @@ static void test_callback(struct rcu_head *r)
888 pr_info("RCU test callback executed %d\n", rcu_self_test_counter); 880 pr_info("RCU test callback executed %d\n", rcu_self_test_counter);
889} 881}
890 882
883DEFINE_STATIC_SRCU(early_srcu);
884
891static void early_boot_test_call_rcu(void) 885static void early_boot_test_call_rcu(void)
892{ 886{
893 static struct rcu_head head; 887 static struct rcu_head head;
888 static struct rcu_head shead;
894 889
895 call_rcu(&head, test_callback); 890 call_rcu(&head, test_callback);
896} 891 if (IS_ENABLED(CONFIG_SRCU))
897 892 call_srcu(&early_srcu, &shead, test_callback);
898static void early_boot_test_call_rcu_bh(void)
899{
900 static struct rcu_head head;
901
902 call_rcu_bh(&head, test_callback);
903}
904
905static void early_boot_test_call_rcu_sched(void)
906{
907 static struct rcu_head head;
908
909 call_rcu_sched(&head, test_callback);
910} 893}
911 894
912void rcu_early_boot_tests(void) 895void rcu_early_boot_tests(void)
@@ -915,10 +898,6 @@ void rcu_early_boot_tests(void)
915 898
916 if (rcu_self_test) 899 if (rcu_self_test)
917 early_boot_test_call_rcu(); 900 early_boot_test_call_rcu();
918 if (rcu_self_test_bh)
919 early_boot_test_call_rcu_bh();
920 if (rcu_self_test_sched)
921 early_boot_test_call_rcu_sched();
922 rcu_test_sync_prims(); 901 rcu_test_sync_prims();
923} 902}
924 903
@@ -930,16 +909,11 @@ static int rcu_verify_early_boot_tests(void)
930 if (rcu_self_test) { 909 if (rcu_self_test) {
931 early_boot_test_counter++; 910 early_boot_test_counter++;
932 rcu_barrier(); 911 rcu_barrier();
912 if (IS_ENABLED(CONFIG_SRCU)) {
913 early_boot_test_counter++;
914 srcu_barrier(&early_srcu);
915 }
933 } 916 }
934 if (rcu_self_test_bh) {
935 early_boot_test_counter++;
936 rcu_barrier_bh();
937 }
938 if (rcu_self_test_sched) {
939 early_boot_test_counter++;
940 rcu_barrier_sched();
941 }
942
943 if (rcu_self_test_counter != early_boot_test_counter) { 917 if (rcu_self_test_counter != early_boot_test_counter) {
944 WARN_ON(1); 918 WARN_ON(1);
945 ret = -1; 919 ret = -1;
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 6f584861d329..7a0720a20003 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -301,7 +301,8 @@ restart:
301 pending >>= softirq_bit; 301 pending >>= softirq_bit;
302 } 302 }
303 303
304 rcu_bh_qs(); 304 if (__this_cpu_read(ksoftirqd) == current)
305 rcu_softirq_qs();
305 local_irq_disable(); 306 local_irq_disable();
306 307
307 pending = local_softirq_pending(); 308 pending = local_softirq_pending();
diff --git a/kernel/torture.c b/kernel/torture.c
index 1ac24a826589..17d91f5fba2a 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -573,7 +573,7 @@ static int stutter;
573 * Block until the stutter interval ends. This must be called periodically 573 * Block until the stutter interval ends. This must be called periodically
574 * by all running kthreads that need to be subject to stuttering. 574 * by all running kthreads that need to be subject to stuttering.
575 */ 575 */
576void stutter_wait(const char *title) 576bool stutter_wait(const char *title)
577{ 577{
578 int spt; 578 int spt;
579 579
@@ -590,6 +590,7 @@ void stutter_wait(const char *title)
590 } 590 }
591 torture_shutdown_absorb(title); 591 torture_shutdown_absorb(title);
592 } 592 }
593 return !!spt;
593} 594}
594EXPORT_SYMBOL_GPL(stutter_wait); 595EXPORT_SYMBOL_GPL(stutter_wait);
595 596