aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/atomic_ops.txt45
-rw-r--r--Documentation/kernel-parameters.txt14
-rw-r--r--Documentation/kernel-per-CPU-kthreads.txt34
-rw-r--r--Documentation/memory-barriers.txt42
-rw-r--r--Documentation/timers/NO_HZ.txt10
-rw-r--r--include/linux/lockdep.h7
-rw-r--r--include/linux/rcupdate.h38
-rw-r--r--include/linux/srcu.h2
-rw-r--r--init/Kconfig13
-rw-r--r--kernel/rcu/rcutorture.c27
-rw-r--r--kernel/rcu/srcu.c19
-rw-r--r--kernel/rcu/tiny.c14
-rw-r--r--kernel/rcu/tree.c129
-rw-r--r--kernel/rcu/tree_plugin.h100
-rw-r--r--kernel/rcu/update.c72
-rw-r--r--lib/Kconfig.debug11
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh2
17 files changed, 357 insertions, 222 deletions
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt
index 183e41bdcb69..dab6da3382d9 100644
--- a/Documentation/atomic_ops.txt
+++ b/Documentation/atomic_ops.txt
@@ -201,11 +201,11 @@ These routines add 1 and subtract 1, respectively, from the given
201atomic_t and return the new counter value after the operation is 201atomic_t and return the new counter value after the operation is
202performed. 202performed.
203 203
204Unlike the above routines, it is required that explicit memory 204Unlike the above routines, it is required that these primitives
205barriers are performed before and after the operation. It must be 205include explicit memory barriers that are performed before and after
206done such that all memory operations before and after the atomic 206the operation. It must be done such that all memory operations before
207operation calls are strongly ordered with respect to the atomic 207and after the atomic operation calls are strongly ordered with respect
208operation itself. 208to the atomic operation itself.
209 209
210For example, it should behave as if a smp_mb() call existed both 210For example, it should behave as if a smp_mb() call existed both
211before and after the atomic operation. 211before and after the atomic operation.
@@ -233,21 +233,21 @@ These two routines increment and decrement by 1, respectively, the
233given atomic counter. They return a boolean indicating whether the 233given atomic counter. They return a boolean indicating whether the
234resulting counter value was zero or not. 234resulting counter value was zero or not.
235 235
236It requires explicit memory barrier semantics around the operation as 236Again, these primitives provide explicit memory barrier semantics around
237above. 237the atomic operation.
238 238
239 int atomic_sub_and_test(int i, atomic_t *v); 239 int atomic_sub_and_test(int i, atomic_t *v);
240 240
241This is identical to atomic_dec_and_test() except that an explicit 241This is identical to atomic_dec_and_test() except that an explicit
242decrement is given instead of the implicit "1". It requires explicit 242decrement is given instead of the implicit "1". This primitive must
243memory barrier semantics around the operation. 243provide explicit memory barrier semantics around the operation.
244 244
245 int atomic_add_negative(int i, atomic_t *v); 245 int atomic_add_negative(int i, atomic_t *v);
246 246
247The given increment is added to the given atomic counter value. A 247The given increment is added to the given atomic counter value. A boolean
248boolean is return which indicates whether the resulting counter value 248is return which indicates whether the resulting counter value is negative.
249is negative. It requires explicit memory barrier semantics around the 249This primitive must provide explicit memory barrier semantics around
250operation. 250the operation.
251 251
252Then: 252Then:
253 253
@@ -257,7 +257,7 @@ This performs an atomic exchange operation on the atomic variable v, setting
257the given new value. It returns the old value that the atomic variable v had 257the given new value. It returns the old value that the atomic variable v had
258just before the operation. 258just before the operation.
259 259
260atomic_xchg requires explicit memory barriers around the operation. 260atomic_xchg must provide explicit memory barriers around the operation.
261 261
262 int atomic_cmpxchg(atomic_t *v, int old, int new); 262 int atomic_cmpxchg(atomic_t *v, int old, int new);
263 263
@@ -266,7 +266,7 @@ with the given old and new values. Like all atomic_xxx operations,
266atomic_cmpxchg will only satisfy its atomicity semantics as long as all 266atomic_cmpxchg will only satisfy its atomicity semantics as long as all
267other accesses of *v are performed through atomic_xxx operations. 267other accesses of *v are performed through atomic_xxx operations.
268 268
269atomic_cmpxchg requires explicit memory barriers around the operation. 269atomic_cmpxchg must provide explicit memory barriers around the operation.
270 270
271The semantics for atomic_cmpxchg are the same as those defined for 'cas' 271The semantics for atomic_cmpxchg are the same as those defined for 'cas'
272below. 272below.
@@ -279,8 +279,8 @@ If the atomic value v is not equal to u, this function adds a to v, and
279returns non zero. If v is equal to u then it returns zero. This is done as 279returns non zero. If v is equal to u then it returns zero. This is done as
280an atomic operation. 280an atomic operation.
281 281
282atomic_add_unless requires explicit memory barriers around the operation 282atomic_add_unless must provide explicit memory barriers around the
283unless it fails (returns 0). 283operation unless it fails (returns 0).
284 284
285atomic_inc_not_zero, equivalent to atomic_add_unless(v, 1, 0) 285atomic_inc_not_zero, equivalent to atomic_add_unless(v, 1, 0)
286 286
@@ -460,9 +460,9 @@ the return value into an int. There are other places where things
460like this occur as well. 460like this occur as well.
461 461
462These routines, like the atomic_t counter operations returning values, 462These routines, like the atomic_t counter operations returning values,
463require explicit memory barrier semantics around their execution. All 463must provide explicit memory barrier semantics around their execution.
464memory operations before the atomic bit operation call must be made 464All memory operations before the atomic bit operation call must be
465visible globally before the atomic bit operation is made visible. 465made visible globally before the atomic bit operation is made visible.
466Likewise, the atomic bit operation must be visible globally before any 466Likewise, the atomic bit operation must be visible globally before any
467subsequent memory operation is made visible. For example: 467subsequent memory operation is made visible. For example:
468 468
@@ -536,8 +536,9 @@ except that two underscores are prefixed to the interface name.
536These non-atomic variants also do not require any special memory 536These non-atomic variants also do not require any special memory
537barrier semantics. 537barrier semantics.
538 538
539The routines xchg() and cmpxchg() need the same exact memory barriers 539The routines xchg() and cmpxchg() must provide the same exact
540as the atomic and bit operations returning values. 540memory-barrier semantics as the atomic and bit operations returning
541values.
541 542
542Spinlocks and rwlocks have memory barrier expectations as well. 543Spinlocks and rwlocks have memory barrier expectations as well.
543The rule to follow is simple: 544The rule to follow is simple:
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 94de410ec341..5368ba701de2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2997,11 +2997,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2997 value is one, and maximum value is HZ. 2997 value is one, and maximum value is HZ.
2998 2998
2999 rcutree.kthread_prio= [KNL,BOOT] 2999 rcutree.kthread_prio= [KNL,BOOT]
3000 Set the SCHED_FIFO priority of the RCU 3000 Set the SCHED_FIFO priority of the RCU per-CPU
3001 per-CPU kthreads (rcuc/N). This value is also 3001 kthreads (rcuc/N). This value is also used for
3002 used for the priority of the RCU boost threads 3002 the priority of the RCU boost threads (rcub/N)
3003 (rcub/N). Valid values are 1-99 and the default 3003 and for the RCU grace-period kthreads (rcu_bh,
3004 is 1 (the least-favored priority). 3004 rcu_preempt, and rcu_sched). If RCU_BOOST is
3005 set, valid values are 1-99 and the default is 1
3006 (the least-favored priority). Otherwise, when
3007 RCU_BOOST is not set, valid values are 0-99 and
3008 the default is zero (non-realtime operation).
3005 3009
3006 rcutree.rcu_nocb_leader_stride= [KNL] 3010 rcutree.rcu_nocb_leader_stride= [KNL]
3007 Set the number of NOCB kthread groups, which 3011 Set the number of NOCB kthread groups, which
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index f3cd299fcc41..f4cbfe0ba108 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -190,20 +190,24 @@ To reduce its OS jitter, do any of the following:
190 on each CPU, including cs_dbs_timer() and od_dbs_timer(). 190 on each CPU, including cs_dbs_timer() and od_dbs_timer().
191 WARNING: Please check your CPU specifications to 191 WARNING: Please check your CPU specifications to
192 make sure that this is safe on your particular system. 192 make sure that this is safe on your particular system.
193 d. It is not possible to entirely get rid of OS jitter 193 d. As of v3.18, Christoph Lameter's on-demand vmstat workers
194 from vmstat_update() on CONFIG_SMP=y systems, but you 194 commit prevents OS jitter due to vmstat_update() on
195 can decrease its frequency by writing a large value 195 CONFIG_SMP=y systems. Before v3.18, is not possible
196 to /proc/sys/vm/stat_interval. The default value is 196 to entirely get rid of the OS jitter, but you can
197 HZ, for an interval of one second. Of course, larger 197 decrease its frequency by writing a large value to
198 values will make your virtual-memory statistics update 198 /proc/sys/vm/stat_interval. The default value is HZ,
199 more slowly. Of course, you can also run your workload 199 for an interval of one second. Of course, larger values
200 at a real-time priority, thus preempting vmstat_update(), 200 will make your virtual-memory statistics update more
201 slowly. Of course, you can also run your workload at
202 a real-time priority, thus preempting vmstat_update(),
201 but if your workload is CPU-bound, this is a bad idea. 203 but if your workload is CPU-bound, this is a bad idea.
202 However, there is an RFC patch from Christoph Lameter 204 However, there is an RFC patch from Christoph Lameter
203 (based on an earlier one from Gilad Ben-Yossef) that 205 (based on an earlier one from Gilad Ben-Yossef) that
204 reduces or even eliminates vmstat overhead for some 206 reduces or even eliminates vmstat overhead for some
205 workloads at https://lkml.org/lkml/2013/9/4/379. 207 workloads at https://lkml.org/lkml/2013/9/4/379.
206 e. If running on high-end powerpc servers, build with 208 e. Boot with "elevator=noop" to avoid workqueue use by
209 the block layer.
210 f. If running on high-end powerpc servers, build with
207 CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS 211 CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS
208 daemon from running on each CPU every second or so. 212 daemon from running on each CPU every second or so.
209 (This will require editing Kconfig files and will defeat 213 (This will require editing Kconfig files and will defeat
@@ -211,12 +215,12 @@ To reduce its OS jitter, do any of the following:
211 due to the rtas_event_scan() function. 215 due to the rtas_event_scan() function.
212 WARNING: Please check your CPU specifications to 216 WARNING: Please check your CPU specifications to
213 make sure that this is safe on your particular system. 217 make sure that this is safe on your particular system.
214 f. If running on Cell Processor, build your kernel with 218 g. If running on Cell Processor, build your kernel with
215 CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from 219 CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from
216 spu_gov_work(). 220 spu_gov_work().
217 WARNING: Please check your CPU specifications to 221 WARNING: Please check your CPU specifications to
218 make sure that this is safe on your particular system. 222 make sure that this is safe on your particular system.
219 g. If running on PowerMAC, build your kernel with 223 h. If running on PowerMAC, build your kernel with
220 CONFIG_PMAC_RACKMETER=n to disable the CPU-meter, 224 CONFIG_PMAC_RACKMETER=n to disable the CPU-meter,
221 avoiding OS jitter from rackmeter_do_timer(). 225 avoiding OS jitter from rackmeter_do_timer().
222 226
@@ -258,8 +262,12 @@ Purpose: Detect software lockups on each CPU.
258To reduce its OS jitter, do at least one of the following: 262To reduce its OS jitter, do at least one of the following:
2591. Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these 2631. Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these
260 kthreads from being created in the first place. 264 kthreads from being created in the first place.
2612. Echo a zero to /proc/sys/kernel/watchdog to disable the 2652. Boot with "nosoftlockup=0", which will also prevent these kthreads
266 from being created. Other related watchdog and softlockup boot
267 parameters may be found in Documentation/kernel-parameters.txt
268 and Documentation/watchdog/watchdog-parameters.txt.
2693. Echo a zero to /proc/sys/kernel/watchdog to disable the
262 watchdog timer. 270 watchdog timer.
2633. Echo a large number of /proc/sys/kernel/watchdog_thresh in 2714. Echo a large number of /proc/sys/kernel/watchdog_thresh in
264 order to reduce the frequency of OS jitter due to the watchdog 272 order to reduce the frequency of OS jitter due to the watchdog
265 timer down to a level that is acceptable for your workload. 273 timer down to a level that is acceptable for your workload.
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index ca2387ef27ab..6974f1c2b4e1 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -592,9 +592,9 @@ See also the subsection on "Cache Coherency" for a more thorough example.
592CONTROL DEPENDENCIES 592CONTROL DEPENDENCIES
593-------------------- 593--------------------
594 594
595A control dependency requires a full read memory barrier, not simply a data 595A load-load control dependency requires a full read memory barrier, not
596dependency barrier to make it work correctly. Consider the following bit of 596simply a data dependency barrier to make it work correctly. Consider the
597code: 597following bit of code:
598 598
599 q = ACCESS_ONCE(a); 599 q = ACCESS_ONCE(a);
600 if (q) { 600 if (q) {
@@ -615,14 +615,15 @@ case what's actually required is:
615 } 615 }
616 616
617However, stores are not speculated. This means that ordering -is- provided 617However, stores are not speculated. This means that ordering -is- provided
618in the following example: 618for load-store control dependencies, as in the following example:
619 619
620 q = ACCESS_ONCE(a); 620 q = ACCESS_ONCE(a);
621 if (q) { 621 if (q) {
622 ACCESS_ONCE(b) = p; 622 ACCESS_ONCE(b) = p;
623 } 623 }
624 624
625Please note that ACCESS_ONCE() is not optional! Without the 625Control dependencies pair normally with other types of barriers.
626That said, please note that ACCESS_ONCE() is not optional! Without the
626ACCESS_ONCE(), might combine the load from 'a' with other loads from 627ACCESS_ONCE(), might combine the load from 'a' with other loads from
627'a', and the store to 'b' with other stores to 'b', with possible highly 628'a', and the store to 'b' with other stores to 'b', with possible highly
628counterintuitive effects on ordering. 629counterintuitive effects on ordering.
@@ -813,6 +814,8 @@ In summary:
813 barrier() can help to preserve your control dependency. Please 814 barrier() can help to preserve your control dependency. Please
814 see the Compiler Barrier section for more information. 815 see the Compiler Barrier section for more information.
815 816
817 (*) Control dependencies pair normally with other types of barriers.
818
816 (*) Control dependencies do -not- provide transitivity. If you 819 (*) Control dependencies do -not- provide transitivity. If you
817 need transitivity, use smp_mb(). 820 need transitivity, use smp_mb().
818 821
@@ -823,14 +826,14 @@ SMP BARRIER PAIRING
823When dealing with CPU-CPU interactions, certain types of memory barrier should 826When dealing with CPU-CPU interactions, certain types of memory barrier should
824always be paired. A lack of appropriate pairing is almost certainly an error. 827always be paired. A lack of appropriate pairing is almost certainly an error.
825 828
826General barriers pair with each other, though they also pair with 829General barriers pair with each other, though they also pair with most
827most other types of barriers, albeit without transitivity. An acquire 830other types of barriers, albeit without transitivity. An acquire barrier
828barrier pairs with a release barrier, but both may also pair with other 831pairs with a release barrier, but both may also pair with other barriers,
829barriers, including of course general barriers. A write barrier pairs 832including of course general barriers. A write barrier pairs with a data
830with a data dependency barrier, an acquire barrier, a release barrier, 833dependency barrier, a control dependency, an acquire barrier, a release
831a read barrier, or a general barrier. Similarly a read barrier or a 834barrier, a read barrier, or a general barrier. Similarly a read barrier,
832data dependency barrier pairs with a write barrier, an acquire barrier, 835control dependency, or a data dependency barrier pairs with a write
833a release barrier, or a general barrier: 836barrier, an acquire barrier, a release barrier, or a general barrier:
834 837
835 CPU 1 CPU 2 838 CPU 1 CPU 2
836 =============== =============== 839 =============== ===============
@@ -850,6 +853,19 @@ Or:
850 <data dependency barrier> 853 <data dependency barrier>
851 y = *x; 854 y = *x;
852 855
856Or even:
857
858 CPU 1 CPU 2
859 =============== ===============================
860 r1 = ACCESS_ONCE(y);
861 <general barrier>
862 ACCESS_ONCE(y) = 1; if (r2 = ACCESS_ONCE(x)) {
863 <implicit control dependency>
864 ACCESS_ONCE(y) = 1;
865 }
866
867 assert(r1 == 0 || r2 == 0);
868
853Basically, the read barrier always has to be there, even though it can be of 869Basically, the read barrier always has to be there, even though it can be of
854the "weaker" type. 870the "weaker" type.
855 871
diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt
index cca122f25120..6eaf576294f3 100644
--- a/Documentation/timers/NO_HZ.txt
+++ b/Documentation/timers/NO_HZ.txt
@@ -158,13 +158,9 @@ not come for free:
158 to the need to inform kernel subsystems (such as RCU) about 158 to the need to inform kernel subsystems (such as RCU) about
159 the change in mode. 159 the change in mode.
160 160
1613. POSIX CPU timers on adaptive-tick CPUs may miss their deadlines 1613. POSIX CPU timers prevent CPUs from entering adaptive-tick mode.
162 (perhaps indefinitely) because they currently rely on 162 Real-time applications needing to take actions based on CPU time
163 scheduling-tick interrupts. This will likely be fixed in 163 consumption need to use other means of doing so.
164 one of two ways: (1) Prevent CPUs with POSIX CPU timers from
165 entering adaptive-tick mode, or (2) Use hrtimers or other
166 adaptive-ticks-immune mechanism to cause the POSIX CPU timer to
167 fire properly.
168 164
1694. If there are more perf events pending than the hardware can 1654. If there are more perf events pending than the hardware can
170 accommodate, they are normally round-robined so as to collect 166 accommodate, they are normally round-robined so as to collect
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 74ab23176e9b..066ba4157541 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -531,8 +531,13 @@ do { \
531# define might_lock_read(lock) do { } while (0) 531# define might_lock_read(lock) do { } while (0)
532#endif 532#endif
533 533
534#ifdef CONFIG_PROVE_RCU 534#ifdef CONFIG_LOCKDEP
535void lockdep_rcu_suspicious(const char *file, const int line, const char *s); 535void lockdep_rcu_suspicious(const char *file, const int line, const char *s);
536#else
537static inline void
538lockdep_rcu_suspicious(const char *file, const int line, const char *s)
539{
540}
536#endif 541#endif
537 542
538#endif /* __LINUX_LOCKDEP_H */ 543#endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 762022f07afd..573a5afd5ed8 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -48,6 +48,26 @@
48 48
49extern int rcu_expedited; /* for sysctl */ 49extern int rcu_expedited; /* for sysctl */
50 50
51#ifdef CONFIG_TINY_RCU
52/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
53static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */
54{
55 return false;
56}
57
58static inline void rcu_expedite_gp(void)
59{
60}
61
62static inline void rcu_unexpedite_gp(void)
63{
64}
65#else /* #ifdef CONFIG_TINY_RCU */
66bool rcu_gp_is_expedited(void); /* Internal RCU use. */
67void rcu_expedite_gp(void);
68void rcu_unexpedite_gp(void);
69#endif /* #else #ifdef CONFIG_TINY_RCU */
70
51enum rcutorture_type { 71enum rcutorture_type {
52 RCU_FLAVOR, 72 RCU_FLAVOR,
53 RCU_BH_FLAVOR, 73 RCU_BH_FLAVOR,
@@ -195,6 +215,15 @@ void call_rcu_sched(struct rcu_head *head,
195 215
196void synchronize_sched(void); 216void synchronize_sched(void);
197 217
218/*
219 * Structure allowing asynchronous waiting on RCU.
220 */
221struct rcu_synchronize {
222 struct rcu_head head;
223 struct completion completion;
224};
225void wakeme_after_rcu(struct rcu_head *head);
226
198/** 227/**
199 * call_rcu_tasks() - Queue an RCU for invocation task-based grace period 228 * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
200 * @head: structure to be used for queueing the RCU updates. 229 * @head: structure to be used for queueing the RCU updates.
@@ -258,6 +287,7 @@ static inline int rcu_preempt_depth(void)
258 287
259/* Internal to kernel */ 288/* Internal to kernel */
260void rcu_init(void); 289void rcu_init(void);
290void rcu_end_inkernel_boot(void);
261void rcu_sched_qs(void); 291void rcu_sched_qs(void);
262void rcu_bh_qs(void); 292void rcu_bh_qs(void);
263void rcu_check_callbacks(int user); 293void rcu_check_callbacks(int user);
@@ -722,7 +752,7 @@ static inline void rcu_preempt_sleep_check(void)
722 * annotated as __rcu. 752 * annotated as __rcu.
723 */ 753 */
724#define rcu_dereference_check(p, c) \ 754#define rcu_dereference_check(p, c) \
725 __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) 755 __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu)
726 756
727/** 757/**
728 * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking 758 * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking
@@ -732,7 +762,7 @@ static inline void rcu_preempt_sleep_check(void)
732 * This is the RCU-bh counterpart to rcu_dereference_check(). 762 * This is the RCU-bh counterpart to rcu_dereference_check().
733 */ 763 */
734#define rcu_dereference_bh_check(p, c) \ 764#define rcu_dereference_bh_check(p, c) \
735 __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) 765 __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu)
736 766
737/** 767/**
738 * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking 768 * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking
@@ -742,7 +772,7 @@ static inline void rcu_preempt_sleep_check(void)
742 * This is the RCU-sched counterpart to rcu_dereference_check(). 772 * This is the RCU-sched counterpart to rcu_dereference_check().
743 */ 773 */
744#define rcu_dereference_sched_check(p, c) \ 774#define rcu_dereference_sched_check(p, c) \
745 __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ 775 __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \
746 __rcu) 776 __rcu)
747 777
748#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ 778#define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/
@@ -935,9 +965,9 @@ static inline void rcu_read_unlock(void)
935{ 965{
936 rcu_lockdep_assert(rcu_is_watching(), 966 rcu_lockdep_assert(rcu_is_watching(),
937 "rcu_read_unlock() used illegally while idle"); 967 "rcu_read_unlock() used illegally while idle");
938 rcu_lock_release(&rcu_lock_map);
939 __release(RCU); 968 __release(RCU);
940 __rcu_read_unlock(); 969 __rcu_read_unlock();
970 rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */
941} 971}
942 972
943/** 973/**
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 9cfd9623fb03..bdeb4567b71e 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -182,7 +182,7 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp)
182 * lockdep_is_held() calls. 182 * lockdep_is_held() calls.
183 */ 183 */
184#define srcu_dereference_check(p, sp, c) \ 184#define srcu_dereference_check(p, sp, c) \
185 __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) 185 __rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu)
186 186
187/** 187/**
188 * srcu_dereference - fetch SRCU-protected pointer for later dereferencing 188 * srcu_dereference - fetch SRCU-protected pointer for later dereferencing
diff --git a/init/Kconfig b/init/Kconfig
index f5dbc6d4261b..9a0592516f48 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -791,6 +791,19 @@ config RCU_NOCB_CPU_ALL
791 791
792endchoice 792endchoice
793 793
794config RCU_EXPEDITE_BOOT
795 bool
796 default n
797 help
798 This option enables expedited grace periods at boot time,
799 as if rcu_expedite_gp() had been invoked early in boot.
800 The corresponding rcu_unexpedite_gp() is invoked from
801 rcu_end_inkernel_boot(), which is intended to be invoked
802 at the end of the kernel-only boot sequence, just before
803 init is exec'ed.
804
805 Accept the default if unsure.
806
794endmenu # "RCU Subsystem" 807endmenu # "RCU Subsystem"
795 808
796config BUILD_BIN2C 809config BUILD_BIN2C
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 30d42aa55d83..8dbe27611ec3 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -853,6 +853,8 @@ rcu_torture_fqs(void *arg)
853static int 853static int
854rcu_torture_writer(void *arg) 854rcu_torture_writer(void *arg)
855{ 855{
856 bool can_expedite = !rcu_gp_is_expedited();
857 int expediting = 0;
856 unsigned long gp_snap; 858 unsigned long gp_snap;
857 bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal; 859 bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal;
858 bool gp_sync1 = gp_sync; 860 bool gp_sync1 = gp_sync;
@@ -865,9 +867,15 @@ rcu_torture_writer(void *arg)
865 int nsynctypes = 0; 867 int nsynctypes = 0;
866 868
867 VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); 869 VERBOSE_TOROUT_STRING("rcu_torture_writer task started");
870 pr_alert("%s" TORTURE_FLAG
871 " Grace periods expedited from boot/sysfs for %s,\n",
872 torture_type, cur_ops->name);
873 pr_alert("%s" TORTURE_FLAG
874 " Testing of dynamic grace-period expediting diabled.\n",
875 torture_type);
868 876
869 /* Initialize synctype[] array. If none set, take default. */ 877 /* Initialize synctype[] array. If none set, take default. */
870 if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync) 878 if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1)
871 gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true; 879 gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true;
872 if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) 880 if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync)
873 synctype[nsynctypes++] = RTWS_COND_GET; 881 synctype[nsynctypes++] = RTWS_COND_GET;
@@ -949,9 +957,26 @@ rcu_torture_writer(void *arg)
949 } 957 }
950 } 958 }
951 rcutorture_record_progress(++rcu_torture_current_version); 959 rcutorture_record_progress(++rcu_torture_current_version);
960 /* Cycle through nesting levels of rcu_expedite_gp() calls. */
961 if (can_expedite &&
962 !(torture_random(&rand) & 0xff & (!!expediting - 1))) {
963 WARN_ON_ONCE(expediting == 0 && rcu_gp_is_expedited());
964 if (expediting >= 0)
965 rcu_expedite_gp();
966 else
967 rcu_unexpedite_gp();
968 if (++expediting > 3)
969 expediting = -expediting;
970 }
952 rcu_torture_writer_state = RTWS_STUTTER; 971 rcu_torture_writer_state = RTWS_STUTTER;
953 stutter_wait("rcu_torture_writer"); 972 stutter_wait("rcu_torture_writer");
954 } while (!torture_must_stop()); 973 } while (!torture_must_stop());
974 /* Reset expediting back to unexpedited. */
975 if (expediting > 0)
976 expediting = -expediting;
977 while (can_expedite && expediting++ < 0)
978 rcu_unexpedite_gp();
979 WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited());
955 rcu_torture_writer_state = RTWS_STOPPING; 980 rcu_torture_writer_state = RTWS_STOPPING;
956 torture_kthread_stopping("rcu_torture_writer"); 981 torture_kthread_stopping("rcu_torture_writer");
957 return 0; 982 return 0;
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
index 445bf8ffe3fb..cad76e76b4e7 100644
--- a/kernel/rcu/srcu.c
+++ b/kernel/rcu/srcu.c
@@ -402,23 +402,6 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
402} 402}
403EXPORT_SYMBOL_GPL(call_srcu); 403EXPORT_SYMBOL_GPL(call_srcu);
404 404
405struct rcu_synchronize {
406 struct rcu_head head;
407 struct completion completion;
408};
409
410/*
411 * Awaken the corresponding synchronize_srcu() instance now that a
412 * grace period has elapsed.
413 */
414static void wakeme_after_rcu(struct rcu_head *head)
415{
416 struct rcu_synchronize *rcu;
417
418 rcu = container_of(head, struct rcu_synchronize, head);
419 complete(&rcu->completion);
420}
421
422static void srcu_advance_batches(struct srcu_struct *sp, int trycount); 405static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
423static void srcu_reschedule(struct srcu_struct *sp); 406static void srcu_reschedule(struct srcu_struct *sp);
424 407
@@ -507,7 +490,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
507 */ 490 */
508void synchronize_srcu(struct srcu_struct *sp) 491void synchronize_srcu(struct srcu_struct *sp)
509{ 492{
510 __synchronize_srcu(sp, rcu_expedited 493 __synchronize_srcu(sp, rcu_gp_is_expedited()
511 ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT 494 ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
512 : SYNCHRONIZE_SRCU_TRYCOUNT); 495 : SYNCHRONIZE_SRCU_TRYCOUNT);
513} 496}
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index cc9ceca7bde1..069742d61c68 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -103,8 +103,7 @@ EXPORT_SYMBOL(__rcu_is_watching);
103static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) 103static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
104{ 104{
105 RCU_TRACE(reset_cpu_stall_ticks(rcp)); 105 RCU_TRACE(reset_cpu_stall_ticks(rcp));
106 if (rcp->rcucblist != NULL && 106 if (rcp->donetail != rcp->curtail) {
107 rcp->donetail != rcp->curtail) {
108 rcp->donetail = rcp->curtail; 107 rcp->donetail = rcp->curtail;
109 return 1; 108 return 1;
110 } 109 }
@@ -169,17 +168,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
169 unsigned long flags; 168 unsigned long flags;
170 RCU_TRACE(int cb_count = 0); 169 RCU_TRACE(int cb_count = 0);
171 170
172 /* If no RCU callbacks ready to invoke, just return. */
173 if (&rcp->rcucblist == rcp->donetail) {
174 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1));
175 RCU_TRACE(trace_rcu_batch_end(rcp->name, 0,
176 !!ACCESS_ONCE(rcp->rcucblist),
177 need_resched(),
178 is_idle_task(current),
179 false));
180 return;
181 }
182
183 /* Move the ready-to-invoke callbacks to a local list. */ 171 /* Move the ready-to-invoke callbacks to a local list. */
184 local_irq_save(flags); 172 local_irq_save(flags);
185 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); 173 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1));
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8fcc64ed858c..233165da782f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -91,8 +91,10 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var
91 91
92#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ 92#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
93DEFINE_RCU_TPS(sname) \ 93DEFINE_RCU_TPS(sname) \
94DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \
94struct rcu_state sname##_state = { \ 95struct rcu_state sname##_state = { \
95 .level = { &sname##_state.node[0] }, \ 96 .level = { &sname##_state.node[0] }, \
97 .rda = &sname##_data, \
96 .call = cr, \ 98 .call = cr, \
97 .fqs_state = RCU_GP_IDLE, \ 99 .fqs_state = RCU_GP_IDLE, \
98 .gpnum = 0UL - 300UL, \ 100 .gpnum = 0UL - 300UL, \
@@ -103,8 +105,7 @@ struct rcu_state sname##_state = { \
103 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 105 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
104 .name = RCU_STATE_NAME(sname), \ 106 .name = RCU_STATE_NAME(sname), \
105 .abbr = sabbr, \ 107 .abbr = sabbr, \
106}; \ 108}
107DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data)
108 109
109RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); 110RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched);
110RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); 111RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh);
@@ -310,10 +311,10 @@ void rcu_note_context_switch(void)
310EXPORT_SYMBOL_GPL(rcu_note_context_switch); 311EXPORT_SYMBOL_GPL(rcu_note_context_switch);
311 312
312/* 313/*
313 * Register a quiesecent state for all RCU flavors. If there is an 314 * Register a quiescent state for all RCU flavors. If there is an
314 * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight 315 * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight
315 * dyntick-idle quiescent state visible to other CPUs (but only for those 316 * dyntick-idle quiescent state visible to other CPUs (but only for those
316 * RCU flavors in desparate need of a quiescent state, which will normally 317 * RCU flavors in desperate need of a quiescent state, which will normally
317 * be none of them). Either way, do a lightweight quiescent state for 318 * be none of them). Either way, do a lightweight quiescent state for
318 * all RCU flavors. 319 * all RCU flavors.
319 */ 320 */
@@ -428,6 +429,15 @@ void rcu_bh_force_quiescent_state(void)
428EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); 429EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
429 430
430/* 431/*
432 * Force a quiescent state for RCU-sched.
433 */
434void rcu_sched_force_quiescent_state(void)
435{
436 force_quiescent_state(&rcu_sched_state);
437}
438EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
439
440/*
431 * Show the state of the grace-period kthreads. 441 * Show the state of the grace-period kthreads.
432 */ 442 */
433void show_rcu_gp_kthreads(void) 443void show_rcu_gp_kthreads(void)
@@ -501,15 +511,6 @@ void rcutorture_record_progress(unsigned long vernum)
501EXPORT_SYMBOL_GPL(rcutorture_record_progress); 511EXPORT_SYMBOL_GPL(rcutorture_record_progress);
502 512
503/* 513/*
504 * Force a quiescent state for RCU-sched.
505 */
506void rcu_sched_force_quiescent_state(void)
507{
508 force_quiescent_state(&rcu_sched_state);
509}
510EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
511
512/*
513 * Does the CPU have callbacks ready to be invoked? 514 * Does the CPU have callbacks ready to be invoked?
514 */ 515 */
515static int 516static int
@@ -1347,20 +1348,30 @@ void rcu_cpu_stall_reset(void)
1347} 1348}
1348 1349
1349/* 1350/*
1350 * Initialize the specified rcu_data structure's callback list to empty. 1351 * Initialize the specified rcu_data structure's default callback list
1352 * to empty. The default callback list is the one that is not used by
1353 * no-callbacks CPUs.
1351 */ 1354 */
1352static void init_callback_list(struct rcu_data *rdp) 1355static void init_default_callback_list(struct rcu_data *rdp)
1353{ 1356{
1354 int i; 1357 int i;
1355 1358
1356 if (init_nocb_callback_list(rdp))
1357 return;
1358 rdp->nxtlist = NULL; 1359 rdp->nxtlist = NULL;
1359 for (i = 0; i < RCU_NEXT_SIZE; i++) 1360 for (i = 0; i < RCU_NEXT_SIZE; i++)
1360 rdp->nxttail[i] = &rdp->nxtlist; 1361 rdp->nxttail[i] = &rdp->nxtlist;
1361} 1362}
1362 1363
1363/* 1364/*
1365 * Initialize the specified rcu_data structure's callback list to empty.
1366 */
1367static void init_callback_list(struct rcu_data *rdp)
1368{
1369 if (init_nocb_callback_list(rdp))
1370 return;
1371 init_default_callback_list(rdp);
1372}
1373
1374/*
1364 * Determine the value that ->completed will have at the end of the 1375 * Determine the value that ->completed will have at the end of the
1365 * next subsequent grace period. This is used to tag callbacks so that 1376 * next subsequent grace period. This is used to tag callbacks so that
1366 * a CPU can invoke callbacks in a timely fashion even if that CPU has 1377 * a CPU can invoke callbacks in a timely fashion even if that CPU has
@@ -1727,7 +1738,6 @@ static int rcu_gp_init(struct rcu_state *rsp)
1727 struct rcu_node *rnp = rcu_get_root(rsp); 1738 struct rcu_node *rnp = rcu_get_root(rsp);
1728 1739
1729 ACCESS_ONCE(rsp->gp_activity) = jiffies; 1740 ACCESS_ONCE(rsp->gp_activity) = jiffies;
1730 rcu_bind_gp_kthread();
1731 raw_spin_lock_irq(&rnp->lock); 1741 raw_spin_lock_irq(&rnp->lock);
1732 smp_mb__after_unlock_lock(); 1742 smp_mb__after_unlock_lock();
1733 if (!ACCESS_ONCE(rsp->gp_flags)) { 1743 if (!ACCESS_ONCE(rsp->gp_flags)) {
@@ -1822,8 +1832,8 @@ static int rcu_gp_init(struct rcu_state *rsp)
1822 rcu_preempt_check_blocked_tasks(rnp); 1832 rcu_preempt_check_blocked_tasks(rnp);
1823 rnp->qsmask = rnp->qsmaskinit; 1833 rnp->qsmask = rnp->qsmaskinit;
1824 ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; 1834 ACCESS_ONCE(rnp->gpnum) = rsp->gpnum;
1825 WARN_ON_ONCE(rnp->completed != rsp->completed); 1835 if (WARN_ON_ONCE(rnp->completed != rsp->completed))
1826 ACCESS_ONCE(rnp->completed) = rsp->completed; 1836 ACCESS_ONCE(rnp->completed) = rsp->completed;
1827 if (rnp == rdp->mynode) 1837 if (rnp == rdp->mynode)
1828 (void)__note_gp_changes(rsp, rnp, rdp); 1838 (void)__note_gp_changes(rsp, rnp, rdp);
1829 rcu_preempt_boost_start_gp(rnp); 1839 rcu_preempt_boost_start_gp(rnp);
@@ -1866,7 +1876,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1866 fqs_state = RCU_FORCE_QS; 1876 fqs_state = RCU_FORCE_QS;
1867 } else { 1877 } else {
1868 /* Handle dyntick-idle and offline CPUs. */ 1878 /* Handle dyntick-idle and offline CPUs. */
1869 isidle = false; 1879 isidle = true;
1870 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); 1880 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
1871 } 1881 }
1872 /* Clear flag to prevent immediate re-entry. */ 1882 /* Clear flag to prevent immediate re-entry. */
@@ -1965,6 +1975,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
1965 struct rcu_state *rsp = arg; 1975 struct rcu_state *rsp = arg;
1966 struct rcu_node *rnp = rcu_get_root(rsp); 1976 struct rcu_node *rnp = rcu_get_root(rsp);
1967 1977
1978 rcu_bind_gp_kthread();
1968 for (;;) { 1979 for (;;) {
1969 1980
1970 /* Handle grace-period start. */ 1981 /* Handle grace-period start. */
@@ -2750,8 +2761,8 @@ static void force_qs_rnp(struct rcu_state *rsp,
2750 bit = 1; 2761 bit = 1;
2751 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 2762 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2752 if ((rnp->qsmask & bit) != 0) { 2763 if ((rnp->qsmask & bit) != 0) {
2753 if ((rnp->qsmaskinit & bit) != 0) 2764 if ((rnp->qsmaskinit & bit) == 0)
2754 *isidle = false; 2765 *isidle = false; /* Pending hotplug. */
2755 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) 2766 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
2756 mask |= bit; 2767 mask |= bit;
2757 } 2768 }
@@ -2895,7 +2906,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2895 * If called from an extended quiescent state, invoke the RCU 2906 * If called from an extended quiescent state, invoke the RCU
2896 * core in order to force a re-evaluation of RCU's idleness. 2907 * core in order to force a re-evaluation of RCU's idleness.
2897 */ 2908 */
2898 if (!rcu_is_watching() && cpu_online(smp_processor_id())) 2909 if (!rcu_is_watching())
2899 invoke_rcu_core(); 2910 invoke_rcu_core();
2900 2911
2901 /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ 2912 /* If interrupts were disabled or CPU offline, don't invoke RCU core. */
@@ -2981,11 +2992,22 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2981 2992
2982 if (cpu != -1) 2993 if (cpu != -1)
2983 rdp = per_cpu_ptr(rsp->rda, cpu); 2994 rdp = per_cpu_ptr(rsp->rda, cpu);
2984 offline = !__call_rcu_nocb(rdp, head, lazy, flags); 2995 if (likely(rdp->mynode)) {
2985 WARN_ON_ONCE(offline); 2996 /* Post-boot, so this should be for a no-CBs CPU. */
2986 /* _call_rcu() is illegal on offline CPU; leak the callback. */ 2997 offline = !__call_rcu_nocb(rdp, head, lazy, flags);
2987 local_irq_restore(flags); 2998 WARN_ON_ONCE(offline);
2988 return; 2999 /* Offline CPU, _call_rcu() illegal, leak callback. */
3000 local_irq_restore(flags);
3001 return;
3002 }
3003 /*
3004 * Very early boot, before rcu_init(). Initialize if needed
3005 * and then drop through to queue the callback.
3006 */
3007 BUG_ON(cpu != -1);
3008 WARN_ON_ONCE(!rcu_is_watching());
3009 if (!likely(rdp->nxtlist))
3010 init_default_callback_list(rdp);
2989 } 3011 }
2990 ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1; 3012 ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1;
2991 if (lazy) 3013 if (lazy)
@@ -3108,7 +3130,7 @@ void synchronize_sched(void)
3108 "Illegal synchronize_sched() in RCU-sched read-side critical section"); 3130 "Illegal synchronize_sched() in RCU-sched read-side critical section");
3109 if (rcu_blocking_is_gp()) 3131 if (rcu_blocking_is_gp())
3110 return; 3132 return;
3111 if (rcu_expedited) 3133 if (rcu_gp_is_expedited())
3112 synchronize_sched_expedited(); 3134 synchronize_sched_expedited();
3113 else 3135 else
3114 wait_rcu_gp(call_rcu_sched); 3136 wait_rcu_gp(call_rcu_sched);
@@ -3135,7 +3157,7 @@ void synchronize_rcu_bh(void)
3135 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); 3157 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
3136 if (rcu_blocking_is_gp()) 3158 if (rcu_blocking_is_gp())
3137 return; 3159 return;
3138 if (rcu_expedited) 3160 if (rcu_gp_is_expedited())
3139 synchronize_rcu_bh_expedited(); 3161 synchronize_rcu_bh_expedited();
3140 else 3162 else
3141 wait_rcu_gp(call_rcu_bh); 3163 wait_rcu_gp(call_rcu_bh);
@@ -3735,7 +3757,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3735 rdp->qlen_last_fqs_check = 0; 3757 rdp->qlen_last_fqs_check = 0;
3736 rdp->n_force_qs_snap = rsp->n_force_qs; 3758 rdp->n_force_qs_snap = rsp->n_force_qs;
3737 rdp->blimit = blimit; 3759 rdp->blimit = blimit;
3738 init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ 3760 if (!rdp->nxtlist)
3761 init_callback_list(rdp); /* Re-enable callbacks on this CPU. */
3739 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 3762 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
3740 rcu_sysidle_init_percpu_data(rdp->dynticks); 3763 rcu_sysidle_init_percpu_data(rdp->dynticks);
3741 atomic_set(&rdp->dynticks->dynticks, 3764 atomic_set(&rdp->dynticks->dynticks,
@@ -3826,11 +3849,12 @@ static int rcu_pm_notify(struct notifier_block *self,
3826 case PM_HIBERNATION_PREPARE: 3849 case PM_HIBERNATION_PREPARE:
3827 case PM_SUSPEND_PREPARE: 3850 case PM_SUSPEND_PREPARE:
3828 if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ 3851 if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
3829 rcu_expedited = 1; 3852 rcu_expedite_gp();
3830 break; 3853 break;
3831 case PM_POST_HIBERNATION: 3854 case PM_POST_HIBERNATION:
3832 case PM_POST_SUSPEND: 3855 case PM_POST_SUSPEND:
3833 rcu_expedited = 0; 3856 if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */
3857 rcu_unexpedite_gp();
3834 break; 3858 break;
3835 default: 3859 default:
3836 break; 3860 break;
@@ -3900,30 +3924,26 @@ void rcu_scheduler_starting(void)
3900 * Compute the per-level fanout, either using the exact fanout specified 3924 * Compute the per-level fanout, either using the exact fanout specified
3901 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. 3925 * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
3902 */ 3926 */
3903#ifdef CONFIG_RCU_FANOUT_EXACT
3904static void __init rcu_init_levelspread(struct rcu_state *rsp) 3927static void __init rcu_init_levelspread(struct rcu_state *rsp)
3905{ 3928{
3906 int i; 3929 int i;
3907 3930
3908 rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; 3931 if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) {
3909 for (i = rcu_num_lvls - 2; i >= 0; i--) 3932 rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf;
3910 rsp->levelspread[i] = CONFIG_RCU_FANOUT; 3933 for (i = rcu_num_lvls - 2; i >= 0; i--)
3911} 3934 rsp->levelspread[i] = CONFIG_RCU_FANOUT;
3912#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ 3935 } else {
3913static void __init rcu_init_levelspread(struct rcu_state *rsp) 3936 int ccur;
3914{ 3937 int cprv;
3915 int ccur; 3938
3916 int cprv; 3939 cprv = nr_cpu_ids;
3917 int i; 3940 for (i = rcu_num_lvls - 1; i >= 0; i--) {
3918 3941 ccur = rsp->levelcnt[i];
3919 cprv = nr_cpu_ids; 3942 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
3920 for (i = rcu_num_lvls - 1; i >= 0; i--) { 3943 cprv = ccur;
3921 ccur = rsp->levelcnt[i]; 3944 }
3922 rsp->levelspread[i] = (cprv + ccur - 1) / ccur;
3923 cprv = ccur;
3924 } 3945 }
3925} 3946}
3926#endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */
3927 3947
3928/* 3948/*
3929 * Helper function for rcu_init() that initializes one rcu_state structure. 3949 * Helper function for rcu_init() that initializes one rcu_state structure.
@@ -3999,7 +4019,6 @@ static void __init rcu_init_one(struct rcu_state *rsp,
3999 } 4019 }
4000 } 4020 }
4001 4021
4002 rsp->rda = rda;
4003 init_waitqueue_head(&rsp->gp_wq); 4022 init_waitqueue_head(&rsp->gp_wq);
4004 rnp = rsp->level[rcu_num_lvls - 1]; 4023 rnp = rsp->level[rcu_num_lvls - 1];
4005 for_each_possible_cpu(i) { 4024 for_each_possible_cpu(i) {
@@ -4092,6 +4111,8 @@ void __init rcu_init(void)
4092{ 4111{
4093 int cpu; 4112 int cpu;
4094 4113
4114 rcu_early_boot_tests();
4115
4095 rcu_bootup_announce(); 4116 rcu_bootup_announce();
4096 rcu_init_geometry(); 4117 rcu_init_geometry();
4097 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 4118 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
@@ -4108,8 +4129,6 @@ void __init rcu_init(void)
4108 pm_notifier(rcu_pm_notify, 0); 4129 pm_notifier(rcu_pm_notify, 0);
4109 for_each_online_cpu(cpu) 4130 for_each_online_cpu(cpu)
4110 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); 4131 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
4111
4112 rcu_early_boot_tests();
4113} 4132}
4114 4133
4115#include "tree_plugin.h" 4134#include "tree_plugin.h"
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index d45e961515c1..8c0ec0f5a027 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -58,38 +58,33 @@ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
58 */ 58 */
59static void __init rcu_bootup_announce_oddness(void) 59static void __init rcu_bootup_announce_oddness(void)
60{ 60{
61#ifdef CONFIG_RCU_TRACE 61 if (IS_ENABLED(CONFIG_RCU_TRACE))
62 pr_info("\tRCU debugfs-based tracing is enabled.\n"); 62 pr_info("\tRCU debugfs-based tracing is enabled.\n");
63#endif 63 if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) ||
64#if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) 64 (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32))
65 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", 65 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
66 CONFIG_RCU_FANOUT); 66 CONFIG_RCU_FANOUT);
67#endif 67 if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT))
68#ifdef CONFIG_RCU_FANOUT_EXACT 68 pr_info("\tHierarchical RCU autobalancing is disabled.\n");
69 pr_info("\tHierarchical RCU autobalancing is disabled.\n"); 69 if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ))
70#endif 70 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n");
71#ifdef CONFIG_RCU_FAST_NO_HZ 71 if (IS_ENABLED(CONFIG_PROVE_RCU))
72 pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); 72 pr_info("\tRCU lockdep checking is enabled.\n");
73#endif 73 if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE))
74#ifdef CONFIG_PROVE_RCU 74 pr_info("\tRCU torture testing starts during boot.\n");
75 pr_info("\tRCU lockdep checking is enabled.\n"); 75 if (IS_ENABLED(CONFIG_RCU_CPU_STALL_INFO))
76#endif 76 pr_info("\tAdditional per-CPU info printed with stalls.\n");
77#ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE 77 if (NUM_RCU_LVL_4 != 0)
78 pr_info("\tRCU torture testing starts during boot.\n"); 78 pr_info("\tFour-level hierarchy is enabled.\n");
79#endif 79 if (CONFIG_RCU_FANOUT_LEAF != 16)
80#if defined(CONFIG_RCU_CPU_STALL_INFO) 80 pr_info("\tBuild-time adjustment of leaf fanout to %d.\n",
81 pr_info("\tAdditional per-CPU info printed with stalls.\n"); 81 CONFIG_RCU_FANOUT_LEAF);
82#endif
83#if NUM_RCU_LVL_4 != 0
84 pr_info("\tFour-level hierarchy is enabled.\n");
85#endif
86 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) 82 if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF)
87 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 83 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
88 if (nr_cpu_ids != NR_CPUS) 84 if (nr_cpu_ids != NR_CPUS)
89 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 85 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
90#ifdef CONFIG_RCU_BOOST 86 if (IS_ENABLED(CONFIG_RCU_BOOST))
91 pr_info("\tRCU kthread priority: %d.\n", kthread_prio); 87 pr_info("\tRCU kthread priority: %d.\n", kthread_prio);
92#endif
93} 88}
94 89
95#ifdef CONFIG_PREEMPT_RCU 90#ifdef CONFIG_PREEMPT_RCU
@@ -296,7 +291,13 @@ void rcu_read_unlock_special(struct task_struct *t)
296 } 291 }
297 292
298 /* Hardware IRQ handlers cannot block, complain if they get here. */ 293 /* Hardware IRQ handlers cannot block, complain if they get here. */
299 if (WARN_ON_ONCE(in_irq() || in_serving_softirq())) { 294 if (in_irq() || in_serving_softirq()) {
295 lockdep_rcu_suspicious(__FILE__, __LINE__,
296 "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n");
297 pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n",
298 t->rcu_read_unlock_special.s,
299 t->rcu_read_unlock_special.b.blocked,
300 t->rcu_read_unlock_special.b.need_qs);
300 local_irq_restore(flags); 301 local_irq_restore(flags);
301 return; 302 return;
302 } 303 }
@@ -535,7 +536,7 @@ void synchronize_rcu(void)
535 "Illegal synchronize_rcu() in RCU read-side critical section"); 536 "Illegal synchronize_rcu() in RCU read-side critical section");
536 if (!rcu_scheduler_active) 537 if (!rcu_scheduler_active)
537 return; 538 return;
538 if (rcu_expedited) 539 if (rcu_gp_is_expedited())
539 synchronize_rcu_expedited(); 540 synchronize_rcu_expedited();
540 else 541 else
541 wait_rcu_gp(call_rcu); 542 wait_rcu_gp(call_rcu);
@@ -1940,7 +1941,8 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu)
1940 rhp = ACCESS_ONCE(rdp->nocb_follower_head); 1941 rhp = ACCESS_ONCE(rdp->nocb_follower_head);
1941 1942
1942 /* Having no rcuo kthread but CBs after scheduler starts is bad! */ 1943 /* Having no rcuo kthread but CBs after scheduler starts is bad! */
1943 if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) { 1944 if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp &&
1945 rcu_scheduler_fully_active) {
1944 /* RCU callback enqueued before CPU first came online??? */ 1946 /* RCU callback enqueued before CPU first came online??? */
1945 pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", 1947 pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n",
1946 cpu, rhp->func); 1948 cpu, rhp->func);
@@ -2387,18 +2389,8 @@ void __init rcu_init_nohz(void)
2387 pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); 2389 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
2388 2390
2389 for_each_rcu_flavor(rsp) { 2391 for_each_rcu_flavor(rsp) {
2390 for_each_cpu(cpu, rcu_nocb_mask) { 2392 for_each_cpu(cpu, rcu_nocb_mask)
2391 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 2393 init_nocb_callback_list(per_cpu_ptr(rsp->rda, cpu));
2392
2393 /*
2394 * If there are early callbacks, they will need
2395 * to be moved to the nocb lists.
2396 */
2397 WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] !=
2398 &rdp->nxtlist &&
2399 rdp->nxttail[RCU_NEXT_TAIL] != NULL);
2400 init_nocb_callback_list(rdp);
2401 }
2402 rcu_organize_nocb_kthreads(rsp); 2394 rcu_organize_nocb_kthreads(rsp);
2403 } 2395 }
2404} 2396}
@@ -2535,6 +2527,16 @@ static bool init_nocb_callback_list(struct rcu_data *rdp)
2535 if (!rcu_is_nocb_cpu(rdp->cpu)) 2527 if (!rcu_is_nocb_cpu(rdp->cpu))
2536 return false; 2528 return false;
2537 2529
2530 /* If there are early-boot callbacks, move them to nocb lists. */
2531 if (rdp->nxtlist) {
2532 rdp->nocb_head = rdp->nxtlist;
2533 rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL];
2534 atomic_long_set(&rdp->nocb_q_count, rdp->qlen);
2535 atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy);
2536 rdp->nxtlist = NULL;
2537 rdp->qlen = 0;
2538 rdp->qlen_lazy = 0;
2539 }
2538 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 2540 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2539 return true; 2541 return true;
2540} 2542}
@@ -2758,7 +2760,8 @@ static void rcu_sysidle_exit(int irq)
2758 2760
2759/* 2761/*
2760 * Check to see if the current CPU is idle. Note that usermode execution 2762 * Check to see if the current CPU is idle. Note that usermode execution
2761 * does not count as idle. The caller must have disabled interrupts. 2763 * does not count as idle. The caller must have disabled interrupts,
2764 * and must be running on tick_do_timer_cpu.
2762 */ 2765 */
2763static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, 2766static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2764 unsigned long *maxj) 2767 unsigned long *maxj)
@@ -2779,8 +2782,8 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2779 if (!*isidle || rdp->rsp != rcu_state_p || 2782 if (!*isidle || rdp->rsp != rcu_state_p ||
2780 cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) 2783 cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
2781 return; 2784 return;
2782 if (rcu_gp_in_progress(rdp->rsp)) 2785 /* Verify affinity of current kthread. */
2783 WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); 2786 WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
2784 2787
2785 /* Pick up current idle and NMI-nesting counter and check. */ 2788 /* Pick up current idle and NMI-nesting counter and check. */
2786 cur = atomic_read(&rdtp->dynticks_idle); 2789 cur = atomic_read(&rdtp->dynticks_idle);
@@ -3063,11 +3066,10 @@ static void rcu_bind_gp_kthread(void)
3063 return; 3066 return;
3064#ifdef CONFIG_NO_HZ_FULL_SYSIDLE 3067#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
3065 cpu = tick_do_timer_cpu; 3068 cpu = tick_do_timer_cpu;
3066 if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu) 3069 if (cpu >= 0 && cpu < nr_cpu_ids)
3067 set_cpus_allowed_ptr(current, cpumask_of(cpu)); 3070 set_cpus_allowed_ptr(current, cpumask_of(cpu));
3068#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 3071#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
3069 if (!is_housekeeping_cpu(raw_smp_processor_id())) 3072 housekeeping_affine(current);
3070 housekeeping_affine(current);
3071#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 3073#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
3072} 3074}
3073 3075
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index e0d31a345ee6..1f133350da01 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -62,6 +62,63 @@ MODULE_ALIAS("rcupdate");
62 62
63module_param(rcu_expedited, int, 0); 63module_param(rcu_expedited, int, 0);
64 64
65#ifndef CONFIG_TINY_RCU
66
67static atomic_t rcu_expedited_nesting =
68 ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0);
69
70/*
71 * Should normal grace-period primitives be expedited? Intended for
72 * use within RCU. Note that this function takes the rcu_expedited
73 * sysfs/boot variable into account as well as the rcu_expedite_gp()
74 * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited()
75 * returns false is a -really- bad idea.
76 */
77bool rcu_gp_is_expedited(void)
78{
79 return rcu_expedited || atomic_read(&rcu_expedited_nesting);
80}
81EXPORT_SYMBOL_GPL(rcu_gp_is_expedited);
82
83/**
84 * rcu_expedite_gp - Expedite future RCU grace periods
85 *
86 * After a call to this function, future calls to synchronize_rcu() and
87 * friends act as the corresponding synchronize_rcu_expedited() function
88 * had instead been called.
89 */
90void rcu_expedite_gp(void)
91{
92 atomic_inc(&rcu_expedited_nesting);
93}
94EXPORT_SYMBOL_GPL(rcu_expedite_gp);
95
96/**
97 * rcu_unexpedite_gp - Cancel prior rcu_expedite_gp() invocation
98 *
99 * Undo a prior call to rcu_expedite_gp(). If all prior calls to
100 * rcu_expedite_gp() are undone by a subsequent call to rcu_unexpedite_gp(),
101 * and if the rcu_expedited sysfs/boot parameter is not set, then all
102 * subsequent calls to synchronize_rcu() and friends will return to
103 * their normal non-expedited behavior.
104 */
105void rcu_unexpedite_gp(void)
106{
107 atomic_dec(&rcu_expedited_nesting);
108}
109EXPORT_SYMBOL_GPL(rcu_unexpedite_gp);
110
111#endif /* #ifndef CONFIG_TINY_RCU */
112
113/*
114 * Inform RCU of the end of the in-kernel boot sequence.
115 */
116void rcu_end_inkernel_boot(void)
117{
118 if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT))
119 rcu_unexpedite_gp();
120}
121
65#ifdef CONFIG_PREEMPT_RCU 122#ifdef CONFIG_PREEMPT_RCU
66 123
67/* 124/*
@@ -199,16 +256,13 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
199 256
200#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 257#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
201 258
202struct rcu_synchronize { 259/**
203 struct rcu_head head; 260 * wakeme_after_rcu() - Callback function to awaken a task after grace period
204 struct completion completion; 261 * @head: Pointer to rcu_head member within rcu_synchronize structure
205}; 262 *
206 263 * Awaken the corresponding task now that a grace period has elapsed.
207/*
208 * Awaken the corresponding synchronize_rcu() instance now that a
209 * grace period has elapsed.
210 */ 264 */
211static void wakeme_after_rcu(struct rcu_head *head) 265void wakeme_after_rcu(struct rcu_head *head)
212{ 266{
213 struct rcu_synchronize *rcu; 267 struct rcu_synchronize *rcu;
214 268
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1173afc308ad..1ad74c0df01f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1180,16 +1180,7 @@ config DEBUG_CREDENTIALS
1180menu "RCU Debugging" 1180menu "RCU Debugging"
1181 1181
1182config PROVE_RCU 1182config PROVE_RCU
1183 bool "RCU debugging: prove RCU correctness" 1183 def_bool PROVE_LOCKING
1184 depends on PROVE_LOCKING
1185 default n
1186 help
1187 This feature enables lockdep extensions that check for correct
1188 use of RCU APIs. This is currently under development. Say Y
1189 if you want to debug RCU usage or help work on the PROVE_RCU
1190 feature.
1191
1192 Say N if you are unsure.
1193 1184
1194config PROVE_RCU_REPEATEDLY 1185config PROVE_RCU_REPEATEDLY
1195 bool "RCU debugging: don't disable PROVE_RCU on first splat" 1186 bool "RCU debugging: don't disable PROVE_RCU on first splat"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 368d64ac779e..dd2812ceb0ba 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -310,7 +310,7 @@ function dump(first, pastlast)
310 cfr[jn] = cf[j] "." cfrep[cf[j]]; 310 cfr[jn] = cf[j] "." cfrep[cf[j]];
311 } 311 }
312 if (cpusr[jn] > ncpus && ncpus != 0) 312 if (cpusr[jn] > ncpus && ncpus != 0)
313 ovf = "(!)"; 313 ovf = "-ovf";
314 else 314 else
315 ovf = ""; 315 ovf = "";
316 print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; 316 print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`";