aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2017-06-13 02:40:03 -0400
committerIngo Molnar <mingo@kernel.org>2017-06-13 02:40:03 -0400
commit567b64aaefc4ef9ae3af124ae0b13dc13a6804a8 (patch)
tree0b2edea83c359a19e00d529a41c3ced5396933df
parent32c1431eea4881a6b17bd7c639315010aeefa452 (diff)
parent6d48152eafde1f0d0a4a9e0584fa7d9ff4fbfdac (diff)
Merge branch 'for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu
Pull RCU updates from Paul E. McKenney: "The largest feature of this series is shrinking and simplification, with the following diffstat summary: 79 files changed, 1496 insertions(+), 4211 deletions(-) In other words, this series represents a net reduction of more than 2700 lines of code." Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/RCU/00-INDEX2
-rw-r--r--Documentation/RCU/Design/Requirements/Requirements.html34
-rw-r--r--Documentation/RCU/checklist.txt8
-rw-r--r--Documentation/RCU/trace.txt535
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt41
-rw-r--r--Documentation/core-api/atomic_ops.rst5
-rw-r--r--Documentation/dev-tools/sparse.rst6
-rw-r--r--Documentation/kernel-per-CPU-kthreads.txt31
-rw-r--r--Documentation/memory-barriers.txt2
-rw-r--r--Documentation/timers/NO_HZ.txt29
-rw-r--r--include/linux/bcm47xx_nvram.h1
-rw-r--r--include/linux/compiler.h4
-rw-r--r--include/linux/rcu_node_tree.h4
-rw-r--r--include/linux/rcu_segcblist.h4
-rw-r--r--include/linux/rcupdate.h318
-rw-r--r--include/linux/rcutiny.h167
-rw-r--r--include/linux/rcutree.h21
-rw-r--r--include/linux/spinlock.h20
-rw-r--r--include/linux/srcu.h25
-rw-r--r--include/linux/srcuclassic.h115
-rw-r--r--include/linux/srcutiny.h47
-rw-r--r--include/linux/srcutree.h13
-rw-r--r--include/trace/events/rcu.h1
-rw-r--r--init/Kconfig349
-rw-r--r--kernel/locking/lockdep.c176
-rw-r--r--kernel/rcu/Kconfig242
-rw-r--r--kernel/rcu/Kconfig.debug82
-rw-r--r--kernel/rcu/Makefile2
-rw-r--r--kernel/rcu/rcu.h277
-rw-r--r--kernel/rcu/rcuperf.c129
-rw-r--r--kernel/rcu/rcutorture.c21
-rw-r--r--kernel/rcu/srcu.c661
-rw-r--r--kernel/rcu/srcutiny.c86
-rw-r--r--kernel/rcu/srcutree.c187
-rw-r--r--kernel/rcu/tiny.c54
-rw-r--r--kernel/rcu/tiny_plugin.h123
-rw-r--r--kernel/rcu/tree.c195
-rw-r--r--kernel/rcu/tree.h109
-rw-r--r--kernel/rcu/tree_exp.h2
-rw-r--r--kernel/rcu/tree_plugin.h573
-rw-r--r--kernel/rcu/tree_trace.c494
-rw-r--r--kernel/rcu/update.c77
-rw-r--r--kernel/sched/core.c8
-rw-r--r--kernel/time/Kconfig50
-rw-r--r--lib/Kconfig.debug184
-rw-r--r--lib/Makefile3
-rwxr-xr-xscripts/checkpatch.pl17
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configcheck.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P6
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-t10
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-u9
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY025
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE015
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE025
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE034
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE044
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE054
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE064
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE076
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE081
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08-T21
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/TINY (renamed from tools/testing/selftests/rcutorture/configs/rcu/TREE02-T)19
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/TREE1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/TREE541
-rw-r--r--tools/testing/selftests/rcutorture/doc/TINY_RCU.txt1
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt34
-rwxr-xr-xtools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk2
79 files changed, 1496 insertions, 4211 deletions
diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX
index 1672573b037a..f46980c060aa 100644
--- a/Documentation/RCU/00-INDEX
+++ b/Documentation/RCU/00-INDEX
@@ -28,8 +28,6 @@ stallwarn.txt
28 - RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress) 28 - RCU CPU stall warnings (module parameter rcu_cpu_stall_suppress)
29torture.txt 29torture.txt
30 - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST) 30 - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST)
31trace.txt
32 - CONFIG_RCU_TRACE debugfs files and formats
33UP.txt 31UP.txt
34 - RCU on Uniprocessor Systems 32 - RCU on Uniprocessor Systems
35whatisRCU.txt 33whatisRCU.txt
diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html
index f60adf112663..95b30fa25d56 100644
--- a/Documentation/RCU/Design/Requirements/Requirements.html
+++ b/Documentation/RCU/Design/Requirements/Requirements.html
@@ -559,9 +559,7 @@ The <tt>rcu_access_pointer()</tt> on line&nbsp;6 is similar to
559 For <tt>remove_gp_synchronous()</tt>, as long as all modifications 559 For <tt>remove_gp_synchronous()</tt>, as long as all modifications
560 to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>, 560 to <tt>gp</tt> are carried out while holding <tt>gp_lock</tt>,
561 the above optimizations are harmless. 561 the above optimizations are harmless.
562 However, 562 However, <tt>sparse</tt> will complain if you
563 with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt>,
564 <tt>sparse</tt> will complain if you
565 define <tt>gp</tt> with <tt>__rcu</tt> and then 563 define <tt>gp</tt> with <tt>__rcu</tt> and then
566 access it without using 564 access it without using
567 either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>. 565 either <tt>rcu_access_pointer()</tt> or <tt>rcu_dereference()</tt>.
@@ -1849,7 +1847,8 @@ mass storage, or user patience, whichever comes first.
1849If the nesting is not visible to the compiler, as is the case with 1847If the nesting is not visible to the compiler, as is the case with
1850mutually recursive functions each in its own translation unit, 1848mutually recursive functions each in its own translation unit,
1851stack overflow will result. 1849stack overflow will result.
1852If the nesting takes the form of loops, either the control variable 1850If the nesting takes the form of loops, perhaps in the guise of tail
1851recursion, either the control variable
1853will overflow or (in the Linux kernel) you will get an RCU CPU stall warning. 1852will overflow or (in the Linux kernel) you will get an RCU CPU stall warning.
1854Nevertheless, this class of RCU implementations is one 1853Nevertheless, this class of RCU implementations is one
1855of the most composable constructs in existence. 1854of the most composable constructs in existence.
@@ -1977,9 +1976,8 @@ guard against mishaps and misuse:
1977 and <tt>rcu_dereference()</tt>, perhaps (incorrectly) 1976 and <tt>rcu_dereference()</tt>, perhaps (incorrectly)
1978 substituting a simple assignment. 1977 substituting a simple assignment.
1979 To catch this sort of error, a given RCU-protected pointer may be 1978 To catch this sort of error, a given RCU-protected pointer may be
1980 tagged with <tt>__rcu</tt>, after which running sparse 1979 tagged with <tt>__rcu</tt>, after which sparse
1981 with <tt>CONFIG_SPARSE_RCU_POINTER=y</tt> will complain 1980 will complain about simple-assignment accesses to that pointer.
1982 about simple-assignment accesses to that pointer.
1983 Arnd Bergmann made me aware of this requirement, and also 1981 Arnd Bergmann made me aware of this requirement, and also
1984 supplied the needed 1982 supplied the needed
1985 <a href="https://lwn.net/Articles/376011/">patch series</a>. 1983 <a href="https://lwn.net/Articles/376011/">patch series</a>.
@@ -2036,7 +2034,7 @@ guard against mishaps and misuse:
2036 some other synchronization mechanism, for example, reference 2034 some other synchronization mechanism, for example, reference
2037 counting. 2035 counting.
2038<li> In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related 2036<li> In kernels built with <tt>CONFIG_RCU_TRACE=y</tt>, RCU-related
2039 information is provided via both debugfs and event tracing. 2037 information is provided via event tracing.
2040<li> Open-coded use of <tt>rcu_assign_pointer()</tt> and 2038<li> Open-coded use of <tt>rcu_assign_pointer()</tt> and
2041 <tt>rcu_dereference()</tt> to create typical linked 2039 <tt>rcu_dereference()</tt> to create typical linked
2042 data structures can be surprisingly error-prone. 2040 data structures can be surprisingly error-prone.
@@ -2519,11 +2517,7 @@ It is similarly socially unacceptable to interrupt an
2519<tt>nohz_full</tt> CPU running in userspace. 2517<tt>nohz_full</tt> CPU running in userspace.
2520RCU must therefore track <tt>nohz_full</tt> userspace 2518RCU must therefore track <tt>nohz_full</tt> userspace
2521execution. 2519execution.
2522And in 2520RCU must therefore be able to sample state at two points in
2523<a href="https://lwn.net/Articles/558284/"><tt>CONFIG_NO_HZ_FULL_SYSIDLE=y</tt></a>
2524kernels, RCU must separately track idle CPUs on the one hand and
2525CPUs that are either idle or executing in userspace on the other.
2526In both cases, RCU must be able to sample state at two points in
2527time, and be able to determine whether or not some other CPU spent 2521time, and be able to determine whether or not some other CPU spent
2528any time idle and/or executing in userspace. 2522any time idle and/or executing in userspace.
2529 2523
@@ -2936,6 +2930,20 @@ to whether or not a CPU is online, which means that <tt>srcu_barrier()</tt>
2936need not exclude CPU-hotplug operations. 2930need not exclude CPU-hotplug operations.
2937 2931
2938<p> 2932<p>
2933SRCU also differs from other RCU flavors in that SRCU's expedited and
2934non-expedited grace periods are implemented by the same mechanism.
2935This means that in the current SRCU implementation, expediting a
2936future grace period has the side effect of expediting all prior
2937grace periods that have not yet completed.
2938(But please note that this is a property of the current implementation,
2939not necessarily of future implementations.)
2940In addition, if SRCU has been idle for longer than the interval
2941specified by the <tt>srcutree.exp_holdoff</tt> kernel boot parameter
2942(25&nbsp;microseconds by default),
2943and if a <tt>synchronize_srcu()</tt> invocation ends this idle period,
2944that invocation will be automatically expedited.
2945
2946<p>
2939As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating 2947As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating
2940a locking bottleneck present in prior kernel versions. 2948a locking bottleneck present in prior kernel versions.
2941Although this will allow users to put much heavier stress on 2949Although this will allow users to put much heavier stress on
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 877947130ebe..6beda556faf3 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -413,11 +413,11 @@ over a rather long period of time, but improvements are always welcome!
413 read-side critical sections. It is the responsibility of the 413 read-side critical sections. It is the responsibility of the
414 RCU update-side primitives to deal with this. 414 RCU update-side primitives to deal with this.
415 415
41617. Use CONFIG_PROVE_RCU, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the 41617. Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
417 __rcu sparse checks (enabled by CONFIG_SPARSE_RCU_POINTER) to 417 __rcu sparse checks to validate your RCU code. These can help
418 validate your RCU code. These can help find problems as follows: 418 find problems as follows:
419 419
420 CONFIG_PROVE_RCU: check that accesses to RCU-protected data 420 CONFIG_PROVE_LOCKING: check that accesses to RCU-protected data
421 structures are carried out under the proper RCU 421 structures are carried out under the proper RCU
422 read-side critical section, while holding the right 422 read-side critical section, while holding the right
423 combination of locks, or whatever other conditions 423 combination of locks, or whatever other conditions
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
deleted file mode 100644
index 6549012033f9..000000000000
--- a/Documentation/RCU/trace.txt
+++ /dev/null
@@ -1,535 +0,0 @@
1CONFIG_RCU_TRACE debugfs Files and Formats
2
3
4The rcutree and rcutiny implementations of RCU provide debugfs trace
5output that summarizes counters and state. This information is useful for
6debugging RCU itself, and can sometimes also help to debug abuses of RCU.
7The following sections describe the debugfs files and formats, first
8for rcutree and next for rcutiny.
9
10
11CONFIG_TREE_RCU and CONFIG_PREEMPT_RCU debugfs Files and Formats
12
13These implementations of RCU provide several debugfs directories under the
14top-level directory "rcu":
15
16rcu/rcu_bh
17rcu/rcu_preempt
18rcu/rcu_sched
19
20Each directory contains files for the corresponding flavor of RCU.
21Note that rcu/rcu_preempt is only present for CONFIG_PREEMPT_RCU.
22For CONFIG_TREE_RCU, the RCU flavor maps onto the RCU-sched flavor,
23so that activity for both appears in rcu/rcu_sched.
24
25In addition, the following file appears in the top-level directory:
26rcu/rcutorture. This file displays rcutorture test progress. The output
27of "cat rcu/rcutorture" looks as follows:
28
29rcutorture test sequence: 0 (test in progress)
30rcutorture update version number: 615
31
32The first line shows the number of rcutorture tests that have completed
33since boot. If a test is currently running, the "(test in progress)"
34string will appear as shown above. The second line shows the number of
35update cycles that the current test has started, or zero if there is
36no test in progress.
37
38
39Within each flavor directory (rcu/rcu_bh, rcu/rcu_sched, and possibly
40also rcu/rcu_preempt) the following files will be present:
41
42rcudata:
43 Displays fields in struct rcu_data.
44rcuexp:
45 Displays statistics for expedited grace periods.
46rcugp:
47 Displays grace-period counters.
48rcuhier:
49 Displays the struct rcu_node hierarchy.
50rcu_pending:
51 Displays counts of the reasons rcu_pending() decided that RCU had
52 work to do.
53rcuboost:
54 Displays RCU boosting statistics. Only present if
55 CONFIG_RCU_BOOST=y.
56
57The output of "cat rcu/rcu_preempt/rcudata" looks as follows:
58
59 0!c=30455 g=30456 cnq=1/0:1 dt=126535/140000000000000/0 df=2002 of=4 ql=0/0 qs=N... b=10 ci=74572 nci=0 co=1131 ca=716
60 1!c=30719 g=30720 cnq=1/0:0 dt=132007/140000000000000/0 df=1874 of=10 ql=0/0 qs=N... b=10 ci=123209 nci=0 co=685 ca=982
61 2!c=30150 g=30151 cnq=1/1:1 dt=138537/140000000000000/0 df=1707 of=8 ql=0/0 qs=N... b=10 ci=80132 nci=0 co=1328 ca=1458
62 3 c=31249 g=31250 cnq=1/1:0 dt=107255/140000000000000/0 df=1749 of=6 ql=0/450 qs=NRW. b=10 ci=151700 nci=0 co=509 ca=622
63 4!c=29502 g=29503 cnq=1/0:1 dt=83647/140000000000000/0 df=965 of=5 ql=0/0 qs=N... b=10 ci=65643 nci=0 co=1373 ca=1521
64 5 c=31201 g=31202 cnq=1/0:1 dt=70422/0/0 df=535 of=7 ql=0/0 qs=.... b=10 ci=58500 nci=0 co=764 ca=698
65 6!c=30253 g=30254 cnq=1/0:1 dt=95363/140000000000000/0 df=780 of=5 ql=0/0 qs=N... b=10 ci=100607 nci=0 co=1414 ca=1353
66 7 c=31178 g=31178 cnq=1/0:0 dt=91536/0/0 df=547 of=4 ql=0/0 qs=.... b=10 ci=109819 nci=0 co=1115 ca=969
67
68This file has one line per CPU, or eight for this 8-CPU system.
69The fields are as follows:
70
71o The number at the beginning of each line is the CPU number.
72 CPUs numbers followed by an exclamation mark are offline,
73 but have been online at least once since boot. There will be
74 no output for CPUs that have never been online, which can be
75 a good thing in the surprisingly common case where NR_CPUS is
76 substantially larger than the number of actual CPUs.
77
78o "c" is the count of grace periods that this CPU believes have
79 completed. Offlined CPUs and CPUs in dynticks idle mode may lag
80 quite a ways behind, for example, CPU 4 under "rcu_sched" above,
81 which has been offline through 16 RCU grace periods. It is not
82 unusual to see offline CPUs lagging by thousands of grace periods.
83 Note that although the grace-period number is an unsigned long,
84 it is printed out as a signed long to allow more human-friendly
85 representation near boot time.
86
87o "g" is the count of grace periods that this CPU believes have
88 started. Again, offlined CPUs and CPUs in dynticks idle mode
89 may lag behind. If the "c" and "g" values are equal, this CPU
90 has already reported a quiescent state for the last RCU grace
91 period that it is aware of, otherwise, the CPU believes that it
92 owes RCU a quiescent state.
93
94o "pq" indicates that this CPU has passed through a quiescent state
95 for the current grace period. It is possible for "pq" to be
96 "1" and "c" different than "g", which indicates that although
97 the CPU has passed through a quiescent state, either (1) this
98 CPU has not yet reported that fact, (2) some other CPU has not
99 yet reported for this grace period, or (3) both.
100
101o "qp" indicates that RCU still expects a quiescent state from
102 this CPU. Offlined CPUs and CPUs in dyntick idle mode might
103 well have qp=1, which is OK: RCU is still ignoring them.
104
105o "dt" is the current value of the dyntick counter that is incremented
106 when entering or leaving idle, either due to a context switch or
107 due to an interrupt. This number is even if the CPU is in idle
108 from RCU's viewpoint and odd otherwise. The number after the
109 first "/" is the interrupt nesting depth when in idle state,
110 or a large number added to the interrupt-nesting depth when
111 running a non-idle task. Some architectures do not accurately
112 count interrupt nesting when running in non-idle kernel context,
113 which can result in interesting anomalies such as negative
114 interrupt-nesting levels. The number after the second "/"
115 is the NMI nesting depth.
116
117o "df" is the number of times that some other CPU has forced a
118 quiescent state on behalf of this CPU due to this CPU being in
119 idle state.
120
121o "of" is the number of times that some other CPU has forced a
122 quiescent state on behalf of this CPU due to this CPU being
123 offline. In a perfect world, this might never happen, but it
124 turns out that offlining and onlining a CPU can take several grace
125 periods, and so there is likely to be an extended period of time
126 when RCU believes that the CPU is online when it really is not.
127 Please note that erring in the other direction (RCU believing a
128 CPU is offline when it is really alive and kicking) is a fatal
129 error, so it makes sense to err conservatively.
130
131o "ql" is the number of RCU callbacks currently residing on
132 this CPU. The first number is the number of "lazy" callbacks
133 that are known to RCU to only be freeing memory, and the number
134 after the "/" is the total number of callbacks, lazy or not.
135 These counters count callbacks regardless of what phase of
136 grace-period processing that they are in (new, waiting for
137 grace period to start, waiting for grace period to end, ready
138 to invoke).
139
140o "qs" gives an indication of the state of the callback queue
141 with four characters:
142
143 "N" Indicates that there are callbacks queued that are not
144 ready to be handled by the next grace period, and thus
145 will be handled by the grace period following the next
146 one.
147
148 "R" Indicates that there are callbacks queued that are
149 ready to be handled by the next grace period.
150
151 "W" Indicates that there are callbacks queued that are
152 waiting on the current grace period.
153
154 "D" Indicates that there are callbacks queued that have
155 already been handled by a prior grace period, and are
156 thus waiting to be invoked. Note that callbacks in
157 the process of being invoked are not counted here.
158 Callbacks in the process of being invoked are those
159 that have been removed from the rcu_data structures
160 queues by rcu_do_batch(), but which have not yet been
161 invoked.
162
163 If there are no callbacks in a given one of the above states,
164 the corresponding character is replaced by ".".
165
166o "b" is the batch limit for this CPU. If more than this number
167 of RCU callbacks is ready to invoke, then the remainder will
168 be deferred.
169
170o "ci" is the number of RCU callbacks that have been invoked for
171 this CPU. Note that ci+nci+ql is the number of callbacks that have
172 been registered in absence of CPU-hotplug activity.
173
174o "nci" is the number of RCU callbacks that have been offloaded from
175 this CPU. This will always be zero unless the kernel was built
176 with CONFIG_RCU_NOCB_CPU=y and the "rcu_nocbs=" kernel boot
177 parameter was specified.
178
179o "co" is the number of RCU callbacks that have been orphaned due to
180 this CPU going offline. These orphaned callbacks have been moved
181 to an arbitrarily chosen online CPU.
182
183o "ca" is the number of RCU callbacks that have been adopted by this
184 CPU due to other CPUs going offline. Note that ci+co-ca+ql is
185 the number of RCU callbacks registered on this CPU.
186
187
188Kernels compiled with CONFIG_RCU_BOOST=y display the following from
189/debug/rcu/rcu_preempt/rcudata:
190
191 0!c=12865 g=12866 cnq=1/0:1 dt=83113/140000000000000/0 df=288 of=11 ql=0/0 qs=N... kt=0/O ktl=944 b=10 ci=60709 nci=0 co=748 ca=871
192 1 c=14407 g=14408 cnq=1/0:0 dt=100679/140000000000000/0 df=378 of=7 ql=0/119 qs=NRW. kt=0/W ktl=9b6 b=10 ci=109740 nci=0 co=589 ca=485
193 2 c=14407 g=14408 cnq=1/0:0 dt=105486/0/0 df=90 of=9 ql=0/89 qs=NRW. kt=0/W ktl=c0c b=10 ci=83113 nci=0 co=533 ca=490
194 3 c=14407 g=14408 cnq=1/0:0 dt=107138/0/0 df=142 of=8 ql=0/188 qs=NRW. kt=0/W ktl=b96 b=10 ci=121114 nci=0 co=426 ca=290
195 4 c=14405 g=14406 cnq=1/0:1 dt=50238/0/0 df=706 of=7 ql=0/0 qs=.... kt=0/W ktl=812 b=10 ci=34929 nci=0 co=643 ca=114
196 5!c=14168 g=14169 cnq=1/0:0 dt=45465/140000000000000/0 df=161 of=11 ql=0/0 qs=N... kt=0/O ktl=b4d b=10 ci=47712 nci=0 co=677 ca=722
197 6 c=14404 g=14405 cnq=1/0:0 dt=59454/0/0 df=94 of=6 ql=0/0 qs=.... kt=0/W ktl=e57 b=10 ci=55597 nci=0 co=701 ca=811
198 7 c=14407 g=14408 cnq=1/0:1 dt=68850/0/0 df=31 of=8 ql=0/0 qs=.... kt=0/W ktl=14bd b=10 ci=77475 nci=0 co=508 ca=1042
199
200This is similar to the output discussed above, but contains the following
201additional fields:
202
203o "kt" is the per-CPU kernel-thread state. The digit preceding
204 the first slash is zero if there is no work pending and 1
205 otherwise. The character between the first pair of slashes is
206 as follows:
207
208 "S" The kernel thread is stopped, in other words, all
209 CPUs corresponding to this rcu_node structure are
210 offline.
211
212 "R" The kernel thread is running.
213
214 "W" The kernel thread is waiting because there is no work
215 for it to do.
216
217 "O" The kernel thread is waiting because it has been
218 forced off of its designated CPU or because its
219 ->cpus_allowed mask permits it to run on other than
220 its designated CPU.
221
222 "Y" The kernel thread is yielding to avoid hogging CPU.
223
224 "?" Unknown value, indicates a bug.
225
226 The number after the final slash is the CPU that the kthread
227 is actually running on.
228
229 This field is displayed only for CONFIG_RCU_BOOST kernels.
230
231o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
232 the number of times that this CPU's per-CPU kthread has gone
233 through its loop servicing invoke_rcu_cpu_kthread() requests.
234
235 This field is displayed only for CONFIG_RCU_BOOST kernels.
236
237
238The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:
239
240s=21872 wd1=0 wd2=0 wd3=5 enq=0 sc=21872
241
242These fields are as follows:
243
244o "s" is the sequence number, with an odd number indicating that
245 an expedited grace period is in progress.
246
247o "wd1", "wd2", and "wd3" are the number of times that an attempt
248 to start an expedited grace period found that someone else had
249 completed an expedited grace period that satisfies the attempted
250 request. "Our work is done."
251
252o "enq" is the number of quiescent states still outstanding.
253
254o "sc" is the number of times that the attempt to start a
255 new expedited grace period succeeded.
256
257
258The output of "cat rcu/rcu_preempt/rcugp" looks as follows:
259
260completed=31249 gpnum=31250 age=1 max=18
261
262These fields are taken from the rcu_state structure, and are as follows:
263
264o "completed" is the number of grace periods that have completed.
265 It is comparable to the "c" field from rcu/rcudata in that a
266 CPU whose "c" field matches the value of "completed" is aware
267 that the corresponding RCU grace period has completed.
268
269o "gpnum" is the number of grace periods that have started. It is
270 similarly comparable to the "g" field from rcu/rcudata in that
271 a CPU whose "g" field matches the value of "gpnum" is aware that
272 the corresponding RCU grace period has started.
273
274 If these two fields are equal, then there is no grace period
275 in progress, in other words, RCU is idle. On the other hand,
276 if the two fields differ (as they are above), then an RCU grace
277 period is in progress.
278
279o "age" is the number of jiffies that the current grace period
280 has extended for, or zero if there is no grace period currently
281 in effect.
282
283o "max" is the age in jiffies of the longest-duration grace period
284 thus far.
285
286The output of "cat rcu/rcu_preempt/rcuhier" looks as follows:
287
288c=14407 g=14408 s=0 jfq=2 j=c863 nfqs=12040/nfqsng=0(12040) fqlh=1051 oqlen=0/0
2893/3 ..>. 0:7 ^0
290e/e ..>. 0:3 ^0 d/d ..>. 4:7 ^1
291
292The fields are as follows:
293
294o "c" is exactly the same as "completed" under rcu/rcu_preempt/rcugp.
295
296o "g" is exactly the same as "gpnum" under rcu/rcu_preempt/rcugp.
297
298o "s" is the current state of the force_quiescent_state()
299 state machine.
300
301o "jfq" is the number of jiffies remaining for this grace period
302 before force_quiescent_state() is invoked to help push things
303 along. Note that CPUs in idle mode throughout the grace period
304 will not report on their own, but rather must be check by some
305 other CPU via force_quiescent_state().
306
307o "j" is the low-order four hex digits of the jiffies counter.
308 Yes, Paul did run into a number of problems that turned out to
309 be due to the jiffies counter no longer counting. Why do you ask?
310
311o "nfqs" is the number of calls to force_quiescent_state() since
312 boot.
313
314o "nfqsng" is the number of useless calls to force_quiescent_state(),
315 where there wasn't actually a grace period active. This can
316 no longer happen due to grace-period processing being pushed
317 into a kthread. The number in parentheses is the difference
318 between "nfqs" and "nfqsng", or the number of times that
319 force_quiescent_state() actually did some real work.
320
321o "fqlh" is the number of calls to force_quiescent_state() that
322 exited immediately (without even being counted in nfqs above)
323 due to contention on ->fqslock.
324
325o Each element of the form "3/3 ..>. 0:7 ^0" represents one rcu_node
326 structure. Each line represents one level of the hierarchy,
327 from root to leaves. It is best to think of the rcu_data
328 structures as forming yet another level after the leaves.
329 Note that there might be either one, two, three, or even four
330 levels of rcu_node structures, depending on the relationship
331 between CONFIG_RCU_FANOUT, CONFIG_RCU_FANOUT_LEAF (possibly
332 adjusted using the rcu_fanout_leaf kernel boot parameter), and
333 CONFIG_NR_CPUS (possibly adjusted using the nr_cpu_ids count of
334 possible CPUs for the booting hardware).
335
336 o The numbers separated by the "/" are the qsmask followed
337 by the qsmaskinit. The qsmask will have one bit
338 set for each entity in the next lower level that has
339 not yet checked in for the current grace period ("e"
340 indicating CPUs 5, 6, and 7 in the example above).
341 The qsmaskinit will have one bit for each entity that is
342 currently expected to check in during each grace period.
343 The value of qsmaskinit is assigned to that of qsmask
344 at the beginning of each grace period.
345
346 o The characters separated by the ">" indicate the state
347 of the blocked-tasks lists. A "G" preceding the ">"
348 indicates that at least one task blocked in an RCU
349 read-side critical section blocks the current grace
350 period, while a "E" preceding the ">" indicates that
351 at least one task blocked in an RCU read-side critical
352 section blocks the current expedited grace period.
353 A "T" character following the ">" indicates that at
354 least one task is blocked within an RCU read-side
355 critical section, regardless of whether any current
356 grace period (expedited or normal) is inconvenienced.
357 A "." character appears if the corresponding condition
358 does not hold, so that "..>." indicates that no tasks
359 are blocked. In contrast, "GE>T" indicates maximal
360 inconvenience from blocked tasks. CONFIG_TREE_RCU
361 builds of the kernel will always show "..>.".
362
363 o The numbers separated by the ":" are the range of CPUs
364 served by this struct rcu_node. This can be helpful
365 in working out how the hierarchy is wired together.
366
367 For example, the example rcu_node structure shown above
368 has "0:7", indicating that it covers CPUs 0 through 7.
369
370 o The number after the "^" indicates the bit in the
371 next higher level rcu_node structure that this rcu_node
372 structure corresponds to. For example, the "d/d ..>. 4:7
373 ^1" has a "1" in this position, indicating that it
374 corresponds to the "1" bit in the "3" shown in the
375 "3/3 ..>. 0:7 ^0" entry on the next level up.
376
377
378The output of "cat rcu/rcu_sched/rcu_pending" looks as follows:
379
380 0!np=26111 qsp=29 rpq=5386 cbr=1 cng=570 gpc=3674 gps=577 nn=15903 ndw=0
381 1!np=28913 qsp=35 rpq=6097 cbr=1 cng=448 gpc=3700 gps=554 nn=18113 ndw=0
382 2!np=32740 qsp=37 rpq=6202 cbr=0 cng=476 gpc=4627 gps=546 nn=20889 ndw=0
383 3 np=23679 qsp=22 rpq=5044 cbr=1 cng=415 gpc=3403 gps=347 nn=14469 ndw=0
384 4!np=30714 qsp=4 rpq=5574 cbr=0 cng=528 gpc=3931 gps=639 nn=20042 ndw=0
385 5 np=28910 qsp=2 rpq=5246 cbr=0 cng=428 gpc=4105 gps=709 nn=18422 ndw=0
386 6!np=38648 qsp=5 rpq=7076 cbr=0 cng=840 gpc=4072 gps=961 nn=25699 ndw=0
387 7 np=37275 qsp=2 rpq=6873 cbr=0 cng=868 gpc=3416 gps=971 nn=25147 ndw=0
388
389The fields are as follows:
390
391o The leading number is the CPU number, with "!" indicating
392 an offline CPU.
393
394o "np" is the number of times that __rcu_pending() has been invoked
395 for the corresponding flavor of RCU.
396
397o "qsp" is the number of times that the RCU was waiting for a
398 quiescent state from this CPU.
399
400o "rpq" is the number of times that the CPU had passed through
401 a quiescent state, but not yet reported it to RCU.
402
403o "cbr" is the number of times that this CPU had RCU callbacks
404 that had passed through a grace period, and were thus ready
405 to be invoked.
406
407o "cng" is the number of times that this CPU needed another
408 grace period while RCU was idle.
409
410o "gpc" is the number of times that an old grace period had
411 completed, but this CPU was not yet aware of it.
412
413o "gps" is the number of times that a new grace period had started,
414 but this CPU was not yet aware of it.
415
416o "ndw" is the number of times that a wakeup of an rcuo
417 callback-offload kthread had to be deferred in order to avoid
418 deadlock.
419
420o "nn" is the number of times that this CPU needed nothing.
421
422
423The output of "cat rcu/rcuboost" looks as follows:
424
4250:3 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894
426 balk: nt=0 egt=4695 bt=0 nb=0 ny=56 nos=0
4274:7 tasks=.... kt=W ntb=0 neb=0 nnb=0 j=c864 bt=c894
428 balk: nt=0 egt=6541 bt=0 nb=0 ny=126 nos=0
429
430This information is output only for rcu_preempt. Each two-line entry
431corresponds to a leaf rcu_node structure. The fields are as follows:
432
433o "n:m" is the CPU-number range for the corresponding two-line
434 entry. In the sample output above, the first entry covers
435 CPUs zero through three and the second entry covers CPUs four
436 through seven.
437
438o "tasks=TNEB" gives the state of the various segments of the
439 rnp->blocked_tasks list:
440
441 "T" This indicates that there are some tasks that blocked
442 while running on one of the corresponding CPUs while
443 in an RCU read-side critical section.
444
445 "N" This indicates that some of the blocked tasks are preventing
446 the current normal (non-expedited) grace period from
447 completing.
448
449 "E" This indicates that some of the blocked tasks are preventing
450 the current expedited grace period from completing.
451
452 "B" This indicates that some of the blocked tasks are in
453 need of RCU priority boosting.
454
455 Each character is replaced with "." if the corresponding
456 condition does not hold.
457
458o "kt" is the state of the RCU priority-boosting kernel
459 thread associated with the corresponding rcu_node structure.
460 The state can be one of the following:
461
462 "S" The kernel thread is stopped, in other words, all
463 CPUs corresponding to this rcu_node structure are
464 offline.
465
466 "R" The kernel thread is running.
467
468 "W" The kernel thread is waiting because there is no work
469 for it to do.
470
471 "Y" The kernel thread is yielding to avoid hogging CPU.
472
473 "?" Unknown value, indicates a bug.
474
475o "ntb" is the number of tasks boosted.
476
477o "neb" is the number of tasks boosted in order to complete an
478 expedited grace period.
479
480o "nnb" is the number of tasks boosted in order to complete a
481 normal (non-expedited) grace period. When boosting a task
482 that was blocking both an expedited and a normal grace period,
483 it is counted against the expedited total above.
484
485o "j" is the low-order 16 bits of the jiffies counter in
486 hexadecimal.
487
488o "bt" is the low-order 16 bits of the value that the jiffies
489 counter will have when we next start boosting, assuming that
490 the current grace period does not end beforehand. This is
491 also in hexadecimal.
492
493o "balk: nt" counts the number of times we didn't boost (in
494 other words, we balked) even though it was time to boost because
495 there were no blocked tasks to boost. This situation occurs
496 when there is one blocked task on one rcu_node structure and
497 none on some other rcu_node structure.
498
499o "egt" counts the number of times we balked because although
500 there were blocked tasks, none of them were blocking the
501 current grace period, whether expedited or otherwise.
502
503o "bt" counts the number of times we balked because boosting
504 had already been initiated for the current grace period.
505
506o "nb" counts the number of times we balked because there
507 was at least one task blocking the current non-expedited grace
508 period that never had blocked. If it is already running, it
509 just won't help to boost its priority!
510
511o "ny" counts the number of times we balked because it was
512 not yet time to start boosting.
513
514o "nos" counts the number of times we balked for other
515 reasons, e.g., the grace period ended first.
516
517
518CONFIG_TINY_RCU debugfs Files and Formats
519
520These implementations of RCU provides a single debugfs file under the
521top-level directory RCU, namely rcu/rcudata, which displays fields in
522rcu_bh_ctrlblk and rcu_sched_ctrlblk.
523
524The output of "cat rcu/rcudata" is as follows:
525
526rcu_sched: qlen: 0
527rcu_bh: qlen: 0
528
529This is split into rcu_sched and rcu_bh sections. The field is as
530follows:
531
532o "qlen" is the number of RCU callbacks currently waiting either
533 for an RCU grace period or waiting to be invoked. This is the
534 only field present for rcu_sched and rcu_bh, due to the
535 short-circuiting of grace period in those two cases.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0f5c3b4347c6..05f26c8df89e 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3238,21 +3238,17 @@
3238 3238
3239 rcutree.gp_cleanup_delay= [KNL] 3239 rcutree.gp_cleanup_delay= [KNL]
3240 Set the number of jiffies to delay each step of 3240 Set the number of jiffies to delay each step of
3241 RCU grace-period cleanup. This only has effect 3241 RCU grace-period cleanup.
3242 when CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP is set.
3243 3242
3244 rcutree.gp_init_delay= [KNL] 3243 rcutree.gp_init_delay= [KNL]
3245 Set the number of jiffies to delay each step of 3244 Set the number of jiffies to delay each step of
3246 RCU grace-period initialization. This only has 3245 RCU grace-period initialization.
3247 effect when CONFIG_RCU_TORTURE_TEST_SLOW_INIT
3248 is set.
3249 3246
3250 rcutree.gp_preinit_delay= [KNL] 3247 rcutree.gp_preinit_delay= [KNL]
3251 Set the number of jiffies to delay each step of 3248 Set the number of jiffies to delay each step of
3252 RCU grace-period pre-initialization, that is, 3249 RCU grace-period pre-initialization, that is,
3253 the propagation of recent CPU-hotplug changes up 3250 the propagation of recent CPU-hotplug changes up
3254 the rcu_node combining tree. This only has effect 3251 the rcu_node combining tree.
3255 when CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT is set.
3256 3252
3257 rcutree.rcu_fanout_exact= [KNL] 3253 rcutree.rcu_fanout_exact= [KNL]
3258 Disable autobalancing of the rcu_node combining 3254 Disable autobalancing of the rcu_node combining
@@ -3328,6 +3324,17 @@
3328 This wake_up() will be accompanied by a 3324 This wake_up() will be accompanied by a
3329 WARN_ONCE() splat and an ftrace_dump(). 3325 WARN_ONCE() splat and an ftrace_dump().
3330 3326
3327 rcuperf.gp_async= [KNL]
3328 Measure performance of asynchronous
3329 grace-period primitives such as call_rcu().
3330
3331 rcuperf.gp_async_max= [KNL]
3332 Specify the maximum number of outstanding
3333 callbacks per writer thread. When a writer
3334 thread exceeds this limit, it invokes the
3335 corresponding flavor of rcu_barrier() to allow
3336 previously posted callbacks to drain.
3337
3331 rcuperf.gp_exp= [KNL] 3338 rcuperf.gp_exp= [KNL]
3332 Measure performance of expedited synchronous 3339 Measure performance of expedited synchronous
3333 grace-period primitives. 3340 grace-period primitives.
@@ -3355,17 +3362,22 @@
3355 rcuperf.perf_runnable= [BOOT] 3362 rcuperf.perf_runnable= [BOOT]
3356 Start rcuperf running at boot time. 3363 Start rcuperf running at boot time.
3357 3364
3365 rcuperf.perf_type= [KNL]
3366 Specify the RCU implementation to test.
3367
3358 rcuperf.shutdown= [KNL] 3368 rcuperf.shutdown= [KNL]
3359 Shut the system down after performance tests 3369 Shut the system down after performance tests
3360 complete. This is useful for hands-off automated 3370 complete. This is useful for hands-off automated
3361 testing. 3371 testing.
3362 3372
3363 rcuperf.perf_type= [KNL]
3364 Specify the RCU implementation to test.
3365
3366 rcuperf.verbose= [KNL] 3373 rcuperf.verbose= [KNL]
3367 Enable additional printk() statements. 3374 Enable additional printk() statements.
3368 3375
3376 rcuperf.writer_holdoff= [KNL]
3377 Write-side holdoff between grace periods,
3378 in microseconds. The default of zero says
3379 no holdoff.
3380
3369 rcutorture.cbflood_inter_holdoff= [KNL] 3381 rcutorture.cbflood_inter_holdoff= [KNL]
3370 Set holdoff time (jiffies) between successive 3382 Set holdoff time (jiffies) between successive
3371 callback-flood tests. 3383 callback-flood tests.
@@ -3803,6 +3815,15 @@
3803 spia_pedr= 3815 spia_pedr=
3804 spia_peddr= 3816 spia_peddr=
3805 3817
3818 srcutree.counter_wrap_check [KNL]
3819 Specifies how frequently to check for
3820 grace-period sequence counter wrap for the
3821 srcu_data structure's ->srcu_gp_seq_needed field.
3822 The greater the number of bits set in this kernel
3823 parameter, the less frequently counter wrap will
3824 be checked for. Note that the bottom two bits
3825 are ignored.
3826
3806 srcutree.exp_holdoff [KNL] 3827 srcutree.exp_holdoff [KNL]
3807 Specifies how many nanoseconds must elapse 3828 Specifies how many nanoseconds must elapse
3808 since the end of the last SRCU grace period for 3829 since the end of the last SRCU grace period for
diff --git a/Documentation/core-api/atomic_ops.rst b/Documentation/core-api/atomic_ops.rst
index 55e43f1c80de..fce929144ccd 100644
--- a/Documentation/core-api/atomic_ops.rst
+++ b/Documentation/core-api/atomic_ops.rst
@@ -303,6 +303,11 @@ defined which accomplish this::
303 void smp_mb__before_atomic(void); 303 void smp_mb__before_atomic(void);
304 void smp_mb__after_atomic(void); 304 void smp_mb__after_atomic(void);
305 305
306Preceding a non-value-returning read-modify-write atomic operation with
307smp_mb__before_atomic() and following it with smp_mb__after_atomic()
308provides the same full ordering that is provided by value-returning
309read-modify-write atomic operations.
310
306For example, smp_mb__before_atomic() can be used like so:: 311For example, smp_mb__before_atomic() can be used like so::
307 312
308 obj->dead = 1; 313 obj->dead = 1;
diff --git a/Documentation/dev-tools/sparse.rst b/Documentation/dev-tools/sparse.rst
index ffdcc97f6f5a..78aa00a604a0 100644
--- a/Documentation/dev-tools/sparse.rst
+++ b/Documentation/dev-tools/sparse.rst
@@ -103,9 +103,3 @@ have already built it.
103 103
104The optional make variable CF can be used to pass arguments to sparse. The 104The optional make variable CF can be used to pass arguments to sparse. The
105build system passes -Wbitwise to sparse automatically. 105build system passes -Wbitwise to sparse automatically.
106
107Checking RCU annotations
108~~~~~~~~~~~~~~~~~~~~~~~~
109
110RCU annotations are not checked by default. To enable RCU annotation
111checks, include -DCONFIG_SPARSE_RCU_POINTER in your CF flags.
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt
index df31e30b6a02..2cb7dc5c0e0d 100644
--- a/Documentation/kernel-per-CPU-kthreads.txt
+++ b/Documentation/kernel-per-CPU-kthreads.txt
@@ -109,13 +109,12 @@ SCHED_SOFTIRQ: Do all of the following:
109 on that CPU. If a thread that expects to run on the de-jittered 109 on that CPU. If a thread that expects to run on the de-jittered
110 CPU awakens, the scheduler will send an IPI that can result in 110 CPU awakens, the scheduler will send an IPI that can result in
111 a subsequent SCHED_SOFTIRQ. 111 a subsequent SCHED_SOFTIRQ.
1122. Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y, 1122. CONFIG_NO_HZ_FULL=y and ensure that the CPU to be de-jittered
113 CONFIG_NO_HZ_FULL=y, and, in addition, ensure that the CPU 113 is marked as an adaptive-ticks CPU using the "nohz_full="
114 to be de-jittered is marked as an adaptive-ticks CPU using the 114 boot parameter. This reduces the number of scheduler-clock
115 "nohz_full=" boot parameter. This reduces the number of 115 interrupts that the de-jittered CPU receives, minimizing its
116 scheduler-clock interrupts that the de-jittered CPU receives, 116 chances of being selected to do the load balancing work that
117 minimizing its chances of being selected to do the load balancing 117 runs in SCHED_SOFTIRQ context.
118 work that runs in SCHED_SOFTIRQ context.
1193. To the extent possible, keep the CPU out of the kernel when it 1183. To the extent possible, keep the CPU out of the kernel when it
120 is non-idle, for example, by avoiding system calls and by 119 is non-idle, for example, by avoiding system calls and by
121 forcing both kernel threads and interrupts to execute elsewhere. 120 forcing both kernel threads and interrupts to execute elsewhere.
@@ -135,11 +134,10 @@ HRTIMER_SOFTIRQ: Do all of the following:
135RCU_SOFTIRQ: Do at least one of the following: 134RCU_SOFTIRQ: Do at least one of the following:
1361. Offload callbacks and keep the CPU in either dyntick-idle or 1351. Offload callbacks and keep the CPU in either dyntick-idle or
137 adaptive-ticks state by doing all of the following: 136 adaptive-ticks state by doing all of the following:
138 a. Build with CONFIG_RCU_NOCB_CPU=y, CONFIG_RCU_NOCB_CPU_ALL=y, 137 a. CONFIG_NO_HZ_FULL=y and ensure that the CPU to be
139 CONFIG_NO_HZ_FULL=y, and, in addition ensure that the CPU 138 de-jittered is marked as an adaptive-ticks CPU using the
140 to be de-jittered is marked as an adaptive-ticks CPU using 139 "nohz_full=" boot parameter. Bind the rcuo kthreads to
141 the "nohz_full=" boot parameter. Bind the rcuo kthreads 140 housekeeping CPUs, which can tolerate OS jitter.
142 to housekeeping CPUs, which can tolerate OS jitter.
143 b. To the extent possible, keep the CPU out of the kernel 141 b. To the extent possible, keep the CPU out of the kernel
144 when it is non-idle, for example, by avoiding system 142 when it is non-idle, for example, by avoiding system
145 calls and by forcing both kernel threads and interrupts 143 calls and by forcing both kernel threads and interrupts
@@ -236,11 +234,10 @@ To reduce its OS jitter, do at least one of the following:
236 is feasible only if your workload never requires RCU priority 234 is feasible only if your workload never requires RCU priority
237 boosting, for example, if you ensure frequent idle time on all 235 boosting, for example, if you ensure frequent idle time on all
238 CPUs that might execute within the kernel. 236 CPUs that might execute within the kernel.
2393. Build with CONFIG_RCU_NOCB_CPU=y and CONFIG_RCU_NOCB_CPU_ALL=y, 2373. Build with CONFIG_RCU_NOCB_CPU=y and boot with the rcu_nocbs=
240 which offloads all RCU callbacks to kthreads that can be moved 238 boot parameter offloading RCU callbacks from all CPUs susceptible
241 off of CPUs susceptible to OS jitter. This approach prevents the 239 to OS jitter. This approach prevents the rcuc/%u kthreads from
242 rcuc/%u kthreads from having any work to do, so that they are 240 having any work to do, so that they are never awakened.
243 never awakened.
2444. Ensure that the CPU never enters the kernel, and, in particular, 2414. Ensure that the CPU never enters the kernel, and, in particular,
245 avoid initiating any CPU hotplug operations on this CPU. This is 242 avoid initiating any CPU hotplug operations on this CPU. This is
246 another way of preventing any callbacks from being queued on the 243 another way of preventing any callbacks from being queued on the
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 732f10ea382e..9d5e0f853f08 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -27,7 +27,7 @@ The purpose of this document is twofold:
27 (2) to provide a guide as to how to use the barriers that are available. 27 (2) to provide a guide as to how to use the barriers that are available.
28 28
29Note that an architecture can provide more than the minimum requirement 29Note that an architecture can provide more than the minimum requirement
30for any particular barrier, but if the architecure provides less than 30for any particular barrier, but if the architecture provides less than
31that, that architecture is incorrect. 31that, that architecture is incorrect.
32 32
33Note also that it is possible that a barrier may be a no-op for an 33Note also that it is possible that a barrier may be a no-op for an
diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt
index 6eaf576294f3..2dcaf9adb7a7 100644
--- a/Documentation/timers/NO_HZ.txt
+++ b/Documentation/timers/NO_HZ.txt
@@ -194,32 +194,9 @@ that the RCU callbacks are processed in a timely fashion.
194 194
195Another approach is to offload RCU callback processing to "rcuo" kthreads 195Another approach is to offload RCU callback processing to "rcuo" kthreads
196using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to 196using the CONFIG_RCU_NOCB_CPU=y Kconfig option. The specific CPUs to
197offload may be selected via several methods: 197offload may be selected using The "rcu_nocbs=" kernel boot parameter,
198 198which takes a comma-separated list of CPUs and CPU ranges, for example,
1991. One of three mutually exclusive Kconfig options specify a 199"1,3-5" selects CPUs 1, 3, 4, and 5.
200 build-time default for the CPUs to offload:
201
202 a. The CONFIG_RCU_NOCB_CPU_NONE=y Kconfig option results in
203 no CPUs being offloaded.
204
205 b. The CONFIG_RCU_NOCB_CPU_ZERO=y Kconfig option causes
206 CPU 0 to be offloaded.
207
208 c. The CONFIG_RCU_NOCB_CPU_ALL=y Kconfig option causes all
209 CPUs to be offloaded. Note that the callbacks will be
210 offloaded to "rcuo" kthreads, and that those kthreads
211 will in fact run on some CPU. However, this approach
212 gives fine-grained control on exactly which CPUs the
213 callbacks run on, along with their scheduling priority
214 (including the default of SCHED_OTHER), and it further
215 allows this control to be varied dynamically at runtime.
216
2172. The "rcu_nocbs=" kernel boot parameter, which takes a comma-separated
218 list of CPUs and CPU ranges, for example, "1,3-5" selects CPUs 1,
219 3, 4, and 5. The specified CPUs will be offloaded in addition to
220 any CPUs specified as offloaded by CONFIG_RCU_NOCB_CPU_ZERO=y or
221 CONFIG_RCU_NOCB_CPU_ALL=y. This means that the "rcu_nocbs=" boot
222 parameter has no effect for kernels built with RCU_NOCB_CPU_ALL=y.
223 200
224The offloaded CPUs will never queue RCU callbacks, and therefore RCU 201The offloaded CPUs will never queue RCU callbacks, and therefore RCU
225never prevents offloaded CPUs from entering either dyntick-idle mode 202never prevents offloaded CPUs from entering either dyntick-idle mode
diff --git a/include/linux/bcm47xx_nvram.h b/include/linux/bcm47xx_nvram.h
index 2793652fbf66..a414a2b53e41 100644
--- a/include/linux/bcm47xx_nvram.h
+++ b/include/linux/bcm47xx_nvram.h
@@ -8,6 +8,7 @@
8#ifndef __BCM47XX_NVRAM_H 8#ifndef __BCM47XX_NVRAM_H
9#define __BCM47XX_NVRAM_H 9#define __BCM47XX_NVRAM_H
10 10
11#include <linux/errno.h>
11#include <linux/types.h> 12#include <linux/types.h>
12#include <linux/kernel.h> 13#include <linux/kernel.h>
13#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index f8110051188f..707242fdbb89 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -17,11 +17,7 @@
17# define __release(x) __context__(x,-1) 17# define __release(x) __context__(x,-1)
18# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) 18# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0)
19# define __percpu __attribute__((noderef, address_space(3))) 19# define __percpu __attribute__((noderef, address_space(3)))
20#ifdef CONFIG_SPARSE_RCU_POINTER
21# define __rcu __attribute__((noderef, address_space(4))) 20# define __rcu __attribute__((noderef, address_space(4)))
22#else /* CONFIG_SPARSE_RCU_POINTER */
23# define __rcu
24#endif /* CONFIG_SPARSE_RCU_POINTER */
25# define __private __attribute__((noderef)) 21# define __private __attribute__((noderef))
26extern void __chk_user_ptr(const volatile void __user *); 22extern void __chk_user_ptr(const volatile void __user *);
27extern void __chk_io_ptr(const volatile void __iomem *); 23extern void __chk_io_ptr(const volatile void __iomem *);
diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h
index 4b766b61e1a0..426cee67f0e2 100644
--- a/include/linux/rcu_node_tree.h
+++ b/include/linux/rcu_node_tree.h
@@ -7,6 +7,10 @@
7 * unlimited scalability while maintaining a constant level of contention 7 * unlimited scalability while maintaining a constant level of contention
8 * on the root node. 8 * on the root node.
9 * 9 *
10 * This seemingly RCU-private file must be available to SRCU users
11 * because the size of the TREE SRCU srcu_struct structure depends
12 * on these definitions.
13 *
10 * This program is free software; you can redistribute it and/or modify 14 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by 15 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or 16 * the Free Software Foundation; either version 2 of the License, or
diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h
index ba4d2621d9ca..c3ad00e63556 100644
--- a/include/linux/rcu_segcblist.h
+++ b/include/linux/rcu_segcblist.h
@@ -1,6 +1,10 @@
1/* 1/*
2 * RCU segmented callback lists 2 * RCU segmented callback lists
3 * 3 *
4 * This seemingly RCU-private file must be available to SRCU users
5 * because the size of the TREE SRCU srcu_struct structure depends
6 * on these definitions.
7 *
4 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by 9 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or 10 * the Free Software Foundation; either version 2 of the License, or
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index e1e5d002fdb9..f816fc72b51e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -34,104 +34,15 @@
34#define __LINUX_RCUPDATE_H 34#define __LINUX_RCUPDATE_H
35 35
36#include <linux/types.h> 36#include <linux/types.h>
37#include <linux/cache.h>
38#include <linux/spinlock.h>
39#include <linux/threads.h>
40#include <linux/cpumask.h>
41#include <linux/seqlock.h>
42#include <linux/lockdep.h>
43#include <linux/debugobjects.h>
44#include <linux/bug.h>
45#include <linux/compiler.h> 37#include <linux/compiler.h>
46#include <linux/ktime.h> 38#include <linux/atomic.h>
47#include <linux/irqflags.h> 39#include <linux/irqflags.h>
40#include <linux/preempt.h>
41#include <linux/bottom_half.h>
42#include <linux/lockdep.h>
43#include <asm/processor.h>
44#include <linux/cpumask.h>
48 45
49#include <asm/barrier.h>
50
51#ifndef CONFIG_TINY_RCU
52extern int rcu_expedited; /* for sysctl */
53extern int rcu_normal; /* also for sysctl */
54#endif /* #ifndef CONFIG_TINY_RCU */
55
56#ifdef CONFIG_TINY_RCU
57/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
58static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */
59{
60 return true;
61}
62static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */
63{
64 return false;
65}
66
67static inline void rcu_expedite_gp(void)
68{
69}
70
71static inline void rcu_unexpedite_gp(void)
72{
73}
74#else /* #ifdef CONFIG_TINY_RCU */
75bool rcu_gp_is_normal(void); /* Internal RCU use. */
76bool rcu_gp_is_expedited(void); /* Internal RCU use. */
77void rcu_expedite_gp(void);
78void rcu_unexpedite_gp(void);
79#endif /* #else #ifdef CONFIG_TINY_RCU */
80
81enum rcutorture_type {
82 RCU_FLAVOR,
83 RCU_BH_FLAVOR,
84 RCU_SCHED_FLAVOR,
85 RCU_TASKS_FLAVOR,
86 SRCU_FLAVOR,
87 INVALID_RCU_FLAVOR
88};
89
90#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
91void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
92 unsigned long *gpnum, unsigned long *completed);
93void rcutorture_record_test_transition(void);
94void rcutorture_record_progress(unsigned long vernum);
95void do_trace_rcu_torture_read(const char *rcutorturename,
96 struct rcu_head *rhp,
97 unsigned long secs,
98 unsigned long c_old,
99 unsigned long c);
100bool rcu_irq_enter_disabled(void);
101#else
102static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
103 int *flags,
104 unsigned long *gpnum,
105 unsigned long *completed)
106{
107 *flags = 0;
108 *gpnum = 0;
109 *completed = 0;
110}
111static inline void rcutorture_record_test_transition(void)
112{
113}
114static inline void rcutorture_record_progress(unsigned long vernum)
115{
116}
117static inline bool rcu_irq_enter_disabled(void)
118{
119 return false;
120}
121#ifdef CONFIG_RCU_TRACE
122void do_trace_rcu_torture_read(const char *rcutorturename,
123 struct rcu_head *rhp,
124 unsigned long secs,
125 unsigned long c_old,
126 unsigned long c);
127#else
128#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
129 do { } while (0)
130#endif
131#endif
132
133#define UINT_CMP_GE(a, b) (UINT_MAX / 2 >= (a) - (b))
134#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b))
135#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) 46#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
136#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) 47#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
137#define ulong2long(a) (*(long *)(&(a))) 48#define ulong2long(a) (*(long *)(&(a)))
@@ -139,115 +50,14 @@ void do_trace_rcu_torture_read(const char *rcutorturename,
139/* Exported common interfaces */ 50/* Exported common interfaces */
140 51
141#ifdef CONFIG_PREEMPT_RCU 52#ifdef CONFIG_PREEMPT_RCU
142 53void call_rcu(struct rcu_head *head, rcu_callback_t func);
143/**
144 * call_rcu() - Queue an RCU callback for invocation after a grace period.
145 * @head: structure to be used for queueing the RCU updates.
146 * @func: actual callback function to be invoked after the grace period
147 *
148 * The callback function will be invoked some time after a full grace
149 * period elapses, in other words after all pre-existing RCU read-side
150 * critical sections have completed. However, the callback function
151 * might well execute concurrently with RCU read-side critical sections
152 * that started after call_rcu() was invoked. RCU read-side critical
153 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
154 * and may be nested.
155 *
156 * Note that all CPUs must agree that the grace period extended beyond
157 * all pre-existing RCU read-side critical section. On systems with more
158 * than one CPU, this means that when "func()" is invoked, each CPU is
159 * guaranteed to have executed a full memory barrier since the end of its
160 * last RCU read-side critical section whose beginning preceded the call
161 * to call_rcu(). It also means that each CPU executing an RCU read-side
162 * critical section that continues beyond the start of "func()" must have
163 * executed a memory barrier after the call_rcu() but before the beginning
164 * of that RCU read-side critical section. Note that these guarantees
165 * include CPUs that are offline, idle, or executing in user mode, as
166 * well as CPUs that are executing in the kernel.
167 *
168 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
169 * resulting RCU callback function "func()", then both CPU A and CPU B are
170 * guaranteed to execute a full memory barrier during the time interval
171 * between the call to call_rcu() and the invocation of "func()" -- even
172 * if CPU A and CPU B are the same CPU (but again only if the system has
173 * more than one CPU).
174 */
175void call_rcu(struct rcu_head *head,
176 rcu_callback_t func);
177
178#else /* #ifdef CONFIG_PREEMPT_RCU */ 54#else /* #ifdef CONFIG_PREEMPT_RCU */
179
180/* In classic RCU, call_rcu() is just call_rcu_sched(). */
181#define call_rcu call_rcu_sched 55#define call_rcu call_rcu_sched
182
183#endif /* #else #ifdef CONFIG_PREEMPT_RCU */ 56#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
184 57
185/** 58void call_rcu_bh(struct rcu_head *head, rcu_callback_t func);
186 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period. 59void call_rcu_sched(struct rcu_head *head, rcu_callback_t func);
187 * @head: structure to be used for queueing the RCU updates.
188 * @func: actual callback function to be invoked after the grace period
189 *
190 * The callback function will be invoked some time after a full grace
191 * period elapses, in other words after all currently executing RCU
192 * read-side critical sections have completed. call_rcu_bh() assumes
193 * that the read-side critical sections end on completion of a softirq
194 * handler. This means that read-side critical sections in process
195 * context must not be interrupted by softirqs. This interface is to be
196 * used when most of the read-side critical sections are in softirq context.
197 * RCU read-side critical sections are delimited by :
198 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
199 * OR
200 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
201 * These may be nested.
202 *
203 * See the description of call_rcu() for more detailed information on
204 * memory ordering guarantees.
205 */
206void call_rcu_bh(struct rcu_head *head,
207 rcu_callback_t func);
208
209/**
210 * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
211 * @head: structure to be used for queueing the RCU updates.
212 * @func: actual callback function to be invoked after the grace period
213 *
214 * The callback function will be invoked some time after a full grace
215 * period elapses, in other words after all currently executing RCU
216 * read-side critical sections have completed. call_rcu_sched() assumes
217 * that the read-side critical sections end on enabling of preemption
218 * or on voluntary preemption.
219 * RCU read-side critical sections are delimited by :
220 * - rcu_read_lock_sched() and rcu_read_unlock_sched(),
221 * OR
222 * anything that disables preemption.
223 * These may be nested.
224 *
225 * See the description of call_rcu() for more detailed information on
226 * memory ordering guarantees.
227 */
228void call_rcu_sched(struct rcu_head *head,
229 rcu_callback_t func);
230
231void synchronize_sched(void); 60void synchronize_sched(void);
232
233/**
234 * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
235 * @head: structure to be used for queueing the RCU updates.
236 * @func: actual callback function to be invoked after the grace period
237 *
238 * The callback function will be invoked some time after a full grace
239 * period elapses, in other words after all currently executing RCU
240 * read-side critical sections have completed. call_rcu_tasks() assumes
241 * that the read-side critical sections end at a voluntary context
242 * switch (not a preemption!), entry into idle, or transition to usermode
243 * execution. As such, there are no read-side primitives analogous to
244 * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
245 * to determine that all tasks have passed through a safe state, not so
246 * much for data-strcuture synchronization.
247 *
248 * See the description of call_rcu() for more detailed information on
249 * memory ordering guarantees.
250 */
251void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); 61void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func);
252void synchronize_rcu_tasks(void); 62void synchronize_rcu_tasks(void);
253void rcu_barrier_tasks(void); 63void rcu_barrier_tasks(void);
@@ -301,22 +111,12 @@ void rcu_check_callbacks(int user);
301void rcu_report_dead(unsigned int cpu); 111void rcu_report_dead(unsigned int cpu);
302void rcu_cpu_starting(unsigned int cpu); 112void rcu_cpu_starting(unsigned int cpu);
303 113
304#ifndef CONFIG_TINY_RCU
305void rcu_end_inkernel_boot(void);
306#else /* #ifndef CONFIG_TINY_RCU */
307static inline void rcu_end_inkernel_boot(void) { }
308#endif /* #ifndef CONFIG_TINY_RCU */
309
310#ifdef CONFIG_RCU_STALL_COMMON 114#ifdef CONFIG_RCU_STALL_COMMON
311void rcu_sysrq_start(void); 115void rcu_sysrq_start(void);
312void rcu_sysrq_end(void); 116void rcu_sysrq_end(void);
313#else /* #ifdef CONFIG_RCU_STALL_COMMON */ 117#else /* #ifdef CONFIG_RCU_STALL_COMMON */
314static inline void rcu_sysrq_start(void) 118static inline void rcu_sysrq_start(void) { }
315{ 119static inline void rcu_sysrq_end(void) { }
316}
317static inline void rcu_sysrq_end(void)
318{
319}
320#endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */ 120#endif /* #else #ifdef CONFIG_RCU_STALL_COMMON */
321 121
322#ifdef CONFIG_NO_HZ_FULL 122#ifdef CONFIG_NO_HZ_FULL
@@ -330,9 +130,7 @@ static inline void rcu_user_exit(void) { }
330#ifdef CONFIG_RCU_NOCB_CPU 130#ifdef CONFIG_RCU_NOCB_CPU
331void rcu_init_nohz(void); 131void rcu_init_nohz(void);
332#else /* #ifdef CONFIG_RCU_NOCB_CPU */ 132#else /* #ifdef CONFIG_RCU_NOCB_CPU */
333static inline void rcu_init_nohz(void) 133static inline void rcu_init_nohz(void) { }
334{
335}
336#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 134#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
337 135
338/** 136/**
@@ -397,10 +195,6 @@ do { \
397 rcu_note_voluntary_context_switch(current); \ 195 rcu_note_voluntary_context_switch(current); \
398} while (0) 196} while (0)
399 197
400#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
401bool __rcu_is_watching(void);
402#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
403
404/* 198/*
405 * Infrastructure to implement the synchronize_() primitives in 199 * Infrastructure to implement the synchronize_() primitives in
406 * TREE_RCU and rcu_barrier_() primitives in TINY_RCU. 200 * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
@@ -414,10 +208,6 @@ bool __rcu_is_watching(void);
414#error "Unknown RCU implementation specified to kernel configuration" 208#error "Unknown RCU implementation specified to kernel configuration"
415#endif 209#endif
416 210
417#define RCU_SCHEDULER_INACTIVE 0
418#define RCU_SCHEDULER_INIT 1
419#define RCU_SCHEDULER_RUNNING 2
420
421/* 211/*
422 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic 212 * init_rcu_head_on_stack()/destroy_rcu_head_on_stack() are needed for dynamic
423 * initialization and destruction of rcu_head on the stack. rcu_head structures 213 * initialization and destruction of rcu_head on the stack. rcu_head structures
@@ -430,30 +220,16 @@ void destroy_rcu_head(struct rcu_head *head);
430void init_rcu_head_on_stack(struct rcu_head *head); 220void init_rcu_head_on_stack(struct rcu_head *head);
431void destroy_rcu_head_on_stack(struct rcu_head *head); 221void destroy_rcu_head_on_stack(struct rcu_head *head);
432#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 222#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
433static inline void init_rcu_head(struct rcu_head *head) 223static inline void init_rcu_head(struct rcu_head *head) { }
434{ 224static inline void destroy_rcu_head(struct rcu_head *head) { }
435} 225static inline void init_rcu_head_on_stack(struct rcu_head *head) { }
436 226static inline void destroy_rcu_head_on_stack(struct rcu_head *head) { }
437static inline void destroy_rcu_head(struct rcu_head *head)
438{
439}
440
441static inline void init_rcu_head_on_stack(struct rcu_head *head)
442{
443}
444
445static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
446{
447}
448#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */ 227#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
449 228
450#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) 229#if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU)
451bool rcu_lockdep_current_cpu_online(void); 230bool rcu_lockdep_current_cpu_online(void);
452#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 231#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
453static inline bool rcu_lockdep_current_cpu_online(void) 232static inline bool rcu_lockdep_current_cpu_online(void) { return true; }
454{
455 return true;
456}
457#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 233#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
458 234
459#ifdef CONFIG_DEBUG_LOCK_ALLOC 235#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -473,18 +249,8 @@ extern struct lockdep_map rcu_bh_lock_map;
473extern struct lockdep_map rcu_sched_lock_map; 249extern struct lockdep_map rcu_sched_lock_map;
474extern struct lockdep_map rcu_callback_map; 250extern struct lockdep_map rcu_callback_map;
475int debug_lockdep_rcu_enabled(void); 251int debug_lockdep_rcu_enabled(void);
476
477int rcu_read_lock_held(void); 252int rcu_read_lock_held(void);
478int rcu_read_lock_bh_held(void); 253int rcu_read_lock_bh_held(void);
479
480/**
481 * rcu_read_lock_sched_held() - might we be in RCU-sched read-side critical section?
482 *
483 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
484 * RCU-sched read-side critical section. In absence of
485 * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
486 * critical section unless it can prove otherwise.
487 */
488int rcu_read_lock_sched_held(void); 254int rcu_read_lock_sched_held(void);
489 255
490#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 256#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
@@ -531,9 +297,7 @@ static inline void rcu_preempt_sleep_check(void)
531 "Illegal context switch in RCU read-side critical section"); 297 "Illegal context switch in RCU read-side critical section");
532} 298}
533#else /* #ifdef CONFIG_PROVE_RCU */ 299#else /* #ifdef CONFIG_PROVE_RCU */
534static inline void rcu_preempt_sleep_check(void) 300static inline void rcu_preempt_sleep_check(void) { }
535{
536}
537#endif /* #else #ifdef CONFIG_PROVE_RCU */ 301#endif /* #else #ifdef CONFIG_PROVE_RCU */
538 302
539#define rcu_sleep_check() \ 303#define rcu_sleep_check() \
@@ -1084,52 +848,6 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
1084#define kfree_rcu(ptr, rcu_head) \ 848#define kfree_rcu(ptr, rcu_head) \
1085 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head)) 849 __kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
1086 850
1087#ifdef CONFIG_TINY_RCU
1088static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1089{
1090 *nextevt = KTIME_MAX;
1091 return 0;
1092}
1093#endif /* #ifdef CONFIG_TINY_RCU */
1094
1095#if defined(CONFIG_RCU_NOCB_CPU_ALL)
1096static inline bool rcu_is_nocb_cpu(int cpu) { return true; }
1097#elif defined(CONFIG_RCU_NOCB_CPU)
1098bool rcu_is_nocb_cpu(int cpu);
1099#else
1100static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
1101#endif
1102
1103
1104/* Only for use by adaptive-ticks code. */
1105#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
1106bool rcu_sys_is_idle(void);
1107void rcu_sysidle_force_exit(void);
1108#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
1109
1110static inline bool rcu_sys_is_idle(void)
1111{
1112 return false;
1113}
1114
1115static inline void rcu_sysidle_force_exit(void)
1116{
1117}
1118
1119#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
1120
1121
1122/*
1123 * Dump the ftrace buffer, but only one time per callsite per boot.
1124 */
1125#define rcu_ftrace_dump(oops_dump_mode) \
1126do { \
1127 static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
1128 \
1129 if (!atomic_read(&___rfd_beenhere) && \
1130 !atomic_xchg(&___rfd_beenhere, 1)) \
1131 ftrace_dump(oops_dump_mode); \
1132} while (0)
1133 851
1134/* 852/*
1135 * Place this after a lock-acquisition primitive to guarantee that 853 * Place this after a lock-acquisition primitive to guarantee that
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 74d9c3a1feee..5becbbccb998 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -25,7 +25,7 @@
25#ifndef __LINUX_TINY_H 25#ifndef __LINUX_TINY_H
26#define __LINUX_TINY_H 26#define __LINUX_TINY_H
27 27
28#include <linux/cache.h> 28#include <linux/ktime.h>
29 29
30struct rcu_dynticks; 30struct rcu_dynticks;
31static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) 31static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
@@ -33,10 +33,8 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
33 return 0; 33 return 0;
34} 34}
35 35
36static inline bool rcu_eqs_special_set(int cpu) 36/* Never flag non-existent other CPUs! */
37{ 37static inline bool rcu_eqs_special_set(int cpu) { return false; }
38 return false; /* Never flag non-existent other CPUs! */
39}
40 38
41static inline unsigned long get_state_synchronize_rcu(void) 39static inline unsigned long get_state_synchronize_rcu(void)
42{ 40{
@@ -98,159 +96,38 @@ static inline void kfree_call_rcu(struct rcu_head *head,
98 rcu_note_voluntary_context_switch_lite(current); \ 96 rcu_note_voluntary_context_switch_lite(current); \
99 } while (0) 97 } while (0)
100 98
101/* 99static inline int rcu_needs_cpu(u64 basemono, u64 *nextevt)
102 * Take advantage of the fact that there is only one CPU, which
103 * allows us to ignore virtualization-based context switches.
104 */
105static inline void rcu_virt_note_context_switch(int cpu)
106{
107}
108
109/*
110 * Return the number of grace periods started.
111 */
112static inline unsigned long rcu_batches_started(void)
113{
114 return 0;
115}
116
117/*
118 * Return the number of bottom-half grace periods started.
119 */
120static inline unsigned long rcu_batches_started_bh(void)
121{
122 return 0;
123}
124
125/*
126 * Return the number of sched grace periods started.
127 */
128static inline unsigned long rcu_batches_started_sched(void)
129{
130 return 0;
131}
132
133/*
134 * Return the number of grace periods completed.
135 */
136static inline unsigned long rcu_batches_completed(void)
137{
138 return 0;
139}
140
141/*
142 * Return the number of bottom-half grace periods completed.
143 */
144static inline unsigned long rcu_batches_completed_bh(void)
145{
146 return 0;
147}
148
149/*
150 * Return the number of sched grace periods completed.
151 */
152static inline unsigned long rcu_batches_completed_sched(void)
153{ 100{
101 *nextevt = KTIME_MAX;
154 return 0; 102 return 0;
155} 103}
156 104
157/* 105/*
158 * Return the number of expedited grace periods completed. 106 * Take advantage of the fact that there is only one CPU, which
159 */ 107 * allows us to ignore virtualization-based context switches.
160static inline unsigned long rcu_exp_batches_completed(void)
161{
162 return 0;
163}
164
165/*
166 * Return the number of expedited sched grace periods completed.
167 */ 108 */
168static inline unsigned long rcu_exp_batches_completed_sched(void) 109static inline void rcu_virt_note_context_switch(int cpu) { }
169{ 110static inline void rcu_cpu_stall_reset(void) { }
170 return 0; 111static inline void rcu_idle_enter(void) { }
171} 112static inline void rcu_idle_exit(void) { }
172 113static inline void rcu_irq_enter(void) { }
173static inline void rcu_force_quiescent_state(void) 114static inline bool rcu_irq_enter_disabled(void) { return false; }
174{ 115static inline void rcu_irq_exit_irqson(void) { }
175} 116static inline void rcu_irq_enter_irqson(void) { }
176 117static inline void rcu_irq_exit(void) { }
177static inline void rcu_bh_force_quiescent_state(void) 118static inline void exit_rcu(void) { }
178{
179}
180
181static inline void rcu_sched_force_quiescent_state(void)
182{
183}
184
185static inline void show_rcu_gp_kthreads(void)
186{
187}
188
189static inline void rcu_cpu_stall_reset(void)
190{
191}
192
193static inline void rcu_idle_enter(void)
194{
195}
196
197static inline void rcu_idle_exit(void)
198{
199}
200
201static inline void rcu_irq_enter(void)
202{
203}
204
205static inline void rcu_irq_exit_irqson(void)
206{
207}
208
209static inline void rcu_irq_enter_irqson(void)
210{
211}
212
213static inline void rcu_irq_exit(void)
214{
215}
216
217static inline void exit_rcu(void)
218{
219}
220 119
221#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) 120#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
222extern int rcu_scheduler_active __read_mostly; 121extern int rcu_scheduler_active __read_mostly;
223void rcu_scheduler_starting(void); 122void rcu_scheduler_starting(void);
224#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ 123#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
225static inline void rcu_scheduler_starting(void) 124static inline void rcu_scheduler_starting(void) { }
226{
227}
228#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ 125#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
126static inline void rcu_end_inkernel_boot(void) { }
127static inline bool rcu_is_watching(void) { return true; }
229 128
230#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) 129/* Avoid RCU read-side critical sections leaking across. */
231 130static inline void rcu_all_qs(void) { barrier(); }
232static inline bool rcu_is_watching(void)
233{
234 return __rcu_is_watching();
235}
236
237#else /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
238
239static inline bool rcu_is_watching(void)
240{
241 return true;
242}
243
244#endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
245
246static inline void rcu_request_urgent_qs_task(struct task_struct *t)
247{
248}
249
250static inline void rcu_all_qs(void)
251{
252 barrier(); /* Avoid RCU read-side critical sections leaking across. */
253}
254 131
255/* RCUtree hotplug events */ 132/* RCUtree hotplug events */
256#define rcutree_prepare_cpu NULL 133#define rcutree_prepare_cpu NULL
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 0bacb6b2af69..37d6fd3b7ff8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -79,37 +79,20 @@ void cond_synchronize_rcu(unsigned long oldstate);
79unsigned long get_state_synchronize_sched(void); 79unsigned long get_state_synchronize_sched(void);
80void cond_synchronize_sched(unsigned long oldstate); 80void cond_synchronize_sched(unsigned long oldstate);
81 81
82extern unsigned long rcutorture_testseq;
83extern unsigned long rcutorture_vernum;
84unsigned long rcu_batches_started(void);
85unsigned long rcu_batches_started_bh(void);
86unsigned long rcu_batches_started_sched(void);
87unsigned long rcu_batches_completed(void);
88unsigned long rcu_batches_completed_bh(void);
89unsigned long rcu_batches_completed_sched(void);
90unsigned long rcu_exp_batches_completed(void);
91unsigned long rcu_exp_batches_completed_sched(void);
92void show_rcu_gp_kthreads(void);
93
94void rcu_force_quiescent_state(void);
95void rcu_bh_force_quiescent_state(void);
96void rcu_sched_force_quiescent_state(void);
97
98void rcu_idle_enter(void); 82void rcu_idle_enter(void);
99void rcu_idle_exit(void); 83void rcu_idle_exit(void);
100void rcu_irq_enter(void); 84void rcu_irq_enter(void);
101void rcu_irq_exit(void); 85void rcu_irq_exit(void);
102void rcu_irq_enter_irqson(void); 86void rcu_irq_enter_irqson(void);
103void rcu_irq_exit_irqson(void); 87void rcu_irq_exit_irqson(void);
88bool rcu_irq_enter_disabled(void);
104 89
105void exit_rcu(void); 90void exit_rcu(void);
106 91
107void rcu_scheduler_starting(void); 92void rcu_scheduler_starting(void);
108extern int rcu_scheduler_active __read_mostly; 93extern int rcu_scheduler_active __read_mostly;
109 94void rcu_end_inkernel_boot(void);
110bool rcu_is_watching(void); 95bool rcu_is_watching(void);
111void rcu_request_urgent_qs_task(struct task_struct *t);
112
113void rcu_all_qs(void); 96void rcu_all_qs(void);
114 97
115/* RCUtree hotplug events */ 98/* RCUtree hotplug events */
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 59248dcc6ef3..d9510e8522d4 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -369,6 +369,26 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock)
369 raw_spin_trylock_irqsave(spinlock_check(lock), flags); \ 369 raw_spin_trylock_irqsave(spinlock_check(lock), flags); \
370}) 370})
371 371
372/**
373 * spin_unlock_wait - Interpose between successive critical sections
374 * @lock: the spinlock whose critical sections are to be interposed.
375 *
376 * Semantically this is equivalent to a spin_lock() immediately
377 * followed by a spin_unlock(). However, most architectures have
378 * more efficient implementations in which the spin_unlock_wait()
379 * cannot block concurrent lock acquisition, and in some cases
380 * where spin_unlock_wait() does not write to the lock variable.
381 * Nevertheless, spin_unlock_wait() can have high overhead, so if
382 * you feel the need to use it, please check to see if there is
383 * a better way to get your job done.
384 *
385 * The ordering guarantees provided by spin_unlock_wait() are:
386 *
387 * 1. All accesses preceding the spin_unlock_wait() happen before
388 * any accesses in later critical sections for this same lock.
389 * 2. All accesses following the spin_unlock_wait() happen after
390 * any accesses in earlier critical sections for this same lock.
391 */
372static __always_inline void spin_unlock_wait(spinlock_t *lock) 392static __always_inline void spin_unlock_wait(spinlock_t *lock)
373{ 393{
374 raw_spin_unlock_wait(&lock->rlock); 394 raw_spin_unlock_wait(&lock->rlock);
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4c1d5f7e62c4..39af9bc0f653 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -60,32 +60,15 @@ int init_srcu_struct(struct srcu_struct *sp);
60#include <linux/srcutiny.h> 60#include <linux/srcutiny.h>
61#elif defined(CONFIG_TREE_SRCU) 61#elif defined(CONFIG_TREE_SRCU)
62#include <linux/srcutree.h> 62#include <linux/srcutree.h>
63#elif defined(CONFIG_CLASSIC_SRCU) 63#elif defined(CONFIG_SRCU)
64#include <linux/srcuclassic.h>
65#else
66#error "Unknown SRCU implementation specified to kernel configuration" 64#error "Unknown SRCU implementation specified to kernel configuration"
65#else
66/* Dummy definition for things like notifiers. Actual use gets link error. */
67struct srcu_struct { };
67#endif 68#endif
68 69
69/**
70 * call_srcu() - Queue a callback for invocation after an SRCU grace period
71 * @sp: srcu_struct in queue the callback
72 * @head: structure to be used for queueing the SRCU callback.
73 * @func: function to be invoked after the SRCU grace period
74 *
75 * The callback function will be invoked some time after a full SRCU
76 * grace period elapses, in other words after all pre-existing SRCU
77 * read-side critical sections have completed. However, the callback
78 * function might well execute concurrently with other SRCU read-side
79 * critical sections that started after call_srcu() was invoked. SRCU
80 * read-side critical sections are delimited by srcu_read_lock() and
81 * srcu_read_unlock(), and may be nested.
82 *
83 * The callback will be invoked from process context, but must nevertheless
84 * be fast and must not block.
85 */
86void call_srcu(struct srcu_struct *sp, struct rcu_head *head, 70void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
87 void (*func)(struct rcu_head *head)); 71 void (*func)(struct rcu_head *head));
88
89void cleanup_srcu_struct(struct srcu_struct *sp); 72void cleanup_srcu_struct(struct srcu_struct *sp);
90int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); 73int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
91void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); 74void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h
deleted file mode 100644
index 5753f7322262..000000000000
--- a/include/linux/srcuclassic.h
+++ /dev/null
@@ -1,115 +0,0 @@
1/*
2 * Sleepable Read-Copy Update mechanism for mutual exclusion,
3 * classic v4.11 variant.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, you can access it online at
17 * http://www.gnu.org/licenses/gpl-2.0.html.
18 *
19 * Copyright (C) IBM Corporation, 2017
20 *
21 * Author: Paul McKenney <paulmck@us.ibm.com>
22 */
23
24#ifndef _LINUX_SRCU_CLASSIC_H
25#define _LINUX_SRCU_CLASSIC_H
26
27struct srcu_array {
28 unsigned long lock_count[2];
29 unsigned long unlock_count[2];
30};
31
32struct rcu_batch {
33 struct rcu_head *head, **tail;
34};
35
36#define RCU_BATCH_INIT(name) { NULL, &(name.head) }
37
38struct srcu_struct {
39 unsigned long completed;
40 struct srcu_array __percpu *per_cpu_ref;
41 spinlock_t queue_lock; /* protect ->batch_queue, ->running */
42 bool running;
43 /* callbacks just queued */
44 struct rcu_batch batch_queue;
45 /* callbacks try to do the first check_zero */
46 struct rcu_batch batch_check0;
47 /* callbacks done with the first check_zero and the flip */
48 struct rcu_batch batch_check1;
49 struct rcu_batch batch_done;
50 struct delayed_work work;
51#ifdef CONFIG_DEBUG_LOCK_ALLOC
52 struct lockdep_map dep_map;
53#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
54};
55
56void process_srcu(struct work_struct *work);
57
58#define __SRCU_STRUCT_INIT(name) \
59 { \
60 .completed = -300, \
61 .per_cpu_ref = &name##_srcu_array, \
62 .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \
63 .running = false, \
64 .batch_queue = RCU_BATCH_INIT(name.batch_queue), \
65 .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \
66 .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \
67 .batch_done = RCU_BATCH_INIT(name.batch_done), \
68 .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\
69 __SRCU_DEP_MAP_INIT(name) \
70 }
71
72/*
73 * Define and initialize a srcu struct at build time.
74 * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it.
75 *
76 * Note that although DEFINE_STATIC_SRCU() hides the name from other
77 * files, the per-CPU variable rules nevertheless require that the
78 * chosen name be globally unique. These rules also prohibit use of
79 * DEFINE_STATIC_SRCU() within a function. If these rules are too
80 * restrictive, declare the srcu_struct manually. For example, in
81 * each file:
82 *
83 * static struct srcu_struct my_srcu;
84 *
85 * Then, before the first use of each my_srcu, manually initialize it:
86 *
87 * init_srcu_struct(&my_srcu);
88 *
89 * See include/linux/percpu-defs.h for the rules on per-CPU variables.
90 */
91#define __DEFINE_SRCU(name, is_static) \
92 static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\
93 is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name)
94#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */)
95#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static)
96
97void synchronize_srcu_expedited(struct srcu_struct *sp);
98void srcu_barrier(struct srcu_struct *sp);
99unsigned long srcu_batches_completed(struct srcu_struct *sp);
100
101static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
102 struct srcu_struct *sp, int *flags,
103 unsigned long *gpnum,
104 unsigned long *completed)
105{
106 if (test_type != SRCU_FLAVOR)
107 return;
108 *flags = 0;
109 *completed = sp->completed;
110 *gpnum = *completed;
111 if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head)
112 (*gpnum)++;
113}
114
115#endif
diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h
index 42311ee0334f..cfbfc540cafc 100644
--- a/include/linux/srcutiny.h
+++ b/include/linux/srcutiny.h
@@ -27,15 +27,14 @@
27#include <linux/swait.h> 27#include <linux/swait.h>
28 28
29struct srcu_struct { 29struct srcu_struct {
30 int srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ 30 short srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */
31 short srcu_idx; /* Current reader array element. */
32 u8 srcu_gp_running; /* GP workqueue running? */
33 u8 srcu_gp_waiting; /* GP waiting for readers? */
31 struct swait_queue_head srcu_wq; 34 struct swait_queue_head srcu_wq;
32 /* Last srcu_read_unlock() wakes GP. */ 35 /* Last srcu_read_unlock() wakes GP. */
33 unsigned long srcu_gp_seq; /* GP seq # for callback tagging. */ 36 struct rcu_head *srcu_cb_head; /* Pending callbacks: Head. */
34 struct rcu_segcblist srcu_cblist; 37 struct rcu_head **srcu_cb_tail; /* Pending callbacks: Tail. */
35 /* Pending SRCU callbacks. */
36 int srcu_idx; /* Current reader array element. */
37 bool srcu_gp_running; /* GP workqueue running? */
38 bool srcu_gp_waiting; /* GP waiting for readers? */
39 struct work_struct srcu_work; /* For driving grace periods. */ 38 struct work_struct srcu_work; /* For driving grace periods. */
40#ifdef CONFIG_DEBUG_LOCK_ALLOC 39#ifdef CONFIG_DEBUG_LOCK_ALLOC
41 struct lockdep_map dep_map; 40 struct lockdep_map dep_map;
@@ -47,7 +46,7 @@ void srcu_drive_gp(struct work_struct *wp);
47#define __SRCU_STRUCT_INIT(name) \ 46#define __SRCU_STRUCT_INIT(name) \
48{ \ 47{ \
49 .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ 48 .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \
50 .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist), \ 49 .srcu_cb_tail = &name.srcu_cb_head, \
51 .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \ 50 .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \
52 __SRCU_DEP_MAP_INIT(name) \ 51 __SRCU_DEP_MAP_INIT(name) \
53} 52}
@@ -63,31 +62,29 @@ void srcu_drive_gp(struct work_struct *wp);
63 62
64void synchronize_srcu(struct srcu_struct *sp); 63void synchronize_srcu(struct srcu_struct *sp);
65 64
66static inline void synchronize_srcu_expedited(struct srcu_struct *sp) 65/*
66 * Counts the new reader in the appropriate per-CPU element of the
67 * srcu_struct. Can be invoked from irq/bh handlers, but the matching
68 * __srcu_read_unlock() must be in the same handler instance. Returns an
69 * index that must be passed to the matching srcu_read_unlock().
70 */
71static inline int __srcu_read_lock(struct srcu_struct *sp)
67{ 72{
68 synchronize_srcu(sp); 73 int idx;
69}
70 74
71static inline void srcu_barrier(struct srcu_struct *sp) 75 idx = READ_ONCE(sp->srcu_idx);
72{ 76 WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1);
73 synchronize_srcu(sp); 77 return idx;
74} 78}
75 79
76static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) 80static inline void synchronize_srcu_expedited(struct srcu_struct *sp)
77{ 81{
78 return 0; 82 synchronize_srcu(sp);
79} 83}
80 84
81static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, 85static inline void srcu_barrier(struct srcu_struct *sp)
82 struct srcu_struct *sp, int *flags,
83 unsigned long *gpnum,
84 unsigned long *completed)
85{ 86{
86 if (test_type != SRCU_FLAVOR) 87 synchronize_srcu(sp);
87 return;
88 *flags = 0;
89 *completed = sp->srcu_gp_seq;
90 *gpnum = *completed;
91} 88}
92 89
93#endif 90#endif
diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h
index 32e86d85fd11..42973f787e7e 100644
--- a/include/linux/srcutree.h
+++ b/include/linux/srcutree.h
@@ -40,7 +40,7 @@ struct srcu_data {
40 unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ 40 unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */
41 41
42 /* Update-side state. */ 42 /* Update-side state. */
43 spinlock_t lock ____cacheline_internodealigned_in_smp; 43 raw_spinlock_t __private lock ____cacheline_internodealigned_in_smp;
44 struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ 44 struct rcu_segcblist srcu_cblist; /* List of callbacks.*/
45 unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ 45 unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */
46 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ 46 unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */
@@ -58,7 +58,7 @@ struct srcu_data {
58 * Node in SRCU combining tree, similar in function to rcu_data. 58 * Node in SRCU combining tree, similar in function to rcu_data.
59 */ 59 */
60struct srcu_node { 60struct srcu_node {
61 spinlock_t lock; 61 raw_spinlock_t __private lock;
62 unsigned long srcu_have_cbs[4]; /* GP seq for children */ 62 unsigned long srcu_have_cbs[4]; /* GP seq for children */
63 /* having CBs, but only */ 63 /* having CBs, but only */
64 /* is > ->srcu_gq_seq. */ 64 /* is > ->srcu_gq_seq. */
@@ -78,7 +78,7 @@ struct srcu_struct {
78 struct srcu_node *level[RCU_NUM_LVLS + 1]; 78 struct srcu_node *level[RCU_NUM_LVLS + 1];
79 /* First node at each level. */ 79 /* First node at each level. */
80 struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ 80 struct mutex srcu_cb_mutex; /* Serialize CB preparation. */
81 spinlock_t gp_lock; /* protect ->srcu_cblist */ 81 raw_spinlock_t __private lock; /* Protect counters */
82 struct mutex srcu_gp_mutex; /* Serialize GP work. */ 82 struct mutex srcu_gp_mutex; /* Serialize GP work. */
83 unsigned int srcu_idx; /* Current rdr array element. */ 83 unsigned int srcu_idx; /* Current rdr array element. */
84 unsigned long srcu_gp_seq; /* Grace-period seq #. */ 84 unsigned long srcu_gp_seq; /* Grace-period seq #. */
@@ -109,7 +109,7 @@ void process_srcu(struct work_struct *work);
109#define __SRCU_STRUCT_INIT(name) \ 109#define __SRCU_STRUCT_INIT(name) \
110 { \ 110 { \
111 .sda = &name##_srcu_data, \ 111 .sda = &name##_srcu_data, \
112 .gp_lock = __SPIN_LOCK_UNLOCKED(name.gp_lock), \ 112 .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
113 .srcu_gp_seq_needed = 0 - 1, \ 113 .srcu_gp_seq_needed = 0 - 1, \
114 __SRCU_DEP_MAP_INIT(name) \ 114 __SRCU_DEP_MAP_INIT(name) \
115 } 115 }
@@ -141,10 +141,5 @@ void process_srcu(struct work_struct *work);
141 141
142void synchronize_srcu_expedited(struct srcu_struct *sp); 142void synchronize_srcu_expedited(struct srcu_struct *sp);
143void srcu_barrier(struct srcu_struct *sp); 143void srcu_barrier(struct srcu_struct *sp);
144unsigned long srcu_batches_completed(struct srcu_struct *sp);
145
146void srcutorture_get_gp_data(enum rcutorture_type test_type,
147 struct srcu_struct *sp, int *flags,
148 unsigned long *gpnum, unsigned long *completed);
149 144
150#endif 145#endif
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index e3facb356838..91dc089d65b7 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -742,6 +742,7 @@ TRACE_EVENT(rcu_torture_read,
742 * "OnlineQ": _rcu_barrier() found online CPU with callbacks. 742 * "OnlineQ": _rcu_barrier() found online CPU with callbacks.
743 * "OnlineNQ": _rcu_barrier() found online CPU, no callbacks. 743 * "OnlineNQ": _rcu_barrier() found online CPU, no callbacks.
744 * "IRQ": An rcu_barrier_callback() callback posted on remote CPU. 744 * "IRQ": An rcu_barrier_callback() callback posted on remote CPU.
745 * "IRQNQ": An rcu_barrier_callback() callback found no callbacks.
745 * "CB": An rcu_barrier_callback() invoked a callback, not the last. 746 * "CB": An rcu_barrier_callback() invoked a callback, not the last.
746 * "LastCB": An rcu_barrier_callback() invoked the last callback. 747 * "LastCB": An rcu_barrier_callback() invoked the last callback.
747 * "Inc2": _rcu_barrier() piggyback check counter incremented. 748 * "Inc2": _rcu_barrier() piggyback check counter incremented.
diff --git a/init/Kconfig b/init/Kconfig
index 1d3475fc9496..bc4c180c66a5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -472,354 +472,7 @@ config TASK_IO_ACCOUNTING
472 472
473endmenu # "CPU/Task time and stats accounting" 473endmenu # "CPU/Task time and stats accounting"
474 474
475menu "RCU Subsystem" 475source "kernel/rcu/Kconfig"
476
477config TREE_RCU
478 bool
479 default y if !PREEMPT && SMP
480 help
481 This option selects the RCU implementation that is
482 designed for very large SMP system with hundreds or
483 thousands of CPUs. It also scales down nicely to
484 smaller systems.
485
486config PREEMPT_RCU
487 bool
488 default y if PREEMPT
489 help
490 This option selects the RCU implementation that is
491 designed for very large SMP systems with hundreds or
492 thousands of CPUs, but for which real-time response
493 is also required. It also scales down nicely to
494 smaller systems.
495
496 Select this option if you are unsure.
497
498config TINY_RCU
499 bool
500 default y if !PREEMPT && !SMP
501 help
502 This option selects the RCU implementation that is
503 designed for UP systems from which real-time response
504 is not required. This option greatly reduces the
505 memory footprint of RCU.
506
507config RCU_EXPERT
508 bool "Make expert-level adjustments to RCU configuration"
509 default n
510 help
511 This option needs to be enabled if you wish to make
512 expert-level adjustments to RCU configuration. By default,
513 no such adjustments can be made, which has the often-beneficial
514 side-effect of preventing "make oldconfig" from asking you all
515 sorts of detailed questions about how you would like numerous
516 obscure RCU options to be set up.
517
518 Say Y if you need to make expert-level adjustments to RCU.
519
520 Say N if you are unsure.
521
522config SRCU
523 bool
524 default y
525 help
526 This option selects the sleepable version of RCU. This version
527 permits arbitrary sleeping or blocking within RCU read-side critical
528 sections.
529
530config CLASSIC_SRCU
531 bool "Use v4.11 classic SRCU implementation"
532 default n
533 depends on RCU_EXPERT && SRCU
534 help
535 This option selects the traditional well-tested classic SRCU
536 implementation from v4.11, as might be desired for enterprise
537 Linux distributions. Without this option, the shiny new
538 Tiny SRCU and Tree SRCU implementations are used instead.
539 At some point, it is hoped that Tiny SRCU and Tree SRCU
540 will accumulate enough test time and confidence to allow
541 Classic SRCU to be dropped entirely.
542
543 Say Y if you need a rock-solid SRCU.
544
545 Say N if you would like help test Tree SRCU.
546
547config TINY_SRCU
548 bool
549 default y if SRCU && TINY_RCU && !CLASSIC_SRCU
550 help
551 This option selects the single-CPU non-preemptible version of SRCU.
552
553config TREE_SRCU
554 bool
555 default y if SRCU && !TINY_RCU && !CLASSIC_SRCU
556 help
557 This option selects the full-fledged version of SRCU.
558
559config TASKS_RCU
560 bool
561 default n
562 select SRCU
563 help
564 This option enables a task-based RCU implementation that uses
565 only voluntary context switch (not preemption!), idle, and
566 user-mode execution as quiescent states.
567
568config RCU_STALL_COMMON
569 def_bool ( TREE_RCU || PREEMPT_RCU || RCU_TRACE )
570 help
571 This option enables RCU CPU stall code that is common between
572 the TINY and TREE variants of RCU. The purpose is to allow
573 the tiny variants to disable RCU CPU stall warnings, while
574 making these warnings mandatory for the tree variants.
575
576config RCU_NEED_SEGCBLIST
577 def_bool ( TREE_RCU || PREEMPT_RCU || TINY_SRCU || TREE_SRCU )
578
579config CONTEXT_TRACKING
580 bool
581
582config CONTEXT_TRACKING_FORCE
583 bool "Force context tracking"
584 depends on CONTEXT_TRACKING
585 default y if !NO_HZ_FULL
586 help
587 The major pre-requirement for full dynticks to work is to
588 support the context tracking subsystem. But there are also
589 other dependencies to provide in order to make the full
590 dynticks working.
591
592 This option stands for testing when an arch implements the
593 context tracking backend but doesn't yet fullfill all the
594 requirements to make the full dynticks feature working.
595 Without the full dynticks, there is no way to test the support
596 for context tracking and the subsystems that rely on it: RCU
597 userspace extended quiescent state and tickless cputime
598 accounting. This option copes with the absence of the full
599 dynticks subsystem by forcing the context tracking on all
600 CPUs in the system.
601
602 Say Y only if you're working on the development of an
603 architecture backend for the context tracking.
604
605 Say N otherwise, this option brings an overhead that you
606 don't want in production.
607
608
609config RCU_FANOUT
610 int "Tree-based hierarchical RCU fanout value"
611 range 2 64 if 64BIT
612 range 2 32 if !64BIT
613 depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
614 default 64 if 64BIT
615 default 32 if !64BIT
616 help
617 This option controls the fanout of hierarchical implementations
618 of RCU, allowing RCU to work efficiently on machines with
619 large numbers of CPUs. This value must be at least the fourth
620 root of NR_CPUS, which allows NR_CPUS to be insanely large.
621 The default value of RCU_FANOUT should be used for production
622 systems, but if you are stress-testing the RCU implementation
623 itself, small RCU_FANOUT values allow you to test large-system
624 code paths on small(er) systems.
625
626 Select a specific number if testing RCU itself.
627 Take the default if unsure.
628
629config RCU_FANOUT_LEAF
630 int "Tree-based hierarchical RCU leaf-level fanout value"
631 range 2 64 if 64BIT
632 range 2 32 if !64BIT
633 depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
634 default 16
635 help
636 This option controls the leaf-level fanout of hierarchical
637 implementations of RCU, and allows trading off cache misses
638 against lock contention. Systems that synchronize their
639 scheduling-clock interrupts for energy-efficiency reasons will
640 want the default because the smaller leaf-level fanout keeps
641 lock contention levels acceptably low. Very large systems
642 (hundreds or thousands of CPUs) will instead want to set this
643 value to the maximum value possible in order to reduce the
644 number of cache misses incurred during RCU's grace-period
645 initialization. These systems tend to run CPU-bound, and thus
646 are not helped by synchronized interrupts, and thus tend to
647 skew them, which reduces lock contention enough that large
648 leaf-level fanouts work well. That said, setting leaf-level
649 fanout to a large number will likely cause problematic
650 lock contention on the leaf-level rcu_node structures unless
651 you boot with the skew_tick kernel parameter.
652
653 Select a specific number if testing RCU itself.
654
655 Select the maximum permissible value for large systems, but
656 please understand that you may also need to set the skew_tick
657 kernel boot parameter to avoid contention on the rcu_node
658 structure's locks.
659
660 Take the default if unsure.
661
662config RCU_FAST_NO_HZ
663 bool "Accelerate last non-dyntick-idle CPU's grace periods"
664 depends on NO_HZ_COMMON && SMP && RCU_EXPERT
665 default n
666 help
667 This option permits CPUs to enter dynticks-idle state even if
668 they have RCU callbacks queued, and prevents RCU from waking
669 these CPUs up more than roughly once every four jiffies (by
670 default, you can adjust this using the rcutree.rcu_idle_gp_delay
671 parameter), thus improving energy efficiency. On the other
672 hand, this option increases the duration of RCU grace periods,
673 for example, slowing down synchronize_rcu().
674
675 Say Y if energy efficiency is critically important, and you
676 don't care about increased grace-period durations.
677
678 Say N if you are unsure.
679
680config TREE_RCU_TRACE
681 def_bool RCU_TRACE && ( TREE_RCU || PREEMPT_RCU )
682 select DEBUG_FS
683 help
684 This option provides tracing for the TREE_RCU and
685 PREEMPT_RCU implementations, permitting Makefile to
686 trivially select kernel/rcutree_trace.c.
687
688config RCU_BOOST
689 bool "Enable RCU priority boosting"
690 depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
691 default n
692 help
693 This option boosts the priority of preempted RCU readers that
694 block the current preemptible RCU grace period for too long.
695 This option also prevents heavy loads from blocking RCU
696 callback invocation for all flavors of RCU.
697
698 Say Y here if you are working with real-time apps or heavy loads
699 Say N here if you are unsure.
700
701config RCU_KTHREAD_PRIO
702 int "Real-time priority to use for RCU worker threads"
703 range 1 99 if RCU_BOOST
704 range 0 99 if !RCU_BOOST
705 default 1 if RCU_BOOST
706 default 0 if !RCU_BOOST
707 depends on RCU_EXPERT
708 help
709 This option specifies the SCHED_FIFO priority value that will be
710 assigned to the rcuc/n and rcub/n threads and is also the value
711 used for RCU_BOOST (if enabled). If you are working with a
712 real-time application that has one or more CPU-bound threads
713 running at a real-time priority level, you should set
714 RCU_KTHREAD_PRIO to a priority higher than the highest-priority
715 real-time CPU-bound application thread. The default RCU_KTHREAD_PRIO
716 value of 1 is appropriate in the common case, which is real-time
717 applications that do not have any CPU-bound threads.
718
719 Some real-time applications might not have a single real-time
720 thread that saturates a given CPU, but instead might have
721 multiple real-time threads that, taken together, fully utilize
722 that CPU. In this case, you should set RCU_KTHREAD_PRIO to
723 a priority higher than the lowest-priority thread that is
724 conspiring to prevent the CPU from running any non-real-time
725 tasks. For example, if one thread at priority 10 and another
726 thread at priority 5 are between themselves fully consuming
727 the CPU time on a given CPU, then RCU_KTHREAD_PRIO should be
728 set to priority 6 or higher.
729
730 Specify the real-time priority, or take the default if unsure.
731
732config RCU_BOOST_DELAY
733 int "Milliseconds to delay boosting after RCU grace-period start"
734 range 0 3000
735 depends on RCU_BOOST
736 default 500
737 help
738 This option specifies the time to wait after the beginning of
739 a given grace period before priority-boosting preempted RCU
740 readers blocking that grace period. Note that any RCU reader
741 blocking an expedited RCU grace period is boosted immediately.
742
743 Accept the default if unsure.
744
745config RCU_NOCB_CPU
746 bool "Offload RCU callback processing from boot-selected CPUs"
747 depends on TREE_RCU || PREEMPT_RCU
748 depends on RCU_EXPERT || NO_HZ_FULL
749 default n
750 help
751 Use this option to reduce OS jitter for aggressive HPC or
752 real-time workloads. It can also be used to offload RCU
753 callback invocation to energy-efficient CPUs in battery-powered
754 asymmetric multiprocessors.
755
756 This option offloads callback invocation from the set of
757 CPUs specified at boot time by the rcu_nocbs parameter.
758 For each such CPU, a kthread ("rcuox/N") will be created to
759 invoke callbacks, where the "N" is the CPU being offloaded,
760 and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
761 "s" for RCU-sched. Nothing prevents this kthread from running
762 on the specified CPUs, but (1) the kthreads may be preempted
763 between each callback, and (2) affinity or cgroups can be used
764 to force the kthreads to run on whatever set of CPUs is desired.
765
766 Say Y here if you want to help to debug reduced OS jitter.
767 Say N here if you are unsure.
768
769choice
770 prompt "Build-forced no-CBs CPUs"
771 default RCU_NOCB_CPU_NONE
772 depends on RCU_NOCB_CPU
773 help
774 This option allows no-CBs CPUs (whose RCU callbacks are invoked
775 from kthreads rather than from softirq context) to be specified
776 at build time. Additional no-CBs CPUs may be specified by
777 the rcu_nocbs= boot parameter.
778
779config RCU_NOCB_CPU_NONE
780 bool "No build_forced no-CBs CPUs"
781 help
782 This option does not force any of the CPUs to be no-CBs CPUs.
783 Only CPUs designated by the rcu_nocbs= boot parameter will be
784 no-CBs CPUs, whose RCU callbacks will be invoked by per-CPU
785 kthreads whose names begin with "rcuo". All other CPUs will
786 invoke their own RCU callbacks in softirq context.
787
788 Select this option if you want to choose no-CBs CPUs at
789 boot time, for example, to allow testing of different no-CBs
790 configurations without having to rebuild the kernel each time.
791
792config RCU_NOCB_CPU_ZERO
793 bool "CPU 0 is a build_forced no-CBs CPU"
794 help
795 This option forces CPU 0 to be a no-CBs CPU, so that its RCU
796 callbacks are invoked by a per-CPU kthread whose name begins
797 with "rcuo". Additional CPUs may be designated as no-CBs
798 CPUs using the rcu_nocbs= boot parameter will be no-CBs CPUs.
799 All other CPUs will invoke their own RCU callbacks in softirq
800 context.
801
802 Select this if CPU 0 needs to be a no-CBs CPU for real-time
803 or energy-efficiency reasons, but the real reason it exists
804 is to ensure that randconfig testing covers mixed systems.
805
806config RCU_NOCB_CPU_ALL
807 bool "All CPUs are build_forced no-CBs CPUs"
808 help
809 This option forces all CPUs to be no-CBs CPUs. The rcu_nocbs=
810 boot parameter will be ignored. All CPUs' RCU callbacks will
811 be executed in the context of per-CPU rcuo kthreads created for
812 this purpose. Assuming that the kthreads whose names start with
813 "rcuo" are bound to "housekeeping" CPUs, this reduces OS jitter
814 on the remaining CPUs, but might decrease memory locality during
815 RCU-callback invocation, thus potentially degrading throughput.
816
817 Select this if all CPUs need to be no-CBs CPUs for real-time
818 or energy-efficiency reasons.
819
820endchoice
821
822endmenu # "RCU Subsystem"
823 476
824config BUILD_BIN2C 477config BUILD_BIN2C
825 bool 478 bool
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index c0e31bfee25c..7d2499bec5fe 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -1157,18 +1157,18 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1157 if (debug_locks_silent) 1157 if (debug_locks_silent)
1158 return 0; 1158 return 0;
1159 1159
1160 printk("\n"); 1160 pr_warn("\n");
1161 pr_warn("======================================================\n"); 1161 pr_warn("======================================================\n");
1162 pr_warn("WARNING: possible circular locking dependency detected\n"); 1162 pr_warn("WARNING: possible circular locking dependency detected\n");
1163 print_kernel_ident(); 1163 print_kernel_ident();
1164 pr_warn("------------------------------------------------------\n"); 1164 pr_warn("------------------------------------------------------\n");
1165 printk("%s/%d is trying to acquire lock:\n", 1165 pr_warn("%s/%d is trying to acquire lock:\n",
1166 curr->comm, task_pid_nr(curr)); 1166 curr->comm, task_pid_nr(curr));
1167 print_lock(check_src); 1167 print_lock(check_src);
1168 printk("\nbut task is already holding lock:\n"); 1168 pr_warn("\nbut task is already holding lock:\n");
1169 print_lock(check_tgt); 1169 print_lock(check_tgt);
1170 printk("\nwhich lock already depends on the new lock.\n\n"); 1170 pr_warn("\nwhich lock already depends on the new lock.\n\n");
1171 printk("\nthe existing dependency chain (in reverse order) is:\n"); 1171 pr_warn("\nthe existing dependency chain (in reverse order) is:\n");
1172 1172
1173 print_circular_bug_entry(entry, depth); 1173 print_circular_bug_entry(entry, depth);
1174 1174
@@ -1495,13 +1495,13 @@ print_bad_irq_dependency(struct task_struct *curr,
1495 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1495 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1496 return 0; 1496 return 0;
1497 1497
1498 printk("\n"); 1498 pr_warn("\n");
1499 pr_warn("=====================================================\n"); 1499 pr_warn("=====================================================\n");
1500 pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n", 1500 pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n",
1501 irqclass, irqclass); 1501 irqclass, irqclass);
1502 print_kernel_ident(); 1502 print_kernel_ident();
1503 pr_warn("-----------------------------------------------------\n"); 1503 pr_warn("-----------------------------------------------------\n");
1504 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", 1504 pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
1505 curr->comm, task_pid_nr(curr), 1505 curr->comm, task_pid_nr(curr),
1506 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, 1506 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
1507 curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT, 1507 curr->softirq_context, softirq_count() >> SOFTIRQ_SHIFT,
@@ -1509,46 +1509,46 @@ print_bad_irq_dependency(struct task_struct *curr,
1509 curr->softirqs_enabled); 1509 curr->softirqs_enabled);
1510 print_lock(next); 1510 print_lock(next);
1511 1511
1512 printk("\nand this task is already holding:\n"); 1512 pr_warn("\nand this task is already holding:\n");
1513 print_lock(prev); 1513 print_lock(prev);
1514 printk("which would create a new lock dependency:\n"); 1514 pr_warn("which would create a new lock dependency:\n");
1515 print_lock_name(hlock_class(prev)); 1515 print_lock_name(hlock_class(prev));
1516 printk(KERN_CONT " ->"); 1516 pr_cont(" ->");
1517 print_lock_name(hlock_class(next)); 1517 print_lock_name(hlock_class(next));
1518 printk(KERN_CONT "\n"); 1518 pr_cont("\n");
1519 1519
1520 printk("\nbut this new dependency connects a %s-irq-safe lock:\n", 1520 pr_warn("\nbut this new dependency connects a %s-irq-safe lock:\n",
1521 irqclass); 1521 irqclass);
1522 print_lock_name(backwards_entry->class); 1522 print_lock_name(backwards_entry->class);
1523 printk("\n... which became %s-irq-safe at:\n", irqclass); 1523 pr_warn("\n... which became %s-irq-safe at:\n", irqclass);
1524 1524
1525 print_stack_trace(backwards_entry->class->usage_traces + bit1, 1); 1525 print_stack_trace(backwards_entry->class->usage_traces + bit1, 1);
1526 1526
1527 printk("\nto a %s-irq-unsafe lock:\n", irqclass); 1527 pr_warn("\nto a %s-irq-unsafe lock:\n", irqclass);
1528 print_lock_name(forwards_entry->class); 1528 print_lock_name(forwards_entry->class);
1529 printk("\n... which became %s-irq-unsafe at:\n", irqclass); 1529 pr_warn("\n... which became %s-irq-unsafe at:\n", irqclass);
1530 printk("..."); 1530 pr_warn("...");
1531 1531
1532 print_stack_trace(forwards_entry->class->usage_traces + bit2, 1); 1532 print_stack_trace(forwards_entry->class->usage_traces + bit2, 1);
1533 1533
1534 printk("\nother info that might help us debug this:\n\n"); 1534 pr_warn("\nother info that might help us debug this:\n\n");
1535 print_irq_lock_scenario(backwards_entry, forwards_entry, 1535 print_irq_lock_scenario(backwards_entry, forwards_entry,
1536 hlock_class(prev), hlock_class(next)); 1536 hlock_class(prev), hlock_class(next));
1537 1537
1538 lockdep_print_held_locks(curr); 1538 lockdep_print_held_locks(curr);
1539 1539
1540 printk("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass); 1540 pr_warn("\nthe dependencies between %s-irq-safe lock and the holding lock:\n", irqclass);
1541 if (!save_trace(&prev_root->trace)) 1541 if (!save_trace(&prev_root->trace))
1542 return 0; 1542 return 0;
1543 print_shortest_lock_dependencies(backwards_entry, prev_root); 1543 print_shortest_lock_dependencies(backwards_entry, prev_root);
1544 1544
1545 printk("\nthe dependencies between the lock to be acquired"); 1545 pr_warn("\nthe dependencies between the lock to be acquired");
1546 printk(" and %s-irq-unsafe lock:\n", irqclass); 1546 pr_warn(" and %s-irq-unsafe lock:\n", irqclass);
1547 if (!save_trace(&next_root->trace)) 1547 if (!save_trace(&next_root->trace))
1548 return 0; 1548 return 0;
1549 print_shortest_lock_dependencies(forwards_entry, next_root); 1549 print_shortest_lock_dependencies(forwards_entry, next_root);
1550 1550
1551 printk("\nstack backtrace:\n"); 1551 pr_warn("\nstack backtrace:\n");
1552 dump_stack(); 1552 dump_stack();
1553 1553
1554 return 0; 1554 return 0;
@@ -1724,22 +1724,22 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
1724 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1724 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1725 return 0; 1725 return 0;
1726 1726
1727 printk("\n"); 1727 pr_warn("\n");
1728 pr_warn("============================================\n"); 1728 pr_warn("============================================\n");
1729 pr_warn("WARNING: possible recursive locking detected\n"); 1729 pr_warn("WARNING: possible recursive locking detected\n");
1730 print_kernel_ident(); 1730 print_kernel_ident();
1731 pr_warn("--------------------------------------------\n"); 1731 pr_warn("--------------------------------------------\n");
1732 printk("%s/%d is trying to acquire lock:\n", 1732 pr_warn("%s/%d is trying to acquire lock:\n",
1733 curr->comm, task_pid_nr(curr)); 1733 curr->comm, task_pid_nr(curr));
1734 print_lock(next); 1734 print_lock(next);
1735 printk("\nbut task is already holding lock:\n"); 1735 pr_warn("\nbut task is already holding lock:\n");
1736 print_lock(prev); 1736 print_lock(prev);
1737 1737
1738 printk("\nother info that might help us debug this:\n"); 1738 pr_warn("\nother info that might help us debug this:\n");
1739 print_deadlock_scenario(next, prev); 1739 print_deadlock_scenario(next, prev);
1740 lockdep_print_held_locks(curr); 1740 lockdep_print_held_locks(curr);
1741 1741
1742 printk("\nstack backtrace:\n"); 1742 pr_warn("\nstack backtrace:\n");
1743 dump_stack(); 1743 dump_stack();
1744 1744
1745 return 0; 1745 return 0;
@@ -2074,21 +2074,21 @@ static void print_collision(struct task_struct *curr,
2074 struct held_lock *hlock_next, 2074 struct held_lock *hlock_next,
2075 struct lock_chain *chain) 2075 struct lock_chain *chain)
2076{ 2076{
2077 printk("\n"); 2077 pr_warn("\n");
2078 pr_warn("============================\n"); 2078 pr_warn("============================\n");
2079 pr_warn("WARNING: chain_key collision\n"); 2079 pr_warn("WARNING: chain_key collision\n");
2080 print_kernel_ident(); 2080 print_kernel_ident();
2081 pr_warn("----------------------------\n"); 2081 pr_warn("----------------------------\n");
2082 printk("%s/%d: ", current->comm, task_pid_nr(current)); 2082 pr_warn("%s/%d: ", current->comm, task_pid_nr(current));
2083 printk("Hash chain already cached but the contents don't match!\n"); 2083 pr_warn("Hash chain already cached but the contents don't match!\n");
2084 2084
2085 printk("Held locks:"); 2085 pr_warn("Held locks:");
2086 print_chain_keys_held_locks(curr, hlock_next); 2086 print_chain_keys_held_locks(curr, hlock_next);
2087 2087
2088 printk("Locks in cached chain:"); 2088 pr_warn("Locks in cached chain:");
2089 print_chain_keys_chain(chain); 2089 print_chain_keys_chain(chain);
2090 2090
2091 printk("\nstack backtrace:\n"); 2091 pr_warn("\nstack backtrace:\n");
2092 dump_stack(); 2092 dump_stack();
2093} 2093}
2094#endif 2094#endif
@@ -2373,16 +2373,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
2373 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 2373 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2374 return 0; 2374 return 0;
2375 2375
2376 printk("\n"); 2376 pr_warn("\n");
2377 pr_warn("================================\n"); 2377 pr_warn("================================\n");
2378 pr_warn("WARNING: inconsistent lock state\n"); 2378 pr_warn("WARNING: inconsistent lock state\n");
2379 print_kernel_ident(); 2379 print_kernel_ident();
2380 pr_warn("--------------------------------\n"); 2380 pr_warn("--------------------------------\n");
2381 2381
2382 printk("inconsistent {%s} -> {%s} usage.\n", 2382 pr_warn("inconsistent {%s} -> {%s} usage.\n",
2383 usage_str[prev_bit], usage_str[new_bit]); 2383 usage_str[prev_bit], usage_str[new_bit]);
2384 2384
2385 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n", 2385 pr_warn("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] takes:\n",
2386 curr->comm, task_pid_nr(curr), 2386 curr->comm, task_pid_nr(curr),
2387 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT, 2387 trace_hardirq_context(curr), hardirq_count() >> HARDIRQ_SHIFT,
2388 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT, 2388 trace_softirq_context(curr), softirq_count() >> SOFTIRQ_SHIFT,
@@ -2390,16 +2390,16 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
2390 trace_softirqs_enabled(curr)); 2390 trace_softirqs_enabled(curr));
2391 print_lock(this); 2391 print_lock(this);
2392 2392
2393 printk("{%s} state was registered at:\n", usage_str[prev_bit]); 2393 pr_warn("{%s} state was registered at:\n", usage_str[prev_bit]);
2394 print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1); 2394 print_stack_trace(hlock_class(this)->usage_traces + prev_bit, 1);
2395 2395
2396 print_irqtrace_events(curr); 2396 print_irqtrace_events(curr);
2397 printk("\nother info that might help us debug this:\n"); 2397 pr_warn("\nother info that might help us debug this:\n");
2398 print_usage_bug_scenario(this); 2398 print_usage_bug_scenario(this);
2399 2399
2400 lockdep_print_held_locks(curr); 2400 lockdep_print_held_locks(curr);
2401 2401
2402 printk("\nstack backtrace:\n"); 2402 pr_warn("\nstack backtrace:\n");
2403 dump_stack(); 2403 dump_stack();
2404 2404
2405 return 0; 2405 return 0;
@@ -2438,28 +2438,28 @@ print_irq_inversion_bug(struct task_struct *curr,
2438 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 2438 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2439 return 0; 2439 return 0;
2440 2440
2441 printk("\n"); 2441 pr_warn("\n");
2442 pr_warn("========================================================\n"); 2442 pr_warn("========================================================\n");
2443 pr_warn("WARNING: possible irq lock inversion dependency detected\n"); 2443 pr_warn("WARNING: possible irq lock inversion dependency detected\n");
2444 print_kernel_ident(); 2444 print_kernel_ident();
2445 pr_warn("--------------------------------------------------------\n"); 2445 pr_warn("--------------------------------------------------------\n");
2446 printk("%s/%d just changed the state of lock:\n", 2446 pr_warn("%s/%d just changed the state of lock:\n",
2447 curr->comm, task_pid_nr(curr)); 2447 curr->comm, task_pid_nr(curr));
2448 print_lock(this); 2448 print_lock(this);
2449 if (forwards) 2449 if (forwards)
2450 printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass); 2450 pr_warn("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
2451 else 2451 else
2452 printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass); 2452 pr_warn("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
2453 print_lock_name(other->class); 2453 print_lock_name(other->class);
2454 printk("\n\nand interrupts could create inverse lock ordering between them.\n\n"); 2454 pr_warn("\n\nand interrupts could create inverse lock ordering between them.\n\n");
2455 2455
2456 printk("\nother info that might help us debug this:\n"); 2456 pr_warn("\nother info that might help us debug this:\n");
2457 2457
2458 /* Find a middle lock (if one exists) */ 2458 /* Find a middle lock (if one exists) */
2459 depth = get_lock_depth(other); 2459 depth = get_lock_depth(other);
2460 do { 2460 do {
2461 if (depth == 0 && (entry != root)) { 2461 if (depth == 0 && (entry != root)) {
2462 printk("lockdep:%s bad path found in chain graph\n", __func__); 2462 pr_warn("lockdep:%s bad path found in chain graph\n", __func__);
2463 break; 2463 break;
2464 } 2464 }
2465 middle = entry; 2465 middle = entry;
@@ -2475,12 +2475,12 @@ print_irq_inversion_bug(struct task_struct *curr,
2475 2475
2476 lockdep_print_held_locks(curr); 2476 lockdep_print_held_locks(curr);
2477 2477
2478 printk("\nthe shortest dependencies between 2nd lock and 1st lock:\n"); 2478 pr_warn("\nthe shortest dependencies between 2nd lock and 1st lock:\n");
2479 if (!save_trace(&root->trace)) 2479 if (!save_trace(&root->trace))
2480 return 0; 2480 return 0;
2481 print_shortest_lock_dependencies(other, root); 2481 print_shortest_lock_dependencies(other, root);
2482 2482
2483 printk("\nstack backtrace:\n"); 2483 pr_warn("\nstack backtrace:\n");
2484 dump_stack(); 2484 dump_stack();
2485 2485
2486 return 0; 2486 return 0;
@@ -3189,25 +3189,25 @@ print_lock_nested_lock_not_held(struct task_struct *curr,
3189 if (debug_locks_silent) 3189 if (debug_locks_silent)
3190 return 0; 3190 return 0;
3191 3191
3192 printk("\n"); 3192 pr_warn("\n");
3193 pr_warn("==================================\n"); 3193 pr_warn("==================================\n");
3194 pr_warn("WARNING: Nested lock was not taken\n"); 3194 pr_warn("WARNING: Nested lock was not taken\n");
3195 print_kernel_ident(); 3195 print_kernel_ident();
3196 pr_warn("----------------------------------\n"); 3196 pr_warn("----------------------------------\n");
3197 3197
3198 printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr)); 3198 pr_warn("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr));
3199 print_lock(hlock); 3199 print_lock(hlock);
3200 3200
3201 printk("\nbut this task is not holding:\n"); 3201 pr_warn("\nbut this task is not holding:\n");
3202 printk("%s\n", hlock->nest_lock->name); 3202 pr_warn("%s\n", hlock->nest_lock->name);
3203 3203
3204 printk("\nstack backtrace:\n"); 3204 pr_warn("\nstack backtrace:\n");
3205 dump_stack(); 3205 dump_stack();
3206 3206
3207 printk("\nother info that might help us debug this:\n"); 3207 pr_warn("\nother info that might help us debug this:\n");
3208 lockdep_print_held_locks(curr); 3208 lockdep_print_held_locks(curr);
3209 3209
3210 printk("\nstack backtrace:\n"); 3210 pr_warn("\nstack backtrace:\n");
3211 dump_stack(); 3211 dump_stack();
3212 3212
3213 return 0; 3213 return 0;
@@ -3402,21 +3402,21 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
3402 if (debug_locks_silent) 3402 if (debug_locks_silent)
3403 return 0; 3403 return 0;
3404 3404
3405 printk("\n"); 3405 pr_warn("\n");
3406 pr_warn("=====================================\n"); 3406 pr_warn("=====================================\n");
3407 pr_warn("WARNING: bad unlock balance detected!\n"); 3407 pr_warn("WARNING: bad unlock balance detected!\n");
3408 print_kernel_ident(); 3408 print_kernel_ident();
3409 pr_warn("-------------------------------------\n"); 3409 pr_warn("-------------------------------------\n");
3410 printk("%s/%d is trying to release lock (", 3410 pr_warn("%s/%d is trying to release lock (",
3411 curr->comm, task_pid_nr(curr)); 3411 curr->comm, task_pid_nr(curr));
3412 print_lockdep_cache(lock); 3412 print_lockdep_cache(lock);
3413 printk(KERN_CONT ") at:\n"); 3413 pr_cont(") at:\n");
3414 print_ip_sym(ip); 3414 print_ip_sym(ip);
3415 printk("but there are no more locks to release!\n"); 3415 pr_warn("but there are no more locks to release!\n");
3416 printk("\nother info that might help us debug this:\n"); 3416 pr_warn("\nother info that might help us debug this:\n");
3417 lockdep_print_held_locks(curr); 3417 lockdep_print_held_locks(curr);
3418 3418
3419 printk("\nstack backtrace:\n"); 3419 pr_warn("\nstack backtrace:\n");
3420 dump_stack(); 3420 dump_stack();
3421 3421
3422 return 0; 3422 return 0;
@@ -3974,21 +3974,21 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
3974 if (debug_locks_silent) 3974 if (debug_locks_silent)
3975 return 0; 3975 return 0;
3976 3976
3977 printk("\n"); 3977 pr_warn("\n");
3978 pr_warn("=================================\n"); 3978 pr_warn("=================================\n");
3979 pr_warn("WARNING: bad contention detected!\n"); 3979 pr_warn("WARNING: bad contention detected!\n");
3980 print_kernel_ident(); 3980 print_kernel_ident();
3981 pr_warn("---------------------------------\n"); 3981 pr_warn("---------------------------------\n");
3982 printk("%s/%d is trying to contend lock (", 3982 pr_warn("%s/%d is trying to contend lock (",
3983 curr->comm, task_pid_nr(curr)); 3983 curr->comm, task_pid_nr(curr));
3984 print_lockdep_cache(lock); 3984 print_lockdep_cache(lock);
3985 printk(KERN_CONT ") at:\n"); 3985 pr_cont(") at:\n");
3986 print_ip_sym(ip); 3986 print_ip_sym(ip);
3987 printk("but there are no locks held!\n"); 3987 pr_warn("but there are no locks held!\n");
3988 printk("\nother info that might help us debug this:\n"); 3988 pr_warn("\nother info that might help us debug this:\n");
3989 lockdep_print_held_locks(curr); 3989 lockdep_print_held_locks(curr);
3990 3990
3991 printk("\nstack backtrace:\n"); 3991 pr_warn("\nstack backtrace:\n");
3992 dump_stack(); 3992 dump_stack();
3993 3993
3994 return 0; 3994 return 0;
@@ -4318,17 +4318,17 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
4318 if (debug_locks_silent) 4318 if (debug_locks_silent)
4319 return; 4319 return;
4320 4320
4321 printk("\n"); 4321 pr_warn("\n");
4322 pr_warn("=========================\n"); 4322 pr_warn("=========================\n");
4323 pr_warn("WARNING: held lock freed!\n"); 4323 pr_warn("WARNING: held lock freed!\n");
4324 print_kernel_ident(); 4324 print_kernel_ident();
4325 pr_warn("-------------------------\n"); 4325 pr_warn("-------------------------\n");
4326 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", 4326 pr_warn("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
4327 curr->comm, task_pid_nr(curr), mem_from, mem_to-1); 4327 curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
4328 print_lock(hlock); 4328 print_lock(hlock);
4329 lockdep_print_held_locks(curr); 4329 lockdep_print_held_locks(curr);
4330 4330
4331 printk("\nstack backtrace:\n"); 4331 pr_warn("\nstack backtrace:\n");
4332 dump_stack(); 4332 dump_stack();
4333} 4333}
4334 4334
@@ -4376,14 +4376,14 @@ static void print_held_locks_bug(void)
4376 if (debug_locks_silent) 4376 if (debug_locks_silent)
4377 return; 4377 return;
4378 4378
4379 printk("\n"); 4379 pr_warn("\n");
4380 pr_warn("====================================\n"); 4380 pr_warn("====================================\n");
4381 pr_warn("WARNING: %s/%d still has locks held!\n", 4381 pr_warn("WARNING: %s/%d still has locks held!\n",
4382 current->comm, task_pid_nr(current)); 4382 current->comm, task_pid_nr(current));
4383 print_kernel_ident(); 4383 print_kernel_ident();
4384 pr_warn("------------------------------------\n"); 4384 pr_warn("------------------------------------\n");
4385 lockdep_print_held_locks(current); 4385 lockdep_print_held_locks(current);
4386 printk("\nstack backtrace:\n"); 4386 pr_warn("\nstack backtrace:\n");
4387 dump_stack(); 4387 dump_stack();
4388} 4388}
4389 4389
@@ -4402,10 +4402,10 @@ void debug_show_all_locks(void)
4402 int unlock = 1; 4402 int unlock = 1;
4403 4403
4404 if (unlikely(!debug_locks)) { 4404 if (unlikely(!debug_locks)) {
4405 printk("INFO: lockdep is turned off.\n"); 4405 pr_warn("INFO: lockdep is turned off.\n");
4406 return; 4406 return;
4407 } 4407 }
4408 printk("\nShowing all locks held in the system:\n"); 4408 pr_warn("\nShowing all locks held in the system:\n");
4409 4409
4410 /* 4410 /*
4411 * Here we try to get the tasklist_lock as hard as possible, 4411 * Here we try to get the tasklist_lock as hard as possible,
@@ -4416,18 +4416,18 @@ void debug_show_all_locks(void)
4416retry: 4416retry:
4417 if (!read_trylock(&tasklist_lock)) { 4417 if (!read_trylock(&tasklist_lock)) {
4418 if (count == 10) 4418 if (count == 10)
4419 printk("hm, tasklist_lock locked, retrying... "); 4419 pr_warn("hm, tasklist_lock locked, retrying... ");
4420 if (count) { 4420 if (count) {
4421 count--; 4421 count--;
4422 printk(" #%d", 10-count); 4422 pr_cont(" #%d", 10-count);
4423 mdelay(200); 4423 mdelay(200);
4424 goto retry; 4424 goto retry;
4425 } 4425 }
4426 printk(" ignoring it.\n"); 4426 pr_cont(" ignoring it.\n");
4427 unlock = 0; 4427 unlock = 0;
4428 } else { 4428 } else {
4429 if (count != 10) 4429 if (count != 10)
4430 printk(KERN_CONT " locked it.\n"); 4430 pr_cont(" locked it.\n");
4431 } 4431 }
4432 4432
4433 do_each_thread(g, p) { 4433 do_each_thread(g, p) {
@@ -4445,7 +4445,7 @@ retry:
4445 unlock = 1; 4445 unlock = 1;
4446 } while_each_thread(g, p); 4446 } while_each_thread(g, p);
4447 4447
4448 printk("\n"); 4448 pr_warn("\n");
4449 pr_warn("=============================================\n\n"); 4449 pr_warn("=============================================\n\n");
4450 4450
4451 if (unlock) 4451 if (unlock)
@@ -4475,12 +4475,12 @@ asmlinkage __visible void lockdep_sys_exit(void)
4475 if (unlikely(curr->lockdep_depth)) { 4475 if (unlikely(curr->lockdep_depth)) {
4476 if (!debug_locks_off()) 4476 if (!debug_locks_off())
4477 return; 4477 return;
4478 printk("\n"); 4478 pr_warn("\n");
4479 pr_warn("================================================\n"); 4479 pr_warn("================================================\n");
4480 pr_warn("WARNING: lock held when returning to user space!\n"); 4480 pr_warn("WARNING: lock held when returning to user space!\n");
4481 print_kernel_ident(); 4481 print_kernel_ident();
4482 pr_warn("------------------------------------------------\n"); 4482 pr_warn("------------------------------------------------\n");
4483 printk("%s/%d is leaving the kernel with locks still held!\n", 4483 pr_warn("%s/%d is leaving the kernel with locks still held!\n",
4484 curr->comm, curr->pid); 4484 curr->comm, curr->pid);
4485 lockdep_print_held_locks(curr); 4485 lockdep_print_held_locks(curr);
4486 } 4486 }
@@ -4490,19 +4490,15 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
4490{ 4490{
4491 struct task_struct *curr = current; 4491 struct task_struct *curr = current;
4492 4492
4493#ifndef CONFIG_PROVE_RCU_REPEATEDLY
4494 if (!debug_locks_off())
4495 return;
4496#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
4497 /* Note: the following can be executed concurrently, so be careful. */ 4493 /* Note: the following can be executed concurrently, so be careful. */
4498 printk("\n"); 4494 pr_warn("\n");
4499 pr_warn("=============================\n"); 4495 pr_warn("=============================\n");
4500 pr_warn("WARNING: suspicious RCU usage\n"); 4496 pr_warn("WARNING: suspicious RCU usage\n");
4501 print_kernel_ident(); 4497 print_kernel_ident();
4502 pr_warn("-----------------------------\n"); 4498 pr_warn("-----------------------------\n");
4503 printk("%s:%d %s!\n", file, line, s); 4499 pr_warn("%s:%d %s!\n", file, line, s);
4504 printk("\nother info that might help us debug this:\n\n"); 4500 pr_warn("\nother info that might help us debug this:\n\n");
4505 printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", 4501 pr_warn("\n%srcu_scheduler_active = %d, debug_locks = %d\n",
4506 !rcu_lockdep_current_cpu_online() 4502 !rcu_lockdep_current_cpu_online()
4507 ? "RCU used illegally from offline CPU!\n" 4503 ? "RCU used illegally from offline CPU!\n"
4508 : !rcu_is_watching() 4504 : !rcu_is_watching()
@@ -4529,10 +4525,10 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
4529 * rcu_read_lock_bh() and so on from extended quiescent states. 4525 * rcu_read_lock_bh() and so on from extended quiescent states.
4530 */ 4526 */
4531 if (!rcu_is_watching()) 4527 if (!rcu_is_watching())
4532 printk("RCU used illegally from extended quiescent state!\n"); 4528 pr_warn("RCU used illegally from extended quiescent state!\n");
4533 4529
4534 lockdep_print_held_locks(curr); 4530 lockdep_print_held_locks(curr);
4535 printk("\nstack backtrace:\n"); 4531 pr_warn("\nstack backtrace:\n");
4536 dump_stack(); 4532 dump_stack();
4537} 4533}
4538EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); 4534EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
new file mode 100644
index 000000000000..be90c945063f
--- /dev/null
+++ b/kernel/rcu/Kconfig
@@ -0,0 +1,242 @@
1#
2# RCU-related configuration options
3#
4
5menu "RCU Subsystem"
6
7config TREE_RCU
8 bool
9 default y if !PREEMPT && SMP
10 help
11 This option selects the RCU implementation that is
12 designed for very large SMP system with hundreds or
13 thousands of CPUs. It also scales down nicely to
14 smaller systems.
15
16config PREEMPT_RCU
17 bool
18 default y if PREEMPT
19 help
20 This option selects the RCU implementation that is
21 designed for very large SMP systems with hundreds or
22 thousands of CPUs, but for which real-time response
23 is also required. It also scales down nicely to
24 smaller systems.
25
26 Select this option if you are unsure.
27
28config TINY_RCU
29 bool
30 default y if !PREEMPT && !SMP
31 help
32 This option selects the RCU implementation that is
33 designed for UP systems from which real-time response
34 is not required. This option greatly reduces the
35 memory footprint of RCU.
36
37config RCU_EXPERT
38 bool "Make expert-level adjustments to RCU configuration"
39 default n
40 help
41 This option needs to be enabled if you wish to make
42 expert-level adjustments to RCU configuration. By default,
43 no such adjustments can be made, which has the often-beneficial
44 side-effect of preventing "make oldconfig" from asking you all
45 sorts of detailed questions about how you would like numerous
46 obscure RCU options to be set up.
47
48 Say Y if you need to make expert-level adjustments to RCU.
49
50 Say N if you are unsure.
51
52config SRCU
53 bool
54 help
55 This option selects the sleepable version of RCU. This version
56 permits arbitrary sleeping or blocking within RCU read-side critical
57 sections.
58
59config TINY_SRCU
60 bool
61 default y if SRCU && TINY_RCU
62 help
63 This option selects the single-CPU non-preemptible version of SRCU.
64
65config TREE_SRCU
66 bool
67 default y if SRCU && !TINY_RCU
68 help
69 This option selects the full-fledged version of SRCU.
70
71config TASKS_RCU
72 bool
73 default n
74 select SRCU
75 help
76 This option enables a task-based RCU implementation that uses
77 only voluntary context switch (not preemption!), idle, and
78 user-mode execution as quiescent states.
79
80config RCU_STALL_COMMON
81 def_bool ( TREE_RCU || PREEMPT_RCU )
82 help
83 This option enables RCU CPU stall code that is common between
84 the TINY and TREE variants of RCU. The purpose is to allow
85 the tiny variants to disable RCU CPU stall warnings, while
86 making these warnings mandatory for the tree variants.
87
88config RCU_NEED_SEGCBLIST
89 def_bool ( TREE_RCU || PREEMPT_RCU || TREE_SRCU )
90
91config CONTEXT_TRACKING
92 bool
93
94config CONTEXT_TRACKING_FORCE
95 bool "Force context tracking"
96 depends on CONTEXT_TRACKING
97 default y if !NO_HZ_FULL
98 help
99 The major pre-requirement for full dynticks to work is to
100 support the context tracking subsystem. But there are also
101 other dependencies to provide in order to make the full
102 dynticks working.
103
104 This option stands for testing when an arch implements the
105 context tracking backend but doesn't yet fullfill all the
106 requirements to make the full dynticks feature working.
107 Without the full dynticks, there is no way to test the support
108 for context tracking and the subsystems that rely on it: RCU
109 userspace extended quiescent state and tickless cputime
110 accounting. This option copes with the absence of the full
111 dynticks subsystem by forcing the context tracking on all
112 CPUs in the system.
113
114 Say Y only if you're working on the development of an
115 architecture backend for the context tracking.
116
117 Say N otherwise, this option brings an overhead that you
118 don't want in production.
119
120
121config RCU_FANOUT
122 int "Tree-based hierarchical RCU fanout value"
123 range 2 64 if 64BIT
124 range 2 32 if !64BIT
125 depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
126 default 64 if 64BIT
127 default 32 if !64BIT
128 help
129 This option controls the fanout of hierarchical implementations
130 of RCU, allowing RCU to work efficiently on machines with
131 large numbers of CPUs. This value must be at least the fourth
132 root of NR_CPUS, which allows NR_CPUS to be insanely large.
133 The default value of RCU_FANOUT should be used for production
134 systems, but if you are stress-testing the RCU implementation
135 itself, small RCU_FANOUT values allow you to test large-system
136 code paths on small(er) systems.
137
138 Select a specific number if testing RCU itself.
139 Take the default if unsure.
140
141config RCU_FANOUT_LEAF
142 int "Tree-based hierarchical RCU leaf-level fanout value"
143 range 2 64 if 64BIT
144 range 2 32 if !64BIT
145 depends on (TREE_RCU || PREEMPT_RCU) && RCU_EXPERT
146 default 16
147 help
148 This option controls the leaf-level fanout of hierarchical
149 implementations of RCU, and allows trading off cache misses
150 against lock contention. Systems that synchronize their
151 scheduling-clock interrupts for energy-efficiency reasons will
152 want the default because the smaller leaf-level fanout keeps
153 lock contention levels acceptably low. Very large systems
154 (hundreds or thousands of CPUs) will instead want to set this
155 value to the maximum value possible in order to reduce the
156 number of cache misses incurred during RCU's grace-period
157 initialization. These systems tend to run CPU-bound, and thus
158 are not helped by synchronized interrupts, and thus tend to
159 skew them, which reduces lock contention enough that large
160 leaf-level fanouts work well. That said, setting leaf-level
161 fanout to a large number will likely cause problematic
162 lock contention on the leaf-level rcu_node structures unless
163 you boot with the skew_tick kernel parameter.
164
165 Select a specific number if testing RCU itself.
166
167 Select the maximum permissible value for large systems, but
168 please understand that you may also need to set the skew_tick
169 kernel boot parameter to avoid contention on the rcu_node
170 structure's locks.
171
172 Take the default if unsure.
173
174config RCU_FAST_NO_HZ
175 bool "Accelerate last non-dyntick-idle CPU's grace periods"
176 depends on NO_HZ_COMMON && SMP && RCU_EXPERT
177 default n
178 help
179 This option permits CPUs to enter dynticks-idle state even if
180 they have RCU callbacks queued, and prevents RCU from waking
181 these CPUs up more than roughly once every four jiffies (by
182 default, you can adjust this using the rcutree.rcu_idle_gp_delay
183 parameter), thus improving energy efficiency. On the other
184 hand, this option increases the duration of RCU grace periods,
185 for example, slowing down synchronize_rcu().
186
187 Say Y if energy efficiency is critically important, and you
188 don't care about increased grace-period durations.
189
190 Say N if you are unsure.
191
192config RCU_BOOST
193 bool "Enable RCU priority boosting"
194 depends on RT_MUTEXES && PREEMPT_RCU && RCU_EXPERT
195 default n
196 help
197 This option boosts the priority of preempted RCU readers that
198 block the current preemptible RCU grace period for too long.
199 This option also prevents heavy loads from blocking RCU
200 callback invocation for all flavors of RCU.
201
202 Say Y here if you are working with real-time apps or heavy loads
203 Say N here if you are unsure.
204
205config RCU_BOOST_DELAY
206 int "Milliseconds to delay boosting after RCU grace-period start"
207 range 0 3000
208 depends on RCU_BOOST
209 default 500
210 help
211 This option specifies the time to wait after the beginning of
212 a given grace period before priority-boosting preempted RCU
213 readers blocking that grace period. Note that any RCU reader
214 blocking an expedited RCU grace period is boosted immediately.
215
216 Accept the default if unsure.
217
218config RCU_NOCB_CPU
219 bool "Offload RCU callback processing from boot-selected CPUs"
220 depends on TREE_RCU || PREEMPT_RCU
221 depends on RCU_EXPERT || NO_HZ_FULL
222 default n
223 help
224 Use this option to reduce OS jitter for aggressive HPC or
225 real-time workloads. It can also be used to offload RCU
226 callback invocation to energy-efficient CPUs in battery-powered
227 asymmetric multiprocessors.
228
229 This option offloads callback invocation from the set of
230 CPUs specified at boot time by the rcu_nocbs parameter.
231 For each such CPU, a kthread ("rcuox/N") will be created to
232 invoke callbacks, where the "N" is the CPU being offloaded,
233 and where the "x" is "b" for RCU-bh, "p" for RCU-preempt, and
234 "s" for RCU-sched. Nothing prevents this kthread from running
235 on the specified CPUs, but (1) the kthreads may be preempted
236 between each callback, and (2) affinity or cgroups can be used
237 to force the kthreads to run on whatever set of CPUs is desired.
238
239 Say Y here if you want to help to debug reduced OS jitter.
240 Say N here if you are unsure.
241
242endmenu # "RCU Subsystem"
diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
new file mode 100644
index 000000000000..0ec7d1d33a14
--- /dev/null
+++ b/kernel/rcu/Kconfig.debug
@@ -0,0 +1,82 @@
1#
2# RCU-related debugging configuration options
3#
4
5menu "RCU Debugging"
6
7config PROVE_RCU
8 def_bool PROVE_LOCKING
9
10config TORTURE_TEST
11 tristate
12 default n
13
14config RCU_PERF_TEST
15 tristate "performance tests for RCU"
16 depends on DEBUG_KERNEL
17 select TORTURE_TEST
18 select SRCU
19 select TASKS_RCU
20 default n
21 help
22 This option provides a kernel module that runs performance
23 tests on the RCU infrastructure. The kernel module may be built
24 after the fact on the running kernel to be tested, if desired.
25
26 Say Y here if you want RCU performance tests to be built into
27 the kernel.
28 Say M if you want the RCU performance tests to build as a module.
29 Say N if you are unsure.
30
31config RCU_TORTURE_TEST
32 tristate "torture tests for RCU"
33 depends on DEBUG_KERNEL
34 select TORTURE_TEST
35 select SRCU
36 select TASKS_RCU
37 default n
38 help
39 This option provides a kernel module that runs torture tests
40 on the RCU infrastructure. The kernel module may be built
41 after the fact on the running kernel to be tested, if desired.
42
43 Say Y here if you want RCU torture tests to be built into
44 the kernel.
45 Say M if you want the RCU torture tests to build as a module.
46 Say N if you are unsure.
47
48config RCU_CPU_STALL_TIMEOUT
49 int "RCU CPU stall timeout in seconds"
50 depends on RCU_STALL_COMMON
51 range 3 300
52 default 21
53 help
54 If a given RCU grace period extends more than the specified
55 number of seconds, a CPU stall warning is printed. If the
56 RCU grace period persists, additional CPU stall warnings are
57 printed at more widely spaced intervals.
58
59config RCU_TRACE
60 bool "Enable tracing for RCU"
61 depends on DEBUG_KERNEL
62 default y if TREE_RCU
63 select TRACE_CLOCK
64 help
65 This option enables additional tracepoints for ftrace-style
66 event tracing.
67
68 Say Y here if you want to enable RCU tracing
69 Say N if you are unsure.
70
71config RCU_EQS_DEBUG
72 bool "Provide debugging asserts for adding NO_HZ support to an arch"
73 depends on DEBUG_KERNEL
74 help
75 This option provides consistency checks in RCU's handling of
76 NO_HZ. These checks have proven quite helpful in detecting
77 bugs in arch-specific NO_HZ code.
78
79 Say N here if you need ultimate kernel/user switch latencies
80 Say Y if you are unsure
81
82endmenu # "RCU Debugging"
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 23803c7d5180..13c0fc852767 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -3,13 +3,11 @@
3KCOV_INSTRUMENT := n 3KCOV_INSTRUMENT := n
4 4
5obj-y += update.o sync.o 5obj-y += update.o sync.o
6obj-$(CONFIG_CLASSIC_SRCU) += srcu.o
7obj-$(CONFIG_TREE_SRCU) += srcutree.o 6obj-$(CONFIG_TREE_SRCU) += srcutree.o
8obj-$(CONFIG_TINY_SRCU) += srcutiny.o 7obj-$(CONFIG_TINY_SRCU) += srcutiny.o
9obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o 8obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
10obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o 9obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o
11obj-$(CONFIG_TREE_RCU) += tree.o 10obj-$(CONFIG_TREE_RCU) += tree.o
12obj-$(CONFIG_PREEMPT_RCU) += tree.o 11obj-$(CONFIG_PREEMPT_RCU) += tree.o
13obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
14obj-$(CONFIG_TINY_RCU) += tiny.o 12obj-$(CONFIG_TINY_RCU) += tiny.o
15obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o 13obj-$(CONFIG_RCU_NEED_SEGCBLIST) += rcu_segcblist.o
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 73e16ec4054b..808b8c85f626 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -212,6 +212,18 @@ int rcu_jiffies_till_stall_check(void);
212 */ 212 */
213#define TPS(x) tracepoint_string(x) 213#define TPS(x) tracepoint_string(x)
214 214
215/*
216 * Dump the ftrace buffer, but only one time per callsite per boot.
217 */
218#define rcu_ftrace_dump(oops_dump_mode) \
219do { \
220 static atomic_t ___rfd_beenhere = ATOMIC_INIT(0); \
221 \
222 if (!atomic_read(&___rfd_beenhere) && \
223 !atomic_xchg(&___rfd_beenhere, 1)) \
224 ftrace_dump(oops_dump_mode); \
225} while (0)
226
215void rcu_early_boot_tests(void); 227void rcu_early_boot_tests(void);
216void rcu_test_sync_prims(void); 228void rcu_test_sync_prims(void);
217 229
@@ -291,6 +303,271 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
291 cpu <= rnp->grphi; \ 303 cpu <= rnp->grphi; \
292 cpu = cpumask_next((cpu), cpu_possible_mask)) 304 cpu = cpumask_next((cpu), cpu_possible_mask))
293 305
306/*
307 * Wrappers for the rcu_node::lock acquire and release.
308 *
309 * Because the rcu_nodes form a tree, the tree traversal locking will observe
310 * different lock values, this in turn means that an UNLOCK of one level
311 * followed by a LOCK of another level does not imply a full memory barrier;
312 * and most importantly transitivity is lost.
313 *
314 * In order to restore full ordering between tree levels, augment the regular
315 * lock acquire functions with smp_mb__after_unlock_lock().
316 *
317 * As ->lock of struct rcu_node is a __private field, therefore one should use
318 * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
319 */
320#define raw_spin_lock_rcu_node(p) \
321do { \
322 raw_spin_lock(&ACCESS_PRIVATE(p, lock)); \
323 smp_mb__after_unlock_lock(); \
324} while (0)
325
326#define raw_spin_unlock_rcu_node(p) raw_spin_unlock(&ACCESS_PRIVATE(p, lock))
327
328#define raw_spin_lock_irq_rcu_node(p) \
329do { \
330 raw_spin_lock_irq(&ACCESS_PRIVATE(p, lock)); \
331 smp_mb__after_unlock_lock(); \
332} while (0)
333
334#define raw_spin_unlock_irq_rcu_node(p) \
335 raw_spin_unlock_irq(&ACCESS_PRIVATE(p, lock))
336
337#define raw_spin_lock_irqsave_rcu_node(p, flags) \
338do { \
339 raw_spin_lock_irqsave(&ACCESS_PRIVATE(p, lock), flags); \
340 smp_mb__after_unlock_lock(); \
341} while (0)
342
343#define raw_spin_unlock_irqrestore_rcu_node(p, flags) \
344 raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(p, lock), flags) \
345
346#define raw_spin_trylock_rcu_node(p) \
347({ \
348 bool ___locked = raw_spin_trylock(&ACCESS_PRIVATE(p, lock)); \
349 \
350 if (___locked) \
351 smp_mb__after_unlock_lock(); \
352 ___locked; \
353})
354
294#endif /* #if defined(SRCU) || !defined(TINY_RCU) */ 355#endif /* #if defined(SRCU) || !defined(TINY_RCU) */
295 356
357#ifdef CONFIG_TINY_RCU
358/* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */
359static inline bool rcu_gp_is_normal(void) /* Internal RCU use. */
360{
361 return true;
362}
363static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */
364{
365 return false;
366}
367
368static inline void rcu_expedite_gp(void)
369{
370}
371
372static inline void rcu_unexpedite_gp(void)
373{
374}
375#else /* #ifdef CONFIG_TINY_RCU */
376bool rcu_gp_is_normal(void); /* Internal RCU use. */
377bool rcu_gp_is_expedited(void); /* Internal RCU use. */
378void rcu_expedite_gp(void);
379void rcu_unexpedite_gp(void);
380void rcupdate_announce_bootup_oddness(void);
381#endif /* #else #ifdef CONFIG_TINY_RCU */
382
383#define RCU_SCHEDULER_INACTIVE 0
384#define RCU_SCHEDULER_INIT 1
385#define RCU_SCHEDULER_RUNNING 2
386
387#ifdef CONFIG_TINY_RCU
388static inline void rcu_request_urgent_qs_task(struct task_struct *t) { }
389#else /* #ifdef CONFIG_TINY_RCU */
390void rcu_request_urgent_qs_task(struct task_struct *t);
391#endif /* #else #ifdef CONFIG_TINY_RCU */
392
393enum rcutorture_type {
394 RCU_FLAVOR,
395 RCU_BH_FLAVOR,
396 RCU_SCHED_FLAVOR,
397 RCU_TASKS_FLAVOR,
398 SRCU_FLAVOR,
399 INVALID_RCU_FLAVOR
400};
401
402#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
403void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
404 unsigned long *gpnum, unsigned long *completed);
405void rcutorture_record_test_transition(void);
406void rcutorture_record_progress(unsigned long vernum);
407void do_trace_rcu_torture_read(const char *rcutorturename,
408 struct rcu_head *rhp,
409 unsigned long secs,
410 unsigned long c_old,
411 unsigned long c);
412#else
413static inline void rcutorture_get_gp_data(enum rcutorture_type test_type,
414 int *flags,
415 unsigned long *gpnum,
416 unsigned long *completed)
417{
418 *flags = 0;
419 *gpnum = 0;
420 *completed = 0;
421}
422static inline void rcutorture_record_test_transition(void)
423{
424}
425static inline void rcutorture_record_progress(unsigned long vernum)
426{
427}
428#ifdef CONFIG_RCU_TRACE
429void do_trace_rcu_torture_read(const char *rcutorturename,
430 struct rcu_head *rhp,
431 unsigned long secs,
432 unsigned long c_old,
433 unsigned long c);
434#else
435#define do_trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
436 do { } while (0)
437#endif
438#endif
439
440#ifdef CONFIG_TINY_SRCU
441
442static inline void srcutorture_get_gp_data(enum rcutorture_type test_type,
443 struct srcu_struct *sp, int *flags,
444 unsigned long *gpnum,
445 unsigned long *completed)
446{
447 if (test_type != SRCU_FLAVOR)
448 return;
449 *flags = 0;
450 *completed = sp->srcu_idx;
451 *gpnum = *completed;
452}
453
454#elif defined(CONFIG_TREE_SRCU)
455
456void srcutorture_get_gp_data(enum rcutorture_type test_type,
457 struct srcu_struct *sp, int *flags,
458 unsigned long *gpnum, unsigned long *completed);
459
460#endif
461
462#ifdef CONFIG_TINY_RCU
463
464/*
465 * Return the number of grace periods started.
466 */
467static inline unsigned long rcu_batches_started(void)
468{
469 return 0;
470}
471
472/*
473 * Return the number of bottom-half grace periods started.
474 */
475static inline unsigned long rcu_batches_started_bh(void)
476{
477 return 0;
478}
479
480/*
481 * Return the number of sched grace periods started.
482 */
483static inline unsigned long rcu_batches_started_sched(void)
484{
485 return 0;
486}
487
488/*
489 * Return the number of grace periods completed.
490 */
491static inline unsigned long rcu_batches_completed(void)
492{
493 return 0;
494}
495
496/*
497 * Return the number of bottom-half grace periods completed.
498 */
499static inline unsigned long rcu_batches_completed_bh(void)
500{
501 return 0;
502}
503
504/*
505 * Return the number of sched grace periods completed.
506 */
507static inline unsigned long rcu_batches_completed_sched(void)
508{
509 return 0;
510}
511
512/*
513 * Return the number of expedited grace periods completed.
514 */
515static inline unsigned long rcu_exp_batches_completed(void)
516{
517 return 0;
518}
519
520/*
521 * Return the number of expedited sched grace periods completed.
522 */
523static inline unsigned long rcu_exp_batches_completed_sched(void)
524{
525 return 0;
526}
527
528static inline unsigned long srcu_batches_completed(struct srcu_struct *sp)
529{
530 return 0;
531}
532
533static inline void rcu_force_quiescent_state(void)
534{
535}
536
537static inline void rcu_bh_force_quiescent_state(void)
538{
539}
540
541static inline void rcu_sched_force_quiescent_state(void)
542{
543}
544
545static inline void show_rcu_gp_kthreads(void)
546{
547}
548
549#else /* #ifdef CONFIG_TINY_RCU */
550extern unsigned long rcutorture_testseq;
551extern unsigned long rcutorture_vernum;
552unsigned long rcu_batches_started(void);
553unsigned long rcu_batches_started_bh(void);
554unsigned long rcu_batches_started_sched(void);
555unsigned long rcu_batches_completed(void);
556unsigned long rcu_batches_completed_bh(void);
557unsigned long rcu_batches_completed_sched(void);
558unsigned long rcu_exp_batches_completed(void);
559unsigned long rcu_exp_batches_completed_sched(void);
560unsigned long srcu_batches_completed(struct srcu_struct *sp);
561void show_rcu_gp_kthreads(void);
562void rcu_force_quiescent_state(void);
563void rcu_bh_force_quiescent_state(void);
564void rcu_sched_force_quiescent_state(void);
565#endif /* #else #ifdef CONFIG_TINY_RCU */
566
567#ifdef CONFIG_RCU_NOCB_CPU
568bool rcu_is_nocb_cpu(int cpu);
569#else
570static inline bool rcu_is_nocb_cpu(int cpu) { return false; }
571#endif
572
296#endif /* __LINUX_RCU_H */ 573#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcu/rcuperf.c b/kernel/rcu/rcuperf.c
index a4a86fb47e4a..3cc18110b612 100644
--- a/kernel/rcu/rcuperf.c
+++ b/kernel/rcu/rcuperf.c
@@ -48,6 +48,8 @@
48#include <linux/torture.h> 48#include <linux/torture.h>
49#include <linux/vmalloc.h> 49#include <linux/vmalloc.h>
50 50
51#include "rcu.h"
52
51MODULE_LICENSE("GPL"); 53MODULE_LICENSE("GPL");
52MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>"); 54MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
53 55
@@ -59,12 +61,16 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.vnet.ibm.com>");
59#define VERBOSE_PERFOUT_ERRSTRING(s) \ 61#define VERBOSE_PERFOUT_ERRSTRING(s) \
60 do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0) 62 do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
61 63
64torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
65torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
62torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); 66torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
63torture_param(int, holdoff, 10, "Holdoff time before test start (s)"); 67torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
64torture_param(int, nreaders, -1, "Number of RCU reader threads"); 68torture_param(int, nreaders, 0, "Number of RCU reader threads");
65torture_param(int, nwriters, -1, "Number of RCU updater threads"); 69torture_param(int, nwriters, -1, "Number of RCU updater threads");
66torture_param(bool, shutdown, false, "Shutdown at end of performance tests."); 70torture_param(bool, shutdown, !IS_ENABLED(MODULE),
71 "Shutdown at end of performance tests.");
67torture_param(bool, verbose, true, "Enable verbose debugging printk()s"); 72torture_param(bool, verbose, true, "Enable verbose debugging printk()s");
73torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
68 74
69static char *perf_type = "rcu"; 75static char *perf_type = "rcu";
70module_param(perf_type, charp, 0444); 76module_param(perf_type, charp, 0444);
@@ -86,13 +92,16 @@ static u64 t_rcu_perf_writer_started;
86static u64 t_rcu_perf_writer_finished; 92static u64 t_rcu_perf_writer_finished;
87static unsigned long b_rcu_perf_writer_started; 93static unsigned long b_rcu_perf_writer_started;
88static unsigned long b_rcu_perf_writer_finished; 94static unsigned long b_rcu_perf_writer_finished;
95static DEFINE_PER_CPU(atomic_t, n_async_inflight);
89 96
90static int rcu_perf_writer_state; 97static int rcu_perf_writer_state;
91#define RTWS_INIT 0 98#define RTWS_INIT 0
92#define RTWS_EXP_SYNC 1 99#define RTWS_ASYNC 1
93#define RTWS_SYNC 2 100#define RTWS_BARRIER 2
94#define RTWS_IDLE 2 101#define RTWS_EXP_SYNC 3
95#define RTWS_STOPPING 3 102#define RTWS_SYNC 4
103#define RTWS_IDLE 5
104#define RTWS_STOPPING 6
96 105
97#define MAX_MEAS 10000 106#define MAX_MEAS 10000
98#define MIN_MEAS 100 107#define MIN_MEAS 100
@@ -114,6 +123,8 @@ struct rcu_perf_ops {
114 unsigned long (*started)(void); 123 unsigned long (*started)(void);
115 unsigned long (*completed)(void); 124 unsigned long (*completed)(void);
116 unsigned long (*exp_completed)(void); 125 unsigned long (*exp_completed)(void);
126 void (*async)(struct rcu_head *head, rcu_callback_t func);
127 void (*gp_barrier)(void);
117 void (*sync)(void); 128 void (*sync)(void);
118 void (*exp_sync)(void); 129 void (*exp_sync)(void);
119 const char *name; 130 const char *name;
@@ -153,6 +164,8 @@ static struct rcu_perf_ops rcu_ops = {
153 .started = rcu_batches_started, 164 .started = rcu_batches_started,
154 .completed = rcu_batches_completed, 165 .completed = rcu_batches_completed,
155 .exp_completed = rcu_exp_batches_completed, 166 .exp_completed = rcu_exp_batches_completed,
167 .async = call_rcu,
168 .gp_barrier = rcu_barrier,
156 .sync = synchronize_rcu, 169 .sync = synchronize_rcu,
157 .exp_sync = synchronize_rcu_expedited, 170 .exp_sync = synchronize_rcu_expedited,
158 .name = "rcu" 171 .name = "rcu"
@@ -181,6 +194,8 @@ static struct rcu_perf_ops rcu_bh_ops = {
181 .started = rcu_batches_started_bh, 194 .started = rcu_batches_started_bh,
182 .completed = rcu_batches_completed_bh, 195 .completed = rcu_batches_completed_bh,
183 .exp_completed = rcu_exp_batches_completed_sched, 196 .exp_completed = rcu_exp_batches_completed_sched,
197 .async = call_rcu_bh,
198 .gp_barrier = rcu_barrier_bh,
184 .sync = synchronize_rcu_bh, 199 .sync = synchronize_rcu_bh,
185 .exp_sync = synchronize_rcu_bh_expedited, 200 .exp_sync = synchronize_rcu_bh_expedited,
186 .name = "rcu_bh" 201 .name = "rcu_bh"
@@ -208,6 +223,16 @@ static unsigned long srcu_perf_completed(void)
208 return srcu_batches_completed(srcu_ctlp); 223 return srcu_batches_completed(srcu_ctlp);
209} 224}
210 225
226static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func)
227{
228 call_srcu(srcu_ctlp, head, func);
229}
230
231static void srcu_rcu_barrier(void)
232{
233 srcu_barrier(srcu_ctlp);
234}
235
211static void srcu_perf_synchronize(void) 236static void srcu_perf_synchronize(void)
212{ 237{
213 synchronize_srcu(srcu_ctlp); 238 synchronize_srcu(srcu_ctlp);
@@ -226,11 +251,42 @@ static struct rcu_perf_ops srcu_ops = {
226 .started = NULL, 251 .started = NULL,
227 .completed = srcu_perf_completed, 252 .completed = srcu_perf_completed,
228 .exp_completed = srcu_perf_completed, 253 .exp_completed = srcu_perf_completed,
254 .async = srcu_call_rcu,
255 .gp_barrier = srcu_rcu_barrier,
229 .sync = srcu_perf_synchronize, 256 .sync = srcu_perf_synchronize,
230 .exp_sync = srcu_perf_synchronize_expedited, 257 .exp_sync = srcu_perf_synchronize_expedited,
231 .name = "srcu" 258 .name = "srcu"
232}; 259};
233 260
261static struct srcu_struct srcud;
262
263static void srcu_sync_perf_init(void)
264{
265 srcu_ctlp = &srcud;
266 init_srcu_struct(srcu_ctlp);
267}
268
269static void srcu_sync_perf_cleanup(void)
270{
271 cleanup_srcu_struct(srcu_ctlp);
272}
273
274static struct rcu_perf_ops srcud_ops = {
275 .ptype = SRCU_FLAVOR,
276 .init = srcu_sync_perf_init,
277 .cleanup = srcu_sync_perf_cleanup,
278 .readlock = srcu_perf_read_lock,
279 .readunlock = srcu_perf_read_unlock,
280 .started = NULL,
281 .completed = srcu_perf_completed,
282 .exp_completed = srcu_perf_completed,
283 .async = srcu_call_rcu,
284 .gp_barrier = srcu_rcu_barrier,
285 .sync = srcu_perf_synchronize,
286 .exp_sync = srcu_perf_synchronize_expedited,
287 .name = "srcud"
288};
289
234/* 290/*
235 * Definitions for sched perf testing. 291 * Definitions for sched perf testing.
236 */ 292 */
@@ -254,6 +310,8 @@ static struct rcu_perf_ops sched_ops = {
254 .started = rcu_batches_started_sched, 310 .started = rcu_batches_started_sched,
255 .completed = rcu_batches_completed_sched, 311 .completed = rcu_batches_completed_sched,
256 .exp_completed = rcu_exp_batches_completed_sched, 312 .exp_completed = rcu_exp_batches_completed_sched,
313 .async = call_rcu_sched,
314 .gp_barrier = rcu_barrier_sched,
257 .sync = synchronize_sched, 315 .sync = synchronize_sched,
258 .exp_sync = synchronize_sched_expedited, 316 .exp_sync = synchronize_sched_expedited,
259 .name = "sched" 317 .name = "sched"
@@ -281,6 +339,8 @@ static struct rcu_perf_ops tasks_ops = {
281 .readunlock = tasks_perf_read_unlock, 339 .readunlock = tasks_perf_read_unlock,
282 .started = rcu_no_completed, 340 .started = rcu_no_completed,
283 .completed = rcu_no_completed, 341 .completed = rcu_no_completed,
342 .async = call_rcu_tasks,
343 .gp_barrier = rcu_barrier_tasks,
284 .sync = synchronize_rcu_tasks, 344 .sync = synchronize_rcu_tasks,
285 .exp_sync = synchronize_rcu_tasks, 345 .exp_sync = synchronize_rcu_tasks,
286 .name = "tasks" 346 .name = "tasks"
@@ -344,6 +404,15 @@ rcu_perf_reader(void *arg)
344} 404}
345 405
346/* 406/*
407 * Callback function for asynchronous grace periods from rcu_perf_writer().
408 */
409static void rcu_perf_async_cb(struct rcu_head *rhp)
410{
411 atomic_dec(this_cpu_ptr(&n_async_inflight));
412 kfree(rhp);
413}
414
415/*
347 * RCU perf writer kthread. Repeatedly does a grace period. 416 * RCU perf writer kthread. Repeatedly does a grace period.
348 */ 417 */
349static int 418static int
@@ -352,6 +421,7 @@ rcu_perf_writer(void *arg)
352 int i = 0; 421 int i = 0;
353 int i_max; 422 int i_max;
354 long me = (long)arg; 423 long me = (long)arg;
424 struct rcu_head *rhp = NULL;
355 struct sched_param sp; 425 struct sched_param sp;
356 bool started = false, done = false, alldone = false; 426 bool started = false, done = false, alldone = false;
357 u64 t; 427 u64 t;
@@ -380,9 +450,27 @@ rcu_perf_writer(void *arg)
380 } 450 }
381 451
382 do { 452 do {
453 if (writer_holdoff)
454 udelay(writer_holdoff);
383 wdp = &wdpp[i]; 455 wdp = &wdpp[i];
384 *wdp = ktime_get_mono_fast_ns(); 456 *wdp = ktime_get_mono_fast_ns();
385 if (gp_exp) { 457 if (gp_async) {
458retry:
459 if (!rhp)
460 rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
461 if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
462 rcu_perf_writer_state = RTWS_ASYNC;
463 atomic_inc(this_cpu_ptr(&n_async_inflight));
464 cur_ops->async(rhp, rcu_perf_async_cb);
465 rhp = NULL;
466 } else if (!kthread_should_stop()) {
467 rcu_perf_writer_state = RTWS_BARRIER;
468 cur_ops->gp_barrier();
469 goto retry;
470 } else {
471 kfree(rhp); /* Because we are stopping. */
472 }
473 } else if (gp_exp) {
386 rcu_perf_writer_state = RTWS_EXP_SYNC; 474 rcu_perf_writer_state = RTWS_EXP_SYNC;
387 cur_ops->exp_sync(); 475 cur_ops->exp_sync();
388 } else { 476 } else {
@@ -429,6 +517,10 @@ rcu_perf_writer(void *arg)
429 i++; 517 i++;
430 rcu_perf_wait_shutdown(); 518 rcu_perf_wait_shutdown();
431 } while (!torture_must_stop()); 519 } while (!torture_must_stop());
520 if (gp_async) {
521 rcu_perf_writer_state = RTWS_BARRIER;
522 cur_ops->gp_barrier();
523 }
432 rcu_perf_writer_state = RTWS_STOPPING; 524 rcu_perf_writer_state = RTWS_STOPPING;
433 writer_n_durations[me] = i_max; 525 writer_n_durations[me] = i_max;
434 torture_kthread_stopping("rcu_perf_writer"); 526 torture_kthread_stopping("rcu_perf_writer");
@@ -452,6 +544,17 @@ rcu_perf_cleanup(void)
452 u64 *wdp; 544 u64 *wdp;
453 u64 *wdpp; 545 u64 *wdpp;
454 546
547 /*
548 * Would like warning at start, but everything is expedited
549 * during the mid-boot phase, so have to wait till the end.
550 */
551 if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
552 VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
553 if (rcu_gp_is_normal() && gp_exp)
554 VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
555 if (gp_exp && gp_async)
556 VERBOSE_PERFOUT_ERRSTRING("No expedited async GPs, so went with async!");
557
455 if (torture_cleanup_begin()) 558 if (torture_cleanup_begin())
456 return; 559 return;
457 560
@@ -554,7 +657,7 @@ rcu_perf_init(void)
554 long i; 657 long i;
555 int firsterr = 0; 658 int firsterr = 0;
556 static struct rcu_perf_ops *perf_ops[] = { 659 static struct rcu_perf_ops *perf_ops[] = {
557 &rcu_ops, &rcu_bh_ops, &srcu_ops, &sched_ops, 660 &rcu_ops, &rcu_bh_ops, &srcu_ops, &srcud_ops, &sched_ops,
558 RCUPERF_TASKS_OPS 661 RCUPERF_TASKS_OPS
559 }; 662 };
560 663
@@ -624,16 +727,6 @@ rcu_perf_init(void)
624 firsterr = -ENOMEM; 727 firsterr = -ENOMEM;
625 goto unwind; 728 goto unwind;
626 } 729 }
627 if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp) {
628 VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
629 firsterr = -EINVAL;
630 goto unwind;
631 }
632 if (rcu_gp_is_normal() && gp_exp) {
633 VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
634 firsterr = -EINVAL;
635 goto unwind;
636 }
637 for (i = 0; i < nrealwriters; i++) { 730 for (i = 0; i < nrealwriters; i++) {
638 writer_durations[i] = 731 writer_durations[i] =
639 kcalloc(MAX_MEAS, sizeof(*writer_durations[i]), 732 kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index ae6e574d4cf5..b8f7f8ce8575 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -52,6 +52,8 @@
52#include <linux/torture.h> 52#include <linux/torture.h>
53#include <linux/vmalloc.h> 53#include <linux/vmalloc.h>
54 54
55#include "rcu.h"
56
55MODULE_LICENSE("GPL"); 57MODULE_LICENSE("GPL");
56MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>"); 58MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
57 59
@@ -562,31 +564,19 @@ static void srcu_torture_stats(void)
562 int __maybe_unused cpu; 564 int __maybe_unused cpu;
563 int idx; 565 int idx;
564 566
565#if defined(CONFIG_TREE_SRCU) || defined(CONFIG_CLASSIC_SRCU)
566#ifdef CONFIG_TREE_SRCU 567#ifdef CONFIG_TREE_SRCU
567 idx = srcu_ctlp->srcu_idx & 0x1; 568 idx = srcu_ctlp->srcu_idx & 0x1;
568#else /* #ifdef CONFIG_TREE_SRCU */
569 idx = srcu_ctlp->completed & 0x1;
570#endif /* #else #ifdef CONFIG_TREE_SRCU */
571 pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", 569 pr_alert("%s%s Tree SRCU per-CPU(idx=%d):",
572 torture_type, TORTURE_FLAG, idx); 570 torture_type, TORTURE_FLAG, idx);
573 for_each_possible_cpu(cpu) { 571 for_each_possible_cpu(cpu) {
574 unsigned long l0, l1; 572 unsigned long l0, l1;
575 unsigned long u0, u1; 573 unsigned long u0, u1;
576 long c0, c1; 574 long c0, c1;
577#ifdef CONFIG_TREE_SRCU
578 struct srcu_data *counts; 575 struct srcu_data *counts;
579 576
580 counts = per_cpu_ptr(srcu_ctlp->sda, cpu); 577 counts = per_cpu_ptr(srcu_ctlp->sda, cpu);
581 u0 = counts->srcu_unlock_count[!idx]; 578 u0 = counts->srcu_unlock_count[!idx];
582 u1 = counts->srcu_unlock_count[idx]; 579 u1 = counts->srcu_unlock_count[idx];
583#else /* #ifdef CONFIG_TREE_SRCU */
584 struct srcu_array *counts;
585
586 counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu);
587 u0 = counts->unlock_count[!idx];
588 u1 = counts->unlock_count[idx];
589#endif /* #else #ifdef CONFIG_TREE_SRCU */
590 580
591 /* 581 /*
592 * Make sure that a lock is always counted if the corresponding 582 * Make sure that a lock is always counted if the corresponding
@@ -594,13 +584,8 @@ static void srcu_torture_stats(void)
594 */ 584 */
595 smp_rmb(); 585 smp_rmb();
596 586
597#ifdef CONFIG_TREE_SRCU
598 l0 = counts->srcu_lock_count[!idx]; 587 l0 = counts->srcu_lock_count[!idx];
599 l1 = counts->srcu_lock_count[idx]; 588 l1 = counts->srcu_lock_count[idx];
600#else /* #ifdef CONFIG_TREE_SRCU */
601 l0 = counts->lock_count[!idx];
602 l1 = counts->lock_count[idx];
603#endif /* #else #ifdef CONFIG_TREE_SRCU */
604 589
605 c0 = l0 - u0; 590 c0 = l0 - u0;
606 c1 = l1 - u1; 591 c1 = l1 - u1;
@@ -609,7 +594,7 @@ static void srcu_torture_stats(void)
609 pr_cont("\n"); 594 pr_cont("\n");
610#elif defined(CONFIG_TINY_SRCU) 595#elif defined(CONFIG_TINY_SRCU)
611 idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1; 596 idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1;
612 pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%d,%d)\n", 597 pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n",
613 torture_type, TORTURE_FLAG, idx, 598 torture_type, TORTURE_FLAG, idx,
614 READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]), 599 READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]),
615 READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx])); 600 READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx]));
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c
deleted file mode 100644
index dea03614263f..000000000000
--- a/kernel/rcu/srcu.c
+++ /dev/null
@@ -1,661 +0,0 @@
1/*
2 * Sleepable Read-Copy Update mechanism for mutual exclusion.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, you can access it online at
16 * http://www.gnu.org/licenses/gpl-2.0.html.
17 *
18 * Copyright (C) IBM Corporation, 2006
19 * Copyright (C) Fujitsu, 2012
20 *
21 * Author: Paul McKenney <paulmck@us.ibm.com>
22 * Lai Jiangshan <laijs@cn.fujitsu.com>
23 *
24 * For detailed explanation of Read-Copy Update mechanism see -
25 * Documentation/RCU/ *.txt
26 *
27 */
28
29#include <linux/export.h>
30#include <linux/mutex.h>
31#include <linux/percpu.h>
32#include <linux/preempt.h>
33#include <linux/rcupdate_wait.h>
34#include <linux/sched.h>
35#include <linux/smp.h>
36#include <linux/delay.h>
37#include <linux/srcu.h>
38
39#include "rcu.h"
40
41/*
42 * Initialize an rcu_batch structure to empty.
43 */
44static inline void rcu_batch_init(struct rcu_batch *b)
45{
46 b->head = NULL;
47 b->tail = &b->head;
48}
49
50/*
51 * Enqueue a callback onto the tail of the specified rcu_batch structure.
52 */
53static inline void rcu_batch_queue(struct rcu_batch *b, struct rcu_head *head)
54{
55 *b->tail = head;
56 b->tail = &head->next;
57}
58
59/*
60 * Is the specified rcu_batch structure empty?
61 */
62static inline bool rcu_batch_empty(struct rcu_batch *b)
63{
64 return b->tail == &b->head;
65}
66
67/*
68 * Remove the callback at the head of the specified rcu_batch structure
69 * and return a pointer to it, or return NULL if the structure is empty.
70 */
71static inline struct rcu_head *rcu_batch_dequeue(struct rcu_batch *b)
72{
73 struct rcu_head *head;
74
75 if (rcu_batch_empty(b))
76 return NULL;
77
78 head = b->head;
79 b->head = head->next;
80 if (b->tail == &head->next)
81 rcu_batch_init(b);
82
83 return head;
84}
85
86/*
87 * Move all callbacks from the rcu_batch structure specified by "from" to
88 * the structure specified by "to".
89 */
90static inline void rcu_batch_move(struct rcu_batch *to, struct rcu_batch *from)
91{
92 if (!rcu_batch_empty(from)) {
93 *to->tail = from->head;
94 to->tail = from->tail;
95 rcu_batch_init(from);
96 }
97}
98
99static int init_srcu_struct_fields(struct srcu_struct *sp)
100{
101 sp->completed = 0;
102 spin_lock_init(&sp->queue_lock);
103 sp->running = false;
104 rcu_batch_init(&sp->batch_queue);
105 rcu_batch_init(&sp->batch_check0);
106 rcu_batch_init(&sp->batch_check1);
107 rcu_batch_init(&sp->batch_done);
108 INIT_DELAYED_WORK(&sp->work, process_srcu);
109 sp->per_cpu_ref = alloc_percpu(struct srcu_array);
110 return sp->per_cpu_ref ? 0 : -ENOMEM;
111}
112
113#ifdef CONFIG_DEBUG_LOCK_ALLOC
114
115int __init_srcu_struct(struct srcu_struct *sp, const char *name,
116 struct lock_class_key *key)
117{
118 /* Don't re-initialize a lock while it is held. */
119 debug_check_no_locks_freed((void *)sp, sizeof(*sp));
120 lockdep_init_map(&sp->dep_map, name, key, 0);
121 return init_srcu_struct_fields(sp);
122}
123EXPORT_SYMBOL_GPL(__init_srcu_struct);
124
125#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
126
127/**
128 * init_srcu_struct - initialize a sleep-RCU structure
129 * @sp: structure to initialize.
130 *
131 * Must invoke this on a given srcu_struct before passing that srcu_struct
132 * to any other function. Each srcu_struct represents a separate domain
133 * of SRCU protection.
134 */
135int init_srcu_struct(struct srcu_struct *sp)
136{
137 return init_srcu_struct_fields(sp);
138}
139EXPORT_SYMBOL_GPL(init_srcu_struct);
140
141#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
142
143/*
144 * Returns approximate total of the readers' ->lock_count[] values for the
145 * rank of per-CPU counters specified by idx.
146 */
147static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
148{
149 int cpu;
150 unsigned long sum = 0;
151
152 for_each_possible_cpu(cpu) {
153 struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
154
155 sum += READ_ONCE(cpuc->lock_count[idx]);
156 }
157 return sum;
158}
159
160/*
161 * Returns approximate total of the readers' ->unlock_count[] values for the
162 * rank of per-CPU counters specified by idx.
163 */
164static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
165{
166 int cpu;
167 unsigned long sum = 0;
168
169 for_each_possible_cpu(cpu) {
170 struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
171
172 sum += READ_ONCE(cpuc->unlock_count[idx]);
173 }
174 return sum;
175}
176
177/*
178 * Return true if the number of pre-existing readers is determined to
179 * be zero.
180 */
181static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
182{
183 unsigned long unlocks;
184
185 unlocks = srcu_readers_unlock_idx(sp, idx);
186
187 /*
188 * Make sure that a lock is always counted if the corresponding unlock
189 * is counted. Needs to be a smp_mb() as the read side may contain a
190 * read from a variable that is written to before the synchronize_srcu()
191 * in the write side. In this case smp_mb()s A and B act like the store
192 * buffering pattern.
193 *
194 * This smp_mb() also pairs with smp_mb() C to prevent accesses after the
195 * synchronize_srcu() from being executed before the grace period ends.
196 */
197 smp_mb(); /* A */
198
199 /*
200 * If the locks are the same as the unlocks, then there must have
201 * been no readers on this index at some time in between. This does not
202 * mean that there are no more readers, as one could have read the
203 * current index but not have incremented the lock counter yet.
204 *
205 * Possible bug: There is no guarantee that there haven't been ULONG_MAX
206 * increments of ->lock_count[] since the unlocks were counted, meaning
207 * that this could return true even if there are still active readers.
208 * Since there are no memory barriers around srcu_flip(), the CPU is not
209 * required to increment ->completed before running
210 * srcu_readers_unlock_idx(), which means that there could be an
211 * arbitrarily large number of critical sections that execute after
212 * srcu_readers_unlock_idx() but use the old value of ->completed.
213 */
214 return srcu_readers_lock_idx(sp, idx) == unlocks;
215}
216
217/**
218 * srcu_readers_active - returns true if there are readers. and false
219 * otherwise
220 * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
221 *
222 * Note that this is not an atomic primitive, and can therefore suffer
223 * severe errors when invoked on an active srcu_struct. That said, it
224 * can be useful as an error check at cleanup time.
225 */
226static bool srcu_readers_active(struct srcu_struct *sp)
227{
228 int cpu;
229 unsigned long sum = 0;
230
231 for_each_possible_cpu(cpu) {
232 struct srcu_array *cpuc = per_cpu_ptr(sp->per_cpu_ref, cpu);
233
234 sum += READ_ONCE(cpuc->lock_count[0]);
235 sum += READ_ONCE(cpuc->lock_count[1]);
236 sum -= READ_ONCE(cpuc->unlock_count[0]);
237 sum -= READ_ONCE(cpuc->unlock_count[1]);
238 }
239 return sum;
240}
241
242/**
243 * cleanup_srcu_struct - deconstruct a sleep-RCU structure
244 * @sp: structure to clean up.
245 *
246 * Must invoke this only after you are finished using a given srcu_struct
247 * that was initialized via init_srcu_struct(). This code does some
248 * probabalistic checking, spotting late uses of srcu_read_lock(),
249 * synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu().
250 * If any such late uses are detected, the per-CPU memory associated with
251 * the srcu_struct is simply leaked and WARN_ON() is invoked. If the
252 * caller frees the srcu_struct itself, a use-after-free crash will likely
253 * ensue, but at least there will be a warning printed.
254 */
255void cleanup_srcu_struct(struct srcu_struct *sp)
256{
257 if (WARN_ON(srcu_readers_active(sp)))
258 return; /* Leakage unless caller handles error. */
259 free_percpu(sp->per_cpu_ref);
260 sp->per_cpu_ref = NULL;
261}
262EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
263
264/*
265 * Counts the new reader in the appropriate per-CPU element of the
266 * srcu_struct.
267 * Returns an index that must be passed to the matching srcu_read_unlock().
268 */
269int __srcu_read_lock(struct srcu_struct *sp)
270{
271 int idx;
272
273 idx = READ_ONCE(sp->completed) & 0x1;
274 this_cpu_inc(sp->per_cpu_ref->lock_count[idx]);
275 smp_mb(); /* B */ /* Avoid leaking the critical section. */
276 return idx;
277}
278EXPORT_SYMBOL_GPL(__srcu_read_lock);
279
280/*
281 * Removes the count for the old reader from the appropriate per-CPU
282 * element of the srcu_struct. Note that this may well be a different
283 * CPU than that which was incremented by the corresponding srcu_read_lock().
284 */
285void __srcu_read_unlock(struct srcu_struct *sp, int idx)
286{
287 smp_mb(); /* C */ /* Avoid leaking the critical section. */
288 this_cpu_inc(sp->per_cpu_ref->unlock_count[idx]);
289}
290EXPORT_SYMBOL_GPL(__srcu_read_unlock);
291
292/*
293 * We use an adaptive strategy for synchronize_srcu() and especially for
294 * synchronize_srcu_expedited(). We spin for a fixed time period
295 * (defined below) to allow SRCU readers to exit their read-side critical
296 * sections. If there are still some readers after 10 microseconds,
297 * we repeatedly block for 1-millisecond time periods. This approach
298 * has done well in testing, so there is no need for a config parameter.
299 */
300#define SRCU_RETRY_CHECK_DELAY 5
301#define SYNCHRONIZE_SRCU_TRYCOUNT 2
302#define SYNCHRONIZE_SRCU_EXP_TRYCOUNT 12
303
304/*
305 * @@@ Wait until all pre-existing readers complete. Such readers
306 * will have used the index specified by "idx".
307 * the caller should ensures the ->completed is not changed while checking
308 * and idx = (->completed & 1) ^ 1
309 */
310static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
311{
312 for (;;) {
313 if (srcu_readers_active_idx_check(sp, idx))
314 return true;
315 if (--trycount <= 0)
316 return false;
317 udelay(SRCU_RETRY_CHECK_DELAY);
318 }
319}
320
321/*
322 * Increment the ->completed counter so that future SRCU readers will
323 * use the other rank of the ->(un)lock_count[] arrays. This allows
324 * us to wait for pre-existing readers in a starvation-free manner.
325 */
326static void srcu_flip(struct srcu_struct *sp)
327{
328 WRITE_ONCE(sp->completed, sp->completed + 1);
329
330 /*
331 * Ensure that if the updater misses an __srcu_read_unlock()
332 * increment, that task's next __srcu_read_lock() will see the
333 * above counter update. Note that both this memory barrier
334 * and the one in srcu_readers_active_idx_check() provide the
335 * guarantee for __srcu_read_lock().
336 */
337 smp_mb(); /* D */ /* Pairs with C. */
338}
339
340/*
341 * Enqueue an SRCU callback on the specified srcu_struct structure,
342 * initiating grace-period processing if it is not already running.
343 *
344 * Note that all CPUs must agree that the grace period extended beyond
345 * all pre-existing SRCU read-side critical section. On systems with
346 * more than one CPU, this means that when "func()" is invoked, each CPU
347 * is guaranteed to have executed a full memory barrier since the end of
348 * its last corresponding SRCU read-side critical section whose beginning
349 * preceded the call to call_rcu(). It also means that each CPU executing
350 * an SRCU read-side critical section that continues beyond the start of
351 * "func()" must have executed a memory barrier after the call_rcu()
352 * but before the beginning of that SRCU read-side critical section.
353 * Note that these guarantees include CPUs that are offline, idle, or
354 * executing in user mode, as well as CPUs that are executing in the kernel.
355 *
356 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
357 * resulting SRCU callback function "func()", then both CPU A and CPU
358 * B are guaranteed to execute a full memory barrier during the time
359 * interval between the call to call_rcu() and the invocation of "func()".
360 * This guarantee applies even if CPU A and CPU B are the same CPU (but
361 * again only if the system has more than one CPU).
362 *
363 * Of course, these guarantees apply only for invocations of call_srcu(),
364 * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
365 * srcu_struct structure.
366 */
367void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
368 rcu_callback_t func)
369{
370 unsigned long flags;
371
372 head->next = NULL;
373 head->func = func;
374 spin_lock_irqsave(&sp->queue_lock, flags);
375 smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
376 rcu_batch_queue(&sp->batch_queue, head);
377 if (!sp->running) {
378 sp->running = true;
379 queue_delayed_work(system_power_efficient_wq, &sp->work, 0);
380 }
381 spin_unlock_irqrestore(&sp->queue_lock, flags);
382}
383EXPORT_SYMBOL_GPL(call_srcu);
384
385static void srcu_advance_batches(struct srcu_struct *sp, int trycount);
386static void srcu_reschedule(struct srcu_struct *sp);
387
388/*
389 * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
390 */
391static void __synchronize_srcu(struct srcu_struct *sp, int trycount)
392{
393 struct rcu_synchronize rcu;
394 struct rcu_head *head = &rcu.head;
395 bool done = false;
396
397 RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
398 lock_is_held(&rcu_bh_lock_map) ||
399 lock_is_held(&rcu_lock_map) ||
400 lock_is_held(&rcu_sched_lock_map),
401 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
402
403 might_sleep();
404 init_completion(&rcu.completion);
405
406 head->next = NULL;
407 head->func = wakeme_after_rcu;
408 spin_lock_irq(&sp->queue_lock);
409 smp_mb__after_unlock_lock(); /* Caller's prior accesses before GP. */
410 if (!sp->running) {
411 /* steal the processing owner */
412 sp->running = true;
413 rcu_batch_queue(&sp->batch_check0, head);
414 spin_unlock_irq(&sp->queue_lock);
415
416 srcu_advance_batches(sp, trycount);
417 if (!rcu_batch_empty(&sp->batch_done)) {
418 BUG_ON(sp->batch_done.head != head);
419 rcu_batch_dequeue(&sp->batch_done);
420 done = true;
421 }
422 /* give the processing owner to work_struct */
423 srcu_reschedule(sp);
424 } else {
425 rcu_batch_queue(&sp->batch_queue, head);
426 spin_unlock_irq(&sp->queue_lock);
427 }
428
429 if (!done) {
430 wait_for_completion(&rcu.completion);
431 smp_mb(); /* Caller's later accesses after GP. */
432 }
433
434}
435
436/**
437 * synchronize_srcu - wait for prior SRCU read-side critical-section completion
438 * @sp: srcu_struct with which to synchronize.
439 *
440 * Wait for the count to drain to zero of both indexes. To avoid the
441 * possible starvation of synchronize_srcu(), it waits for the count of
442 * the index=((->completed & 1) ^ 1) to drain to zero at first,
443 * and then flip the completed and wait for the count of the other index.
444 *
445 * Can block; must be called from process context.
446 *
447 * Note that it is illegal to call synchronize_srcu() from the corresponding
448 * SRCU read-side critical section; doing so will result in deadlock.
449 * However, it is perfectly legal to call synchronize_srcu() on one
450 * srcu_struct from some other srcu_struct's read-side critical section,
451 * as long as the resulting graph of srcu_structs is acyclic.
452 *
453 * There are memory-ordering constraints implied by synchronize_srcu().
454 * On systems with more than one CPU, when synchronize_srcu() returns,
455 * each CPU is guaranteed to have executed a full memory barrier since
456 * the end of its last corresponding SRCU-sched read-side critical section
457 * whose beginning preceded the call to synchronize_srcu(). In addition,
458 * each CPU having an SRCU read-side critical section that extends beyond
459 * the return from synchronize_srcu() is guaranteed to have executed a
460 * full memory barrier after the beginning of synchronize_srcu() and before
461 * the beginning of that SRCU read-side critical section. Note that these
462 * guarantees include CPUs that are offline, idle, or executing in user mode,
463 * as well as CPUs that are executing in the kernel.
464 *
465 * Furthermore, if CPU A invoked synchronize_srcu(), which returned
466 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
467 * to have executed a full memory barrier during the execution of
468 * synchronize_srcu(). This guarantee applies even if CPU A and CPU B
469 * are the same CPU, but again only if the system has more than one CPU.
470 *
471 * Of course, these memory-ordering guarantees apply only when
472 * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
473 * passed the same srcu_struct structure.
474 */
475void synchronize_srcu(struct srcu_struct *sp)
476{
477 __synchronize_srcu(sp, (rcu_gp_is_expedited() && !rcu_gp_is_normal())
478 ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT
479 : SYNCHRONIZE_SRCU_TRYCOUNT);
480}
481EXPORT_SYMBOL_GPL(synchronize_srcu);
482
483/**
484 * synchronize_srcu_expedited - Brute-force SRCU grace period
485 * @sp: srcu_struct with which to synchronize.
486 *
487 * Wait for an SRCU grace period to elapse, but be more aggressive about
488 * spinning rather than blocking when waiting.
489 *
490 * Note that synchronize_srcu_expedited() has the same deadlock and
491 * memory-ordering properties as does synchronize_srcu().
492 */
493void synchronize_srcu_expedited(struct srcu_struct *sp)
494{
495 __synchronize_srcu(sp, SYNCHRONIZE_SRCU_EXP_TRYCOUNT);
496}
497EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
498
499/**
500 * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
501 * @sp: srcu_struct on which to wait for in-flight callbacks.
502 */
503void srcu_barrier(struct srcu_struct *sp)
504{
505 synchronize_srcu(sp);
506}
507EXPORT_SYMBOL_GPL(srcu_barrier);
508
509/**
510 * srcu_batches_completed - return batches completed.
511 * @sp: srcu_struct on which to report batch completion.
512 *
513 * Report the number of batches, correlated with, but not necessarily
514 * precisely the same as, the number of grace periods that have elapsed.
515 */
516unsigned long srcu_batches_completed(struct srcu_struct *sp)
517{
518 return sp->completed;
519}
520EXPORT_SYMBOL_GPL(srcu_batches_completed);
521
522#define SRCU_CALLBACK_BATCH 10
523#define SRCU_INTERVAL 1
524
525/*
526 * Move any new SRCU callbacks to the first stage of the SRCU grace
527 * period pipeline.
528 */
529static void srcu_collect_new(struct srcu_struct *sp)
530{
531 if (!rcu_batch_empty(&sp->batch_queue)) {
532 spin_lock_irq(&sp->queue_lock);
533 rcu_batch_move(&sp->batch_check0, &sp->batch_queue);
534 spin_unlock_irq(&sp->queue_lock);
535 }
536}
537
538/*
539 * Core SRCU state machine. Advance callbacks from ->batch_check0 to
540 * ->batch_check1 and then to ->batch_done as readers drain.
541 */
542static void srcu_advance_batches(struct srcu_struct *sp, int trycount)
543{
544 int idx = 1 ^ (sp->completed & 1);
545
546 /*
547 * Because readers might be delayed for an extended period after
548 * fetching ->completed for their index, at any point in time there
549 * might well be readers using both idx=0 and idx=1. We therefore
550 * need to wait for readers to clear from both index values before
551 * invoking a callback.
552 */
553
554 if (rcu_batch_empty(&sp->batch_check0) &&
555 rcu_batch_empty(&sp->batch_check1))
556 return; /* no callbacks need to be advanced */
557
558 if (!try_check_zero(sp, idx, trycount))
559 return; /* failed to advance, will try after SRCU_INTERVAL */
560
561 /*
562 * The callbacks in ->batch_check1 have already done with their
563 * first zero check and flip back when they were enqueued on
564 * ->batch_check0 in a previous invocation of srcu_advance_batches().
565 * (Presumably try_check_zero() returned false during that
566 * invocation, leaving the callbacks stranded on ->batch_check1.)
567 * They are therefore ready to invoke, so move them to ->batch_done.
568 */
569 rcu_batch_move(&sp->batch_done, &sp->batch_check1);
570
571 if (rcu_batch_empty(&sp->batch_check0))
572 return; /* no callbacks need to be advanced */
573 srcu_flip(sp);
574
575 /*
576 * The callbacks in ->batch_check0 just finished their
577 * first check zero and flip, so move them to ->batch_check1
578 * for future checking on the other idx.
579 */
580 rcu_batch_move(&sp->batch_check1, &sp->batch_check0);
581
582 /*
583 * SRCU read-side critical sections are normally short, so check
584 * at least twice in quick succession after a flip.
585 */
586 trycount = trycount < 2 ? 2 : trycount;
587 if (!try_check_zero(sp, idx^1, trycount))
588 return; /* failed to advance, will try after SRCU_INTERVAL */
589
590 /*
591 * The callbacks in ->batch_check1 have now waited for all
592 * pre-existing readers using both idx values. They are therefore
593 * ready to invoke, so move them to ->batch_done.
594 */
595 rcu_batch_move(&sp->batch_done, &sp->batch_check1);
596}
597
598/*
599 * Invoke a limited number of SRCU callbacks that have passed through
600 * their grace period. If there are more to do, SRCU will reschedule
601 * the workqueue. Note that needed memory barriers have been executed
602 * in this task's context by srcu_readers_active_idx_check().
603 */
604static void srcu_invoke_callbacks(struct srcu_struct *sp)
605{
606 int i;
607 struct rcu_head *head;
608
609 for (i = 0; i < SRCU_CALLBACK_BATCH; i++) {
610 head = rcu_batch_dequeue(&sp->batch_done);
611 if (!head)
612 break;
613 local_bh_disable();
614 head->func(head);
615 local_bh_enable();
616 }
617}
618
619/*
620 * Finished one round of SRCU grace period. Start another if there are
621 * more SRCU callbacks queued, otherwise put SRCU into not-running state.
622 */
623static void srcu_reschedule(struct srcu_struct *sp)
624{
625 bool pending = true;
626
627 if (rcu_batch_empty(&sp->batch_done) &&
628 rcu_batch_empty(&sp->batch_check1) &&
629 rcu_batch_empty(&sp->batch_check0) &&
630 rcu_batch_empty(&sp->batch_queue)) {
631 spin_lock_irq(&sp->queue_lock);
632 if (rcu_batch_empty(&sp->batch_done) &&
633 rcu_batch_empty(&sp->batch_check1) &&
634 rcu_batch_empty(&sp->batch_check0) &&
635 rcu_batch_empty(&sp->batch_queue)) {
636 sp->running = false;
637 pending = false;
638 }
639 spin_unlock_irq(&sp->queue_lock);
640 }
641
642 if (pending)
643 queue_delayed_work(system_power_efficient_wq,
644 &sp->work, SRCU_INTERVAL);
645}
646
647/*
648 * This is the work-queue function that handles SRCU grace periods.
649 */
650void process_srcu(struct work_struct *work)
651{
652 struct srcu_struct *sp;
653
654 sp = container_of(work, struct srcu_struct, work.work);
655
656 srcu_collect_new(sp);
657 srcu_advance_batches(sp, 1);
658 srcu_invoke_callbacks(sp);
659 srcu_reschedule(sp);
660}
661EXPORT_SYMBOL_GPL(process_srcu);
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 32798eb14853..1a1c1047d2ed 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -38,8 +38,8 @@ static int init_srcu_struct_fields(struct srcu_struct *sp)
38 sp->srcu_lock_nesting[0] = 0; 38 sp->srcu_lock_nesting[0] = 0;
39 sp->srcu_lock_nesting[1] = 0; 39 sp->srcu_lock_nesting[1] = 0;
40 init_swait_queue_head(&sp->srcu_wq); 40 init_swait_queue_head(&sp->srcu_wq);
41 sp->srcu_gp_seq = 0; 41 sp->srcu_cb_head = NULL;
42 rcu_segcblist_init(&sp->srcu_cblist); 42 sp->srcu_cb_tail = &sp->srcu_cb_head;
43 sp->srcu_gp_running = false; 43 sp->srcu_gp_running = false;
44 sp->srcu_gp_waiting = false; 44 sp->srcu_gp_waiting = false;
45 sp->srcu_idx = 0; 45 sp->srcu_idx = 0;
@@ -88,30 +88,14 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
88{ 88{
89 WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); 89 WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
90 flush_work(&sp->srcu_work); 90 flush_work(&sp->srcu_work);
91 WARN_ON(rcu_seq_state(sp->srcu_gp_seq));
92 WARN_ON(sp->srcu_gp_running); 91 WARN_ON(sp->srcu_gp_running);
93 WARN_ON(sp->srcu_gp_waiting); 92 WARN_ON(sp->srcu_gp_waiting);
94 WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)); 93 WARN_ON(sp->srcu_cb_head);
94 WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
95} 95}
96EXPORT_SYMBOL_GPL(cleanup_srcu_struct); 96EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
97 97
98/* 98/*
99 * Counts the new reader in the appropriate per-CPU element of the
100 * srcu_struct. Can be invoked from irq/bh handlers, but the matching
101 * __srcu_read_unlock() must be in the same handler instance. Returns an
102 * index that must be passed to the matching srcu_read_unlock().
103 */
104int __srcu_read_lock(struct srcu_struct *sp)
105{
106 int idx;
107
108 idx = READ_ONCE(sp->srcu_idx);
109 WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1);
110 return idx;
111}
112EXPORT_SYMBOL_GPL(__srcu_read_lock);
113
114/*
115 * Removes the count for the old reader from the appropriate element of 99 * Removes the count for the old reader from the appropriate element of
116 * the srcu_struct. 100 * the srcu_struct.
117 */ 101 */
@@ -133,52 +117,44 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock);
133void srcu_drive_gp(struct work_struct *wp) 117void srcu_drive_gp(struct work_struct *wp)
134{ 118{
135 int idx; 119 int idx;
136 struct rcu_cblist ready_cbs; 120 struct rcu_head *lh;
137 struct srcu_struct *sp;
138 struct rcu_head *rhp; 121 struct rcu_head *rhp;
122 struct srcu_struct *sp;
139 123
140 sp = container_of(wp, struct srcu_struct, srcu_work); 124 sp = container_of(wp, struct srcu_struct, srcu_work);
141 if (sp->srcu_gp_running || rcu_segcblist_empty(&sp->srcu_cblist)) 125 if (sp->srcu_gp_running || !READ_ONCE(sp->srcu_cb_head))
142 return; /* Already running or nothing to do. */ 126 return; /* Already running or nothing to do. */
143 127
144 /* Tag recently arrived callbacks and wait for readers. */ 128 /* Remove recently arrived callbacks and wait for readers. */
145 WRITE_ONCE(sp->srcu_gp_running, true); 129 WRITE_ONCE(sp->srcu_gp_running, true);
146 rcu_segcblist_accelerate(&sp->srcu_cblist, 130 local_irq_disable();
147 rcu_seq_snap(&sp->srcu_gp_seq)); 131 lh = sp->srcu_cb_head;
148 rcu_seq_start(&sp->srcu_gp_seq); 132 sp->srcu_cb_head = NULL;
133 sp->srcu_cb_tail = &sp->srcu_cb_head;
134 local_irq_enable();
149 idx = sp->srcu_idx; 135 idx = sp->srcu_idx;
150 WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx); 136 WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx);
151 WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ 137 WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */
152 swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); 138 swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx]));
153 WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ 139 WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */
154 rcu_seq_end(&sp->srcu_gp_seq); 140
155 141 /* Invoke the callbacks we removed above. */
156 /* Update callback list based on GP, and invoke ready callbacks. */ 142 while (lh) {
157 rcu_segcblist_advance(&sp->srcu_cblist, 143 rhp = lh;
158 rcu_seq_current(&sp->srcu_gp_seq)); 144 lh = lh->next;
159 if (rcu_segcblist_ready_cbs(&sp->srcu_cblist)) { 145 local_bh_disable();
160 rcu_cblist_init(&ready_cbs); 146 rhp->func(rhp);
161 local_irq_disable(); 147 local_bh_enable();
162 rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs);
163 local_irq_enable();
164 rhp = rcu_cblist_dequeue(&ready_cbs);
165 for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
166 local_bh_disable();
167 rhp->func(rhp);
168 local_bh_enable();
169 }
170 local_irq_disable();
171 rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs);
172 local_irq_enable();
173 } 148 }
174 WRITE_ONCE(sp->srcu_gp_running, false);
175 149
176 /* 150 /*
177 * If more callbacks, reschedule ourselves. This can race with 151 * Enable rescheduling, and if there are more callbacks,
178 * a call_srcu() at interrupt level, but the ->srcu_gp_running 152 * reschedule ourselves. This can race with a call_srcu()
179 * checks will straighten that out. 153 * at interrupt level, but the ->srcu_gp_running checks will
154 * straighten that out.
180 */ 155 */
181 if (!rcu_segcblist_empty(&sp->srcu_cblist)) 156 WRITE_ONCE(sp->srcu_gp_running, false);
157 if (READ_ONCE(sp->srcu_cb_head))
182 schedule_work(&sp->srcu_work); 158 schedule_work(&sp->srcu_work);
183} 159}
184EXPORT_SYMBOL_GPL(srcu_drive_gp); 160EXPORT_SYMBOL_GPL(srcu_drive_gp);
@@ -187,14 +163,16 @@ EXPORT_SYMBOL_GPL(srcu_drive_gp);
187 * Enqueue an SRCU callback on the specified srcu_struct structure, 163 * Enqueue an SRCU callback on the specified srcu_struct structure,
188 * initiating grace-period processing if it is not already running. 164 * initiating grace-period processing if it is not already running.
189 */ 165 */
190void call_srcu(struct srcu_struct *sp, struct rcu_head *head, 166void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
191 rcu_callback_t func) 167 rcu_callback_t func)
192{ 168{
193 unsigned long flags; 169 unsigned long flags;
194 170
195 head->func = func; 171 rhp->func = func;
172 rhp->next = NULL;
196 local_irq_save(flags); 173 local_irq_save(flags);
197 rcu_segcblist_enqueue(&sp->srcu_cblist, head, false); 174 *sp->srcu_cb_tail = rhp;
175 sp->srcu_cb_tail = &rhp->next;
198 local_irq_restore(flags); 176 local_irq_restore(flags);
199 if (!READ_ONCE(sp->srcu_gp_running)) 177 if (!READ_ONCE(sp->srcu_gp_running))
200 schedule_work(&sp->srcu_work); 178 schedule_work(&sp->srcu_work);
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 157654fa436a..d0ca524bf042 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -40,9 +40,15 @@
40#include "rcu.h" 40#include "rcu.h"
41#include "rcu_segcblist.h" 41#include "rcu_segcblist.h"
42 42
43ulong exp_holdoff = 25 * 1000; /* Holdoff (ns) for auto-expediting. */ 43/* Holdoff in nanoseconds for auto-expediting. */
44#define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
45static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
44module_param(exp_holdoff, ulong, 0444); 46module_param(exp_holdoff, ulong, 0444);
45 47
48/* Overflow-check frequency. N bits roughly says every 2**N grace periods. */
49static ulong counter_wrap_check = (ULONG_MAX >> 2);
50module_param(counter_wrap_check, ulong, 0444);
51
46static void srcu_invoke_callbacks(struct work_struct *work); 52static void srcu_invoke_callbacks(struct work_struct *work);
47static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); 53static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
48 54
@@ -70,7 +76,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
70 76
71 /* Each pass through this loop initializes one srcu_node structure. */ 77 /* Each pass through this loop initializes one srcu_node structure. */
72 rcu_for_each_node_breadth_first(sp, snp) { 78 rcu_for_each_node_breadth_first(sp, snp) {
73 spin_lock_init(&snp->lock); 79 raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock));
74 WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != 80 WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
75 ARRAY_SIZE(snp->srcu_data_have_cbs)); 81 ARRAY_SIZE(snp->srcu_data_have_cbs));
76 for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) { 82 for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
@@ -104,7 +110,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
104 snp_first = sp->level[level]; 110 snp_first = sp->level[level];
105 for_each_possible_cpu(cpu) { 111 for_each_possible_cpu(cpu) {
106 sdp = per_cpu_ptr(sp->sda, cpu); 112 sdp = per_cpu_ptr(sp->sda, cpu);
107 spin_lock_init(&sdp->lock); 113 raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
108 rcu_segcblist_init(&sdp->srcu_cblist); 114 rcu_segcblist_init(&sdp->srcu_cblist);
109 sdp->srcu_cblist_invoking = false; 115 sdp->srcu_cblist_invoking = false;
110 sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; 116 sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
@@ -163,7 +169,7 @@ int __init_srcu_struct(struct srcu_struct *sp, const char *name,
163 /* Don't re-initialize a lock while it is held. */ 169 /* Don't re-initialize a lock while it is held. */
164 debug_check_no_locks_freed((void *)sp, sizeof(*sp)); 170 debug_check_no_locks_freed((void *)sp, sizeof(*sp));
165 lockdep_init_map(&sp->dep_map, name, key, 0); 171 lockdep_init_map(&sp->dep_map, name, key, 0);
166 spin_lock_init(&sp->gp_lock); 172 raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
167 return init_srcu_struct_fields(sp, false); 173 return init_srcu_struct_fields(sp, false);
168} 174}
169EXPORT_SYMBOL_GPL(__init_srcu_struct); 175EXPORT_SYMBOL_GPL(__init_srcu_struct);
@@ -180,7 +186,7 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct);
180 */ 186 */
181int init_srcu_struct(struct srcu_struct *sp) 187int init_srcu_struct(struct srcu_struct *sp)
182{ 188{
183 spin_lock_init(&sp->gp_lock); 189 raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
184 return init_srcu_struct_fields(sp, false); 190 return init_srcu_struct_fields(sp, false);
185} 191}
186EXPORT_SYMBOL_GPL(init_srcu_struct); 192EXPORT_SYMBOL_GPL(init_srcu_struct);
@@ -191,7 +197,7 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
191 * First-use initialization of statically allocated srcu_struct 197 * First-use initialization of statically allocated srcu_struct
192 * structure. Wiring up the combining tree is more than can be 198 * structure. Wiring up the combining tree is more than can be
193 * done with compile-time initialization, so this check is added 199 * done with compile-time initialization, so this check is added
194 * to each update-side SRCU primitive. Use ->gp_lock, which -is- 200 * to each update-side SRCU primitive. Use sp->lock, which -is-
195 * compile-time initialized, to resolve races involving multiple 201 * compile-time initialized, to resolve races involving multiple
196 * CPUs trying to garner first-use privileges. 202 * CPUs trying to garner first-use privileges.
197 */ 203 */
@@ -203,13 +209,13 @@ static void check_init_srcu_struct(struct srcu_struct *sp)
203 /* The smp_load_acquire() pairs with the smp_store_release(). */ 209 /* The smp_load_acquire() pairs with the smp_store_release(). */
204 if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ 210 if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
205 return; /* Already initialized. */ 211 return; /* Already initialized. */
206 spin_lock_irqsave(&sp->gp_lock, flags); 212 raw_spin_lock_irqsave_rcu_node(sp, flags);
207 if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { 213 if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
208 spin_unlock_irqrestore(&sp->gp_lock, flags); 214 raw_spin_unlock_irqrestore_rcu_node(sp, flags);
209 return; 215 return;
210 } 216 }
211 init_srcu_struct_fields(sp, true); 217 init_srcu_struct_fields(sp, true);
212 spin_unlock_irqrestore(&sp->gp_lock, flags); 218 raw_spin_unlock_irqrestore_rcu_node(sp, flags);
213} 219}
214 220
215/* 221/*
@@ -275,15 +281,20 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
275 * not mean that there are no more readers, as one could have read 281 * not mean that there are no more readers, as one could have read
276 * the current index but not have incremented the lock counter yet. 282 * the current index but not have incremented the lock counter yet.
277 * 283 *
278 * Possible bug: There is no guarantee that there haven't been 284 * So suppose that the updater is preempted here for so long
279 * ULONG_MAX increments of ->srcu_lock_count[] since the unlocks were 285 * that more than ULONG_MAX non-nested readers come and go in
280 * counted, meaning that this could return true even if there are 286 * the meantime. It turns out that this cannot result in overflow
281 * still active readers. Since there are no memory barriers around 287 * because if a reader modifies its unlock count after we read it
282 * srcu_flip(), the CPU is not required to increment ->srcu_idx 288 * above, then that reader's next load of ->srcu_idx is guaranteed
283 * before running srcu_readers_unlock_idx(), which means that there 289 * to get the new value, which will cause it to operate on the
284 * could be an arbitrarily large number of critical sections that 290 * other bank of counters, where it cannot contribute to the
285 * execute after srcu_readers_unlock_idx() but use the old value 291 * overflow of these counters. This means that there is a maximum
286 * of ->srcu_idx. 292 * of 2*NR_CPUS increments, which cannot overflow given current
293 * systems, especially not on 64-bit systems.
294 *
295 * OK, how about nesting? This does impose a limit on nesting
296 * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
297 * especially on 64-bit systems.
287 */ 298 */
288 return srcu_readers_lock_idx(sp, idx) == unlocks; 299 return srcu_readers_lock_idx(sp, idx) == unlocks;
289} 300}
@@ -400,8 +411,7 @@ static void srcu_gp_start(struct srcu_struct *sp)
400 struct srcu_data *sdp = this_cpu_ptr(sp->sda); 411 struct srcu_data *sdp = this_cpu_ptr(sp->sda);
401 int state; 412 int state;
402 413
403 RCU_LOCKDEP_WARN(!lockdep_is_held(&sp->gp_lock), 414 lockdep_assert_held(&sp->lock);
404 "Invoked srcu_gp_start() without ->gp_lock!");
405 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); 415 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
406 rcu_segcblist_advance(&sdp->srcu_cblist, 416 rcu_segcblist_advance(&sdp->srcu_cblist,
407 rcu_seq_current(&sp->srcu_gp_seq)); 417 rcu_seq_current(&sp->srcu_gp_seq));
@@ -489,17 +499,20 @@ static void srcu_gp_end(struct srcu_struct *sp)
489{ 499{
490 unsigned long cbdelay; 500 unsigned long cbdelay;
491 bool cbs; 501 bool cbs;
502 int cpu;
503 unsigned long flags;
492 unsigned long gpseq; 504 unsigned long gpseq;
493 int idx; 505 int idx;
494 int idxnext; 506 int idxnext;
495 unsigned long mask; 507 unsigned long mask;
508 struct srcu_data *sdp;
496 struct srcu_node *snp; 509 struct srcu_node *snp;
497 510
498 /* Prevent more than one additional grace period. */ 511 /* Prevent more than one additional grace period. */
499 mutex_lock(&sp->srcu_cb_mutex); 512 mutex_lock(&sp->srcu_cb_mutex);
500 513
501 /* End the current grace period. */ 514 /* End the current grace period. */
502 spin_lock_irq(&sp->gp_lock); 515 raw_spin_lock_irq_rcu_node(sp);
503 idx = rcu_seq_state(sp->srcu_gp_seq); 516 idx = rcu_seq_state(sp->srcu_gp_seq);
504 WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); 517 WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
505 cbdelay = srcu_get_delay(sp); 518 cbdelay = srcu_get_delay(sp);
@@ -508,7 +521,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
508 gpseq = rcu_seq_current(&sp->srcu_gp_seq); 521 gpseq = rcu_seq_current(&sp->srcu_gp_seq);
509 if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) 522 if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
510 sp->srcu_gp_seq_needed_exp = gpseq; 523 sp->srcu_gp_seq_needed_exp = gpseq;
511 spin_unlock_irq(&sp->gp_lock); 524 raw_spin_unlock_irq_rcu_node(sp);
512 mutex_unlock(&sp->srcu_gp_mutex); 525 mutex_unlock(&sp->srcu_gp_mutex);
513 /* A new grace period can start at this point. But only one. */ 526 /* A new grace period can start at this point. But only one. */
514 527
@@ -516,7 +529,7 @@ static void srcu_gp_end(struct srcu_struct *sp)
516 idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); 529 idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
517 idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); 530 idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
518 rcu_for_each_node_breadth_first(sp, snp) { 531 rcu_for_each_node_breadth_first(sp, snp) {
519 spin_lock_irq(&snp->lock); 532 raw_spin_lock_irq_rcu_node(snp);
520 cbs = false; 533 cbs = false;
521 if (snp >= sp->level[rcu_num_lvls - 1]) 534 if (snp >= sp->level[rcu_num_lvls - 1])
522 cbs = snp->srcu_have_cbs[idx] == gpseq; 535 cbs = snp->srcu_have_cbs[idx] == gpseq;
@@ -526,28 +539,37 @@ static void srcu_gp_end(struct srcu_struct *sp)
526 snp->srcu_gp_seq_needed_exp = gpseq; 539 snp->srcu_gp_seq_needed_exp = gpseq;
527 mask = snp->srcu_data_have_cbs[idx]; 540 mask = snp->srcu_data_have_cbs[idx];
528 snp->srcu_data_have_cbs[idx] = 0; 541 snp->srcu_data_have_cbs[idx] = 0;
529 spin_unlock_irq(&snp->lock); 542 raw_spin_unlock_irq_rcu_node(snp);
530 if (cbs) { 543 if (cbs)
531 smp_mb(); /* GP end before CB invocation. */
532 srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); 544 srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
533 } 545
546 /* Occasionally prevent srcu_data counter wrap. */
547 if (!(gpseq & counter_wrap_check))
548 for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
549 sdp = per_cpu_ptr(sp->sda, cpu);
550 raw_spin_lock_irqsave_rcu_node(sdp, flags);
551 if (ULONG_CMP_GE(gpseq,
552 sdp->srcu_gp_seq_needed + 100))
553 sdp->srcu_gp_seq_needed = gpseq;
554 raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
555 }
534 } 556 }
535 557
536 /* Callback initiation done, allow grace periods after next. */ 558 /* Callback initiation done, allow grace periods after next. */
537 mutex_unlock(&sp->srcu_cb_mutex); 559 mutex_unlock(&sp->srcu_cb_mutex);
538 560
539 /* Start a new grace period if needed. */ 561 /* Start a new grace period if needed. */
540 spin_lock_irq(&sp->gp_lock); 562 raw_spin_lock_irq_rcu_node(sp);
541 gpseq = rcu_seq_current(&sp->srcu_gp_seq); 563 gpseq = rcu_seq_current(&sp->srcu_gp_seq);
542 if (!rcu_seq_state(gpseq) && 564 if (!rcu_seq_state(gpseq) &&
543 ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { 565 ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
544 srcu_gp_start(sp); 566 srcu_gp_start(sp);
545 spin_unlock_irq(&sp->gp_lock); 567 raw_spin_unlock_irq_rcu_node(sp);
546 /* Throttle expedited grace periods: Should be rare! */ 568 /* Throttle expedited grace periods: Should be rare! */
547 srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff 569 srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
548 ? 0 : SRCU_INTERVAL); 570 ? 0 : SRCU_INTERVAL);
549 } else { 571 } else {
550 spin_unlock_irq(&sp->gp_lock); 572 raw_spin_unlock_irq_rcu_node(sp);
551 } 573 }
552} 574}
553 575
@@ -567,18 +589,18 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
567 if (rcu_seq_done(&sp->srcu_gp_seq, s) || 589 if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
568 ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) 590 ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
569 return; 591 return;
570 spin_lock_irqsave(&snp->lock, flags); 592 raw_spin_lock_irqsave_rcu_node(snp, flags);
571 if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) { 593 if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
572 spin_unlock_irqrestore(&snp->lock, flags); 594 raw_spin_unlock_irqrestore_rcu_node(snp, flags);
573 return; 595 return;
574 } 596 }
575 WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); 597 WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
576 spin_unlock_irqrestore(&snp->lock, flags); 598 raw_spin_unlock_irqrestore_rcu_node(snp, flags);
577 } 599 }
578 spin_lock_irqsave(&sp->gp_lock, flags); 600 raw_spin_lock_irqsave_rcu_node(sp, flags);
579 if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) 601 if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
580 sp->srcu_gp_seq_needed_exp = s; 602 sp->srcu_gp_seq_needed_exp = s;
581 spin_unlock_irqrestore(&sp->gp_lock, flags); 603 raw_spin_unlock_irqrestore_rcu_node(sp, flags);
582} 604}
583 605
584/* 606/*
@@ -600,14 +622,13 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
600 for (; snp != NULL; snp = snp->srcu_parent) { 622 for (; snp != NULL; snp = snp->srcu_parent) {
601 if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) 623 if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
602 return; /* GP already done and CBs recorded. */ 624 return; /* GP already done and CBs recorded. */
603 spin_lock_irqsave(&snp->lock, flags); 625 raw_spin_lock_irqsave_rcu_node(snp, flags);
604 if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { 626 if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
605 snp_seq = snp->srcu_have_cbs[idx]; 627 snp_seq = snp->srcu_have_cbs[idx];
606 if (snp == sdp->mynode && snp_seq == s) 628 if (snp == sdp->mynode && snp_seq == s)
607 snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 629 snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
608 spin_unlock_irqrestore(&snp->lock, flags); 630 raw_spin_unlock_irqrestore_rcu_node(snp, flags);
609 if (snp == sdp->mynode && snp_seq != s) { 631 if (snp == sdp->mynode && snp_seq != s) {
610 smp_mb(); /* CBs after GP! */
611 srcu_schedule_cbs_sdp(sdp, do_norm 632 srcu_schedule_cbs_sdp(sdp, do_norm
612 ? SRCU_INTERVAL 633 ? SRCU_INTERVAL
613 : 0); 634 : 0);
@@ -622,11 +643,11 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
622 snp->srcu_data_have_cbs[idx] |= sdp->grpmask; 643 snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
623 if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s)) 644 if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
624 snp->srcu_gp_seq_needed_exp = s; 645 snp->srcu_gp_seq_needed_exp = s;
625 spin_unlock_irqrestore(&snp->lock, flags); 646 raw_spin_unlock_irqrestore_rcu_node(snp, flags);
626 } 647 }
627 648
628 /* Top of tree, must ensure the grace period will be started. */ 649 /* Top of tree, must ensure the grace period will be started. */
629 spin_lock_irqsave(&sp->gp_lock, flags); 650 raw_spin_lock_irqsave_rcu_node(sp, flags);
630 if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { 651 if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
631 /* 652 /*
632 * Record need for grace period s. Pair with load 653 * Record need for grace period s. Pair with load
@@ -645,7 +666,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
645 queue_delayed_work(system_power_efficient_wq, &sp->work, 666 queue_delayed_work(system_power_efficient_wq, &sp->work,
646 srcu_get_delay(sp)); 667 srcu_get_delay(sp));
647 } 668 }
648 spin_unlock_irqrestore(&sp->gp_lock, flags); 669 raw_spin_unlock_irqrestore_rcu_node(sp, flags);
649} 670}
650 671
651/* 672/*
@@ -671,6 +692,16 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
671 */ 692 */
672static void srcu_flip(struct srcu_struct *sp) 693static void srcu_flip(struct srcu_struct *sp)
673{ 694{
695 /*
696 * Ensure that if this updater saw a given reader's increment
697 * from __srcu_read_lock(), that reader was using an old value
698 * of ->srcu_idx. Also ensure that if a given reader sees the
699 * new value of ->srcu_idx, this updater's earlier scans cannot
700 * have seen that reader's increments (which is OK, because this
701 * grace period need not wait on that reader).
702 */
703 smp_mb(); /* E */ /* Pairs with B and C. */
704
674 WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); 705 WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1);
675 706
676 /* 707 /*
@@ -745,6 +776,13 @@ static bool srcu_might_be_idle(struct srcu_struct *sp)
745} 776}
746 777
747/* 778/*
779 * SRCU callback function to leak a callback.
780 */
781static void srcu_leak_callback(struct rcu_head *rhp)
782{
783}
784
785/*
748 * Enqueue an SRCU callback on the srcu_data structure associated with 786 * Enqueue an SRCU callback on the srcu_data structure associated with
749 * the current CPU and the specified srcu_struct structure, initiating 787 * the current CPU and the specified srcu_struct structure, initiating
750 * grace-period processing if it is not already running. 788 * grace-period processing if it is not already running.
@@ -782,10 +820,16 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
782 struct srcu_data *sdp; 820 struct srcu_data *sdp;
783 821
784 check_init_srcu_struct(sp); 822 check_init_srcu_struct(sp);
823 if (debug_rcu_head_queue(rhp)) {
824 /* Probable double call_srcu(), so leak the callback. */
825 WRITE_ONCE(rhp->func, srcu_leak_callback);
826 WARN_ONCE(1, "call_srcu(): Leaked duplicate callback\n");
827 return;
828 }
785 rhp->func = func; 829 rhp->func = func;
786 local_irq_save(flags); 830 local_irq_save(flags);
787 sdp = this_cpu_ptr(sp->sda); 831 sdp = this_cpu_ptr(sp->sda);
788 spin_lock(&sdp->lock); 832 raw_spin_lock_rcu_node(sdp);
789 rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); 833 rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
790 rcu_segcblist_advance(&sdp->srcu_cblist, 834 rcu_segcblist_advance(&sdp->srcu_cblist,
791 rcu_seq_current(&sp->srcu_gp_seq)); 835 rcu_seq_current(&sp->srcu_gp_seq));
@@ -799,13 +843,30 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
799 sdp->srcu_gp_seq_needed_exp = s; 843 sdp->srcu_gp_seq_needed_exp = s;
800 needexp = true; 844 needexp = true;
801 } 845 }
802 spin_unlock_irqrestore(&sdp->lock, flags); 846 raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
803 if (needgp) 847 if (needgp)
804 srcu_funnel_gp_start(sp, sdp, s, do_norm); 848 srcu_funnel_gp_start(sp, sdp, s, do_norm);
805 else if (needexp) 849 else if (needexp)
806 srcu_funnel_exp_start(sp, sdp->mynode, s); 850 srcu_funnel_exp_start(sp, sdp->mynode, s);
807} 851}
808 852
853/**
854 * call_srcu() - Queue a callback for invocation after an SRCU grace period
855 * @sp: srcu_struct in queue the callback
856 * @head: structure to be used for queueing the SRCU callback.
857 * @func: function to be invoked after the SRCU grace period
858 *
859 * The callback function will be invoked some time after a full SRCU
860 * grace period elapses, in other words after all pre-existing SRCU
861 * read-side critical sections have completed. However, the callback
862 * function might well execute concurrently with other SRCU read-side
863 * critical sections that started after call_srcu() was invoked. SRCU
864 * read-side critical sections are delimited by srcu_read_lock() and
865 * srcu_read_unlock(), and may be nested.
866 *
867 * The callback will be invoked from process context, but must nevertheless
868 * be fast and must not block.
869 */
809void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, 870void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
810 rcu_callback_t func) 871 rcu_callback_t func)
811{ 872{
@@ -953,13 +1014,16 @@ void srcu_barrier(struct srcu_struct *sp)
953 */ 1014 */
954 for_each_possible_cpu(cpu) { 1015 for_each_possible_cpu(cpu) {
955 sdp = per_cpu_ptr(sp->sda, cpu); 1016 sdp = per_cpu_ptr(sp->sda, cpu);
956 spin_lock_irq(&sdp->lock); 1017 raw_spin_lock_irq_rcu_node(sdp);
957 atomic_inc(&sp->srcu_barrier_cpu_cnt); 1018 atomic_inc(&sp->srcu_barrier_cpu_cnt);
958 sdp->srcu_barrier_head.func = srcu_barrier_cb; 1019 sdp->srcu_barrier_head.func = srcu_barrier_cb;
1020 debug_rcu_head_queue(&sdp->srcu_barrier_head);
959 if (!rcu_segcblist_entrain(&sdp->srcu_cblist, 1021 if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
960 &sdp->srcu_barrier_head, 0)) 1022 &sdp->srcu_barrier_head, 0)) {
1023 debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
961 atomic_dec(&sp->srcu_barrier_cpu_cnt); 1024 atomic_dec(&sp->srcu_barrier_cpu_cnt);
962 spin_unlock_irq(&sdp->lock); 1025 }
1026 raw_spin_unlock_irq_rcu_node(sdp);
963 } 1027 }
964 1028
965 /* Remove the initial count, at which point reaching zero can happen. */ 1029 /* Remove the initial count, at which point reaching zero can happen. */
@@ -1008,17 +1072,17 @@ static void srcu_advance_state(struct srcu_struct *sp)
1008 */ 1072 */
1009 idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ 1073 idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
1010 if (idx == SRCU_STATE_IDLE) { 1074 if (idx == SRCU_STATE_IDLE) {
1011 spin_lock_irq(&sp->gp_lock); 1075 raw_spin_lock_irq_rcu_node(sp);
1012 if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { 1076 if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
1013 WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); 1077 WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
1014 spin_unlock_irq(&sp->gp_lock); 1078 raw_spin_unlock_irq_rcu_node(sp);
1015 mutex_unlock(&sp->srcu_gp_mutex); 1079 mutex_unlock(&sp->srcu_gp_mutex);
1016 return; 1080 return;
1017 } 1081 }
1018 idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); 1082 idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
1019 if (idx == SRCU_STATE_IDLE) 1083 if (idx == SRCU_STATE_IDLE)
1020 srcu_gp_start(sp); 1084 srcu_gp_start(sp);
1021 spin_unlock_irq(&sp->gp_lock); 1085 raw_spin_unlock_irq_rcu_node(sp);
1022 if (idx != SRCU_STATE_IDLE) { 1086 if (idx != SRCU_STATE_IDLE) {
1023 mutex_unlock(&sp->srcu_gp_mutex); 1087 mutex_unlock(&sp->srcu_gp_mutex);
1024 return; /* Someone else started the grace period. */ 1088 return; /* Someone else started the grace period. */
@@ -1067,22 +1131,22 @@ static void srcu_invoke_callbacks(struct work_struct *work)
1067 sdp = container_of(work, struct srcu_data, work.work); 1131 sdp = container_of(work, struct srcu_data, work.work);
1068 sp = sdp->sp; 1132 sp = sdp->sp;
1069 rcu_cblist_init(&ready_cbs); 1133 rcu_cblist_init(&ready_cbs);
1070 spin_lock_irq(&sdp->lock); 1134 raw_spin_lock_irq_rcu_node(sdp);
1071 smp_mb(); /* Old grace periods before callback invocation! */
1072 rcu_segcblist_advance(&sdp->srcu_cblist, 1135 rcu_segcblist_advance(&sdp->srcu_cblist,
1073 rcu_seq_current(&sp->srcu_gp_seq)); 1136 rcu_seq_current(&sp->srcu_gp_seq));
1074 if (sdp->srcu_cblist_invoking || 1137 if (sdp->srcu_cblist_invoking ||
1075 !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { 1138 !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
1076 spin_unlock_irq(&sdp->lock); 1139 raw_spin_unlock_irq_rcu_node(sdp);
1077 return; /* Someone else on the job or nothing to do. */ 1140 return; /* Someone else on the job or nothing to do. */
1078 } 1141 }
1079 1142
1080 /* We are on the job! Extract and invoke ready callbacks. */ 1143 /* We are on the job! Extract and invoke ready callbacks. */
1081 sdp->srcu_cblist_invoking = true; 1144 sdp->srcu_cblist_invoking = true;
1082 rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); 1145 rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
1083 spin_unlock_irq(&sdp->lock); 1146 raw_spin_unlock_irq_rcu_node(sdp);
1084 rhp = rcu_cblist_dequeue(&ready_cbs); 1147 rhp = rcu_cblist_dequeue(&ready_cbs);
1085 for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { 1148 for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
1149 debug_rcu_head_unqueue(rhp);
1086 local_bh_disable(); 1150 local_bh_disable();
1087 rhp->func(rhp); 1151 rhp->func(rhp);
1088 local_bh_enable(); 1152 local_bh_enable();
@@ -1092,13 +1156,13 @@ static void srcu_invoke_callbacks(struct work_struct *work)
1092 * Update counts, accelerate new callbacks, and if needed, 1156 * Update counts, accelerate new callbacks, and if needed,
1093 * schedule another round of callback invocation. 1157 * schedule another round of callback invocation.
1094 */ 1158 */
1095 spin_lock_irq(&sdp->lock); 1159 raw_spin_lock_irq_rcu_node(sdp);
1096 rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); 1160 rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
1097 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, 1161 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
1098 rcu_seq_snap(&sp->srcu_gp_seq)); 1162 rcu_seq_snap(&sp->srcu_gp_seq));
1099 sdp->srcu_cblist_invoking = false; 1163 sdp->srcu_cblist_invoking = false;
1100 more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); 1164 more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
1101 spin_unlock_irq(&sdp->lock); 1165 raw_spin_unlock_irq_rcu_node(sdp);
1102 if (more) 1166 if (more)
1103 srcu_schedule_cbs_sdp(sdp, 0); 1167 srcu_schedule_cbs_sdp(sdp, 0);
1104} 1168}
@@ -1111,7 +1175,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
1111{ 1175{
1112 bool pushgp = true; 1176 bool pushgp = true;
1113 1177
1114 spin_lock_irq(&sp->gp_lock); 1178 raw_spin_lock_irq_rcu_node(sp);
1115 if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { 1179 if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
1116 if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { 1180 if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
1117 /* All requests fulfilled, time to go idle. */ 1181 /* All requests fulfilled, time to go idle. */
@@ -1121,7 +1185,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
1121 /* Outstanding request and no GP. Start one. */ 1185 /* Outstanding request and no GP. Start one. */
1122 srcu_gp_start(sp); 1186 srcu_gp_start(sp);
1123 } 1187 }
1124 spin_unlock_irq(&sp->gp_lock); 1188 raw_spin_unlock_irq_rcu_node(sp);
1125 1189
1126 if (pushgp) 1190 if (pushgp)
1127 queue_delayed_work(system_power_efficient_wq, &sp->work, delay); 1191 queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
@@ -1152,3 +1216,12 @@ void srcutorture_get_gp_data(enum rcutorture_type test_type,
1152 *gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed); 1216 *gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed);
1153} 1217}
1154EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); 1218EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
1219
1220static int __init srcu_bootup_announce(void)
1221{
1222 pr_info("Hierarchical SRCU implementation.\n");
1223 if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
1224 pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
1225 return 0;
1226}
1227early_initcall(srcu_bootup_announce);
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index e5385731e391..f8488965250f 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -35,15 +35,26 @@
35#include <linux/time.h> 35#include <linux/time.h>
36#include <linux/cpu.h> 36#include <linux/cpu.h>
37#include <linux/prefetch.h> 37#include <linux/prefetch.h>
38#include <linux/trace_events.h>
39 38
40#include "rcu.h" 39#include "rcu.h"
41 40
42/* Forward declarations for tiny_plugin.h. */ 41/* Global control variables for rcupdate callback mechanism. */
43struct rcu_ctrlblk; 42struct rcu_ctrlblk {
44static void __call_rcu(struct rcu_head *head, 43 struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
45 rcu_callback_t func, 44 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
46 struct rcu_ctrlblk *rcp); 45 struct rcu_head **curtail; /* ->next pointer of last CB. */
46};
47
48/* Definition for rcupdate control block. */
49static struct rcu_ctrlblk rcu_sched_ctrlblk = {
50 .donetail = &rcu_sched_ctrlblk.rcucblist,
51 .curtail = &rcu_sched_ctrlblk.rcucblist,
52};
53
54static struct rcu_ctrlblk rcu_bh_ctrlblk = {
55 .donetail = &rcu_bh_ctrlblk.rcucblist,
56 .curtail = &rcu_bh_ctrlblk.rcucblist,
57};
47 58
48#include "tiny_plugin.h" 59#include "tiny_plugin.h"
49 60
@@ -59,19 +70,6 @@ void rcu_barrier_sched(void)
59} 70}
60EXPORT_SYMBOL(rcu_barrier_sched); 71EXPORT_SYMBOL(rcu_barrier_sched);
61 72
62#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE)
63
64/*
65 * Test whether RCU thinks that the current CPU is idle.
66 */
67bool notrace __rcu_is_watching(void)
68{
69 return true;
70}
71EXPORT_SYMBOL(__rcu_is_watching);
72
73#endif /* defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */
74
75/* 73/*
76 * Helper function for rcu_sched_qs() and rcu_bh_qs(). 74 * Helper function for rcu_sched_qs() and rcu_bh_qs().
77 * Also irqs are disabled to avoid confusion due to interrupt handlers 75 * Also irqs are disabled to avoid confusion due to interrupt handlers
@@ -79,7 +77,6 @@ EXPORT_SYMBOL(__rcu_is_watching);
79 */ 77 */
80static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) 78static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
81{ 79{
82 RCU_TRACE(reset_cpu_stall_ticks(rcp);)
83 if (rcp->donetail != rcp->curtail) { 80 if (rcp->donetail != rcp->curtail) {
84 rcp->donetail = rcp->curtail; 81 rcp->donetail = rcp->curtail;
85 return 1; 82 return 1;
@@ -125,7 +122,6 @@ void rcu_bh_qs(void)
125 */ 122 */
126void rcu_check_callbacks(int user) 123void rcu_check_callbacks(int user)
127{ 124{
128 RCU_TRACE(check_cpu_stalls();)
129 if (user) 125 if (user)
130 rcu_sched_qs(); 126 rcu_sched_qs();
131 else if (!in_softirq()) 127 else if (!in_softirq())
@@ -140,10 +136,8 @@ void rcu_check_callbacks(int user)
140 */ 136 */
141static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) 137static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
142{ 138{
143 const char *rn = NULL;
144 struct rcu_head *next, *list; 139 struct rcu_head *next, *list;
145 unsigned long flags; 140 unsigned long flags;
146 RCU_TRACE(int cb_count = 0;)
147 141
148 /* Move the ready-to-invoke callbacks to a local list. */ 142 /* Move the ready-to-invoke callbacks to a local list. */
149 local_irq_save(flags); 143 local_irq_save(flags);
@@ -152,7 +146,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
152 local_irq_restore(flags); 146 local_irq_restore(flags);
153 return; 147 return;
154 } 148 }
155 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1);)
156 list = rcp->rcucblist; 149 list = rcp->rcucblist;
157 rcp->rcucblist = *rcp->donetail; 150 rcp->rcucblist = *rcp->donetail;
158 *rcp->donetail = NULL; 151 *rcp->donetail = NULL;
@@ -162,22 +155,15 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
162 local_irq_restore(flags); 155 local_irq_restore(flags);
163 156
164 /* Invoke the callbacks on the local list. */ 157 /* Invoke the callbacks on the local list. */
165 RCU_TRACE(rn = rcp->name;)
166 while (list) { 158 while (list) {
167 next = list->next; 159 next = list->next;
168 prefetch(next); 160 prefetch(next);
169 debug_rcu_head_unqueue(list); 161 debug_rcu_head_unqueue(list);
170 local_bh_disable(); 162 local_bh_disable();
171 __rcu_reclaim(rn, list); 163 __rcu_reclaim("", list);
172 local_bh_enable(); 164 local_bh_enable();
173 list = next; 165 list = next;
174 RCU_TRACE(cb_count++;)
175 } 166 }
176 RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count);)
177 RCU_TRACE(trace_rcu_batch_end(rcp->name,
178 cb_count, 0, need_resched(),
179 is_idle_task(current),
180 false));
181} 167}
182 168
183static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused) 169static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused)
@@ -221,7 +207,6 @@ static void __call_rcu(struct rcu_head *head,
221 local_irq_save(flags); 207 local_irq_save(flags);
222 *rcp->curtail = head; 208 *rcp->curtail = head;
223 rcp->curtail = &head->next; 209 rcp->curtail = &head->next;
224 RCU_TRACE(rcp->qlen++;)
225 local_irq_restore(flags); 210 local_irq_restore(flags);
226 211
227 if (unlikely(is_idle_task(current))) { 212 if (unlikely(is_idle_task(current))) {
@@ -254,8 +239,5 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
254void __init rcu_init(void) 239void __init rcu_init(void)
255{ 240{
256 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 241 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
257 RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk);)
258 RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk);)
259
260 rcu_early_boot_tests(); 242 rcu_early_boot_tests();
261} 243}
diff --git a/kernel/rcu/tiny_plugin.h b/kernel/rcu/tiny_plugin.h
index 371034e77f87..f0a01b2a3062 100644
--- a/kernel/rcu/tiny_plugin.h
+++ b/kernel/rcu/tiny_plugin.h
@@ -22,36 +22,6 @@
22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> 22 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
23 */ 23 */
24 24
25#include <linux/kthread.h>
26#include <linux/init.h>
27#include <linux/debugfs.h>
28#include <linux/seq_file.h>
29
30/* Global control variables for rcupdate callback mechanism. */
31struct rcu_ctrlblk {
32 struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
33 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
34 struct rcu_head **curtail; /* ->next pointer of last CB. */
35 RCU_TRACE(long qlen); /* Number of pending CBs. */
36 RCU_TRACE(unsigned long gp_start); /* Start time for stalls. */
37 RCU_TRACE(unsigned long ticks_this_gp); /* Statistic for stalls. */
38 RCU_TRACE(unsigned long jiffies_stall); /* Jiffies at next stall. */
39 RCU_TRACE(const char *name); /* Name of RCU type. */
40};
41
42/* Definition for rcupdate control block. */
43static struct rcu_ctrlblk rcu_sched_ctrlblk = {
44 .donetail = &rcu_sched_ctrlblk.rcucblist,
45 .curtail = &rcu_sched_ctrlblk.rcucblist,
46 RCU_TRACE(.name = "rcu_sched")
47};
48
49static struct rcu_ctrlblk rcu_bh_ctrlblk = {
50 .donetail = &rcu_bh_ctrlblk.rcucblist,
51 .curtail = &rcu_bh_ctrlblk.rcucblist,
52 RCU_TRACE(.name = "rcu_bh")
53};
54
55#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) 25#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU)
56#include <linux/kernel_stat.h> 26#include <linux/kernel_stat.h>
57 27
@@ -75,96 +45,3 @@ void __init rcu_scheduler_starting(void)
75} 45}
76 46
77#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ 47#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */
78
79#ifdef CONFIG_RCU_TRACE
80
81static void rcu_trace_sub_qlen(struct rcu_ctrlblk *rcp, int n)
82{
83 unsigned long flags;
84
85 local_irq_save(flags);
86 rcp->qlen -= n;
87 local_irq_restore(flags);
88}
89
90/*
91 * Dump statistics for TINY_RCU, such as they are.
92 */
93static int show_tiny_stats(struct seq_file *m, void *unused)
94{
95 seq_printf(m, "rcu_sched: qlen: %ld\n", rcu_sched_ctrlblk.qlen);
96 seq_printf(m, "rcu_bh: qlen: %ld\n", rcu_bh_ctrlblk.qlen);
97 return 0;
98}
99
100static int show_tiny_stats_open(struct inode *inode, struct file *file)
101{
102 return single_open(file, show_tiny_stats, NULL);
103}
104
105static const struct file_operations show_tiny_stats_fops = {
106 .owner = THIS_MODULE,
107 .open = show_tiny_stats_open,
108 .read = seq_read,
109 .llseek = seq_lseek,
110 .release = single_release,
111};
112
113static struct dentry *rcudir;
114
115static int __init rcutiny_trace_init(void)
116{
117 struct dentry *retval;
118
119 rcudir = debugfs_create_dir("rcu", NULL);
120 if (!rcudir)
121 goto free_out;
122 retval = debugfs_create_file("rcudata", 0444, rcudir,
123 NULL, &show_tiny_stats_fops);
124 if (!retval)
125 goto free_out;
126 return 0;
127free_out:
128 debugfs_remove_recursive(rcudir);
129 return 1;
130}
131device_initcall(rcutiny_trace_init);
132
133static void check_cpu_stall(struct rcu_ctrlblk *rcp)
134{
135 unsigned long j;
136 unsigned long js;
137
138 if (rcu_cpu_stall_suppress)
139 return;
140 rcp->ticks_this_gp++;
141 j = jiffies;
142 js = READ_ONCE(rcp->jiffies_stall);
143 if (rcp->rcucblist && ULONG_CMP_GE(j, js)) {
144 pr_err("INFO: %s stall on CPU (%lu ticks this GP) idle=%llx (t=%lu jiffies q=%ld)\n",
145 rcp->name, rcp->ticks_this_gp, DYNTICK_TASK_EXIT_IDLE,
146 jiffies - rcp->gp_start, rcp->qlen);
147 dump_stack();
148 WRITE_ONCE(rcp->jiffies_stall,
149 jiffies + 3 * rcu_jiffies_till_stall_check() + 3);
150 } else if (ULONG_CMP_GE(j, js)) {
151 WRITE_ONCE(rcp->jiffies_stall,
152 jiffies + rcu_jiffies_till_stall_check());
153 }
154}
155
156static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp)
157{
158 rcp->ticks_this_gp = 0;
159 rcp->gp_start = jiffies;
160 WRITE_ONCE(rcp->jiffies_stall,
161 jiffies + rcu_jiffies_till_stall_check());
162}
163
164static void check_cpu_stalls(void)
165{
166 RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk);)
167 RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk);)
168}
169
170#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e354e475e645..51d4c3acf32d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -168,35 +168,17 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp,
168static void sync_sched_exp_online_cleanup(int cpu); 168static void sync_sched_exp_online_cleanup(int cpu);
169 169
170/* rcuc/rcub kthread realtime priority */ 170/* rcuc/rcub kthread realtime priority */
171#ifdef CONFIG_RCU_KTHREAD_PRIO
172static int kthread_prio = CONFIG_RCU_KTHREAD_PRIO;
173#else /* #ifdef CONFIG_RCU_KTHREAD_PRIO */
174static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0; 171static int kthread_prio = IS_ENABLED(CONFIG_RCU_BOOST) ? 1 : 0;
175#endif /* #else #ifdef CONFIG_RCU_KTHREAD_PRIO */
176module_param(kthread_prio, int, 0644); 172module_param(kthread_prio, int, 0644);
177 173
178/* Delay in jiffies for grace-period initialization delays, debug only. */ 174/* Delay in jiffies for grace-period initialization delays, debug only. */
179 175
180#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT 176static int gp_preinit_delay;
181static int gp_preinit_delay = CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY; 177module_param(gp_preinit_delay, int, 0444);
182module_param(gp_preinit_delay, int, 0644); 178static int gp_init_delay;
183#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ 179module_param(gp_init_delay, int, 0444);
184static const int gp_preinit_delay; 180static int gp_cleanup_delay;
185#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT */ 181module_param(gp_cleanup_delay, int, 0444);
186
187#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT
188static int gp_init_delay = CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY;
189module_param(gp_init_delay, int, 0644);
190#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
191static const int gp_init_delay;
192#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_INIT */
193
194#ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP
195static int gp_cleanup_delay = CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY;
196module_param(gp_cleanup_delay, int, 0644);
197#else /* #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */
198static const int gp_cleanup_delay;
199#endif /* #else #ifdef CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP */
200 182
201/* 183/*
202 * Number of grace periods between delays, normalized by the duration of 184 * Number of grace periods between delays, normalized by the duration of
@@ -250,6 +232,7 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
250 */ 232 */
251void rcu_sched_qs(void) 233void rcu_sched_qs(void)
252{ 234{
235 RCU_LOCKDEP_WARN(preemptible(), "rcu_sched_qs() invoked with preemption enabled!!!");
253 if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s)) 236 if (!__this_cpu_read(rcu_sched_data.cpu_no_qs.s))
254 return; 237 return;
255 trace_rcu_grace_period(TPS("rcu_sched"), 238 trace_rcu_grace_period(TPS("rcu_sched"),
@@ -265,6 +248,7 @@ void rcu_sched_qs(void)
265 248
266void rcu_bh_qs(void) 249void rcu_bh_qs(void)
267{ 250{
251 RCU_LOCKDEP_WARN(preemptible(), "rcu_bh_qs() invoked with preemption enabled!!!");
268 if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) { 252 if (__this_cpu_read(rcu_bh_data.cpu_no_qs.s)) {
269 trace_rcu_grace_period(TPS("rcu_bh"), 253 trace_rcu_grace_period(TPS("rcu_bh"),
270 __this_cpu_read(rcu_bh_data.gpnum), 254 __this_cpu_read(rcu_bh_data.gpnum),
@@ -286,10 +270,6 @@ void rcu_bh_qs(void)
286static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 270static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
287 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, 271 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
288 .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR), 272 .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
289#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
290 .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
291 .dynticks_idle = ATOMIC_INIT(1),
292#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
293}; 273};
294 274
295/* 275/*
@@ -478,7 +458,7 @@ void rcu_note_context_switch(bool preempt)
478 barrier(); /* Avoid RCU read-side critical sections leaking down. */ 458 barrier(); /* Avoid RCU read-side critical sections leaking down. */
479 trace_rcu_utilization(TPS("Start context switch")); 459 trace_rcu_utilization(TPS("Start context switch"));
480 rcu_sched_qs(); 460 rcu_sched_qs();
481 rcu_preempt_note_context_switch(); 461 rcu_preempt_note_context_switch(preempt);
482 /* Load rcu_urgent_qs before other flags. */ 462 /* Load rcu_urgent_qs before other flags. */
483 if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs))) 463 if (!smp_load_acquire(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs)))
484 goto out; 464 goto out;
@@ -534,9 +514,12 @@ void rcu_all_qs(void)
534} 514}
535EXPORT_SYMBOL_GPL(rcu_all_qs); 515EXPORT_SYMBOL_GPL(rcu_all_qs);
536 516
537static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 517#define DEFAULT_RCU_BLIMIT 10 /* Maximum callbacks per rcu_do_batch. */
538static long qhimark = 10000; /* If this many pending, ignore blimit. */ 518static long blimit = DEFAULT_RCU_BLIMIT;
539static long qlowmark = 100; /* Once only this many pending, use blimit. */ 519#define DEFAULT_RCU_QHIMARK 10000 /* If this many pending, ignore blimit. */
520static long qhimark = DEFAULT_RCU_QHIMARK;
521#define DEFAULT_RCU_QLOMARK 100 /* Once only this many pending, use blimit. */
522static long qlowmark = DEFAULT_RCU_QLOMARK;
540 523
541module_param(blimit, long, 0444); 524module_param(blimit, long, 0444);
542module_param(qhimark, long, 0444); 525module_param(qhimark, long, 0444);
@@ -559,10 +542,7 @@ module_param(jiffies_till_sched_qs, ulong, 0644);
559 542
560static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 543static bool rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
561 struct rcu_data *rdp); 544 struct rcu_data *rdp);
562static void force_qs_rnp(struct rcu_state *rsp, 545static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp));
563 int (*f)(struct rcu_data *rsp, bool *isidle,
564 unsigned long *maxj),
565 bool *isidle, unsigned long *maxj);
566static void force_quiescent_state(struct rcu_state *rsp); 546static void force_quiescent_state(struct rcu_state *rsp);
567static int rcu_pending(void); 547static int rcu_pending(void);
568 548
@@ -757,6 +737,7 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
757 int idx = (READ_ONCE(rnp->completed) + 1) & 0x1; 737 int idx = (READ_ONCE(rnp->completed) + 1) & 0x1;
758 int *fp = &rnp->need_future_gp[idx]; 738 int *fp = &rnp->need_future_gp[idx];
759 739
740 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_future_needs_gp() invoked with irqs enabled!!!");
760 return READ_ONCE(*fp); 741 return READ_ONCE(*fp);
761} 742}
762 743
@@ -768,6 +749,7 @@ static int rcu_future_needs_gp(struct rcu_state *rsp)
768static bool 749static bool
769cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 750cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
770{ 751{
752 RCU_LOCKDEP_WARN(!irqs_disabled(), "cpu_needs_another_gp() invoked with irqs enabled!!!");
771 if (rcu_gp_in_progress(rsp)) 753 if (rcu_gp_in_progress(rsp))
772 return false; /* No, a grace period is already in progress. */ 754 return false; /* No, a grace period is already in progress. */
773 if (rcu_future_needs_gp(rsp)) 755 if (rcu_future_needs_gp(rsp))
@@ -794,6 +776,7 @@ static void rcu_eqs_enter_common(bool user)
794 struct rcu_data *rdp; 776 struct rcu_data *rdp;
795 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 777 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
796 778
779 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_eqs_enter_common() invoked with irqs enabled!!!");
797 trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0); 780 trace_rcu_dyntick(TPS("Start"), rdtp->dynticks_nesting, 0);
798 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && 781 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
799 !user && !is_idle_task(current)) { 782 !user && !is_idle_task(current)) {
@@ -864,7 +847,6 @@ void rcu_idle_enter(void)
864 847
865 local_irq_save(flags); 848 local_irq_save(flags);
866 rcu_eqs_enter(false); 849 rcu_eqs_enter(false);
867 rcu_sysidle_enter(0);
868 local_irq_restore(flags); 850 local_irq_restore(flags);
869} 851}
870EXPORT_SYMBOL_GPL(rcu_idle_enter); 852EXPORT_SYMBOL_GPL(rcu_idle_enter);
@@ -914,7 +896,6 @@ void rcu_irq_exit(void)
914 trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1); 896 trace_rcu_dyntick(TPS("--="), rdtp->dynticks_nesting, rdtp->dynticks_nesting - 1);
915 rdtp->dynticks_nesting--; 897 rdtp->dynticks_nesting--;
916 } 898 }
917 rcu_sysidle_enter(1);
918} 899}
919 900
920/* 901/*
@@ -967,6 +948,7 @@ static void rcu_eqs_exit(bool user)
967 struct rcu_dynticks *rdtp; 948 struct rcu_dynticks *rdtp;
968 long long oldval; 949 long long oldval;
969 950
951 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_eqs_exit() invoked with irqs enabled!!!");
970 rdtp = this_cpu_ptr(&rcu_dynticks); 952 rdtp = this_cpu_ptr(&rcu_dynticks);
971 oldval = rdtp->dynticks_nesting; 953 oldval = rdtp->dynticks_nesting;
972 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0); 954 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && oldval < 0);
@@ -995,7 +977,6 @@ void rcu_idle_exit(void)
995 977
996 local_irq_save(flags); 978 local_irq_save(flags);
997 rcu_eqs_exit(false); 979 rcu_eqs_exit(false);
998 rcu_sysidle_exit(0);
999 local_irq_restore(flags); 980 local_irq_restore(flags);
1000} 981}
1001EXPORT_SYMBOL_GPL(rcu_idle_exit); 982EXPORT_SYMBOL_GPL(rcu_idle_exit);
@@ -1047,7 +1028,6 @@ void rcu_irq_enter(void)
1047 trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting); 1028 trace_rcu_dyntick(TPS("++="), oldval, rdtp->dynticks_nesting);
1048 else 1029 else
1049 rcu_eqs_exit_common(oldval, true); 1030 rcu_eqs_exit_common(oldval, true);
1050 rcu_sysidle_exit(1);
1051} 1031}
1052 1032
1053/* 1033/*
@@ -1130,22 +1110,11 @@ void rcu_nmi_exit(void)
1130} 1110}
1131 1111
1132/** 1112/**
1133 * __rcu_is_watching - are RCU read-side critical sections safe?
1134 *
1135 * Return true if RCU is watching the running CPU, which means that
1136 * this CPU can safely enter RCU read-side critical sections. Unlike
1137 * rcu_is_watching(), the caller of __rcu_is_watching() must have at
1138 * least disabled preemption.
1139 */
1140bool notrace __rcu_is_watching(void)
1141{
1142 return !rcu_dynticks_curr_cpu_in_eqs();
1143}
1144
1145/**
1146 * rcu_is_watching - see if RCU thinks that the current CPU is idle 1113 * rcu_is_watching - see if RCU thinks that the current CPU is idle
1147 * 1114 *
1148 * If the current CPU is in its idle loop and is neither in an interrupt 1115 * Return true if RCU is watching the running CPU, which means that this
1116 * CPU can safely enter RCU read-side critical sections. In other words,
1117 * if the current CPU is in its idle loop and is neither in an interrupt
1149 * or NMI handler, return true. 1118 * or NMI handler, return true.
1150 */ 1119 */
1151bool notrace rcu_is_watching(void) 1120bool notrace rcu_is_watching(void)
@@ -1153,7 +1122,7 @@ bool notrace rcu_is_watching(void)
1153 bool ret; 1122 bool ret;
1154 1123
1155 preempt_disable_notrace(); 1124 preempt_disable_notrace();
1156 ret = __rcu_is_watching(); 1125 ret = !rcu_dynticks_curr_cpu_in_eqs();
1157 preempt_enable_notrace(); 1126 preempt_enable_notrace();
1158 return ret; 1127 return ret;
1159} 1128}
@@ -1237,11 +1206,9 @@ static int rcu_is_cpu_rrupt_from_idle(void)
1237 * credit them with an implicit quiescent state. Return 1 if this CPU 1206 * credit them with an implicit quiescent state. Return 1 if this CPU
1238 * is in dynticks idle mode, which is an extended quiescent state. 1207 * is in dynticks idle mode, which is an extended quiescent state.
1239 */ 1208 */
1240static int dyntick_save_progress_counter(struct rcu_data *rdp, 1209static int dyntick_save_progress_counter(struct rcu_data *rdp)
1241 bool *isidle, unsigned long *maxj)
1242{ 1210{
1243 rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks); 1211 rdp->dynticks_snap = rcu_dynticks_snap(rdp->dynticks);
1244 rcu_sysidle_check_cpu(rdp, isidle, maxj);
1245 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) { 1212 if (rcu_dynticks_in_eqs(rdp->dynticks_snap)) {
1246 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti")); 1213 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("dti"));
1247 if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4, 1214 if (ULONG_CMP_LT(READ_ONCE(rdp->gpnum) + ULONG_MAX / 4,
@@ -1258,8 +1225,7 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp,
1258 * idle state since the last call to dyntick_save_progress_counter() 1225 * idle state since the last call to dyntick_save_progress_counter()
1259 * for this same CPU, or by virtue of having been offline. 1226 * for this same CPU, or by virtue of having been offline.
1260 */ 1227 */
1261static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, 1228static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
1262 bool *isidle, unsigned long *maxj)
1263{ 1229{
1264 unsigned long jtsq; 1230 unsigned long jtsq;
1265 bool *rnhqp; 1231 bool *rnhqp;
@@ -1674,6 +1640,8 @@ void rcu_cpu_stall_reset(void)
1674static unsigned long rcu_cbs_completed(struct rcu_state *rsp, 1640static unsigned long rcu_cbs_completed(struct rcu_state *rsp,
1675 struct rcu_node *rnp) 1641 struct rcu_node *rnp)
1676{ 1642{
1643 lockdep_assert_held(&rnp->lock);
1644
1677 /* 1645 /*
1678 * If RCU is idle, we just wait for the next grace period. 1646 * If RCU is idle, we just wait for the next grace period.
1679 * But we can only be sure that RCU is idle if we are looking 1647 * But we can only be sure that RCU is idle if we are looking
@@ -1719,6 +1687,8 @@ rcu_start_future_gp(struct rcu_node *rnp, struct rcu_data *rdp,
1719 bool ret = false; 1687 bool ret = false;
1720 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp); 1688 struct rcu_node *rnp_root = rcu_get_root(rdp->rsp);
1721 1689
1690 lockdep_assert_held(&rnp->lock);
1691
1722 /* 1692 /*
1723 * Pick up grace-period number for new callbacks. If this 1693 * Pick up grace-period number for new callbacks. If this
1724 * grace period is already marked as needed, return to the caller. 1694 * grace period is already marked as needed, return to the caller.
@@ -1845,6 +1815,8 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1845{ 1815{
1846 bool ret = false; 1816 bool ret = false;
1847 1817
1818 lockdep_assert_held(&rnp->lock);
1819
1848 /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ 1820 /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
1849 if (!rcu_segcblist_pend_cbs(&rdp->cblist)) 1821 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1850 return false; 1822 return false;
@@ -1883,6 +1855,8 @@ static bool rcu_accelerate_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1883static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp, 1855static bool rcu_advance_cbs(struct rcu_state *rsp, struct rcu_node *rnp,
1884 struct rcu_data *rdp) 1856 struct rcu_data *rdp)
1885{ 1857{
1858 lockdep_assert_held(&rnp->lock);
1859
1886 /* If no pending (not yet ready to invoke) callbacks, nothing to do. */ 1860 /* If no pending (not yet ready to invoke) callbacks, nothing to do. */
1887 if (!rcu_segcblist_pend_cbs(&rdp->cblist)) 1861 if (!rcu_segcblist_pend_cbs(&rdp->cblist))
1888 return false; 1862 return false;
@@ -1909,6 +1883,8 @@ static bool __note_gp_changes(struct rcu_state *rsp, struct rcu_node *rnp,
1909 bool ret; 1883 bool ret;
1910 bool need_gp; 1884 bool need_gp;
1911 1885
1886 lockdep_assert_held(&rnp->lock);
1887
1912 /* Handle the ends of any preceding grace periods first. */ 1888 /* Handle the ends of any preceding grace periods first. */
1913 if (rdp->completed == rnp->completed && 1889 if (rdp->completed == rnp->completed &&
1914 !unlikely(READ_ONCE(rdp->gpwrap))) { 1890 !unlikely(READ_ONCE(rdp->gpwrap))) {
@@ -2115,25 +2091,16 @@ static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
2115 */ 2091 */
2116static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time) 2092static void rcu_gp_fqs(struct rcu_state *rsp, bool first_time)
2117{ 2093{
2118 bool isidle = false;
2119 unsigned long maxj;
2120 struct rcu_node *rnp = rcu_get_root(rsp); 2094 struct rcu_node *rnp = rcu_get_root(rsp);
2121 2095
2122 WRITE_ONCE(rsp->gp_activity, jiffies); 2096 WRITE_ONCE(rsp->gp_activity, jiffies);
2123 rsp->n_force_qs++; 2097 rsp->n_force_qs++;
2124 if (first_time) { 2098 if (first_time) {
2125 /* Collect dyntick-idle snapshots. */ 2099 /* Collect dyntick-idle snapshots. */
2126 if (is_sysidle_rcu_state(rsp)) { 2100 force_qs_rnp(rsp, dyntick_save_progress_counter);
2127 isidle = true;
2128 maxj = jiffies - ULONG_MAX / 4;
2129 }
2130 force_qs_rnp(rsp, dyntick_save_progress_counter,
2131 &isidle, &maxj);
2132 rcu_sysidle_report_gp(rsp, isidle, maxj);
2133 } else { 2101 } else {
2134 /* Handle dyntick-idle and offline CPUs. */ 2102 /* Handle dyntick-idle and offline CPUs. */
2135 isidle = true; 2103 force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
2136 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
2137 } 2104 }
2138 /* Clear flag to prevent immediate re-entry. */ 2105 /* Clear flag to prevent immediate re-entry. */
2139 if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 2106 if (READ_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
@@ -2341,6 +2308,7 @@ static bool
2341rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp, 2308rcu_start_gp_advanced(struct rcu_state *rsp, struct rcu_node *rnp,
2342 struct rcu_data *rdp) 2309 struct rcu_data *rdp)
2343{ 2310{
2311 lockdep_assert_held(&rnp->lock);
2344 if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) { 2312 if (!rsp->gp_kthread || !cpu_needs_another_gp(rsp, rdp)) {
2345 /* 2313 /*
2346 * Either we have not yet spawned the grace-period 2314 * Either we have not yet spawned the grace-period
@@ -2402,6 +2370,7 @@ static bool rcu_start_gp(struct rcu_state *rsp)
2402static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) 2370static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
2403 __releases(rcu_get_root(rsp)->lock) 2371 __releases(rcu_get_root(rsp)->lock)
2404{ 2372{
2373 lockdep_assert_held(&rcu_get_root(rsp)->lock);
2405 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 2374 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
2406 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS); 2375 WRITE_ONCE(rsp->gp_flags, READ_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS);
2407 raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags); 2376 raw_spin_unlock_irqrestore_rcu_node(rcu_get_root(rsp), flags);
@@ -2426,6 +2395,8 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
2426 unsigned long oldmask = 0; 2395 unsigned long oldmask = 0;
2427 struct rcu_node *rnp_c; 2396 struct rcu_node *rnp_c;
2428 2397
2398 lockdep_assert_held(&rnp->lock);
2399
2429 /* Walk up the rcu_node hierarchy. */ 2400 /* Walk up the rcu_node hierarchy. */
2430 for (;;) { 2401 for (;;) {
2431 if (!(rnp->qsmask & mask) || rnp->gpnum != gps) { 2402 if (!(rnp->qsmask & mask) || rnp->gpnum != gps) {
@@ -2486,6 +2457,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_state *rsp,
2486 unsigned long mask; 2457 unsigned long mask;
2487 struct rcu_node *rnp_p; 2458 struct rcu_node *rnp_p;
2488 2459
2460 lockdep_assert_held(&rnp->lock);
2489 if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p || 2461 if (rcu_state_p == &rcu_sched_state || rsp != rcu_state_p ||
2490 rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 2462 rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
2491 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 2463 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -2599,6 +2571,8 @@ static void
2599rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, 2571rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
2600 struct rcu_node *rnp, struct rcu_data *rdp) 2572 struct rcu_node *rnp, struct rcu_data *rdp)
2601{ 2573{
2574 lockdep_assert_held(&rsp->orphan_lock);
2575
2602 /* No-CBs CPUs do not have orphanable callbacks. */ 2576 /* No-CBs CPUs do not have orphanable callbacks. */
2603 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu)) 2577 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || rcu_is_nocb_cpu(rdp->cpu))
2604 return; 2578 return;
@@ -2639,6 +2613,8 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags)
2639{ 2613{
2640 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 2614 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
2641 2615
2616 lockdep_assert_held(&rsp->orphan_lock);
2617
2642 /* No-CBs CPUs are handled specially. */ 2618 /* No-CBs CPUs are handled specially. */
2643 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || 2619 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
2644 rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags)) 2620 rcu_nocb_adopt_orphan_cbs(rsp, rdp, flags))
@@ -2705,6 +2681,7 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf)
2705 long mask; 2681 long mask;
2706 struct rcu_node *rnp = rnp_leaf; 2682 struct rcu_node *rnp = rnp_leaf;
2707 2683
2684 lockdep_assert_held(&rnp->lock);
2708 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) || 2685 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU) ||
2709 rnp->qsmaskinit || rcu_preempt_has_tasks(rnp)) 2686 rnp->qsmaskinit || rcu_preempt_has_tasks(rnp))
2710 return; 2687 return;
@@ -2895,10 +2872,7 @@ void rcu_check_callbacks(int user)
2895 * 2872 *
2896 * The caller must have suppressed start of new grace periods. 2873 * The caller must have suppressed start of new grace periods.
2897 */ 2874 */
2898static void force_qs_rnp(struct rcu_state *rsp, 2875static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *rsp))
2899 int (*f)(struct rcu_data *rsp, bool *isidle,
2900 unsigned long *maxj),
2901 bool *isidle, unsigned long *maxj)
2902{ 2876{
2903 int cpu; 2877 int cpu;
2904 unsigned long flags; 2878 unsigned long flags;
@@ -2937,7 +2911,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
2937 for_each_leaf_node_possible_cpu(rnp, cpu) { 2911 for_each_leaf_node_possible_cpu(rnp, cpu) {
2938 unsigned long bit = leaf_node_cpu_bit(rnp, cpu); 2912 unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
2939 if ((rnp->qsmask & bit) != 0) { 2913 if ((rnp->qsmask & bit) != 0) {
2940 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) 2914 if (f(per_cpu_ptr(rsp->rda, cpu)))
2941 mask |= bit; 2915 mask |= bit;
2942 } 2916 }
2943 } 2917 }
@@ -3143,9 +3117,14 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
3143 WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); 3117 WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1));
3144 3118
3145 if (debug_rcu_head_queue(head)) { 3119 if (debug_rcu_head_queue(head)) {
3146 /* Probable double call_rcu(), so leak the callback. */ 3120 /*
3121 * Probable double call_rcu(), so leak the callback.
3122 * Use rcu:rcu_callback trace event to find the previous
3123 * time callback was passed to __call_rcu().
3124 */
3125 WARN_ONCE(1, "__call_rcu(): Double-freed CB %p->%pF()!!!\n",
3126 head, head->func);
3147 WRITE_ONCE(head->func, rcu_leak_callback); 3127 WRITE_ONCE(head->func, rcu_leak_callback);
3148 WARN_ONCE(1, "__call_rcu(): Leaked duplicate callback\n");
3149 return; 3128 return;
3150 } 3129 }
3151 head->func = func; 3130 head->func = func;
@@ -3194,8 +3173,24 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func,
3194 local_irq_restore(flags); 3173 local_irq_restore(flags);
3195} 3174}
3196 3175
3197/* 3176/**
3198 * Queue an RCU-sched callback for invocation after a grace period. 3177 * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
3178 * @head: structure to be used for queueing the RCU updates.
3179 * @func: actual callback function to be invoked after the grace period
3180 *
3181 * The callback function will be invoked some time after a full grace
3182 * period elapses, in other words after all currently executing RCU
3183 * read-side critical sections have completed. call_rcu_sched() assumes
3184 * that the read-side critical sections end on enabling of preemption
3185 * or on voluntary preemption.
3186 * RCU read-side critical sections are delimited by :
3187 * - rcu_read_lock_sched() and rcu_read_unlock_sched(), OR
3188 * - anything that disables preemption.
3189 *
3190 * These may be nested.
3191 *
3192 * See the description of call_rcu() for more detailed information on
3193 * memory ordering guarantees.
3199 */ 3194 */
3200void call_rcu_sched(struct rcu_head *head, rcu_callback_t func) 3195void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
3201{ 3196{
@@ -3203,8 +3198,26 @@ void call_rcu_sched(struct rcu_head *head, rcu_callback_t func)
3203} 3198}
3204EXPORT_SYMBOL_GPL(call_rcu_sched); 3199EXPORT_SYMBOL_GPL(call_rcu_sched);
3205 3200
3206/* 3201/**
3207 * Queue an RCU callback for invocation after a quicker grace period. 3202 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
3203 * @head: structure to be used for queueing the RCU updates.
3204 * @func: actual callback function to be invoked after the grace period
3205 *
3206 * The callback function will be invoked some time after a full grace
3207 * period elapses, in other words after all currently executing RCU
3208 * read-side critical sections have completed. call_rcu_bh() assumes
3209 * that the read-side critical sections end on completion of a softirq
3210 * handler. This means that read-side critical sections in process
3211 * context must not be interrupted by softirqs. This interface is to be
3212 * used when most of the read-side critical sections are in softirq context.
3213 * RCU read-side critical sections are delimited by :
3214 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
3215 * OR
3216 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
3217 * These may be nested.
3218 *
3219 * See the description of call_rcu() for more detailed information on
3220 * memory ordering guarantees.
3208 */ 3221 */
3209void call_rcu_bh(struct rcu_head *head, rcu_callback_t func) 3222void call_rcu_bh(struct rcu_head *head, rcu_callback_t func)
3210{ 3223{
@@ -3280,12 +3293,6 @@ static inline int rcu_blocking_is_gp(void)
3280 * to have executed a full memory barrier during the execution of 3293 * to have executed a full memory barrier during the execution of
3281 * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but 3294 * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
3282 * again only if the system has more than one CPU). 3295 * again only if the system has more than one CPU).
3283 *
3284 * This primitive provides the guarantees made by the (now removed)
3285 * synchronize_kernel() API. In contrast, synchronize_rcu() only
3286 * guarantees that rcu_read_lock() sections will have completed.
3287 * In "classic RCU", these two guarantees happen to be one and
3288 * the same, but can differ in realtime RCU implementations.
3289 */ 3296 */
3290void synchronize_sched(void) 3297void synchronize_sched(void)
3291{ 3298{
@@ -3578,8 +3585,14 @@ static void rcu_barrier_func(void *type)
3578 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); 3585 struct rcu_data *rdp = raw_cpu_ptr(rsp->rda);
3579 3586
3580 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence); 3587 _rcu_barrier_trace(rsp, "IRQ", -1, rsp->barrier_sequence);
3581 atomic_inc(&rsp->barrier_cpu_count); 3588 rdp->barrier_head.func = rcu_barrier_callback;
3582 rsp->call(&rdp->barrier_head, rcu_barrier_callback); 3589 debug_rcu_head_queue(&rdp->barrier_head);
3590 if (rcu_segcblist_entrain(&rdp->cblist, &rdp->barrier_head, 0)) {
3591 atomic_inc(&rsp->barrier_cpu_count);
3592 } else {
3593 debug_rcu_head_unqueue(&rdp->barrier_head);
3594 _rcu_barrier_trace(rsp, "IRQNQ", -1, rsp->barrier_sequence);
3595 }
3583} 3596}
3584 3597
3585/* 3598/*
@@ -3698,6 +3711,7 @@ static void rcu_init_new_rnp(struct rcu_node *rnp_leaf)
3698 long mask; 3711 long mask;
3699 struct rcu_node *rnp = rnp_leaf; 3712 struct rcu_node *rnp = rnp_leaf;
3700 3713
3714 lockdep_assert_held(&rnp->lock);
3701 for (;;) { 3715 for (;;) {
3702 mask = rnp->grpmask; 3716 mask = rnp->grpmask;
3703 rnp = rnp->parent; 3717 rnp = rnp->parent;
@@ -3753,7 +3767,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp)
3753 !init_nocb_callback_list(rdp)) 3767 !init_nocb_callback_list(rdp))
3754 rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */ 3768 rcu_segcblist_init(&rdp->cblist); /* Re-enable callbacks. */
3755 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 3769 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
3756 rcu_sysidle_init_percpu_data(rdp->dynticks);
3757 rcu_dynticks_eqs_online(); 3770 rcu_dynticks_eqs_online();
3758 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */ 3771 raw_spin_unlock_rcu_node(rnp); /* irqs remain disabled. */
3759 3772
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index ba38262c3554..9af0f31d6847 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -45,14 +45,6 @@ struct rcu_dynticks {
45 bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */ 45 bool rcu_need_heavy_qs; /* GP old, need heavy quiescent state. */
46 unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */ 46 unsigned long rcu_qs_ctr; /* Light universal quiescent state ctr. */
47 bool rcu_urgent_qs; /* GP old need light quiescent state. */ 47 bool rcu_urgent_qs; /* GP old need light quiescent state. */
48#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
49 long long dynticks_idle_nesting;
50 /* irq/process nesting level from idle. */
51 atomic_t dynticks_idle; /* Even value for idle, else odd. */
52 /* "Idle" excludes userspace execution. */
53 unsigned long dynticks_idle_jiffies;
54 /* End of last non-NMI non-idle period. */
55#endif /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
56#ifdef CONFIG_RCU_FAST_NO_HZ 48#ifdef CONFIG_RCU_FAST_NO_HZ
57 bool all_lazy; /* Are all CPU's CBs lazy? */ 49 bool all_lazy; /* Are all CPU's CBs lazy? */
58 unsigned long nonlazy_posted; 50 unsigned long nonlazy_posted;
@@ -160,19 +152,6 @@ struct rcu_node {
160 /* Number of tasks boosted for expedited GP. */ 152 /* Number of tasks boosted for expedited GP. */
161 unsigned long n_normal_boosts; 153 unsigned long n_normal_boosts;
162 /* Number of tasks boosted for normal GP. */ 154 /* Number of tasks boosted for normal GP. */
163 unsigned long n_balk_blkd_tasks;
164 /* Refused to boost: no blocked tasks. */
165 unsigned long n_balk_exp_gp_tasks;
166 /* Refused to boost: nothing blocking GP. */
167 unsigned long n_balk_boost_tasks;
168 /* Refused to boost: already boosting. */
169 unsigned long n_balk_notblocked;
170 /* Refused to boost: RCU RS CS still running. */
171 unsigned long n_balk_notyet;
172 /* Refused to boost: not yet time. */
173 unsigned long n_balk_nos;
174 /* Refused to boost: not sure why, though. */
175 /* This can happen due to race conditions. */
176#ifdef CONFIG_RCU_NOCB_CPU 155#ifdef CONFIG_RCU_NOCB_CPU
177 struct swait_queue_head nocb_gp_wq[2]; 156 struct swait_queue_head nocb_gp_wq[2];
178 /* Place for rcu_nocb_kthread() to wait GP. */ 157 /* Place for rcu_nocb_kthread() to wait GP. */
@@ -312,9 +291,9 @@ struct rcu_data {
312}; 291};
313 292
314/* Values for nocb_defer_wakeup field in struct rcu_data. */ 293/* Values for nocb_defer_wakeup field in struct rcu_data. */
315#define RCU_NOGP_WAKE_NOT 0 294#define RCU_NOCB_WAKE_NOT 0
316#define RCU_NOGP_WAKE 1 295#define RCU_NOCB_WAKE 1
317#define RCU_NOGP_WAKE_FORCE 2 296#define RCU_NOCB_WAKE_FORCE 2
318 297
319#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) 298#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
320 /* For jiffies_till_first_fqs and */ 299 /* For jiffies_till_first_fqs and */
@@ -477,7 +456,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
477 456
478/* Forward declarations for rcutree_plugin.h */ 457/* Forward declarations for rcutree_plugin.h */
479static void rcu_bootup_announce(void); 458static void rcu_bootup_announce(void);
480static void rcu_preempt_note_context_switch(void); 459static void rcu_preempt_note_context_switch(bool preempt);
481static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); 460static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
482#ifdef CONFIG_HOTPLUG_CPU 461#ifdef CONFIG_HOTPLUG_CPU
483static bool rcu_preempt_has_tasks(struct rcu_node *rnp); 462static bool rcu_preempt_has_tasks(struct rcu_node *rnp);
@@ -529,15 +508,7 @@ static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp);
529#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 508#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
530static void __maybe_unused rcu_kick_nohz_cpu(int cpu); 509static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
531static bool init_nocb_callback_list(struct rcu_data *rdp); 510static bool init_nocb_callback_list(struct rcu_data *rdp);
532static void rcu_sysidle_enter(int irq);
533static void rcu_sysidle_exit(int irq);
534static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
535 unsigned long *maxj);
536static bool is_sysidle_rcu_state(struct rcu_state *rsp);
537static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
538 unsigned long maxj);
539static void rcu_bind_gp_kthread(void); 511static void rcu_bind_gp_kthread(void);
540static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
541static bool rcu_nohz_full_cpu(struct rcu_state *rsp); 512static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
542static void rcu_dynticks_task_enter(void); 513static void rcu_dynticks_task_enter(void);
543static void rcu_dynticks_task_exit(void); 514static void rcu_dynticks_task_exit(void);
@@ -551,75 +522,3 @@ void srcu_offline_cpu(unsigned int cpu) { }
551#endif /* #else #ifdef CONFIG_SRCU */ 522#endif /* #else #ifdef CONFIG_SRCU */
552 523
553#endif /* #ifndef RCU_TREE_NONCORE */ 524#endif /* #ifndef RCU_TREE_NONCORE */
554
555#ifdef CONFIG_RCU_TRACE
556/* Read out queue lengths for tracing. */
557static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
558{
559#ifdef CONFIG_RCU_NOCB_CPU
560 *ql = atomic_long_read(&rdp->nocb_q_count);
561 *qll = atomic_long_read(&rdp->nocb_q_count_lazy);
562#else /* #ifdef CONFIG_RCU_NOCB_CPU */
563 *ql = 0;
564 *qll = 0;
565#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
566}
567#endif /* #ifdef CONFIG_RCU_TRACE */
568
569/*
570 * Wrappers for the rcu_node::lock acquire and release.
571 *
572 * Because the rcu_nodes form a tree, the tree traversal locking will observe
573 * different lock values, this in turn means that an UNLOCK of one level
574 * followed by a LOCK of another level does not imply a full memory barrier;
575 * and most importantly transitivity is lost.
576 *
577 * In order to restore full ordering between tree levels, augment the regular
578 * lock acquire functions with smp_mb__after_unlock_lock().
579 *
580 * As ->lock of struct rcu_node is a __private field, therefore one should use
581 * these wrappers rather than directly call raw_spin_{lock,unlock}* on ->lock.
582 */
583static inline void raw_spin_lock_rcu_node(struct rcu_node *rnp)
584{
585 raw_spin_lock(&ACCESS_PRIVATE(rnp, lock));
586 smp_mb__after_unlock_lock();
587}
588
589static inline void raw_spin_unlock_rcu_node(struct rcu_node *rnp)
590{
591 raw_spin_unlock(&ACCESS_PRIVATE(rnp, lock));
592}
593
594static inline void raw_spin_lock_irq_rcu_node(struct rcu_node *rnp)
595{
596 raw_spin_lock_irq(&ACCESS_PRIVATE(rnp, lock));
597 smp_mb__after_unlock_lock();
598}
599
600static inline void raw_spin_unlock_irq_rcu_node(struct rcu_node *rnp)
601{
602 raw_spin_unlock_irq(&ACCESS_PRIVATE(rnp, lock));
603}
604
605#define raw_spin_lock_irqsave_rcu_node(rnp, flags) \
606do { \
607 typecheck(unsigned long, flags); \
608 raw_spin_lock_irqsave(&ACCESS_PRIVATE(rnp, lock), flags); \
609 smp_mb__after_unlock_lock(); \
610} while (0)
611
612#define raw_spin_unlock_irqrestore_rcu_node(rnp, flags) \
613do { \
614 typecheck(unsigned long, flags); \
615 raw_spin_unlock_irqrestore(&ACCESS_PRIVATE(rnp, lock), flags); \
616} while (0)
617
618static inline bool raw_spin_trylock_rcu_node(struct rcu_node *rnp)
619{
620 bool locked = raw_spin_trylock(&ACCESS_PRIVATE(rnp, lock));
621
622 if (locked)
623 smp_mb__after_unlock_lock();
624 return locked;
625}
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index e513b4ab1197..dd21ca47e4b4 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -147,7 +147,7 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
147 * 147 *
148 * Caller must hold the rcu_state's exp_mutex. 148 * Caller must hold the rcu_state's exp_mutex.
149 */ 149 */
150static int sync_rcu_preempt_exp_done(struct rcu_node *rnp) 150static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
151{ 151{
152 return rnp->exp_tasks == NULL && 152 return rnp->exp_tasks == NULL &&
153 READ_ONCE(rnp->expmask) == 0; 153 READ_ONCE(rnp->expmask) == 0;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index c9a48657512a..908b309d60d7 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -70,7 +70,7 @@ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */
70static void __init rcu_bootup_announce_oddness(void) 70static void __init rcu_bootup_announce_oddness(void)
71{ 71{
72 if (IS_ENABLED(CONFIG_RCU_TRACE)) 72 if (IS_ENABLED(CONFIG_RCU_TRACE))
73 pr_info("\tRCU debugfs-based tracing is enabled.\n"); 73 pr_info("\tRCU event tracing is enabled.\n");
74 if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) || 74 if ((IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 64) ||
75 (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32)) 75 (!IS_ENABLED(CONFIG_64BIT) && RCU_FANOUT != 32))
76 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", 76 pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n",
@@ -90,8 +90,32 @@ static void __init rcu_bootup_announce_oddness(void)
90 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 90 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
91 if (nr_cpu_ids != NR_CPUS) 91 if (nr_cpu_ids != NR_CPUS)
92 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 92 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
93 if (IS_ENABLED(CONFIG_RCU_BOOST)) 93#ifdef CONFIG_RCU_BOOST
94 pr_info("\tRCU kthread priority: %d.\n", kthread_prio); 94 pr_info("\tRCU priority boosting: priority %d delay %d ms.\n", kthread_prio, CONFIG_RCU_BOOST_DELAY);
95#endif
96 if (blimit != DEFAULT_RCU_BLIMIT)
97 pr_info("\tBoot-time adjustment of callback invocation limit to %ld.\n", blimit);
98 if (qhimark != DEFAULT_RCU_QHIMARK)
99 pr_info("\tBoot-time adjustment of callback high-water mark to %ld.\n", qhimark);
100 if (qlowmark != DEFAULT_RCU_QLOMARK)
101 pr_info("\tBoot-time adjustment of callback low-water mark to %ld.\n", qlowmark);
102 if (jiffies_till_first_fqs != ULONG_MAX)
103 pr_info("\tBoot-time adjustment of first FQS scan delay to %ld jiffies.\n", jiffies_till_first_fqs);
104 if (jiffies_till_next_fqs != ULONG_MAX)
105 pr_info("\tBoot-time adjustment of subsequent FQS scan delay to %ld jiffies.\n", jiffies_till_next_fqs);
106 if (rcu_kick_kthreads)
107 pr_info("\tKick kthreads if too-long grace period.\n");
108 if (IS_ENABLED(CONFIG_DEBUG_OBJECTS_RCU_HEAD))
109 pr_info("\tRCU callback double-/use-after-free debug enabled.\n");
110 if (gp_preinit_delay)
111 pr_info("\tRCU debug GP pre-init slowdown %d jiffies.\n", gp_preinit_delay);
112 if (gp_init_delay)
113 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_init_delay);
114 if (gp_cleanup_delay)
115 pr_info("\tRCU debug GP init slowdown %d jiffies.\n", gp_cleanup_delay);
116 if (IS_ENABLED(CONFIG_RCU_EQS_DEBUG))
117 pr_info("\tRCU debug extended QS entry/exit.\n");
118 rcupdate_announce_bootup_oddness();
95} 119}
96 120
97#ifdef CONFIG_PREEMPT_RCU 121#ifdef CONFIG_PREEMPT_RCU
@@ -155,6 +179,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
155 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0); 179 (rnp->expmask & rdp->grpmask ? RCU_EXP_BLKD : 0);
156 struct task_struct *t = current; 180 struct task_struct *t = current;
157 181
182 lockdep_assert_held(&rnp->lock);
183
158 /* 184 /*
159 * Decide where to queue the newly blocked task. In theory, 185 * Decide where to queue the newly blocked task. In theory,
160 * this could be an if-statement. In practice, when I tried 186 * this could be an if-statement. In practice, when I tried
@@ -263,6 +289,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
263 */ 289 */
264static void rcu_preempt_qs(void) 290static void rcu_preempt_qs(void)
265{ 291{
292 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_qs() invoked with preemption enabled!!!\n");
266 if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) { 293 if (__this_cpu_read(rcu_data_p->cpu_no_qs.s)) {
267 trace_rcu_grace_period(TPS("rcu_preempt"), 294 trace_rcu_grace_period(TPS("rcu_preempt"),
268 __this_cpu_read(rcu_data_p->gpnum), 295 __this_cpu_read(rcu_data_p->gpnum),
@@ -286,12 +313,14 @@ static void rcu_preempt_qs(void)
286 * 313 *
287 * Caller must disable interrupts. 314 * Caller must disable interrupts.
288 */ 315 */
289static void rcu_preempt_note_context_switch(void) 316static void rcu_preempt_note_context_switch(bool preempt)
290{ 317{
291 struct task_struct *t = current; 318 struct task_struct *t = current;
292 struct rcu_data *rdp; 319 struct rcu_data *rdp;
293 struct rcu_node *rnp; 320 struct rcu_node *rnp;
294 321
322 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_preempt_note_context_switch() invoked with interrupts enabled!!!\n");
323 WARN_ON_ONCE(!preempt && t->rcu_read_lock_nesting > 0);
295 if (t->rcu_read_lock_nesting > 0 && 324 if (t->rcu_read_lock_nesting > 0 &&
296 !t->rcu_read_unlock_special.b.blocked) { 325 !t->rcu_read_unlock_special.b.blocked) {
297 326
@@ -607,6 +636,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
607 */ 636 */
608static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) 637static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
609{ 638{
639 RCU_LOCKDEP_WARN(preemptible(), "rcu_preempt_check_blocked_tasks() invoked with preemption enabled!!!\n");
610 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp)); 640 WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
611 if (rcu_preempt_has_tasks(rnp)) 641 if (rcu_preempt_has_tasks(rnp))
612 rnp->gp_tasks = rnp->blkd_tasks.next; 642 rnp->gp_tasks = rnp->blkd_tasks.next;
@@ -643,8 +673,37 @@ static void rcu_preempt_do_callbacks(void)
643 673
644#endif /* #ifdef CONFIG_RCU_BOOST */ 674#endif /* #ifdef CONFIG_RCU_BOOST */
645 675
646/* 676/**
647 * Queue a preemptible-RCU callback for invocation after a grace period. 677 * call_rcu() - Queue an RCU callback for invocation after a grace period.
678 * @head: structure to be used for queueing the RCU updates.
679 * @func: actual callback function to be invoked after the grace period
680 *
681 * The callback function will be invoked some time after a full grace
682 * period elapses, in other words after all pre-existing RCU read-side
683 * critical sections have completed. However, the callback function
684 * might well execute concurrently with RCU read-side critical sections
685 * that started after call_rcu() was invoked. RCU read-side critical
686 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
687 * and may be nested.
688 *
689 * Note that all CPUs must agree that the grace period extended beyond
690 * all pre-existing RCU read-side critical section. On systems with more
691 * than one CPU, this means that when "func()" is invoked, each CPU is
692 * guaranteed to have executed a full memory barrier since the end of its
693 * last RCU read-side critical section whose beginning preceded the call
694 * to call_rcu(). It also means that each CPU executing an RCU read-side
695 * critical section that continues beyond the start of "func()" must have
696 * executed a memory barrier after the call_rcu() but before the beginning
697 * of that RCU read-side critical section. Note that these guarantees
698 * include CPUs that are offline, idle, or executing in user mode, as
699 * well as CPUs that are executing in the kernel.
700 *
701 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
702 * resulting RCU callback function "func()", then both CPU A and CPU B are
703 * guaranteed to execute a full memory barrier during the time interval
704 * between the call to call_rcu() and the invocation of "func()" -- even
705 * if CPU A and CPU B are the same CPU (but again only if the system has
706 * more than one CPU).
648 */ 707 */
649void call_rcu(struct rcu_head *head, rcu_callback_t func) 708void call_rcu(struct rcu_head *head, rcu_callback_t func)
650{ 709{
@@ -663,8 +722,13 @@ EXPORT_SYMBOL_GPL(call_rcu);
663 * synchronize_rcu() was waiting. RCU read-side critical sections are 722 * synchronize_rcu() was waiting. RCU read-side critical sections are
664 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. 723 * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested.
665 * 724 *
666 * See the description of synchronize_sched() for more detailed information 725 * See the description of synchronize_sched() for more detailed
667 * on memory ordering guarantees. 726 * information on memory-ordering guarantees. However, please note
727 * that -only- the memory-ordering guarantees apply. For example,
728 * synchronize_rcu() is -not- guaranteed to wait on things like code
729 * protected by preempt_disable(), instead, synchronize_rcu() is -only-
730 * guaranteed to wait on RCU read-side critical sections, that is, sections
731 * of code protected by rcu_read_lock().
668 */ 732 */
669void synchronize_rcu(void) 733void synchronize_rcu(void)
670{ 734{
@@ -738,7 +802,7 @@ static void __init rcu_bootup_announce(void)
738 * Because preemptible RCU does not exist, we never have to check for 802 * Because preemptible RCU does not exist, we never have to check for
739 * CPUs being in quiescent states. 803 * CPUs being in quiescent states.
740 */ 804 */
741static void rcu_preempt_note_context_switch(void) 805static void rcu_preempt_note_context_switch(bool preempt)
742{ 806{
743} 807}
744 808
@@ -835,33 +899,6 @@ void exit_rcu(void)
835 899
836#include "../locking/rtmutex_common.h" 900#include "../locking/rtmutex_common.h"
837 901
838#ifdef CONFIG_RCU_TRACE
839
840static void rcu_initiate_boost_trace(struct rcu_node *rnp)
841{
842 if (!rcu_preempt_has_tasks(rnp))
843 rnp->n_balk_blkd_tasks++;
844 else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
845 rnp->n_balk_exp_gp_tasks++;
846 else if (rnp->gp_tasks != NULL && rnp->boost_tasks != NULL)
847 rnp->n_balk_boost_tasks++;
848 else if (rnp->gp_tasks != NULL && rnp->qsmask != 0)
849 rnp->n_balk_notblocked++;
850 else if (rnp->gp_tasks != NULL &&
851 ULONG_CMP_LT(jiffies, rnp->boost_time))
852 rnp->n_balk_notyet++;
853 else
854 rnp->n_balk_nos++;
855}
856
857#else /* #ifdef CONFIG_RCU_TRACE */
858
859static void rcu_initiate_boost_trace(struct rcu_node *rnp)
860{
861}
862
863#endif /* #else #ifdef CONFIG_RCU_TRACE */
864
865static void rcu_wake_cond(struct task_struct *t, int status) 902static void rcu_wake_cond(struct task_struct *t, int status)
866{ 903{
867 /* 904 /*
@@ -992,8 +1029,8 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
992{ 1029{
993 struct task_struct *t; 1030 struct task_struct *t;
994 1031
1032 lockdep_assert_held(&rnp->lock);
995 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) { 1033 if (!rcu_preempt_blocked_readers_cgp(rnp) && rnp->exp_tasks == NULL) {
996 rnp->n_balk_exp_gp_tasks++;
997 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1034 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
998 return; 1035 return;
999 } 1036 }
@@ -1009,7 +1046,6 @@ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags)
1009 if (t) 1046 if (t)
1010 rcu_wake_cond(t, rnp->boost_kthread_status); 1047 rcu_wake_cond(t, rnp->boost_kthread_status);
1011 } else { 1048 } else {
1012 rcu_initiate_boost_trace(rnp);
1013 raw_spin_unlock_irqrestore_rcu_node(rnp, flags); 1049 raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
1014 } 1050 }
1015} 1051}
@@ -1260,8 +1296,7 @@ static void rcu_prepare_kthreads(int cpu)
1260int rcu_needs_cpu(u64 basemono, u64 *nextevt) 1296int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1261{ 1297{
1262 *nextevt = KTIME_MAX; 1298 *nextevt = KTIME_MAX;
1263 return IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) 1299 return rcu_cpu_has_callbacks(NULL);
1264 ? 0 : rcu_cpu_has_callbacks(NULL);
1265} 1300}
1266 1301
1267/* 1302/*
@@ -1372,10 +1407,7 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt)
1372 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 1407 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
1373 unsigned long dj; 1408 unsigned long dj;
1374 1409
1375 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL)) { 1410 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_needs_cpu() invoked with irqs enabled!!!");
1376 *nextevt = KTIME_MAX;
1377 return 0;
1378 }
1379 1411
1380 /* Snapshot to detect later posting of non-lazy callback. */ 1412 /* Snapshot to detect later posting of non-lazy callback. */
1381 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; 1413 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
@@ -1424,8 +1456,8 @@ static void rcu_prepare_for_idle(void)
1424 struct rcu_state *rsp; 1456 struct rcu_state *rsp;
1425 int tne; 1457 int tne;
1426 1458
1427 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || 1459 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_prepare_for_idle() invoked with irqs enabled!!!");
1428 rcu_is_nocb_cpu(smp_processor_id())) 1460 if (rcu_is_nocb_cpu(smp_processor_id()))
1429 return; 1461 return;
1430 1462
1431 /* Handle nohz enablement switches conservatively. */ 1463 /* Handle nohz enablement switches conservatively. */
@@ -1479,8 +1511,8 @@ static void rcu_prepare_for_idle(void)
1479 */ 1511 */
1480static void rcu_cleanup_after_idle(void) 1512static void rcu_cleanup_after_idle(void)
1481{ 1513{
1482 if (IS_ENABLED(CONFIG_RCU_NOCB_CPU_ALL) || 1514 RCU_LOCKDEP_WARN(!irqs_disabled(), "rcu_cleanup_after_idle() invoked with irqs enabled!!!");
1483 rcu_is_nocb_cpu(smp_processor_id())) 1515 if (rcu_is_nocb_cpu(smp_processor_id()))
1484 return; 1516 return;
1485 if (rcu_try_advance_all_cbs()) 1517 if (rcu_try_advance_all_cbs())
1486 invoke_rcu_core(); 1518 invoke_rcu_core();
@@ -1747,7 +1779,6 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
1747 init_swait_queue_head(&rnp->nocb_gp_wq[1]); 1779 init_swait_queue_head(&rnp->nocb_gp_wq[1]);
1748} 1780}
1749 1781
1750#ifndef CONFIG_RCU_NOCB_CPU_ALL
1751/* Is the specified CPU a no-CBs CPU? */ 1782/* Is the specified CPU a no-CBs CPU? */
1752bool rcu_is_nocb_cpu(int cpu) 1783bool rcu_is_nocb_cpu(int cpu)
1753{ 1784{
@@ -1755,7 +1786,6 @@ bool rcu_is_nocb_cpu(int cpu)
1755 return cpumask_test_cpu(cpu, rcu_nocb_mask); 1786 return cpumask_test_cpu(cpu, rcu_nocb_mask);
1756 return false; 1787 return false;
1757} 1788}
1758#endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
1759 1789
1760/* 1790/*
1761 * Kick the leader kthread for this NOCB group. 1791 * Kick the leader kthread for this NOCB group.
@@ -1769,6 +1799,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
1769 if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) { 1799 if (READ_ONCE(rdp_leader->nocb_leader_sleep) || force) {
1770 /* Prior smp_mb__after_atomic() orders against prior enqueue. */ 1800 /* Prior smp_mb__after_atomic() orders against prior enqueue. */
1771 WRITE_ONCE(rdp_leader->nocb_leader_sleep, false); 1801 WRITE_ONCE(rdp_leader->nocb_leader_sleep, false);
1802 smp_mb(); /* ->nocb_leader_sleep before swake_up(). */
1772 swake_up(&rdp_leader->nocb_wq); 1803 swake_up(&rdp_leader->nocb_wq);
1773 } 1804 }
1774} 1805}
@@ -1860,7 +1891,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
1860 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 1891 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1861 TPS("WakeEmpty")); 1892 TPS("WakeEmpty"));
1862 } else { 1893 } else {
1863 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE); 1894 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE);
1864 /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ 1895 /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
1865 smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); 1896 smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
1866 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 1897 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
@@ -1874,7 +1905,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
1874 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 1905 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
1875 TPS("WakeOvf")); 1906 TPS("WakeOvf"));
1876 } else { 1907 } else {
1877 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_FORCE); 1908 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_FORCE);
1878 /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ 1909 /* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */
1879 smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); 1910 smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true);
1880 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 1911 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
@@ -2023,6 +2054,7 @@ wait_again:
2023 * nocb_gp_head, where they await a grace period. 2054 * nocb_gp_head, where they await a grace period.
2024 */ 2055 */
2025 gotcbs = false; 2056 gotcbs = false;
2057 smp_mb(); /* wakeup before ->nocb_head reads. */
2026 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) { 2058 for (rdp = my_rdp; rdp; rdp = rdp->nocb_next_follower) {
2027 rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head); 2059 rdp->nocb_gp_head = READ_ONCE(rdp->nocb_head);
2028 if (!rdp->nocb_gp_head) 2060 if (!rdp->nocb_gp_head)
@@ -2201,8 +2233,8 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2201 if (!rcu_nocb_need_deferred_wakeup(rdp)) 2233 if (!rcu_nocb_need_deferred_wakeup(rdp))
2202 return; 2234 return;
2203 ndw = READ_ONCE(rdp->nocb_defer_wakeup); 2235 ndw = READ_ONCE(rdp->nocb_defer_wakeup);
2204 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_NOT); 2236 WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT);
2205 wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE); 2237 wake_nocb_leader(rdp, ndw == RCU_NOCB_WAKE_FORCE);
2206 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake")); 2238 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
2207} 2239}
2208 2240
@@ -2212,10 +2244,6 @@ void __init rcu_init_nohz(void)
2212 bool need_rcu_nocb_mask = true; 2244 bool need_rcu_nocb_mask = true;
2213 struct rcu_state *rsp; 2245 struct rcu_state *rsp;
2214 2246
2215#ifdef CONFIG_RCU_NOCB_CPU_NONE
2216 need_rcu_nocb_mask = false;
2217#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
2218
2219#if defined(CONFIG_NO_HZ_FULL) 2247#if defined(CONFIG_NO_HZ_FULL)
2220 if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask)) 2248 if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
2221 need_rcu_nocb_mask = true; 2249 need_rcu_nocb_mask = true;
@@ -2231,14 +2259,6 @@ void __init rcu_init_nohz(void)
2231 if (!have_rcu_nocb_mask) 2259 if (!have_rcu_nocb_mask)
2232 return; 2260 return;
2233 2261
2234#ifdef CONFIG_RCU_NOCB_CPU_ZERO
2235 pr_info("\tOffload RCU callbacks from CPU 0\n");
2236 cpumask_set_cpu(0, rcu_nocb_mask);
2237#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
2238#ifdef CONFIG_RCU_NOCB_CPU_ALL
2239 pr_info("\tOffload RCU callbacks from all CPUs\n");
2240 cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
2241#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
2242#if defined(CONFIG_NO_HZ_FULL) 2262#if defined(CONFIG_NO_HZ_FULL)
2243 if (tick_nohz_full_running) 2263 if (tick_nohz_full_running)
2244 cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask); 2264 cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
@@ -2491,421 +2511,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
2491#endif /* #ifdef CONFIG_NO_HZ_FULL */ 2511#endif /* #ifdef CONFIG_NO_HZ_FULL */
2492} 2512}
2493 2513
2494
2495#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
2496
2497static int full_sysidle_state; /* Current system-idle state. */
2498#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */
2499#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */
2500#define RCU_SYSIDLE_LONG 2 /* All CPUs idle for long enough. */
2501#define RCU_SYSIDLE_FULL 3 /* All CPUs idle, ready for sysidle. */
2502#define RCU_SYSIDLE_FULL_NOTED 4 /* Actually entered sysidle state. */
2503
2504/*
2505 * Invoked to note exit from irq or task transition to idle. Note that
2506 * usermode execution does -not- count as idle here! After all, we want
2507 * to detect full-system idle states, not RCU quiescent states and grace
2508 * periods. The caller must have disabled interrupts.
2509 */
2510static void rcu_sysidle_enter(int irq)
2511{
2512 unsigned long j;
2513 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
2514
2515 /* If there are no nohz_full= CPUs, no need to track this. */
2516 if (!tick_nohz_full_enabled())
2517 return;
2518
2519 /* Adjust nesting, check for fully idle. */
2520 if (irq) {
2521 rdtp->dynticks_idle_nesting--;
2522 WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
2523 if (rdtp->dynticks_idle_nesting != 0)
2524 return; /* Still not fully idle. */
2525 } else {
2526 if ((rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) ==
2527 DYNTICK_TASK_NEST_VALUE) {
2528 rdtp->dynticks_idle_nesting = 0;
2529 } else {
2530 rdtp->dynticks_idle_nesting -= DYNTICK_TASK_NEST_VALUE;
2531 WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
2532 return; /* Still not fully idle. */
2533 }
2534 }
2535
2536 /* Record start of fully idle period. */
2537 j = jiffies;
2538 WRITE_ONCE(rdtp->dynticks_idle_jiffies, j);
2539 smp_mb__before_atomic();
2540 atomic_inc(&rdtp->dynticks_idle);
2541 smp_mb__after_atomic();
2542 WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
2543}
2544
2545/*
2546 * Unconditionally force exit from full system-idle state. This is
2547 * invoked when a normal CPU exits idle, but must be called separately
2548 * for the timekeeping CPU (tick_do_timer_cpu). The reason for this
2549 * is that the timekeeping CPU is permitted to take scheduling-clock
2550 * interrupts while the system is in system-idle state, and of course
2551 * rcu_sysidle_exit() has no way of distinguishing a scheduling-clock
2552 * interrupt from any other type of interrupt.
2553 */
2554void rcu_sysidle_force_exit(void)
2555{
2556 int oldstate = READ_ONCE(full_sysidle_state);
2557 int newoldstate;
2558
2559 /*
2560 * Each pass through the following loop attempts to exit full
2561 * system-idle state. If contention proves to be a problem,
2562 * a trylock-based contention tree could be used here.
2563 */
2564 while (oldstate > RCU_SYSIDLE_SHORT) {
2565 newoldstate = cmpxchg(&full_sysidle_state,
2566 oldstate, RCU_SYSIDLE_NOT);
2567 if (oldstate == newoldstate &&
2568 oldstate == RCU_SYSIDLE_FULL_NOTED) {
2569 rcu_kick_nohz_cpu(tick_do_timer_cpu);
2570 return; /* We cleared it, done! */
2571 }
2572 oldstate = newoldstate;
2573 }
2574 smp_mb(); /* Order initial oldstate fetch vs. later non-idle work. */
2575}
2576
2577/*
2578 * Invoked to note entry to irq or task transition from idle. Note that
2579 * usermode execution does -not- count as idle here! The caller must
2580 * have disabled interrupts.
2581 */
2582static void rcu_sysidle_exit(int irq)
2583{
2584 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
2585
2586 /* If there are no nohz_full= CPUs, no need to track this. */
2587 if (!tick_nohz_full_enabled())
2588 return;
2589
2590 /* Adjust nesting, check for already non-idle. */
2591 if (irq) {
2592 rdtp->dynticks_idle_nesting++;
2593 WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
2594 if (rdtp->dynticks_idle_nesting != 1)
2595 return; /* Already non-idle. */
2596 } else {
2597 /*
2598 * Allow for irq misnesting. Yes, it really is possible
2599 * to enter an irq handler then never leave it, and maybe
2600 * also vice versa. Handle both possibilities.
2601 */
2602 if (rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) {
2603 rdtp->dynticks_idle_nesting += DYNTICK_TASK_NEST_VALUE;
2604 WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
2605 return; /* Already non-idle. */
2606 } else {
2607 rdtp->dynticks_idle_nesting = DYNTICK_TASK_EXIT_IDLE;
2608 }
2609 }
2610
2611 /* Record end of idle period. */
2612 smp_mb__before_atomic();
2613 atomic_inc(&rdtp->dynticks_idle);
2614 smp_mb__after_atomic();
2615 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
2616
2617 /*
2618 * If we are the timekeeping CPU, we are permitted to be non-idle
2619 * during a system-idle state. This must be the case, because
2620 * the timekeeping CPU has to take scheduling-clock interrupts
2621 * during the time that the system is transitioning to full
2622 * system-idle state. This means that the timekeeping CPU must
2623 * invoke rcu_sysidle_force_exit() directly if it does anything
2624 * more than take a scheduling-clock interrupt.
2625 */
2626 if (smp_processor_id() == tick_do_timer_cpu)
2627 return;
2628
2629 /* Update system-idle state: We are clearly no longer fully idle! */
2630 rcu_sysidle_force_exit();
2631}
2632
2633/*
2634 * Check to see if the current CPU is idle. Note that usermode execution
2635 * does not count as idle. The caller must have disabled interrupts,
2636 * and must be running on tick_do_timer_cpu.
2637 */
2638static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2639 unsigned long *maxj)
2640{
2641 int cur;
2642 unsigned long j;
2643 struct rcu_dynticks *rdtp = rdp->dynticks;
2644
2645 /* If there are no nohz_full= CPUs, don't check system-wide idleness. */
2646 if (!tick_nohz_full_enabled())
2647 return;
2648
2649 /*
2650 * If some other CPU has already reported non-idle, if this is
2651 * not the flavor of RCU that tracks sysidle state, or if this
2652 * is an offline or the timekeeping CPU, nothing to do.
2653 */
2654 if (!*isidle || rdp->rsp != rcu_state_p ||
2655 cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
2656 return;
2657 /* Verify affinity of current kthread. */
2658 WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
2659
2660 /* Pick up current idle and NMI-nesting counter and check. */
2661 cur = atomic_read(&rdtp->dynticks_idle);
2662 if (cur & 0x1) {
2663 *isidle = false; /* We are not idle! */
2664 return;
2665 }
2666 smp_mb(); /* Read counters before timestamps. */
2667
2668 /* Pick up timestamps. */
2669 j = READ_ONCE(rdtp->dynticks_idle_jiffies);
2670 /* If this CPU entered idle more recently, update maxj timestamp. */
2671 if (ULONG_CMP_LT(*maxj, j))
2672 *maxj = j;
2673}
2674
2675/*
2676 * Is this the flavor of RCU that is handling full-system idle?
2677 */
2678static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2679{
2680 return rsp == rcu_state_p;
2681}
2682
2683/*
2684 * Return a delay in jiffies based on the number of CPUs, rcu_node
2685 * leaf fanout, and jiffies tick rate. The idea is to allow larger
2686 * systems more time to transition to full-idle state in order to
2687 * avoid the cache thrashing that otherwise occur on the state variable.
2688 * Really small systems (less than a couple of tens of CPUs) should
2689 * instead use a single global atomically incremented counter, and later
2690 * versions of this will automatically reconfigure themselves accordingly.
2691 */
2692static unsigned long rcu_sysidle_delay(void)
2693{
2694 if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
2695 return 0;
2696 return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000);
2697}
2698
2699/*
2700 * Advance the full-system-idle state. This is invoked when all of
2701 * the non-timekeeping CPUs are idle.
2702 */
2703static void rcu_sysidle(unsigned long j)
2704{
2705 /* Check the current state. */
2706 switch (READ_ONCE(full_sysidle_state)) {
2707 case RCU_SYSIDLE_NOT:
2708
2709 /* First time all are idle, so note a short idle period. */
2710 WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_SHORT);
2711 break;
2712
2713 case RCU_SYSIDLE_SHORT:
2714
2715 /*
2716 * Idle for a bit, time to advance to next state?
2717 * cmpxchg failure means race with non-idle, let them win.
2718 */
2719 if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
2720 (void)cmpxchg(&full_sysidle_state,
2721 RCU_SYSIDLE_SHORT, RCU_SYSIDLE_LONG);
2722 break;
2723
2724 case RCU_SYSIDLE_LONG:
2725
2726 /*
2727 * Do an additional check pass before advancing to full.
2728 * cmpxchg failure means race with non-idle, let them win.
2729 */
2730 if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
2731 (void)cmpxchg(&full_sysidle_state,
2732 RCU_SYSIDLE_LONG, RCU_SYSIDLE_FULL);
2733 break;
2734
2735 default:
2736 break;
2737 }
2738}
2739
2740/*
2741 * Found a non-idle non-timekeeping CPU, so kick the system-idle state
2742 * back to the beginning.
2743 */
2744static void rcu_sysidle_cancel(void)
2745{
2746 smp_mb();
2747 if (full_sysidle_state > RCU_SYSIDLE_SHORT)
2748 WRITE_ONCE(full_sysidle_state, RCU_SYSIDLE_NOT);
2749}
2750
2751/*
2752 * Update the sysidle state based on the results of a force-quiescent-state
2753 * scan of the CPUs' dyntick-idle state.
2754 */
2755static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
2756 unsigned long maxj, bool gpkt)
2757{
2758 if (rsp != rcu_state_p)
2759 return; /* Wrong flavor, ignore. */
2760 if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
2761 return; /* Running state machine from timekeeping CPU. */
2762 if (isidle)
2763 rcu_sysidle(maxj); /* More idle! */
2764 else
2765 rcu_sysidle_cancel(); /* Idle is over. */
2766}
2767
2768/*
2769 * Wrapper for rcu_sysidle_report() when called from the grace-period
2770 * kthread's context.
2771 */
2772static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2773 unsigned long maxj)
2774{
2775 /* If there are no nohz_full= CPUs, no need to track this. */
2776 if (!tick_nohz_full_enabled())
2777 return;
2778
2779 rcu_sysidle_report(rsp, isidle, maxj, true);
2780}
2781
2782/* Callback and function for forcing an RCU grace period. */
2783struct rcu_sysidle_head {
2784 struct rcu_head rh;
2785 int inuse;
2786};
2787
2788static void rcu_sysidle_cb(struct rcu_head *rhp)
2789{
2790 struct rcu_sysidle_head *rshp;
2791
2792 /*
2793 * The following memory barrier is needed to replace the
2794 * memory barriers that would normally be in the memory
2795 * allocator.
2796 */
2797 smp_mb(); /* grace period precedes setting inuse. */
2798
2799 rshp = container_of(rhp, struct rcu_sysidle_head, rh);
2800 WRITE_ONCE(rshp->inuse, 0);
2801}
2802
2803/*
2804 * Check to see if the system is fully idle, other than the timekeeping CPU.
2805 * The caller must have disabled interrupts. This is not intended to be
2806 * called unless tick_nohz_full_enabled().
2807 */
2808bool rcu_sys_is_idle(void)
2809{
2810 static struct rcu_sysidle_head rsh;
2811 int rss = READ_ONCE(full_sysidle_state);
2812
2813 if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
2814 return false;
2815
2816 /* Handle small-system case by doing a full scan of CPUs. */
2817 if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) {
2818 int oldrss = rss - 1;
2819
2820 /*
2821 * One pass to advance to each state up to _FULL.
2822 * Give up if any pass fails to advance the state.
2823 */
2824 while (rss < RCU_SYSIDLE_FULL && oldrss < rss) {
2825 int cpu;
2826 bool isidle = true;
2827 unsigned long maxj = jiffies - ULONG_MAX / 4;
2828 struct rcu_data *rdp;
2829
2830 /* Scan all the CPUs looking for nonidle CPUs. */
2831 for_each_possible_cpu(cpu) {
2832 rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
2833 rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
2834 if (!isidle)
2835 break;
2836 }
2837 rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
2838 oldrss = rss;
2839 rss = READ_ONCE(full_sysidle_state);
2840 }
2841 }
2842
2843 /* If this is the first observation of an idle period, record it. */
2844 if (rss == RCU_SYSIDLE_FULL) {
2845 rss = cmpxchg(&full_sysidle_state,
2846 RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED);
2847 return rss == RCU_SYSIDLE_FULL;
2848 }
2849
2850 smp_mb(); /* ensure rss load happens before later caller actions. */
2851
2852 /* If already fully idle, tell the caller (in case of races). */
2853 if (rss == RCU_SYSIDLE_FULL_NOTED)
2854 return true;
2855
2856 /*
2857 * If we aren't there yet, and a grace period is not in flight,
2858 * initiate a grace period. Either way, tell the caller that
2859 * we are not there yet. We use an xchg() rather than an assignment
2860 * to make up for the memory barriers that would otherwise be
2861 * provided by the memory allocator.
2862 */
2863 if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
2864 !rcu_gp_in_progress(rcu_state_p) &&
2865 !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
2866 call_rcu(&rsh.rh, rcu_sysidle_cb);
2867 return false;
2868}
2869
2870/*
2871 * Initialize dynticks sysidle state for CPUs coming online.
2872 */
2873static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
2874{
2875 rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE;
2876}
2877
2878#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
2879
2880static void rcu_sysidle_enter(int irq)
2881{
2882}
2883
2884static void rcu_sysidle_exit(int irq)
2885{
2886}
2887
2888static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2889 unsigned long *maxj)
2890{
2891}
2892
2893static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2894{
2895 return false;
2896}
2897
2898static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2899 unsigned long maxj)
2900{
2901}
2902
2903static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
2904{
2905}
2906
2907#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
2908
2909/* 2514/*
2910 * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the 2515 * Is this CPU a NO_HZ_FULL CPU that should ignore RCU so that the
2911 * grace-period kthread will do force_quiescent_state() processing? 2516 * grace-period kthread will do force_quiescent_state() processing?
@@ -2936,13 +2541,7 @@ static void rcu_bind_gp_kthread(void)
2936 2541
2937 if (!tick_nohz_full_enabled()) 2542 if (!tick_nohz_full_enabled())
2938 return; 2543 return;
2939#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
2940 cpu = tick_do_timer_cpu;
2941 if (cpu >= 0 && cpu < nr_cpu_ids)
2942 set_cpus_allowed_ptr(current, cpumask_of(cpu));
2943#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
2944 housekeeping_affine(current); 2544 housekeeping_affine(current);
2945#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
2946} 2545}
2947 2546
2948/* Record the current task on dyntick-idle entry. */ 2547/* Record the current task on dyntick-idle entry. */
diff --git a/kernel/rcu/tree_trace.c b/kernel/rcu/tree_trace.c
deleted file mode 100644
index 6cea17a1ea30..000000000000
--- a/kernel/rcu/tree_trace.c
+++ /dev/null
@@ -1,494 +0,0 @@
1/*
2 * Read-Copy Update tracing for hierarchical implementation.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, you can access it online at
16 * http://www.gnu.org/licenses/gpl-2.0.html.
17 *
18 * Copyright IBM Corporation, 2008
19 * Author: Paul E. McKenney
20 *
21 * Papers: http://www.rdrop.com/users/paulmck/RCU
22 *
23 * For detailed explanation of Read-Copy Update mechanism see -
24 * Documentation/RCU
25 *
26 */
27#include <linux/types.h>
28#include <linux/kernel.h>
29#include <linux/init.h>
30#include <linux/spinlock.h>
31#include <linux/smp.h>
32#include <linux/rcupdate.h>
33#include <linux/interrupt.h>
34#include <linux/sched.h>
35#include <linux/atomic.h>
36#include <linux/bitops.h>
37#include <linux/completion.h>
38#include <linux/percpu.h>
39#include <linux/notifier.h>
40#include <linux/cpu.h>
41#include <linux/mutex.h>
42#include <linux/debugfs.h>
43#include <linux/seq_file.h>
44#include <linux/prefetch.h>
45
46#define RCU_TREE_NONCORE
47#include "tree.h"
48#include "rcu.h"
49
50static int r_open(struct inode *inode, struct file *file,
51 const struct seq_operations *op)
52{
53 int ret = seq_open(file, op);
54 if (!ret) {
55 struct seq_file *m = (struct seq_file *)file->private_data;
56 m->private = inode->i_private;
57 }
58 return ret;
59}
60
61static void *r_start(struct seq_file *m, loff_t *pos)
62{
63 struct rcu_state *rsp = (struct rcu_state *)m->private;
64 *pos = cpumask_next(*pos - 1, cpu_possible_mask);
65 if ((*pos) < nr_cpu_ids)
66 return per_cpu_ptr(rsp->rda, *pos);
67 return NULL;
68}
69
70static void *r_next(struct seq_file *m, void *v, loff_t *pos)
71{
72 (*pos)++;
73 return r_start(m, pos);
74}
75
76static void r_stop(struct seq_file *m, void *v)
77{
78}
79
80static int show_rcubarrier(struct seq_file *m, void *v)
81{
82 struct rcu_state *rsp = (struct rcu_state *)m->private;
83 seq_printf(m, "bcc: %d bseq: %lu\n",
84 atomic_read(&rsp->barrier_cpu_count),
85 rsp->barrier_sequence);
86 return 0;
87}
88
89static int rcubarrier_open(struct inode *inode, struct file *file)
90{
91 return single_open(file, show_rcubarrier, inode->i_private);
92}
93
94static const struct file_operations rcubarrier_fops = {
95 .owner = THIS_MODULE,
96 .open = rcubarrier_open,
97 .read = seq_read,
98 .llseek = no_llseek,
99 .release = single_release,
100};
101
102#ifdef CONFIG_RCU_BOOST
103
104static char convert_kthread_status(unsigned int kthread_status)
105{
106 if (kthread_status > RCU_KTHREAD_MAX)
107 return '?';
108 return "SRWOY"[kthread_status];
109}
110
111#endif /* #ifdef CONFIG_RCU_BOOST */
112
113static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
114{
115 long ql, qll;
116
117 if (!rdp->beenonline)
118 return;
119 seq_printf(m, "%3d%cc=%ld g=%ld cnq=%d/%d:%d",
120 rdp->cpu,
121 cpu_is_offline(rdp->cpu) ? '!' : ' ',
122 ulong2long(rdp->completed), ulong2long(rdp->gpnum),
123 rdp->cpu_no_qs.b.norm,
124 rdp->rcu_qs_ctr_snap == per_cpu(rdp->dynticks->rcu_qs_ctr, rdp->cpu),
125 rdp->core_needs_qs);
126 seq_printf(m, " dt=%d/%llx/%d df=%lu",
127 rcu_dynticks_snap(rdp->dynticks),
128 rdp->dynticks->dynticks_nesting,
129 rdp->dynticks->dynticks_nmi_nesting,
130 rdp->dynticks_fqs);
131 seq_printf(m, " of=%lu", rdp->offline_fqs);
132 rcu_nocb_q_lengths(rdp, &ql, &qll);
133 qll += rcu_segcblist_n_lazy_cbs(&rdp->cblist);
134 ql += rcu_segcblist_n_cbs(&rdp->cblist);
135 seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
136 qll, ql,
137 ".N"[!rcu_segcblist_segempty(&rdp->cblist, RCU_NEXT_TAIL)],
138 ".R"[!rcu_segcblist_segempty(&rdp->cblist,
139 RCU_NEXT_READY_TAIL)],
140 ".W"[!rcu_segcblist_segempty(&rdp->cblist, RCU_WAIT_TAIL)],
141 ".D"[!rcu_segcblist_segempty(&rdp->cblist, RCU_DONE_TAIL)]);
142#ifdef CONFIG_RCU_BOOST
143 seq_printf(m, " kt=%d/%c ktl=%x",
144 per_cpu(rcu_cpu_has_work, rdp->cpu),
145 convert_kthread_status(per_cpu(rcu_cpu_kthread_status,
146 rdp->cpu)),
147 per_cpu(rcu_cpu_kthread_loops, rdp->cpu) & 0xffff);
148#endif /* #ifdef CONFIG_RCU_BOOST */
149 seq_printf(m, " b=%ld", rdp->blimit);
150 seq_printf(m, " ci=%lu nci=%lu co=%lu ca=%lu\n",
151 rdp->n_cbs_invoked, rdp->n_nocbs_invoked,
152 rdp->n_cbs_orphaned, rdp->n_cbs_adopted);
153}
154
155static int show_rcudata(struct seq_file *m, void *v)
156{
157 print_one_rcu_data(m, (struct rcu_data *)v);
158 return 0;
159}
160
161static const struct seq_operations rcudate_op = {
162 .start = r_start,
163 .next = r_next,
164 .stop = r_stop,
165 .show = show_rcudata,
166};
167
168static int rcudata_open(struct inode *inode, struct file *file)
169{
170 return r_open(inode, file, &rcudate_op);
171}
172
173static const struct file_operations rcudata_fops = {
174 .owner = THIS_MODULE,
175 .open = rcudata_open,
176 .read = seq_read,
177 .llseek = no_llseek,
178 .release = seq_release,
179};
180
181static int show_rcuexp(struct seq_file *m, void *v)
182{
183 int cpu;
184 struct rcu_state *rsp = (struct rcu_state *)m->private;
185 struct rcu_data *rdp;
186 unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
187
188 for_each_possible_cpu(cpu) {
189 rdp = per_cpu_ptr(rsp->rda, cpu);
190 s0 += atomic_long_read(&rdp->exp_workdone0);
191 s1 += atomic_long_read(&rdp->exp_workdone1);
192 s2 += atomic_long_read(&rdp->exp_workdone2);
193 s3 += atomic_long_read(&rdp->exp_workdone3);
194 }
195 seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu enq=%d sc=%lu\n",
196 rsp->expedited_sequence, s0, s1, s2, s3,
197 atomic_read(&rsp->expedited_need_qs),
198 rsp->expedited_sequence / 2);
199 return 0;
200}
201
202static int rcuexp_open(struct inode *inode, struct file *file)
203{
204 return single_open(file, show_rcuexp, inode->i_private);
205}
206
207static const struct file_operations rcuexp_fops = {
208 .owner = THIS_MODULE,
209 .open = rcuexp_open,
210 .read = seq_read,
211 .llseek = no_llseek,
212 .release = single_release,
213};
214
215#ifdef CONFIG_RCU_BOOST
216
217static void print_one_rcu_node_boost(struct seq_file *m, struct rcu_node *rnp)
218{
219 seq_printf(m, "%d:%d tasks=%c%c%c%c kt=%c ntb=%lu neb=%lu nnb=%lu ",
220 rnp->grplo, rnp->grphi,
221 "T."[list_empty(&rnp->blkd_tasks)],
222 "N."[!rnp->gp_tasks],
223 "E."[!rnp->exp_tasks],
224 "B."[!rnp->boost_tasks],
225 convert_kthread_status(rnp->boost_kthread_status),
226 rnp->n_tasks_boosted, rnp->n_exp_boosts,
227 rnp->n_normal_boosts);
228 seq_printf(m, "j=%04x bt=%04x\n",
229 (int)(jiffies & 0xffff),
230 (int)(rnp->boost_time & 0xffff));
231 seq_printf(m, " balk: nt=%lu egt=%lu bt=%lu nb=%lu ny=%lu nos=%lu\n",
232 rnp->n_balk_blkd_tasks,
233 rnp->n_balk_exp_gp_tasks,
234 rnp->n_balk_boost_tasks,
235 rnp->n_balk_notblocked,
236 rnp->n_balk_notyet,
237 rnp->n_balk_nos);
238}
239
240static int show_rcu_node_boost(struct seq_file *m, void *unused)
241{
242 struct rcu_node *rnp;
243
244 rcu_for_each_leaf_node(&rcu_preempt_state, rnp)
245 print_one_rcu_node_boost(m, rnp);
246 return 0;
247}
248
249static int rcu_node_boost_open(struct inode *inode, struct file *file)
250{
251 return single_open(file, show_rcu_node_boost, NULL);
252}
253
254static const struct file_operations rcu_node_boost_fops = {
255 .owner = THIS_MODULE,
256 .open = rcu_node_boost_open,
257 .read = seq_read,
258 .llseek = no_llseek,
259 .release = single_release,
260};
261
262#endif /* #ifdef CONFIG_RCU_BOOST */
263
264static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp)
265{
266 unsigned long gpnum;
267 int level = 0;
268 struct rcu_node *rnp;
269
270 gpnum = rsp->gpnum;
271 seq_printf(m, "c=%ld g=%ld s=%d jfq=%ld j=%x ",
272 ulong2long(rsp->completed), ulong2long(gpnum),
273 rsp->gp_state,
274 (long)(rsp->jiffies_force_qs - jiffies),
275 (int)(jiffies & 0xffff));
276 seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n",
277 rsp->n_force_qs, rsp->n_force_qs_ngp,
278 rsp->n_force_qs - rsp->n_force_qs_ngp,
279 READ_ONCE(rsp->n_force_qs_lh),
280 rsp->orphan_done.len_lazy,
281 rsp->orphan_done.len);
282 for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) {
283 if (rnp->level != level) {
284 seq_puts(m, "\n");
285 level = rnp->level;
286 }
287 seq_printf(m, "%lx/%lx->%lx %c%c>%c %d:%d ^%d ",
288 rnp->qsmask, rnp->qsmaskinit, rnp->qsmaskinitnext,
289 ".G"[rnp->gp_tasks != NULL],
290 ".E"[rnp->exp_tasks != NULL],
291 ".T"[!list_empty(&rnp->blkd_tasks)],
292 rnp->grplo, rnp->grphi, rnp->grpnum);
293 }
294 seq_puts(m, "\n");
295}
296
297static int show_rcuhier(struct seq_file *m, void *v)
298{
299 struct rcu_state *rsp = (struct rcu_state *)m->private;
300 print_one_rcu_state(m, rsp);
301 return 0;
302}
303
304static int rcuhier_open(struct inode *inode, struct file *file)
305{
306 return single_open(file, show_rcuhier, inode->i_private);
307}
308
309static const struct file_operations rcuhier_fops = {
310 .owner = THIS_MODULE,
311 .open = rcuhier_open,
312 .read = seq_read,
313 .llseek = no_llseek,
314 .release = single_release,
315};
316
317static void show_one_rcugp(struct seq_file *m, struct rcu_state *rsp)
318{
319 unsigned long flags;
320 unsigned long completed;
321 unsigned long gpnum;
322 unsigned long gpage;
323 unsigned long gpmax;
324 struct rcu_node *rnp = &rsp->node[0];
325
326 raw_spin_lock_irqsave_rcu_node(rnp, flags);
327 completed = READ_ONCE(rsp->completed);
328 gpnum = READ_ONCE(rsp->gpnum);
329 if (completed == gpnum)
330 gpage = 0;
331 else
332 gpage = jiffies - rsp->gp_start;
333 gpmax = rsp->gp_max;
334 raw_spin_unlock_irqrestore(&rnp->lock, flags);
335 seq_printf(m, "completed=%ld gpnum=%ld age=%ld max=%ld\n",
336 ulong2long(completed), ulong2long(gpnum), gpage, gpmax);
337}
338
339static int show_rcugp(struct seq_file *m, void *v)
340{
341 struct rcu_state *rsp = (struct rcu_state *)m->private;
342 show_one_rcugp(m, rsp);
343 return 0;
344}
345
346static int rcugp_open(struct inode *inode, struct file *file)
347{
348 return single_open(file, show_rcugp, inode->i_private);
349}
350
351static const struct file_operations rcugp_fops = {
352 .owner = THIS_MODULE,
353 .open = rcugp_open,
354 .read = seq_read,
355 .llseek = no_llseek,
356 .release = single_release,
357};
358
359static void print_one_rcu_pending(struct seq_file *m, struct rcu_data *rdp)
360{
361 if (!rdp->beenonline)
362 return;
363 seq_printf(m, "%3d%cnp=%ld ",
364 rdp->cpu,
365 cpu_is_offline(rdp->cpu) ? '!' : ' ',
366 rdp->n_rcu_pending);
367 seq_printf(m, "qsp=%ld rpq=%ld cbr=%ld cng=%ld ",
368 rdp->n_rp_core_needs_qs,
369 rdp->n_rp_report_qs,
370 rdp->n_rp_cb_ready,
371 rdp->n_rp_cpu_needs_gp);
372 seq_printf(m, "gpc=%ld gps=%ld nn=%ld ndw%ld\n",
373 rdp->n_rp_gp_completed,
374 rdp->n_rp_gp_started,
375 rdp->n_rp_nocb_defer_wakeup,
376 rdp->n_rp_need_nothing);
377}
378
379static int show_rcu_pending(struct seq_file *m, void *v)
380{
381 print_one_rcu_pending(m, (struct rcu_data *)v);
382 return 0;
383}
384
385static const struct seq_operations rcu_pending_op = {
386 .start = r_start,
387 .next = r_next,
388 .stop = r_stop,
389 .show = show_rcu_pending,
390};
391
392static int rcu_pending_open(struct inode *inode, struct file *file)
393{
394 return r_open(inode, file, &rcu_pending_op);
395}
396
397static const struct file_operations rcu_pending_fops = {
398 .owner = THIS_MODULE,
399 .open = rcu_pending_open,
400 .read = seq_read,
401 .llseek = no_llseek,
402 .release = seq_release,
403};
404
405static int show_rcutorture(struct seq_file *m, void *unused)
406{
407 seq_printf(m, "rcutorture test sequence: %lu %s\n",
408 rcutorture_testseq >> 1,
409 (rcutorture_testseq & 0x1) ? "(test in progress)" : "");
410 seq_printf(m, "rcutorture update version number: %lu\n",
411 rcutorture_vernum);
412 return 0;
413}
414
415static int rcutorture_open(struct inode *inode, struct file *file)
416{
417 return single_open(file, show_rcutorture, NULL);
418}
419
420static const struct file_operations rcutorture_fops = {
421 .owner = THIS_MODULE,
422 .open = rcutorture_open,
423 .read = seq_read,
424 .llseek = seq_lseek,
425 .release = single_release,
426};
427
428static struct dentry *rcudir;
429
430static int __init rcutree_trace_init(void)
431{
432 struct rcu_state *rsp;
433 struct dentry *retval;
434 struct dentry *rspdir;
435
436 rcudir = debugfs_create_dir("rcu", NULL);
437 if (!rcudir)
438 goto free_out;
439
440 for_each_rcu_flavor(rsp) {
441 rspdir = debugfs_create_dir(rsp->name, rcudir);
442 if (!rspdir)
443 goto free_out;
444
445 retval = debugfs_create_file("rcudata", 0444,
446 rspdir, rsp, &rcudata_fops);
447 if (!retval)
448 goto free_out;
449
450 retval = debugfs_create_file("rcuexp", 0444,
451 rspdir, rsp, &rcuexp_fops);
452 if (!retval)
453 goto free_out;
454
455 retval = debugfs_create_file("rcu_pending", 0444,
456 rspdir, rsp, &rcu_pending_fops);
457 if (!retval)
458 goto free_out;
459
460 retval = debugfs_create_file("rcubarrier", 0444,
461 rspdir, rsp, &rcubarrier_fops);
462 if (!retval)
463 goto free_out;
464
465#ifdef CONFIG_RCU_BOOST
466 if (rsp == &rcu_preempt_state) {
467 retval = debugfs_create_file("rcuboost", 0444,
468 rspdir, NULL, &rcu_node_boost_fops);
469 if (!retval)
470 goto free_out;
471 }
472#endif
473
474 retval = debugfs_create_file("rcugp", 0444,
475 rspdir, rsp, &rcugp_fops);
476 if (!retval)
477 goto free_out;
478
479 retval = debugfs_create_file("rcuhier", 0444,
480 rspdir, rsp, &rcuhier_fops);
481 if (!retval)
482 goto free_out;
483 }
484
485 retval = debugfs_create_file("rcutorture", 0444, rcudir,
486 NULL, &rcutorture_fops);
487 if (!retval)
488 goto free_out;
489 return 0;
490free_out:
491 debugfs_remove_recursive(rcudir);
492 return 1;
493}
494device_initcall(rcutree_trace_init);
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 273e869ca21d..00e77c470017 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -62,7 +62,9 @@
62#define MODULE_PARAM_PREFIX "rcupdate." 62#define MODULE_PARAM_PREFIX "rcupdate."
63 63
64#ifndef CONFIG_TINY_RCU 64#ifndef CONFIG_TINY_RCU
65extern int rcu_expedited; /* from sysctl */
65module_param(rcu_expedited, int, 0); 66module_param(rcu_expedited, int, 0);
67extern int rcu_normal; /* from sysctl */
66module_param(rcu_normal, int, 0); 68module_param(rcu_normal, int, 0);
67static int rcu_normal_after_boot; 69static int rcu_normal_after_boot;
68module_param(rcu_normal_after_boot, int, 0); 70module_param(rcu_normal_after_boot, int, 0);
@@ -379,6 +381,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
379 struct rcu_synchronize *rs_array) 381 struct rcu_synchronize *rs_array)
380{ 382{
381 int i; 383 int i;
384 int j;
382 385
383 /* Initialize and register callbacks for each flavor specified. */ 386 /* Initialize and register callbacks for each flavor specified. */
384 for (i = 0; i < n; i++) { 387 for (i = 0; i < n; i++) {
@@ -390,7 +393,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
390 } 393 }
391 init_rcu_head_on_stack(&rs_array[i].head); 394 init_rcu_head_on_stack(&rs_array[i].head);
392 init_completion(&rs_array[i].completion); 395 init_completion(&rs_array[i].completion);
393 (crcu_array[i])(&rs_array[i].head, wakeme_after_rcu); 396 for (j = 0; j < i; j++)
397 if (crcu_array[j] == crcu_array[i])
398 break;
399 if (j == i)
400 (crcu_array[i])(&rs_array[i].head, wakeme_after_rcu);
394 } 401 }
395 402
396 /* Wait for all callbacks to be invoked. */ 403 /* Wait for all callbacks to be invoked. */
@@ -399,7 +406,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array,
399 (crcu_array[i] == call_rcu || 406 (crcu_array[i] == call_rcu ||
400 crcu_array[i] == call_rcu_bh)) 407 crcu_array[i] == call_rcu_bh))
401 continue; 408 continue;
402 wait_for_completion(&rs_array[i].completion); 409 for (j = 0; j < i; j++)
410 if (crcu_array[j] == crcu_array[i])
411 break;
412 if (j == i)
413 wait_for_completion(&rs_array[i].completion);
403 destroy_rcu_head_on_stack(&rs_array[i].head); 414 destroy_rcu_head_on_stack(&rs_array[i].head);
404 } 415 }
405} 416}
@@ -560,15 +571,30 @@ static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
560DEFINE_SRCU(tasks_rcu_exit_srcu); 571DEFINE_SRCU(tasks_rcu_exit_srcu);
561 572
562/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */ 573/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
563static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10; 574#define RCU_TASK_STALL_TIMEOUT (HZ * 60 * 10)
575static int rcu_task_stall_timeout __read_mostly = RCU_TASK_STALL_TIMEOUT;
564module_param(rcu_task_stall_timeout, int, 0644); 576module_param(rcu_task_stall_timeout, int, 0644);
565 577
566static void rcu_spawn_tasks_kthread(void); 578static void rcu_spawn_tasks_kthread(void);
567static struct task_struct *rcu_tasks_kthread_ptr; 579static struct task_struct *rcu_tasks_kthread_ptr;
568 580
569/* 581/**
570 * Post an RCU-tasks callback. First call must be from process context 582 * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
571 * after the scheduler if fully operational. 583 * @rhp: structure to be used for queueing the RCU updates.
584 * @func: actual callback function to be invoked after the grace period
585 *
586 * The callback function will be invoked some time after a full grace
587 * period elapses, in other words after all currently executing RCU
588 * read-side critical sections have completed. call_rcu_tasks() assumes
589 * that the read-side critical sections end at a voluntary context
590 * switch (not a preemption!), entry into idle, or transition to usermode
591 * execution. As such, there are no read-side primitives analogous to
592 * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
593 * to determine that all tasks have passed through a safe state, not so
594 * much for data-strcuture synchronization.
595 *
596 * See the description of call_rcu() for more detailed information on
597 * memory ordering guarantees.
572 */ 598 */
573void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func) 599void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func)
574{ 600{
@@ -851,6 +877,23 @@ static void rcu_spawn_tasks_kthread(void)
851 877
852#endif /* #ifdef CONFIG_TASKS_RCU */ 878#endif /* #ifdef CONFIG_TASKS_RCU */
853 879
880#ifndef CONFIG_TINY_RCU
881
882/*
883 * Print any non-default Tasks RCU settings.
884 */
885static void __init rcu_tasks_bootup_oddness(void)
886{
887#ifdef CONFIG_TASKS_RCU
888 if (rcu_task_stall_timeout != RCU_TASK_STALL_TIMEOUT)
889 pr_info("\tTasks-RCU CPU stall warnings timeout set to %d (rcu_task_stall_timeout).\n", rcu_task_stall_timeout);
890 else
891 pr_info("\tTasks RCU enabled.\n");
892#endif /* #ifdef CONFIG_TASKS_RCU */
893}
894
895#endif /* #ifndef CONFIG_TINY_RCU */
896
854#ifdef CONFIG_PROVE_RCU 897#ifdef CONFIG_PROVE_RCU
855 898
856/* 899/*
@@ -935,3 +978,25 @@ late_initcall(rcu_verify_early_boot_tests);
935#else 978#else
936void rcu_early_boot_tests(void) {} 979void rcu_early_boot_tests(void) {}
937#endif /* CONFIG_PROVE_RCU */ 980#endif /* CONFIG_PROVE_RCU */
981
982#ifndef CONFIG_TINY_RCU
983
984/*
985 * Print any significant non-default boot-time settings.
986 */
987void __init rcupdate_announce_bootup_oddness(void)
988{
989 if (rcu_normal)
990 pr_info("\tNo expedited grace period (rcu_normal).\n");
991 else if (rcu_normal_after_boot)
992 pr_info("\tNo expedited grace period (rcu_normal_after_boot).\n");
993 else if (rcu_expedited)
994 pr_info("\tAll grace periods are expedited (rcu_expedited).\n");
995 if (rcu_cpu_stall_suppress)
996 pr_info("\tRCU CPU stall warnings suppressed (rcu_cpu_stall_suppress).\n");
997 if (rcu_cpu_stall_timeout != CONFIG_RCU_CPU_STALL_TIMEOUT)
998 pr_info("\tRCU CPU stall warnings timeout set to %d (rcu_cpu_stall_timeout).\n", rcu_cpu_stall_timeout);
999 rcu_tasks_bootup_oddness();
1000}
1001
1002#endif /* #ifndef CONFIG_TINY_RCU */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 803c3bc274c4..e91138fcde86 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5874,15 +5874,9 @@ int sched_cpu_deactivate(unsigned int cpu)
5874 * users of this state to go away such that all new such users will 5874 * users of this state to go away such that all new such users will
5875 * observe it. 5875 * observe it.
5876 * 5876 *
5877 * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might
5878 * not imply sync_sched(), so wait for both.
5879 *
5880 * Do sync before park smpboot threads to take care the rcu boost case. 5877 * Do sync before park smpboot threads to take care the rcu boost case.
5881 */ 5878 */
5882 if (IS_ENABLED(CONFIG_PREEMPT)) 5879 synchronize_rcu_mult(call_rcu, call_rcu_sched);
5883 synchronize_rcu_mult(call_rcu, call_rcu_sched);
5884 else
5885 synchronize_rcu();
5886 5880
5887 if (!sched_smp_initialized) 5881 if (!sched_smp_initialized)
5888 return 0; 5882 return 0;
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 4008d9f95dd7..ac09bc29eb08 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -126,56 +126,6 @@ config NO_HZ_FULL_ALL
126 Note the boot CPU will still be kept outside the range to 126 Note the boot CPU will still be kept outside the range to
127 handle the timekeeping duty. 127 handle the timekeeping duty.
128 128
129config NO_HZ_FULL_SYSIDLE
130 bool "Detect full-system idle state for full dynticks system"
131 depends on NO_HZ_FULL
132 default n
133 help
134 At least one CPU must keep the scheduling-clock tick running for
135 timekeeping purposes whenever there is a non-idle CPU, where
136 "non-idle" also includes dynticks CPUs as long as they are
137 running non-idle tasks. Because the underlying adaptive-tick
138 support cannot distinguish between all CPUs being idle and
139 all CPUs each running a single task in dynticks mode, the
140 underlying support simply ensures that there is always a CPU
141 handling the scheduling-clock tick, whether or not all CPUs
142 are idle. This Kconfig option enables scalable detection of
143 the all-CPUs-idle state, thus allowing the scheduling-clock
144 tick to be disabled when all CPUs are idle. Note that scalable
145 detection of the all-CPUs-idle state means that larger systems
146 will be slower to declare the all-CPUs-idle state.
147
148 Say Y if you would like to help debug all-CPUs-idle detection.
149
150 Say N if you are unsure.
151
152config NO_HZ_FULL_SYSIDLE_SMALL
153 int "Number of CPUs above which large-system approach is used"
154 depends on NO_HZ_FULL_SYSIDLE
155 range 1 NR_CPUS
156 default 8
157 help
158 The full-system idle detection mechanism takes a lazy approach
159 on large systems, as is required to attain decent scalability.
160 However, on smaller systems, scalability is not anywhere near as
161 large a concern as is energy efficiency. The sysidle subsystem
162 therefore uses a fast but non-scalable algorithm for small
163 systems and a lazier but scalable algorithm for large systems.
164 This Kconfig parameter defines the number of CPUs in the largest
165 system that will be considered to be "small".
166
167 The default value will be fine in most cases. Battery-powered
168 systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
169 numbers of CPUs, and (3) are suffering from battery-lifetime
170 problems due to long sysidle latencies might wish to experiment
171 with larger values for this Kconfig parameter. On the other
172 hand, they might be even better served by disabling NO_HZ_FULL
173 entirely, given that NO_HZ_FULL is intended for HPC and
174 real-time workloads that at present do not tend to be run on
175 battery-powered systems.
176
177 Take the default if you are unsure.
178
179config NO_HZ 129config NO_HZ
180 bool "Old Idle dynticks config" 130 bool "Old Idle dynticks config"
181 depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS 131 depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e4587ebe52c7..a7a751a75cfd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1301,189 +1301,7 @@ config DEBUG_CREDENTIALS
1301 1301
1302 If unsure, say N. 1302 If unsure, say N.
1303 1303
1304menu "RCU Debugging" 1304source "kernel/rcu/Kconfig.debug"
1305
1306config PROVE_RCU
1307 def_bool PROVE_LOCKING
1308
1309config PROVE_RCU_REPEATEDLY
1310 bool "RCU debugging: don't disable PROVE_RCU on first splat"
1311 depends on PROVE_RCU
1312 default n
1313 help
1314 By itself, PROVE_RCU will disable checking upon issuing the
1315 first warning (or "splat"). This feature prevents such
1316 disabling, allowing multiple RCU-lockdep warnings to be printed
1317 on a single reboot.
1318
1319 Say Y to allow multiple RCU-lockdep warnings per boot.
1320
1321 Say N if you are unsure.
1322
1323config SPARSE_RCU_POINTER
1324 bool "RCU debugging: sparse-based checks for pointer usage"
1325 default n
1326 help
1327 This feature enables the __rcu sparse annotation for
1328 RCU-protected pointers. This annotation will cause sparse
1329 to flag any non-RCU used of annotated pointers. This can be
1330 helpful when debugging RCU usage. Please note that this feature
1331 is not intended to enforce code cleanliness; it is instead merely
1332 a debugging aid.
1333
1334 Say Y to make sparse flag questionable use of RCU-protected pointers
1335
1336 Say N if you are unsure.
1337
1338config TORTURE_TEST
1339 tristate
1340 default n
1341
1342config RCU_PERF_TEST
1343 tristate "performance tests for RCU"
1344 depends on DEBUG_KERNEL
1345 select TORTURE_TEST
1346 select SRCU
1347 select TASKS_RCU
1348 default n
1349 help
1350 This option provides a kernel module that runs performance
1351 tests on the RCU infrastructure. The kernel module may be built
1352 after the fact on the running kernel to be tested, if desired.
1353
1354 Say Y here if you want RCU performance tests to be built into
1355 the kernel.
1356 Say M if you want the RCU performance tests to build as a module.
1357 Say N if you are unsure.
1358
1359config RCU_TORTURE_TEST
1360 tristate "torture tests for RCU"
1361 depends on DEBUG_KERNEL
1362 select TORTURE_TEST
1363 select SRCU
1364 select TASKS_RCU
1365 default n
1366 help
1367 This option provides a kernel module that runs torture tests
1368 on the RCU infrastructure. The kernel module may be built
1369 after the fact on the running kernel to be tested, if desired.
1370
1371 Say Y here if you want RCU torture tests to be built into
1372 the kernel.
1373 Say M if you want the RCU torture tests to build as a module.
1374 Say N if you are unsure.
1375
1376config RCU_TORTURE_TEST_SLOW_PREINIT
1377 bool "Slow down RCU grace-period pre-initialization to expose races"
1378 depends on RCU_TORTURE_TEST
1379 help
1380 This option delays grace-period pre-initialization (the
1381 propagation of CPU-hotplug changes up the rcu_node combining
1382 tree) for a few jiffies between initializing each pair of
1383 consecutive rcu_node structures. This helps to expose races
1384 involving grace-period pre-initialization, in other words, it
1385 makes your kernel less stable. It can also greatly increase
1386 grace-period latency, especially on systems with large numbers
1387 of CPUs. This is useful when torture-testing RCU, but in
1388 almost no other circumstance.
1389
1390 Say Y here if you want your system to crash and hang more often.
1391 Say N if you want a sane system.
1392
1393config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
1394 int "How much to slow down RCU grace-period pre-initialization"
1395 range 0 5
1396 default 3
1397 depends on RCU_TORTURE_TEST_SLOW_PREINIT
1398 help
1399 This option specifies the number of jiffies to wait between
1400 each rcu_node structure pre-initialization step.
1401
1402config RCU_TORTURE_TEST_SLOW_INIT
1403 bool "Slow down RCU grace-period initialization to expose races"
1404 depends on RCU_TORTURE_TEST
1405 help
1406 This option delays grace-period initialization for a few
1407 jiffies between initializing each pair of consecutive
1408 rcu_node structures. This helps to expose races involving
1409 grace-period initialization, in other words, it makes your
1410 kernel less stable. It can also greatly increase grace-period
1411 latency, especially on systems with large numbers of CPUs.
1412 This is useful when torture-testing RCU, but in almost no
1413 other circumstance.
1414
1415 Say Y here if you want your system to crash and hang more often.
1416 Say N if you want a sane system.
1417
1418config RCU_TORTURE_TEST_SLOW_INIT_DELAY
1419 int "How much to slow down RCU grace-period initialization"
1420 range 0 5
1421 default 3
1422 depends on RCU_TORTURE_TEST_SLOW_INIT
1423 help
1424 This option specifies the number of jiffies to wait between
1425 each rcu_node structure initialization.
1426
1427config RCU_TORTURE_TEST_SLOW_CLEANUP
1428 bool "Slow down RCU grace-period cleanup to expose races"
1429 depends on RCU_TORTURE_TEST
1430 help
1431 This option delays grace-period cleanup for a few jiffies
1432 between cleaning up each pair of consecutive rcu_node
1433 structures. This helps to expose races involving grace-period
1434 cleanup, in other words, it makes your kernel less stable.
1435 It can also greatly increase grace-period latency, especially
1436 on systems with large numbers of CPUs. This is useful when
1437 torture-testing RCU, but in almost no other circumstance.
1438
1439 Say Y here if you want your system to crash and hang more often.
1440 Say N if you want a sane system.
1441
1442config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
1443 int "How much to slow down RCU grace-period cleanup"
1444 range 0 5
1445 default 3
1446 depends on RCU_TORTURE_TEST_SLOW_CLEANUP
1447 help
1448 This option specifies the number of jiffies to wait between
1449 each rcu_node structure cleanup operation.
1450
1451config RCU_CPU_STALL_TIMEOUT
1452 int "RCU CPU stall timeout in seconds"
1453 depends on RCU_STALL_COMMON
1454 range 3 300
1455 default 21
1456 help
1457 If a given RCU grace period extends more than the specified
1458 number of seconds, a CPU stall warning is printed. If the
1459 RCU grace period persists, additional CPU stall warnings are
1460 printed at more widely spaced intervals.
1461
1462config RCU_TRACE
1463 bool "Enable tracing for RCU"
1464 depends on DEBUG_KERNEL
1465 default y if TREE_RCU
1466 select TRACE_CLOCK
1467 help
1468 This option provides tracing in RCU which presents stats
1469 in debugfs for debugging RCU implementation. It also enables
1470 additional tracepoints for ftrace-style event tracing.
1471
1472 Say Y here if you want to enable RCU tracing
1473 Say N if you are unsure.
1474
1475config RCU_EQS_DEBUG
1476 bool "Provide debugging asserts for adding NO_HZ support to an arch"
1477 depends on DEBUG_KERNEL
1478 help
1479 This option provides consistency checks in RCU's handling of
1480 NO_HZ. These checks have proven quite helpful in detecting
1481 bugs in arch-specific NO_HZ code.
1482
1483 Say N here if you need ultimate kernel/user switch latencies
1484 Say Y if you are unsure
1485
1486endmenu # "RCU Debugging"
1487 1305
1488config DEBUG_WQ_FORCE_RR_CPU 1306config DEBUG_WQ_FORCE_RR_CPU
1489 bool "Force round-robin CPU selection for unbound work items" 1307 bool "Force round-robin CPU selection for unbound work items"
diff --git a/lib/Makefile b/lib/Makefile
index 0166fbc0fa81..07fbe6a75692 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -25,9 +25,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
25 earlycpio.o seq_buf.o siphash.o \ 25 earlycpio.o seq_buf.o siphash.o \
26 nmi_backtrace.o nodemask.o win_minmax.o 26 nmi_backtrace.o nodemask.o win_minmax.o
27 27
28CFLAGS_radix-tree.o += -DCONFIG_SPARSE_RCU_POINTER
29CFLAGS_idr.o += -DCONFIG_SPARSE_RCU_POINTER
30
31lib-$(CONFIG_MMU) += ioremap.o 28lib-$(CONFIG_MMU) += ioremap.o
32lib-$(CONFIG_SMP) += cpumask.o 29lib-$(CONFIG_SMP) += cpumask.o
33lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o 30lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 4b9569fa931b..c7e4d73fe1ce 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -5533,23 +5533,6 @@ sub process {
5533 } 5533 }
5534 } 5534 }
5535 5535
5536# Check for expedited grace periods that interrupt non-idle non-nohz
5537# online CPUs. These expedited can therefore degrade real-time response
5538# if used carelessly, and should be avoided where not absolutely
5539# needed. It is always OK to use synchronize_rcu_expedited() and
5540# synchronize_sched_expedited() at boot time (before real-time applications
5541# start) and in error situations where real-time response is compromised in
5542# any case. Note that synchronize_srcu_expedited() does -not- interrupt
5543# other CPUs, so don't warn on uses of synchronize_srcu_expedited().
5544# Of course, nothing comes for free, and srcu_read_lock() and
5545# srcu_read_unlock() do contain full memory barriers in payment for
5546# synchronize_srcu_expedited() non-interruption properties.
5547 if ($line =~ /\b(synchronize_rcu_expedited|synchronize_sched_expedited)\(/) {
5548 WARN("EXPEDITED_RCU_GRACE_PERIOD",
5549 "expedited RCU grace periods should be avoided where they can degrade real-time response\n" . $herecurr);
5550
5551 }
5552
5553# check of hardware specific defines 5536# check of hardware specific defines
5554 if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) { 5537 if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) {
5555 CHK("ARCH_DEFINES", 5538 CHK("ARCH_DEFINES",
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index eee31e261bf7..70fca318a82b 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -27,7 +27,7 @@ cat $1 > $T/.config
27 27
28cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' | 28cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
29awk ' 29awk '
30BEGIN { 30{
31 print "if grep -q \"" $0 "\" < '"$T/.config"'"; 31 print "if grep -q \"" $0 "\" < '"$T/.config"'";
32 print "then"; 32 print "then";
33 print "\t:"; 33 print "\t:";
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 00cb0db2643d..c29f2ec0bf9f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -45,7 +45,7 @@ T=/tmp/test-linux.sh.$$
45trap 'rm -rf $T' 0 45trap 'rm -rf $T' 0
46mkdir $T 46mkdir $T
47 47
48grep -v 'CONFIG_[A-Z]*_TORTURE_TEST' < ${config_template} > $T/config 48grep -v 'CONFIG_[A-Z]*_TORTURE_TEST=' < ${config_template} > $T/config
49cat << ___EOF___ >> $T/config 49cat << ___EOF___ >> $T/config
50CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD" 50CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD"
51CONFIG_VIRTIO_PCI=y 51CONFIG_VIRTIO_PCI=y
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 3b3c1b693ee1..50091de3a911 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -296,10 +296,7 @@ if test -d .git
296then 296then
297 git status >> $resdir/$ds/testid.txt 297 git status >> $resdir/$ds/testid.txt
298 git rev-parse HEAD >> $resdir/$ds/testid.txt 298 git rev-parse HEAD >> $resdir/$ds/testid.txt
299 if ! git diff HEAD > $T/git-diff 2>&1 299 git diff HEAD >> $resdir/$ds/testid.txt
300 then
301 cp $T/git-diff $resdir/$ds
302 fi
303fi 300fi
304___EOF___ 301___EOF___
305awk < $T/cfgcpu.pack \ 302awk < $T/cfgcpu.pack \
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index a3a1a05a2b5c..6a0b9f69faad 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -9,6 +9,8 @@ TREE08
9TREE09 9TREE09
10SRCU-N 10SRCU-N
11SRCU-P 11SRCU-P
12SRCU-t
13SRCU-u
12TINY01 14TINY01
13TINY02 15TINY02
14TASKS01 16TASKS01
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot
new file mode 100644
index 000000000000..84a7d51b7481
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-C.boot
@@ -0,0 +1 @@
rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
index 1a087c3c8bb8..2da8b49589a0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -5,4 +5,4 @@ CONFIG_HOTPLUG_CPU=y
5CONFIG_PREEMPT_NONE=y 5CONFIG_PREEMPT_NONE=y
6CONFIG_PREEMPT_VOLUNTARY=n 6CONFIG_PREEMPT_VOLUNTARY=n
7CONFIG_PREEMPT=n 7CONFIG_PREEMPT=n
8CONFIG_RCU_EXPERT=y 8#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
index 4837430a71c0..ab7ccd38232b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
@@ -2,7 +2,11 @@ CONFIG_RCU_TRACE=n
2CONFIG_SMP=y 2CONFIG_SMP=y
3CONFIG_NR_CPUS=8 3CONFIG_NR_CPUS=8
4CONFIG_HOTPLUG_CPU=y 4CONFIG_HOTPLUG_CPU=y
5CONFIG_RCU_EXPERT=y
6CONFIG_RCU_FANOUT=2
7CONFIG_RCU_FANOUT_LEAF=2
5CONFIG_PREEMPT_NONE=n 8CONFIG_PREEMPT_NONE=n
6CONFIG_PREEMPT_VOLUNTARY=n 9CONFIG_PREEMPT_VOLUNTARY=n
7CONFIG_PREEMPT=y 10CONFIG_PREEMPT=y
8#CHECK#CONFIG_RCU_EXPERT=n 11CONFIG_DEBUG_LOCK_ALLOC=y
12CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
new file mode 100644
index 000000000000..6c78022c8cd8
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
@@ -0,0 +1,10 @@
1CONFIG_SMP=n
2CONFIG_PREEMPT_NONE=y
3CONFIG_PREEMPT_VOLUNTARY=n
4CONFIG_PREEMPT=n
5#CHECK#CONFIG_TINY_SRCU=y
6CONFIG_RCU_TRACE=n
7CONFIG_DEBUG_LOCK_ALLOC=n
8CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
9CONFIG_DEBUG_ATOMIC_SLEEP=y
10#CHECK#CONFIG_PREEMPT_COUNT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
new file mode 100644
index 000000000000..238bfe3bd0cc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
@@ -0,0 +1 @@
rcutorture.torture_type=srcu
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
new file mode 100644
index 000000000000..6bc24e99862f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
@@ -0,0 +1,9 @@
1CONFIG_SMP=n
2CONFIG_PREEMPT_NONE=y
3CONFIG_PREEMPT_VOLUNTARY=n
4CONFIG_PREEMPT=n
5#CHECK#CONFIG_TINY_SRCU=y
6CONFIG_RCU_TRACE=n
7CONFIG_DEBUG_LOCK_ALLOC=n
8CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
9CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
new file mode 100644
index 000000000000..84a7d51b7481
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
@@ -0,0 +1 @@
rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
index a59f7686e219..d8674264318d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -6,10 +6,9 @@ CONFIG_PREEMPT=n
6CONFIG_HZ_PERIODIC=y 6CONFIG_HZ_PERIODIC=y
7CONFIG_NO_HZ_IDLE=n 7CONFIG_NO_HZ_IDLE=n
8CONFIG_NO_HZ_FULL=n 8CONFIG_NO_HZ_FULL=n
9CONFIG_RCU_TRACE=y
10CONFIG_PROVE_LOCKING=y 9CONFIG_PROVE_LOCKING=y
11CONFIG_PROVE_RCU_REPEATEDLY=y
12#CHECK#CONFIG_PROVE_RCU=y 10#CHECK#CONFIG_PROVE_RCU=y
13CONFIG_DEBUG_LOCK_ALLOC=y 11CONFIG_DEBUG_LOCK_ALLOC=y
12CONFIG_DEBUG_OBJECTS=y
14CONFIG_DEBUG_OBJECTS_RCU_HEAD=y 13CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
15CONFIG_PREEMPT_COUNT=y 14CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index 359cb258f639..b5b53973c01e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -10,12 +10,9 @@ CONFIG_RCU_FAST_NO_HZ=y
10CONFIG_RCU_TRACE=y 10CONFIG_RCU_TRACE=y
11CONFIG_HOTPLUG_CPU=y 11CONFIG_HOTPLUG_CPU=y
12CONFIG_MAXSMP=y 12CONFIG_MAXSMP=y
13CONFIG_CPUMASK_OFFSTACK=y
13CONFIG_RCU_NOCB_CPU=y 14CONFIG_RCU_NOCB_CPU=y
14CONFIG_RCU_NOCB_CPU_ZERO=y
15CONFIG_DEBUG_LOCK_ALLOC=n 15CONFIG_DEBUG_LOCK_ALLOC=n
16CONFIG_RCU_BOOST=n 16CONFIG_RCU_BOOST=n
17CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 17CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
18CONFIG_RCU_EXPERT=y 18CONFIG_RCU_EXPERT=y
19CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
20CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
21CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index adc3abc82fb8..1d14e1383016 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -1 +1,5 @@
1rcutorture.torture_type=rcu_bh maxcpus=8 1rcutorture.torture_type=rcu_bh maxcpus=8
2rcutree.gp_preinit_delay=3
3rcutree.gp_init_delay=3
4rcutree.gp_cleanup_delay=3
5rcu_nocbs=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index c1ab5926568b..35e639e39366 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -18,9 +18,6 @@ CONFIG_RCU_NOCB_CPU=n
18CONFIG_DEBUG_LOCK_ALLOC=y 18CONFIG_DEBUG_LOCK_ALLOC=y
19CONFIG_PROVE_LOCKING=n 19CONFIG_PROVE_LOCKING=n
20CONFIG_RCU_BOOST=n 20CONFIG_RCU_BOOST=n
21CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
22CONFIG_RCU_EXPERT=y 21CONFIG_RCU_EXPERT=y
23CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y 22CONFIG_DEBUG_OBJECTS=y
24CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
25CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
26CONFIG_DEBUG_OBJECTS_RCU_HEAD=y 23CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
index 3b93ee544e70..2dc31b16e506 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -14,9 +14,5 @@ CONFIG_RCU_FANOUT_LEAF=2
14CONFIG_RCU_NOCB_CPU=n 14CONFIG_RCU_NOCB_CPU=n
15CONFIG_DEBUG_LOCK_ALLOC=n 15CONFIG_DEBUG_LOCK_ALLOC=n
16CONFIG_RCU_BOOST=y 16CONFIG_RCU_BOOST=y
17CONFIG_RCU_KTHREAD_PRIO=2
18CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 17CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
19CONFIG_RCU_EXPERT=y 18CONFIG_RCU_EXPERT=y
20CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
21CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
22CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 120c0c88d100..5d2cc0bd50a0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -1 +1,5 @@
1rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30 1rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30
2rcutree.gp_preinit_delay=3
3rcutree.gp_init_delay=3
4rcutree.gp_cleanup_delay=3
5rcutree.kthread_prio=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index 5af758e783c7..27d22695d64c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -15,11 +15,7 @@ CONFIG_SUSPEND=n
15CONFIG_HIBERNATION=n 15CONFIG_HIBERNATION=n
16CONFIG_RCU_FANOUT=4 16CONFIG_RCU_FANOUT=4
17CONFIG_RCU_FANOUT_LEAF=3 17CONFIG_RCU_FANOUT_LEAF=3
18CONFIG_RCU_NOCB_CPU=n
19CONFIG_DEBUG_LOCK_ALLOC=n 18CONFIG_DEBUG_LOCK_ALLOC=n
20CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 19CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
21CONFIG_RCU_EXPERT=y 20CONFIG_RCU_EXPERT=y
22CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
23CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
24CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
25CONFIG_RCU_EQS_DEBUG=y 21CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index d4cdc0d74e16..2dde0d9964e3 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -13,12 +13,8 @@ CONFIG_HOTPLUG_CPU=y
13CONFIG_RCU_FANOUT=6 13CONFIG_RCU_FANOUT=6
14CONFIG_RCU_FANOUT_LEAF=6 14CONFIG_RCU_FANOUT_LEAF=6
15CONFIG_RCU_NOCB_CPU=y 15CONFIG_RCU_NOCB_CPU=y
16CONFIG_RCU_NOCB_CPU_NONE=y
17CONFIG_DEBUG_LOCK_ALLOC=y 16CONFIG_DEBUG_LOCK_ALLOC=y
18CONFIG_PROVE_LOCKING=y 17CONFIG_PROVE_LOCKING=y
19#CHECK#CONFIG_PROVE_RCU=y 18#CHECK#CONFIG_PROVE_RCU=y
20CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 19CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
21CONFIG_RCU_EXPERT=y 20CONFIG_RCU_EXPERT=y
22CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
23CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
24CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
index 15b3e1a86f74..c7fd050dfcd9 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
@@ -1,2 +1,5 @@
1rcutorture.torture_type=sched 1rcutorture.torture_type=sched
2rcupdate.rcu_self_test_sched=1 2rcupdate.rcu_self_test_sched=1
3rcutree.gp_preinit_delay=3
4rcutree.gp_init_delay=3
5rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
index 4cb02bd28f08..05a4eec3f27b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -18,8 +18,6 @@ CONFIG_RCU_NOCB_CPU=n
18CONFIG_DEBUG_LOCK_ALLOC=y 18CONFIG_DEBUG_LOCK_ALLOC=y
19CONFIG_PROVE_LOCKING=y 19CONFIG_PROVE_LOCKING=y
20#CHECK#CONFIG_PROVE_RCU=y 20#CHECK#CONFIG_PROVE_RCU=y
21CONFIG_DEBUG_OBJECTS=y
21CONFIG_DEBUG_OBJECTS_RCU_HEAD=y 22CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
22CONFIG_RCU_EXPERT=y 23CONFIG_RCU_EXPERT=y
23CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
24CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
25CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
index dd90f28ed700..ad18b52a2cad 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
@@ -2,3 +2,6 @@ rcupdate.rcu_self_test=1
2rcupdate.rcu_self_test_bh=1 2rcupdate.rcu_self_test_bh=1
3rcupdate.rcu_self_test_sched=1 3rcupdate.rcu_self_test_sched=1
4rcutree.rcu_fanout_exact=1 4rcutree.rcu_fanout_exact=1
5rcutree.gp_preinit_delay=3
6rcutree.gp_init_delay=3
7rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index b12a3ea1867e..0f4759f4232e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -1,6 +1,5 @@
1CONFIG_SMP=y 1CONFIG_SMP=y
2CONFIG_NR_CPUS=16 2CONFIG_NR_CPUS=16
3CONFIG_CPUMASK_OFFSTACK=y
4CONFIG_PREEMPT_NONE=y 3CONFIG_PREEMPT_NONE=y
5CONFIG_PREEMPT_VOLUNTARY=n 4CONFIG_PREEMPT_VOLUNTARY=n
6CONFIG_PREEMPT=n 5CONFIG_PREEMPT=n
@@ -9,16 +8,11 @@ CONFIG_HZ_PERIODIC=n
9CONFIG_NO_HZ_IDLE=n 8CONFIG_NO_HZ_IDLE=n
10CONFIG_NO_HZ_FULL=y 9CONFIG_NO_HZ_FULL=y
11CONFIG_NO_HZ_FULL_ALL=n 10CONFIG_NO_HZ_FULL_ALL=n
12CONFIG_NO_HZ_FULL_SYSIDLE=y
13CONFIG_RCU_FAST_NO_HZ=n 11CONFIG_RCU_FAST_NO_HZ=n
14CONFIG_RCU_TRACE=y 12CONFIG_RCU_TRACE=y
15CONFIG_HOTPLUG_CPU=y 13CONFIG_HOTPLUG_CPU=y
16CONFIG_RCU_FANOUT=2 14CONFIG_RCU_FANOUT=2
17CONFIG_RCU_FANOUT_LEAF=2 15CONFIG_RCU_FANOUT_LEAF=2
18CONFIG_RCU_NOCB_CPU=n
19CONFIG_DEBUG_LOCK_ALLOC=n 16CONFIG_DEBUG_LOCK_ALLOC=n
20CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 17CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
21CONFIG_RCU_EXPERT=y 18CONFIG_RCU_EXPERT=y
22CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y
23CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y
24CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index 099cc63c6a3b..fb1c763c10c5 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -15,7 +15,6 @@ CONFIG_HIBERNATION=n
15CONFIG_RCU_FANOUT=3 15CONFIG_RCU_FANOUT=3
16CONFIG_RCU_FANOUT_LEAF=2 16CONFIG_RCU_FANOUT_LEAF=2
17CONFIG_RCU_NOCB_CPU=y 17CONFIG_RCU_NOCB_CPU=y
18CONFIG_RCU_NOCB_CPU_ALL=y
19CONFIG_DEBUG_LOCK_ALLOC=n 18CONFIG_DEBUG_LOCK_ALLOC=n
20CONFIG_PROVE_LOCKING=n 19CONFIG_PROVE_LOCKING=n
21CONFIG_RCU_BOOST=n 20CONFIG_RCU_BOOST=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
deleted file mode 100644
index 2ad13f0d29cc..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
+++ /dev/null
@@ -1,21 +0,0 @@
1CONFIG_SMP=y
2CONFIG_NR_CPUS=16
3CONFIG_PREEMPT_NONE=n
4CONFIG_PREEMPT_VOLUNTARY=n
5CONFIG_PREEMPT=y
6#CHECK#CONFIG_PREEMPT_RCU=y
7CONFIG_HZ_PERIODIC=n
8CONFIG_NO_HZ_IDLE=y
9CONFIG_NO_HZ_FULL=n
10CONFIG_RCU_FAST_NO_HZ=n
11CONFIG_RCU_TRACE=y
12CONFIG_HOTPLUG_CPU=n
13CONFIG_SUSPEND=n
14CONFIG_HIBERNATION=n
15CONFIG_RCU_FANOUT=3
16CONFIG_RCU_FANOUT_LEAF=2
17CONFIG_RCU_NOCB_CPU=y
18CONFIG_RCU_NOCB_CPU_ALL=y
19CONFIG_DEBUG_LOCK_ALLOC=n
20CONFIG_RCU_BOOST=n
21CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
index fb066dc82769..1bd8efc4141e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -2,3 +2,4 @@ rcutorture.torture_type=sched
2rcupdate.rcu_self_test=1 2rcupdate.rcu_self_test=1
3rcupdate.rcu_self_test_sched=1 3rcupdate.rcu_self_test_sched=1
4rcutree.rcu_fanout_exact=1 4rcutree.rcu_fanout_exact=1
5rcu_nocbs=0-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
index 917d2517b5b5..fb05ef5279b4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
@@ -1,21 +1,16 @@
1CONFIG_SMP=y 1CONFIG_SMP=n
2CONFIG_NR_CPUS=8 2CONFIG_PREEMPT_NONE=y
3CONFIG_PREEMPT_NONE=n
4CONFIG_PREEMPT_VOLUNTARY=n 3CONFIG_PREEMPT_VOLUNTARY=n
5CONFIG_PREEMPT=y 4CONFIG_PREEMPT=n
6#CHECK#CONFIG_PREEMPT_RCU=y 5#CHECK#CONFIG_TINY_RCU=y
7CONFIG_HZ_PERIODIC=n 6CONFIG_HZ_PERIODIC=n
8CONFIG_NO_HZ_IDLE=y 7CONFIG_NO_HZ_IDLE=y
9CONFIG_NO_HZ_FULL=n 8CONFIG_NO_HZ_FULL=n
10CONFIG_RCU_FAST_NO_HZ=n 9CONFIG_RCU_FAST_NO_HZ=n
11CONFIG_RCU_TRACE=y
12CONFIG_HOTPLUG_CPU=n
13CONFIG_SUSPEND=n
14CONFIG_HIBERNATION=n
15CONFIG_RCU_FANOUT=3
16CONFIG_RCU_FANOUT_LEAF=3
17CONFIG_RCU_NOCB_CPU=n 10CONFIG_RCU_NOCB_CPU=n
18CONFIG_DEBUG_LOCK_ALLOC=y 11CONFIG_DEBUG_LOCK_ALLOC=n
19CONFIG_PROVE_LOCKING=n 12CONFIG_PROVE_LOCKING=n
20CONFIG_RCU_BOOST=n 13CONFIG_RCU_BOOST=n
21CONFIG_DEBUG_OBJECTS_RCU_HEAD=n 14CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
15CONFIG_RCU_EXPERT=y
16CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
index a312f671a29a..721cfda76ab2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
@@ -7,7 +7,6 @@ CONFIG_HZ_PERIODIC=n
7CONFIG_NO_HZ_IDLE=y 7CONFIG_NO_HZ_IDLE=y
8CONFIG_NO_HZ_FULL=n 8CONFIG_NO_HZ_FULL=n
9CONFIG_RCU_FAST_NO_HZ=n 9CONFIG_RCU_FAST_NO_HZ=n
10CONFIG_RCU_TRACE=n
11CONFIG_HOTPLUG_CPU=n 10CONFIG_HOTPLUG_CPU=n
12CONFIG_SUSPEND=n 11CONFIG_SUSPEND=n
13CONFIG_HIBERNATION=n 12CONFIG_HIBERNATION=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
index 985fb170d13c..7629f5dd73b2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
@@ -8,7 +8,6 @@ CONFIG_HZ_PERIODIC=n
8CONFIG_NO_HZ_IDLE=y 8CONFIG_NO_HZ_IDLE=y
9CONFIG_NO_HZ_FULL=n 9CONFIG_NO_HZ_FULL=n
10CONFIG_RCU_FAST_NO_HZ=n 10CONFIG_RCU_FAST_NO_HZ=n
11CONFIG_RCU_TRACE=n
12CONFIG_HOTPLUG_CPU=n 11CONFIG_HOTPLUG_CPU=n
13CONFIG_SUSPEND=n 12CONFIG_SUSPEND=n
14CONFIG_HIBERNATION=n 13CONFIG_HIBERNATION=n
diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
index 24396ae8355b..a75b16991a92 100644
--- a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
@@ -18,7 +18,6 @@ CONFIG_PROVE_RCU
18 18
19 In common code tested by TREE_RCU test cases. 19 In common code tested by TREE_RCU test cases.
20 20
21CONFIG_NO_HZ_FULL_SYSIDLE
22CONFIG_RCU_NOCB_CPU 21CONFIG_RCU_NOCB_CPU
23 22
24 Meaningless for TINY_RCU. 23 Meaningless for TINY_RCU.
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 364801b1a230..9ad3f89c8dc7 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -9,28 +9,20 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one.
9CONFIG_HOTPLUG_CPU -- Do half. (Every second.) 9CONFIG_HOTPLUG_CPU -- Do half. (Every second.)
10CONFIG_HZ_PERIODIC -- Do one. 10CONFIG_HZ_PERIODIC -- Do one.
11CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.) 11CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
12CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE. 12CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement.
13CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
14CONFIG_PREEMPT -- Do half. (First three and #8.) 13CONFIG_PREEMPT -- Do half. (First three and #8.)
15CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not. 14CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
16CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. 15CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
17CONFIG_PROVE_RCU_REPEATEDLY -- Do one.
18CONFIG_RCU_BOOST -- one of PREEMPT_RCU. 16CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
19CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
20CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. 17CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
21CONFIG_RCU_FANOUT_LEAF -- Do one non-default. 18CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
22CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. 19CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs.
23CONFIG_RCU_NOCB_CPU -- Do three, see below. 20CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with
24CONFIG_RCU_NOCB_CPU_ALL -- Do one. 21 rcu_nocbs=0, and one with all rcu_nocbs CPUs.
25CONFIG_RCU_NOCB_CPU_NONE -- Do one.
26CONFIG_RCU_NOCB_CPU_ZERO -- Do one.
27CONFIG_RCU_TRACE -- Do half. 22CONFIG_RCU_TRACE -- Do half.
28CONFIG_SMP -- Need one !SMP for PREEMPT_RCU. 23CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
29CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations. 24CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations.
30CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not. 25CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not.
31CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP -- Do for all but a couple TREE scenarios.
32CONFIG_RCU_TORTURE_TEST_SLOW_INIT -- Do for all but a couple TREE scenarios.
33CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT -- Do for all but a couple TREE scenarios.
34 26
35RCU-bh: Do one with PREEMPT and one with !PREEMPT. 27RCU-bh: Do one with PREEMPT and one with !PREEMPT.
36RCU-sched: Do one with PREEMPT but not BOOST. 28RCU-sched: Do one with PREEMPT but not BOOST.
@@ -52,10 +44,6 @@ CONFIG_64BIT
52 44
53 Used only to check CONFIG_RCU_FANOUT value, inspection suffices. 45 Used only to check CONFIG_RCU_FANOUT value, inspection suffices.
54 46
55CONFIG_NO_HZ_FULL_SYSIDLE_SMALL
56
57 Defer until Frederic uses this.
58
59CONFIG_PREEMPT_COUNT 47CONFIG_PREEMPT_COUNT
60CONFIG_PREEMPT_RCU 48CONFIG_PREEMPT_RCU
61 49
@@ -78,30 +66,16 @@ CONFIG_RCU_TORTURE_TEST_RUNNABLE
78 66
79 Always used in KVM testing. 67 Always used in KVM testing.
80 68
81CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT_DELAY
82CONFIG_RCU_TORTURE_TEST_SLOW_INIT_DELAY
83CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY
84
85 Inspection suffices, ignore.
86
87CONFIG_PREEMPT_RCU 69CONFIG_PREEMPT_RCU
88CONFIG_TREE_RCU 70CONFIG_TREE_RCU
89CONFIG_TINY_RCU 71CONFIG_TINY_RCU
90 72
91 These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP. 73 These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
92 74
93CONFIG_SPARSE_RCU_POINTER
94
95 Makes sense only for sparse runs, not for kernel builds.
96
97CONFIG_SRCU 75CONFIG_SRCU
98CONFIG_TASKS_RCU 76CONFIG_TASKS_RCU
99 77
100 Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable. 78 Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
101 79
102CONFIG_RCU_TRACE
103
104 Implied by CONFIG_RCU_TRACE for Tree RCU.
105
106 80
107boot parameters ignored: TBD 81boot parameters ignored: TBD
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
index 8ff89043d0a9..c9e8bc5082a7 100755
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
@@ -1,4 +1,4 @@
1#!/bin/awk -f 1#!/usr/bin/awk -f
2 2
3# Modify SRCU for formal verification. The first argument should be srcu.h and 3# Modify SRCU for formal verification. The first argument should be srcu.h and
4# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the 4# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the