diff options
-rw-r--r-- | Documentation/atomic_ops.txt | 45 | ||||
-rw-r--r-- | Documentation/kernel-parameters.txt | 14 | ||||
-rw-r--r-- | Documentation/kernel-per-CPU-kthreads.txt | 34 | ||||
-rw-r--r-- | Documentation/memory-barriers.txt | 42 | ||||
-rw-r--r-- | Documentation/timers/NO_HZ.txt | 10 | ||||
-rw-r--r-- | include/linux/lockdep.h | 7 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 38 | ||||
-rw-r--r-- | include/linux/srcu.h | 2 | ||||
-rw-r--r-- | init/Kconfig | 13 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 27 | ||||
-rw-r--r-- | kernel/rcu/srcu.c | 19 | ||||
-rw-r--r-- | kernel/rcu/tiny.c | 14 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 129 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 100 | ||||
-rw-r--r-- | kernel/rcu/update.c | 72 | ||||
-rw-r--r-- | lib/Kconfig.debug | 11 | ||||
-rwxr-xr-x | tools/testing/selftests/rcutorture/bin/kvm.sh | 2 |
17 files changed, 357 insertions, 222 deletions
diff --git a/Documentation/atomic_ops.txt b/Documentation/atomic_ops.txt index 183e41bdcb69..dab6da3382d9 100644 --- a/Documentation/atomic_ops.txt +++ b/Documentation/atomic_ops.txt | |||
@@ -201,11 +201,11 @@ These routines add 1 and subtract 1, respectively, from the given | |||
201 | atomic_t and return the new counter value after the operation is | 201 | atomic_t and return the new counter value after the operation is |
202 | performed. | 202 | performed. |
203 | 203 | ||
204 | Unlike the above routines, it is required that explicit memory | 204 | Unlike the above routines, it is required that these primitives |
205 | barriers are performed before and after the operation. It must be | 205 | include explicit memory barriers that are performed before and after |
206 | done such that all memory operations before and after the atomic | 206 | the operation. It must be done such that all memory operations before |
207 | operation calls are strongly ordered with respect to the atomic | 207 | and after the atomic operation calls are strongly ordered with respect |
208 | operation itself. | 208 | to the atomic operation itself. |
209 | 209 | ||
210 | For example, it should behave as if a smp_mb() call existed both | 210 | For example, it should behave as if a smp_mb() call existed both |
211 | before and after the atomic operation. | 211 | before and after the atomic operation. |
@@ -233,21 +233,21 @@ These two routines increment and decrement by 1, respectively, the | |||
233 | given atomic counter. They return a boolean indicating whether the | 233 | given atomic counter. They return a boolean indicating whether the |
234 | resulting counter value was zero or not. | 234 | resulting counter value was zero or not. |
235 | 235 | ||
236 | It requires explicit memory barrier semantics around the operation as | 236 | Again, these primitives provide explicit memory barrier semantics around |
237 | above. | 237 | the atomic operation. |
238 | 238 | ||
239 | int atomic_sub_and_test(int i, atomic_t *v); | 239 | int atomic_sub_and_test(int i, atomic_t *v); |
240 | 240 | ||
241 | This is identical to atomic_dec_and_test() except that an explicit | 241 | This is identical to atomic_dec_and_test() except that an explicit |
242 | decrement is given instead of the implicit "1". It requires explicit | 242 | decrement is given instead of the implicit "1". This primitive must |
243 | memory barrier semantics around the operation. | 243 | provide explicit memory barrier semantics around the operation. |
244 | 244 | ||
245 | int atomic_add_negative(int i, atomic_t *v); | 245 | int atomic_add_negative(int i, atomic_t *v); |
246 | 246 | ||
247 | The given increment is added to the given atomic counter value. A | 247 | The given increment is added to the given atomic counter value. A boolean |
248 | boolean is return which indicates whether the resulting counter value | 248 | is return which indicates whether the resulting counter value is negative. |
249 | is negative. It requires explicit memory barrier semantics around the | 249 | This primitive must provide explicit memory barrier semantics around |
250 | operation. | 250 | the operation. |
251 | 251 | ||
252 | Then: | 252 | Then: |
253 | 253 | ||
@@ -257,7 +257,7 @@ This performs an atomic exchange operation on the atomic variable v, setting | |||
257 | the given new value. It returns the old value that the atomic variable v had | 257 | the given new value. It returns the old value that the atomic variable v had |
258 | just before the operation. | 258 | just before the operation. |
259 | 259 | ||
260 | atomic_xchg requires explicit memory barriers around the operation. | 260 | atomic_xchg must provide explicit memory barriers around the operation. |
261 | 261 | ||
262 | int atomic_cmpxchg(atomic_t *v, int old, int new); | 262 | int atomic_cmpxchg(atomic_t *v, int old, int new); |
263 | 263 | ||
@@ -266,7 +266,7 @@ with the given old and new values. Like all atomic_xxx operations, | |||
266 | atomic_cmpxchg will only satisfy its atomicity semantics as long as all | 266 | atomic_cmpxchg will only satisfy its atomicity semantics as long as all |
267 | other accesses of *v are performed through atomic_xxx operations. | 267 | other accesses of *v are performed through atomic_xxx operations. |
268 | 268 | ||
269 | atomic_cmpxchg requires explicit memory barriers around the operation. | 269 | atomic_cmpxchg must provide explicit memory barriers around the operation. |
270 | 270 | ||
271 | The semantics for atomic_cmpxchg are the same as those defined for 'cas' | 271 | The semantics for atomic_cmpxchg are the same as those defined for 'cas' |
272 | below. | 272 | below. |
@@ -279,8 +279,8 @@ If the atomic value v is not equal to u, this function adds a to v, and | |||
279 | returns non zero. If v is equal to u then it returns zero. This is done as | 279 | returns non zero. If v is equal to u then it returns zero. This is done as |
280 | an atomic operation. | 280 | an atomic operation. |
281 | 281 | ||
282 | atomic_add_unless requires explicit memory barriers around the operation | 282 | atomic_add_unless must provide explicit memory barriers around the |
283 | unless it fails (returns 0). | 283 | operation unless it fails (returns 0). |
284 | 284 | ||
285 | atomic_inc_not_zero, equivalent to atomic_add_unless(v, 1, 0) | 285 | atomic_inc_not_zero, equivalent to atomic_add_unless(v, 1, 0) |
286 | 286 | ||
@@ -460,9 +460,9 @@ the return value into an int. There are other places where things | |||
460 | like this occur as well. | 460 | like this occur as well. |
461 | 461 | ||
462 | These routines, like the atomic_t counter operations returning values, | 462 | These routines, like the atomic_t counter operations returning values, |
463 | require explicit memory barrier semantics around their execution. All | 463 | must provide explicit memory barrier semantics around their execution. |
464 | memory operations before the atomic bit operation call must be made | 464 | All memory operations before the atomic bit operation call must be |
465 | visible globally before the atomic bit operation is made visible. | 465 | made visible globally before the atomic bit operation is made visible. |
466 | Likewise, the atomic bit operation must be visible globally before any | 466 | Likewise, the atomic bit operation must be visible globally before any |
467 | subsequent memory operation is made visible. For example: | 467 | subsequent memory operation is made visible. For example: |
468 | 468 | ||
@@ -536,8 +536,9 @@ except that two underscores are prefixed to the interface name. | |||
536 | These non-atomic variants also do not require any special memory | 536 | These non-atomic variants also do not require any special memory |
537 | barrier semantics. | 537 | barrier semantics. |
538 | 538 | ||
539 | The routines xchg() and cmpxchg() need the same exact memory barriers | 539 | The routines xchg() and cmpxchg() must provide the same exact |
540 | as the atomic and bit operations returning values. | 540 | memory-barrier semantics as the atomic and bit operations returning |
541 | values. | ||
541 | 542 | ||
542 | Spinlocks and rwlocks have memory barrier expectations as well. | 543 | Spinlocks and rwlocks have memory barrier expectations as well. |
543 | The rule to follow is simple: | 544 | The rule to follow is simple: |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 94de410ec341..5368ba701de2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2997,11 +2997,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2997 | value is one, and maximum value is HZ. | 2997 | value is one, and maximum value is HZ. |
2998 | 2998 | ||
2999 | rcutree.kthread_prio= [KNL,BOOT] | 2999 | rcutree.kthread_prio= [KNL,BOOT] |
3000 | Set the SCHED_FIFO priority of the RCU | 3000 | Set the SCHED_FIFO priority of the RCU per-CPU |
3001 | per-CPU kthreads (rcuc/N). This value is also | 3001 | kthreads (rcuc/N). This value is also used for |
3002 | used for the priority of the RCU boost threads | 3002 | the priority of the RCU boost threads (rcub/N) |
3003 | (rcub/N). Valid values are 1-99 and the default | 3003 | and for the RCU grace-period kthreads (rcu_bh, |
3004 | is 1 (the least-favored priority). | 3004 | rcu_preempt, and rcu_sched). If RCU_BOOST is |
3005 | set, valid values are 1-99 and the default is 1 | ||
3006 | (the least-favored priority). Otherwise, when | ||
3007 | RCU_BOOST is not set, valid values are 0-99 and | ||
3008 | the default is zero (non-realtime operation). | ||
3005 | 3009 | ||
3006 | rcutree.rcu_nocb_leader_stride= [KNL] | 3010 | rcutree.rcu_nocb_leader_stride= [KNL] |
3007 | Set the number of NOCB kthread groups, which | 3011 | Set the number of NOCB kthread groups, which |
diff --git a/Documentation/kernel-per-CPU-kthreads.txt b/Documentation/kernel-per-CPU-kthreads.txt index f3cd299fcc41..f4cbfe0ba108 100644 --- a/Documentation/kernel-per-CPU-kthreads.txt +++ b/Documentation/kernel-per-CPU-kthreads.txt | |||
@@ -190,20 +190,24 @@ To reduce its OS jitter, do any of the following: | |||
190 | on each CPU, including cs_dbs_timer() and od_dbs_timer(). | 190 | on each CPU, including cs_dbs_timer() and od_dbs_timer(). |
191 | WARNING: Please check your CPU specifications to | 191 | WARNING: Please check your CPU specifications to |
192 | make sure that this is safe on your particular system. | 192 | make sure that this is safe on your particular system. |
193 | d. It is not possible to entirely get rid of OS jitter | 193 | d. As of v3.18, Christoph Lameter's on-demand vmstat workers |
194 | from vmstat_update() on CONFIG_SMP=y systems, but you | 194 | commit prevents OS jitter due to vmstat_update() on |
195 | can decrease its frequency by writing a large value | 195 | CONFIG_SMP=y systems. Before v3.18, is not possible |
196 | to /proc/sys/vm/stat_interval. The default value is | 196 | to entirely get rid of the OS jitter, but you can |
197 | HZ, for an interval of one second. Of course, larger | 197 | decrease its frequency by writing a large value to |
198 | values will make your virtual-memory statistics update | 198 | /proc/sys/vm/stat_interval. The default value is HZ, |
199 | more slowly. Of course, you can also run your workload | 199 | for an interval of one second. Of course, larger values |
200 | at a real-time priority, thus preempting vmstat_update(), | 200 | will make your virtual-memory statistics update more |
201 | slowly. Of course, you can also run your workload at | ||
202 | a real-time priority, thus preempting vmstat_update(), | ||
201 | but if your workload is CPU-bound, this is a bad idea. | 203 | but if your workload is CPU-bound, this is a bad idea. |
202 | However, there is an RFC patch from Christoph Lameter | 204 | However, there is an RFC patch from Christoph Lameter |
203 | (based on an earlier one from Gilad Ben-Yossef) that | 205 | (based on an earlier one from Gilad Ben-Yossef) that |
204 | reduces or even eliminates vmstat overhead for some | 206 | reduces or even eliminates vmstat overhead for some |
205 | workloads at https://lkml.org/lkml/2013/9/4/379. | 207 | workloads at https://lkml.org/lkml/2013/9/4/379. |
206 | e. If running on high-end powerpc servers, build with | 208 | e. Boot with "elevator=noop" to avoid workqueue use by |
209 | the block layer. | ||
210 | f. If running on high-end powerpc servers, build with | ||
207 | CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS | 211 | CONFIG_PPC_RTAS_DAEMON=n. This prevents the RTAS |
208 | daemon from running on each CPU every second or so. | 212 | daemon from running on each CPU every second or so. |
209 | (This will require editing Kconfig files and will defeat | 213 | (This will require editing Kconfig files and will defeat |
@@ -211,12 +215,12 @@ To reduce its OS jitter, do any of the following: | |||
211 | due to the rtas_event_scan() function. | 215 | due to the rtas_event_scan() function. |
212 | WARNING: Please check your CPU specifications to | 216 | WARNING: Please check your CPU specifications to |
213 | make sure that this is safe on your particular system. | 217 | make sure that this is safe on your particular system. |
214 | f. If running on Cell Processor, build your kernel with | 218 | g. If running on Cell Processor, build your kernel with |
215 | CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from | 219 | CBE_CPUFREQ_SPU_GOVERNOR=n to avoid OS jitter from |
216 | spu_gov_work(). | 220 | spu_gov_work(). |
217 | WARNING: Please check your CPU specifications to | 221 | WARNING: Please check your CPU specifications to |
218 | make sure that this is safe on your particular system. | 222 | make sure that this is safe on your particular system. |
219 | g. If running on PowerMAC, build your kernel with | 223 | h. If running on PowerMAC, build your kernel with |
220 | CONFIG_PMAC_RACKMETER=n to disable the CPU-meter, | 224 | CONFIG_PMAC_RACKMETER=n to disable the CPU-meter, |
221 | avoiding OS jitter from rackmeter_do_timer(). | 225 | avoiding OS jitter from rackmeter_do_timer(). |
222 | 226 | ||
@@ -258,8 +262,12 @@ Purpose: Detect software lockups on each CPU. | |||
258 | To reduce its OS jitter, do at least one of the following: | 262 | To reduce its OS jitter, do at least one of the following: |
259 | 1. Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these | 263 | 1. Build with CONFIG_LOCKUP_DETECTOR=n, which will prevent these |
260 | kthreads from being created in the first place. | 264 | kthreads from being created in the first place. |
261 | 2. Echo a zero to /proc/sys/kernel/watchdog to disable the | 265 | 2. Boot with "nosoftlockup=0", which will also prevent these kthreads |
266 | from being created. Other related watchdog and softlockup boot | ||
267 | parameters may be found in Documentation/kernel-parameters.txt | ||
268 | and Documentation/watchdog/watchdog-parameters.txt. | ||
269 | 3. Echo a zero to /proc/sys/kernel/watchdog to disable the | ||
262 | watchdog timer. | 270 | watchdog timer. |
263 | 3. Echo a large number of /proc/sys/kernel/watchdog_thresh in | 271 | 4. Echo a large number of /proc/sys/kernel/watchdog_thresh in |
264 | order to reduce the frequency of OS jitter due to the watchdog | 272 | order to reduce the frequency of OS jitter due to the watchdog |
265 | timer down to a level that is acceptable for your workload. | 273 | timer down to a level that is acceptable for your workload. |
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index ca2387ef27ab..6974f1c2b4e1 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt | |||
@@ -592,9 +592,9 @@ See also the subsection on "Cache Coherency" for a more thorough example. | |||
592 | CONTROL DEPENDENCIES | 592 | CONTROL DEPENDENCIES |
593 | -------------------- | 593 | -------------------- |
594 | 594 | ||
595 | A control dependency requires a full read memory barrier, not simply a data | 595 | A load-load control dependency requires a full read memory barrier, not |
596 | dependency barrier to make it work correctly. Consider the following bit of | 596 | simply a data dependency barrier to make it work correctly. Consider the |
597 | code: | 597 | following bit of code: |
598 | 598 | ||
599 | q = ACCESS_ONCE(a); | 599 | q = ACCESS_ONCE(a); |
600 | if (q) { | 600 | if (q) { |
@@ -615,14 +615,15 @@ case what's actually required is: | |||
615 | } | 615 | } |
616 | 616 | ||
617 | However, stores are not speculated. This means that ordering -is- provided | 617 | However, stores are not speculated. This means that ordering -is- provided |
618 | in the following example: | 618 | for load-store control dependencies, as in the following example: |
619 | 619 | ||
620 | q = ACCESS_ONCE(a); | 620 | q = ACCESS_ONCE(a); |
621 | if (q) { | 621 | if (q) { |
622 | ACCESS_ONCE(b) = p; | 622 | ACCESS_ONCE(b) = p; |
623 | } | 623 | } |
624 | 624 | ||
625 | Please note that ACCESS_ONCE() is not optional! Without the | 625 | Control dependencies pair normally with other types of barriers. |
626 | That said, please note that ACCESS_ONCE() is not optional! Without the | ||
626 | ACCESS_ONCE(), might combine the load from 'a' with other loads from | 627 | ACCESS_ONCE(), might combine the load from 'a' with other loads from |
627 | 'a', and the store to 'b' with other stores to 'b', with possible highly | 628 | 'a', and the store to 'b' with other stores to 'b', with possible highly |
628 | counterintuitive effects on ordering. | 629 | counterintuitive effects on ordering. |
@@ -813,6 +814,8 @@ In summary: | |||
813 | barrier() can help to preserve your control dependency. Please | 814 | barrier() can help to preserve your control dependency. Please |
814 | see the Compiler Barrier section for more information. | 815 | see the Compiler Barrier section for more information. |
815 | 816 | ||
817 | (*) Control dependencies pair normally with other types of barriers. | ||
818 | |||
816 | (*) Control dependencies do -not- provide transitivity. If you | 819 | (*) Control dependencies do -not- provide transitivity. If you |
817 | need transitivity, use smp_mb(). | 820 | need transitivity, use smp_mb(). |
818 | 821 | ||
@@ -823,14 +826,14 @@ SMP BARRIER PAIRING | |||
823 | When dealing with CPU-CPU interactions, certain types of memory barrier should | 826 | When dealing with CPU-CPU interactions, certain types of memory barrier should |
824 | always be paired. A lack of appropriate pairing is almost certainly an error. | 827 | always be paired. A lack of appropriate pairing is almost certainly an error. |
825 | 828 | ||
826 | General barriers pair with each other, though they also pair with | 829 | General barriers pair with each other, though they also pair with most |
827 | most other types of barriers, albeit without transitivity. An acquire | 830 | other types of barriers, albeit without transitivity. An acquire barrier |
828 | barrier pairs with a release barrier, but both may also pair with other | 831 | pairs with a release barrier, but both may also pair with other barriers, |
829 | barriers, including of course general barriers. A write barrier pairs | 832 | including of course general barriers. A write barrier pairs with a data |
830 | with a data dependency barrier, an acquire barrier, a release barrier, | 833 | dependency barrier, a control dependency, an acquire barrier, a release |
831 | a read barrier, or a general barrier. Similarly a read barrier or a | 834 | barrier, a read barrier, or a general barrier. Similarly a read barrier, |
832 | data dependency barrier pairs with a write barrier, an acquire barrier, | 835 | control dependency, or a data dependency barrier pairs with a write |
833 | a release barrier, or a general barrier: | 836 | barrier, an acquire barrier, a release barrier, or a general barrier: |
834 | 837 | ||
835 | CPU 1 CPU 2 | 838 | CPU 1 CPU 2 |
836 | =============== =============== | 839 | =============== =============== |
@@ -850,6 +853,19 @@ Or: | |||
850 | <data dependency barrier> | 853 | <data dependency barrier> |
851 | y = *x; | 854 | y = *x; |
852 | 855 | ||
856 | Or even: | ||
857 | |||
858 | CPU 1 CPU 2 | ||
859 | =============== =============================== | ||
860 | r1 = ACCESS_ONCE(y); | ||
861 | <general barrier> | ||
862 | ACCESS_ONCE(y) = 1; if (r2 = ACCESS_ONCE(x)) { | ||
863 | <implicit control dependency> | ||
864 | ACCESS_ONCE(y) = 1; | ||
865 | } | ||
866 | |||
867 | assert(r1 == 0 || r2 == 0); | ||
868 | |||
853 | Basically, the read barrier always has to be there, even though it can be of | 869 | Basically, the read barrier always has to be there, even though it can be of |
854 | the "weaker" type. | 870 | the "weaker" type. |
855 | 871 | ||
diff --git a/Documentation/timers/NO_HZ.txt b/Documentation/timers/NO_HZ.txt index cca122f25120..6eaf576294f3 100644 --- a/Documentation/timers/NO_HZ.txt +++ b/Documentation/timers/NO_HZ.txt | |||
@@ -158,13 +158,9 @@ not come for free: | |||
158 | to the need to inform kernel subsystems (such as RCU) about | 158 | to the need to inform kernel subsystems (such as RCU) about |
159 | the change in mode. | 159 | the change in mode. |
160 | 160 | ||
161 | 3. POSIX CPU timers on adaptive-tick CPUs may miss their deadlines | 161 | 3. POSIX CPU timers prevent CPUs from entering adaptive-tick mode. |
162 | (perhaps indefinitely) because they currently rely on | 162 | Real-time applications needing to take actions based on CPU time |
163 | scheduling-tick interrupts. This will likely be fixed in | 163 | consumption need to use other means of doing so. |
164 | one of two ways: (1) Prevent CPUs with POSIX CPU timers from | ||
165 | entering adaptive-tick mode, or (2) Use hrtimers or other | ||
166 | adaptive-ticks-immune mechanism to cause the POSIX CPU timer to | ||
167 | fire properly. | ||
168 | 164 | ||
169 | 4. If there are more perf events pending than the hardware can | 165 | 4. If there are more perf events pending than the hardware can |
170 | accommodate, they are normally round-robined so as to collect | 166 | accommodate, they are normally round-robined so as to collect |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 74ab23176e9b..066ba4157541 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
@@ -531,8 +531,13 @@ do { \ | |||
531 | # define might_lock_read(lock) do { } while (0) | 531 | # define might_lock_read(lock) do { } while (0) |
532 | #endif | 532 | #endif |
533 | 533 | ||
534 | #ifdef CONFIG_PROVE_RCU | 534 | #ifdef CONFIG_LOCKDEP |
535 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s); | 535 | void lockdep_rcu_suspicious(const char *file, const int line, const char *s); |
536 | #else | ||
537 | static inline void | ||
538 | lockdep_rcu_suspicious(const char *file, const int line, const char *s) | ||
539 | { | ||
540 | } | ||
536 | #endif | 541 | #endif |
537 | 542 | ||
538 | #endif /* __LINUX_LOCKDEP_H */ | 543 | #endif /* __LINUX_LOCKDEP_H */ |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 762022f07afd..573a5afd5ed8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -48,6 +48,26 @@ | |||
48 | 48 | ||
49 | extern int rcu_expedited; /* for sysctl */ | 49 | extern int rcu_expedited; /* for sysctl */ |
50 | 50 | ||
51 | #ifdef CONFIG_TINY_RCU | ||
52 | /* Tiny RCU doesn't expedite, as its purpose in life is instead to be tiny. */ | ||
53 | static inline bool rcu_gp_is_expedited(void) /* Internal RCU use. */ | ||
54 | { | ||
55 | return false; | ||
56 | } | ||
57 | |||
58 | static inline void rcu_expedite_gp(void) | ||
59 | { | ||
60 | } | ||
61 | |||
62 | static inline void rcu_unexpedite_gp(void) | ||
63 | { | ||
64 | } | ||
65 | #else /* #ifdef CONFIG_TINY_RCU */ | ||
66 | bool rcu_gp_is_expedited(void); /* Internal RCU use. */ | ||
67 | void rcu_expedite_gp(void); | ||
68 | void rcu_unexpedite_gp(void); | ||
69 | #endif /* #else #ifdef CONFIG_TINY_RCU */ | ||
70 | |||
51 | enum rcutorture_type { | 71 | enum rcutorture_type { |
52 | RCU_FLAVOR, | 72 | RCU_FLAVOR, |
53 | RCU_BH_FLAVOR, | 73 | RCU_BH_FLAVOR, |
@@ -195,6 +215,15 @@ void call_rcu_sched(struct rcu_head *head, | |||
195 | 215 | ||
196 | void synchronize_sched(void); | 216 | void synchronize_sched(void); |
197 | 217 | ||
218 | /* | ||
219 | * Structure allowing asynchronous waiting on RCU. | ||
220 | */ | ||
221 | struct rcu_synchronize { | ||
222 | struct rcu_head head; | ||
223 | struct completion completion; | ||
224 | }; | ||
225 | void wakeme_after_rcu(struct rcu_head *head); | ||
226 | |||
198 | /** | 227 | /** |
199 | * call_rcu_tasks() - Queue an RCU for invocation task-based grace period | 228 | * call_rcu_tasks() - Queue an RCU for invocation task-based grace period |
200 | * @head: structure to be used for queueing the RCU updates. | 229 | * @head: structure to be used for queueing the RCU updates. |
@@ -258,6 +287,7 @@ static inline int rcu_preempt_depth(void) | |||
258 | 287 | ||
259 | /* Internal to kernel */ | 288 | /* Internal to kernel */ |
260 | void rcu_init(void); | 289 | void rcu_init(void); |
290 | void rcu_end_inkernel_boot(void); | ||
261 | void rcu_sched_qs(void); | 291 | void rcu_sched_qs(void); |
262 | void rcu_bh_qs(void); | 292 | void rcu_bh_qs(void); |
263 | void rcu_check_callbacks(int user); | 293 | void rcu_check_callbacks(int user); |
@@ -722,7 +752,7 @@ static inline void rcu_preempt_sleep_check(void) | |||
722 | * annotated as __rcu. | 752 | * annotated as __rcu. |
723 | */ | 753 | */ |
724 | #define rcu_dereference_check(p, c) \ | 754 | #define rcu_dereference_check(p, c) \ |
725 | __rcu_dereference_check((p), rcu_read_lock_held() || (c), __rcu) | 755 | __rcu_dereference_check((p), (c) || rcu_read_lock_held(), __rcu) |
726 | 756 | ||
727 | /** | 757 | /** |
728 | * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking | 758 | * rcu_dereference_bh_check() - rcu_dereference_bh with debug checking |
@@ -732,7 +762,7 @@ static inline void rcu_preempt_sleep_check(void) | |||
732 | * This is the RCU-bh counterpart to rcu_dereference_check(). | 762 | * This is the RCU-bh counterpart to rcu_dereference_check(). |
733 | */ | 763 | */ |
734 | #define rcu_dereference_bh_check(p, c) \ | 764 | #define rcu_dereference_bh_check(p, c) \ |
735 | __rcu_dereference_check((p), rcu_read_lock_bh_held() || (c), __rcu) | 765 | __rcu_dereference_check((p), (c) || rcu_read_lock_bh_held(), __rcu) |
736 | 766 | ||
737 | /** | 767 | /** |
738 | * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking | 768 | * rcu_dereference_sched_check() - rcu_dereference_sched with debug checking |
@@ -742,7 +772,7 @@ static inline void rcu_preempt_sleep_check(void) | |||
742 | * This is the RCU-sched counterpart to rcu_dereference_check(). | 772 | * This is the RCU-sched counterpart to rcu_dereference_check(). |
743 | */ | 773 | */ |
744 | #define rcu_dereference_sched_check(p, c) \ | 774 | #define rcu_dereference_sched_check(p, c) \ |
745 | __rcu_dereference_check((p), rcu_read_lock_sched_held() || (c), \ | 775 | __rcu_dereference_check((p), (c) || rcu_read_lock_sched_held(), \ |
746 | __rcu) | 776 | __rcu) |
747 | 777 | ||
748 | #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ | 778 | #define rcu_dereference_raw(p) rcu_dereference_check(p, 1) /*@@@ needed? @@@*/ |
@@ -935,9 +965,9 @@ static inline void rcu_read_unlock(void) | |||
935 | { | 965 | { |
936 | rcu_lockdep_assert(rcu_is_watching(), | 966 | rcu_lockdep_assert(rcu_is_watching(), |
937 | "rcu_read_unlock() used illegally while idle"); | 967 | "rcu_read_unlock() used illegally while idle"); |
938 | rcu_lock_release(&rcu_lock_map); | ||
939 | __release(RCU); | 968 | __release(RCU); |
940 | __rcu_read_unlock(); | 969 | __rcu_read_unlock(); |
970 | rcu_lock_release(&rcu_lock_map); /* Keep acq info for rls diags. */ | ||
941 | } | 971 | } |
942 | 972 | ||
943 | /** | 973 | /** |
diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 9cfd9623fb03..bdeb4567b71e 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h | |||
@@ -182,7 +182,7 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) | |||
182 | * lockdep_is_held() calls. | 182 | * lockdep_is_held() calls. |
183 | */ | 183 | */ |
184 | #define srcu_dereference_check(p, sp, c) \ | 184 | #define srcu_dereference_check(p, sp, c) \ |
185 | __rcu_dereference_check((p), srcu_read_lock_held(sp) || (c), __rcu) | 185 | __rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu) |
186 | 186 | ||
187 | /** | 187 | /** |
188 | * srcu_dereference - fetch SRCU-protected pointer for later dereferencing | 188 | * srcu_dereference - fetch SRCU-protected pointer for later dereferencing |
diff --git a/init/Kconfig b/init/Kconfig index f5dbc6d4261b..9a0592516f48 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -791,6 +791,19 @@ config RCU_NOCB_CPU_ALL | |||
791 | 791 | ||
792 | endchoice | 792 | endchoice |
793 | 793 | ||
794 | config RCU_EXPEDITE_BOOT | ||
795 | bool | ||
796 | default n | ||
797 | help | ||
798 | This option enables expedited grace periods at boot time, | ||
799 | as if rcu_expedite_gp() had been invoked early in boot. | ||
800 | The corresponding rcu_unexpedite_gp() is invoked from | ||
801 | rcu_end_inkernel_boot(), which is intended to be invoked | ||
802 | at the end of the kernel-only boot sequence, just before | ||
803 | init is exec'ed. | ||
804 | |||
805 | Accept the default if unsure. | ||
806 | |||
794 | endmenu # "RCU Subsystem" | 807 | endmenu # "RCU Subsystem" |
795 | 808 | ||
796 | config BUILD_BIN2C | 809 | config BUILD_BIN2C |
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 30d42aa55d83..8dbe27611ec3 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c | |||
@@ -853,6 +853,8 @@ rcu_torture_fqs(void *arg) | |||
853 | static int | 853 | static int |
854 | rcu_torture_writer(void *arg) | 854 | rcu_torture_writer(void *arg) |
855 | { | 855 | { |
856 | bool can_expedite = !rcu_gp_is_expedited(); | ||
857 | int expediting = 0; | ||
856 | unsigned long gp_snap; | 858 | unsigned long gp_snap; |
857 | bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal; | 859 | bool gp_cond1 = gp_cond, gp_exp1 = gp_exp, gp_normal1 = gp_normal; |
858 | bool gp_sync1 = gp_sync; | 860 | bool gp_sync1 = gp_sync; |
@@ -865,9 +867,15 @@ rcu_torture_writer(void *arg) | |||
865 | int nsynctypes = 0; | 867 | int nsynctypes = 0; |
866 | 868 | ||
867 | VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); | 869 | VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); |
870 | pr_alert("%s" TORTURE_FLAG | ||
871 | " Grace periods expedited from boot/sysfs for %s,\n", | ||
872 | torture_type, cur_ops->name); | ||
873 | pr_alert("%s" TORTURE_FLAG | ||
874 | " Testing of dynamic grace-period expediting diabled.\n", | ||
875 | torture_type); | ||
868 | 876 | ||
869 | /* Initialize synctype[] array. If none set, take default. */ | 877 | /* Initialize synctype[] array. If none set, take default. */ |
870 | if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync) | 878 | if (!gp_cond1 && !gp_exp1 && !gp_normal1 && !gp_sync1) |
871 | gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true; | 879 | gp_cond1 = gp_exp1 = gp_normal1 = gp_sync1 = true; |
872 | if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) | 880 | if (gp_cond1 && cur_ops->get_state && cur_ops->cond_sync) |
873 | synctype[nsynctypes++] = RTWS_COND_GET; | 881 | synctype[nsynctypes++] = RTWS_COND_GET; |
@@ -949,9 +957,26 @@ rcu_torture_writer(void *arg) | |||
949 | } | 957 | } |
950 | } | 958 | } |
951 | rcutorture_record_progress(++rcu_torture_current_version); | 959 | rcutorture_record_progress(++rcu_torture_current_version); |
960 | /* Cycle through nesting levels of rcu_expedite_gp() calls. */ | ||
961 | if (can_expedite && | ||
962 | !(torture_random(&rand) & 0xff & (!!expediting - 1))) { | ||
963 | WARN_ON_ONCE(expediting == 0 && rcu_gp_is_expedited()); | ||
964 | if (expediting >= 0) | ||
965 | rcu_expedite_gp(); | ||
966 | else | ||
967 | rcu_unexpedite_gp(); | ||
968 | if (++expediting > 3) | ||
969 | expediting = -expediting; | ||
970 | } | ||
952 | rcu_torture_writer_state = RTWS_STUTTER; | 971 | rcu_torture_writer_state = RTWS_STUTTER; |
953 | stutter_wait("rcu_torture_writer"); | 972 | stutter_wait("rcu_torture_writer"); |
954 | } while (!torture_must_stop()); | 973 | } while (!torture_must_stop()); |
974 | /* Reset expediting back to unexpedited. */ | ||
975 | if (expediting > 0) | ||
976 | expediting = -expediting; | ||
977 | while (can_expedite && expediting++ < 0) | ||
978 | rcu_unexpedite_gp(); | ||
979 | WARN_ON_ONCE(can_expedite && rcu_gp_is_expedited()); | ||
955 | rcu_torture_writer_state = RTWS_STOPPING; | 980 | rcu_torture_writer_state = RTWS_STOPPING; |
956 | torture_kthread_stopping("rcu_torture_writer"); | 981 | torture_kthread_stopping("rcu_torture_writer"); |
957 | return 0; | 982 | return 0; |
diff --git a/kernel/rcu/srcu.c b/kernel/rcu/srcu.c index 445bf8ffe3fb..cad76e76b4e7 100644 --- a/kernel/rcu/srcu.c +++ b/kernel/rcu/srcu.c | |||
@@ -402,23 +402,6 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *head, | |||
402 | } | 402 | } |
403 | EXPORT_SYMBOL_GPL(call_srcu); | 403 | EXPORT_SYMBOL_GPL(call_srcu); |
404 | 404 | ||
405 | struct rcu_synchronize { | ||
406 | struct rcu_head head; | ||
407 | struct completion completion; | ||
408 | }; | ||
409 | |||
410 | /* | ||
411 | * Awaken the corresponding synchronize_srcu() instance now that a | ||
412 | * grace period has elapsed. | ||
413 | */ | ||
414 | static void wakeme_after_rcu(struct rcu_head *head) | ||
415 | { | ||
416 | struct rcu_synchronize *rcu; | ||
417 | |||
418 | rcu = container_of(head, struct rcu_synchronize, head); | ||
419 | complete(&rcu->completion); | ||
420 | } | ||
421 | |||
422 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount); | 405 | static void srcu_advance_batches(struct srcu_struct *sp, int trycount); |
423 | static void srcu_reschedule(struct srcu_struct *sp); | 406 | static void srcu_reschedule(struct srcu_struct *sp); |
424 | 407 | ||
@@ -507,7 +490,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, int trycount) | |||
507 | */ | 490 | */ |
508 | void synchronize_srcu(struct srcu_struct *sp) | 491 | void synchronize_srcu(struct srcu_struct *sp) |
509 | { | 492 | { |
510 | __synchronize_srcu(sp, rcu_expedited | 493 | __synchronize_srcu(sp, rcu_gp_is_expedited() |
511 | ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT | 494 | ? SYNCHRONIZE_SRCU_EXP_TRYCOUNT |
512 | : SYNCHRONIZE_SRCU_TRYCOUNT); | 495 | : SYNCHRONIZE_SRCU_TRYCOUNT); |
513 | } | 496 | } |
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index cc9ceca7bde1..069742d61c68 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c | |||
@@ -103,8 +103,7 @@ EXPORT_SYMBOL(__rcu_is_watching); | |||
103 | static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) | 103 | static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) |
104 | { | 104 | { |
105 | RCU_TRACE(reset_cpu_stall_ticks(rcp)); | 105 | RCU_TRACE(reset_cpu_stall_ticks(rcp)); |
106 | if (rcp->rcucblist != NULL && | 106 | if (rcp->donetail != rcp->curtail) { |
107 | rcp->donetail != rcp->curtail) { | ||
108 | rcp->donetail = rcp->curtail; | 107 | rcp->donetail = rcp->curtail; |
109 | return 1; | 108 | return 1; |
110 | } | 109 | } |
@@ -169,17 +168,6 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
169 | unsigned long flags; | 168 | unsigned long flags; |
170 | RCU_TRACE(int cb_count = 0); | 169 | RCU_TRACE(int cb_count = 0); |
171 | 170 | ||
172 | /* If no RCU callbacks ready to invoke, just return. */ | ||
173 | if (&rcp->rcucblist == rcp->donetail) { | ||
174 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, 0, -1)); | ||
175 | RCU_TRACE(trace_rcu_batch_end(rcp->name, 0, | ||
176 | !!ACCESS_ONCE(rcp->rcucblist), | ||
177 | need_resched(), | ||
178 | is_idle_task(current), | ||
179 | false)); | ||
180 | return; | ||
181 | } | ||
182 | |||
183 | /* Move the ready-to-invoke callbacks to a local list. */ | 171 | /* Move the ready-to-invoke callbacks to a local list. */ |
184 | local_irq_save(flags); | 172 | local_irq_save(flags); |
185 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); | 173 | RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); |
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8fcc64ed858c..233165da782f 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
@@ -91,8 +91,10 @@ static const char *tp_##sname##_varname __used __tracepoint_string = sname##_var | |||
91 | 91 | ||
92 | #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ | 92 | #define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ |
93 | DEFINE_RCU_TPS(sname) \ | 93 | DEFINE_RCU_TPS(sname) \ |
94 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data); \ | ||
94 | struct rcu_state sname##_state = { \ | 95 | struct rcu_state sname##_state = { \ |
95 | .level = { &sname##_state.node[0] }, \ | 96 | .level = { &sname##_state.node[0] }, \ |
97 | .rda = &sname##_data, \ | ||
96 | .call = cr, \ | 98 | .call = cr, \ |
97 | .fqs_state = RCU_GP_IDLE, \ | 99 | .fqs_state = RCU_GP_IDLE, \ |
98 | .gpnum = 0UL - 300UL, \ | 100 | .gpnum = 0UL - 300UL, \ |
@@ -103,8 +105,7 @@ struct rcu_state sname##_state = { \ | |||
103 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ | 105 | .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ |
104 | .name = RCU_STATE_NAME(sname), \ | 106 | .name = RCU_STATE_NAME(sname), \ |
105 | .abbr = sabbr, \ | 107 | .abbr = sabbr, \ |
106 | }; \ | 108 | } |
107 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, sname##_data) | ||
108 | 109 | ||
109 | RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); | 110 | RCU_STATE_INITIALIZER(rcu_sched, 's', call_rcu_sched); |
110 | RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); | 111 | RCU_STATE_INITIALIZER(rcu_bh, 'b', call_rcu_bh); |
@@ -310,10 +311,10 @@ void rcu_note_context_switch(void) | |||
310 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); | 311 | EXPORT_SYMBOL_GPL(rcu_note_context_switch); |
311 | 312 | ||
312 | /* | 313 | /* |
313 | * Register a quiesecent state for all RCU flavors. If there is an | 314 | * Register a quiescent state for all RCU flavors. If there is an |
314 | * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight | 315 | * emergency, invoke rcu_momentary_dyntick_idle() to do a heavy-weight |
315 | * dyntick-idle quiescent state visible to other CPUs (but only for those | 316 | * dyntick-idle quiescent state visible to other CPUs (but only for those |
316 | * RCU flavors in desparate need of a quiescent state, which will normally | 317 | * RCU flavors in desperate need of a quiescent state, which will normally |
317 | * be none of them). Either way, do a lightweight quiescent state for | 318 | * be none of them). Either way, do a lightweight quiescent state for |
318 | * all RCU flavors. | 319 | * all RCU flavors. |
319 | */ | 320 | */ |
@@ -428,6 +429,15 @@ void rcu_bh_force_quiescent_state(void) | |||
428 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); | 429 | EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state); |
429 | 430 | ||
430 | /* | 431 | /* |
432 | * Force a quiescent state for RCU-sched. | ||
433 | */ | ||
434 | void rcu_sched_force_quiescent_state(void) | ||
435 | { | ||
436 | force_quiescent_state(&rcu_sched_state); | ||
437 | } | ||
438 | EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); | ||
439 | |||
440 | /* | ||
431 | * Show the state of the grace-period kthreads. | 441 | * Show the state of the grace-period kthreads. |
432 | */ | 442 | */ |
433 | void show_rcu_gp_kthreads(void) | 443 | void show_rcu_gp_kthreads(void) |
@@ -501,15 +511,6 @@ void rcutorture_record_progress(unsigned long vernum) | |||
501 | EXPORT_SYMBOL_GPL(rcutorture_record_progress); | 511 | EXPORT_SYMBOL_GPL(rcutorture_record_progress); |
502 | 512 | ||
503 | /* | 513 | /* |
504 | * Force a quiescent state for RCU-sched. | ||
505 | */ | ||
506 | void rcu_sched_force_quiescent_state(void) | ||
507 | { | ||
508 | force_quiescent_state(&rcu_sched_state); | ||
509 | } | ||
510 | EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state); | ||
511 | |||
512 | /* | ||
513 | * Does the CPU have callbacks ready to be invoked? | 514 | * Does the CPU have callbacks ready to be invoked? |
514 | */ | 515 | */ |
515 | static int | 516 | static int |
@@ -1347,20 +1348,30 @@ void rcu_cpu_stall_reset(void) | |||
1347 | } | 1348 | } |
1348 | 1349 | ||
1349 | /* | 1350 | /* |
1350 | * Initialize the specified rcu_data structure's callback list to empty. | 1351 | * Initialize the specified rcu_data structure's default callback list |
1352 | * to empty. The default callback list is the one that is not used by | ||
1353 | * no-callbacks CPUs. | ||
1351 | */ | 1354 | */ |
1352 | static void init_callback_list(struct rcu_data *rdp) | 1355 | static void init_default_callback_list(struct rcu_data *rdp) |
1353 | { | 1356 | { |
1354 | int i; | 1357 | int i; |
1355 | 1358 | ||
1356 | if (init_nocb_callback_list(rdp)) | ||
1357 | return; | ||
1358 | rdp->nxtlist = NULL; | 1359 | rdp->nxtlist = NULL; |
1359 | for (i = 0; i < RCU_NEXT_SIZE; i++) | 1360 | for (i = 0; i < RCU_NEXT_SIZE; i++) |
1360 | rdp->nxttail[i] = &rdp->nxtlist; | 1361 | rdp->nxttail[i] = &rdp->nxtlist; |
1361 | } | 1362 | } |
1362 | 1363 | ||
1363 | /* | 1364 | /* |
1365 | * Initialize the specified rcu_data structure's callback list to empty. | ||
1366 | */ | ||
1367 | static void init_callback_list(struct rcu_data *rdp) | ||
1368 | { | ||
1369 | if (init_nocb_callback_list(rdp)) | ||
1370 | return; | ||
1371 | init_default_callback_list(rdp); | ||
1372 | } | ||
1373 | |||
1374 | /* | ||
1364 | * Determine the value that ->completed will have at the end of the | 1375 | * Determine the value that ->completed will have at the end of the |
1365 | * next subsequent grace period. This is used to tag callbacks so that | 1376 | * next subsequent grace period. This is used to tag callbacks so that |
1366 | * a CPU can invoke callbacks in a timely fashion even if that CPU has | 1377 | * a CPU can invoke callbacks in a timely fashion even if that CPU has |
@@ -1727,7 +1738,6 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1727 | struct rcu_node *rnp = rcu_get_root(rsp); | 1738 | struct rcu_node *rnp = rcu_get_root(rsp); |
1728 | 1739 | ||
1729 | ACCESS_ONCE(rsp->gp_activity) = jiffies; | 1740 | ACCESS_ONCE(rsp->gp_activity) = jiffies; |
1730 | rcu_bind_gp_kthread(); | ||
1731 | raw_spin_lock_irq(&rnp->lock); | 1741 | raw_spin_lock_irq(&rnp->lock); |
1732 | smp_mb__after_unlock_lock(); | 1742 | smp_mb__after_unlock_lock(); |
1733 | if (!ACCESS_ONCE(rsp->gp_flags)) { | 1743 | if (!ACCESS_ONCE(rsp->gp_flags)) { |
@@ -1822,8 +1832,8 @@ static int rcu_gp_init(struct rcu_state *rsp) | |||
1822 | rcu_preempt_check_blocked_tasks(rnp); | 1832 | rcu_preempt_check_blocked_tasks(rnp); |
1823 | rnp->qsmask = rnp->qsmaskinit; | 1833 | rnp->qsmask = rnp->qsmaskinit; |
1824 | ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; | 1834 | ACCESS_ONCE(rnp->gpnum) = rsp->gpnum; |
1825 | WARN_ON_ONCE(rnp->completed != rsp->completed); | 1835 | if (WARN_ON_ONCE(rnp->completed != rsp->completed)) |
1826 | ACCESS_ONCE(rnp->completed) = rsp->completed; | 1836 | ACCESS_ONCE(rnp->completed) = rsp->completed; |
1827 | if (rnp == rdp->mynode) | 1837 | if (rnp == rdp->mynode) |
1828 | (void)__note_gp_changes(rsp, rnp, rdp); | 1838 | (void)__note_gp_changes(rsp, rnp, rdp); |
1829 | rcu_preempt_boost_start_gp(rnp); | 1839 | rcu_preempt_boost_start_gp(rnp); |
@@ -1866,7 +1876,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in) | |||
1866 | fqs_state = RCU_FORCE_QS; | 1876 | fqs_state = RCU_FORCE_QS; |
1867 | } else { | 1877 | } else { |
1868 | /* Handle dyntick-idle and offline CPUs. */ | 1878 | /* Handle dyntick-idle and offline CPUs. */ |
1869 | isidle = false; | 1879 | isidle = true; |
1870 | force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); | 1880 | force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); |
1871 | } | 1881 | } |
1872 | /* Clear flag to prevent immediate re-entry. */ | 1882 | /* Clear flag to prevent immediate re-entry. */ |
@@ -1965,6 +1975,7 @@ static int __noreturn rcu_gp_kthread(void *arg) | |||
1965 | struct rcu_state *rsp = arg; | 1975 | struct rcu_state *rsp = arg; |
1966 | struct rcu_node *rnp = rcu_get_root(rsp); | 1976 | struct rcu_node *rnp = rcu_get_root(rsp); |
1967 | 1977 | ||
1978 | rcu_bind_gp_kthread(); | ||
1968 | for (;;) { | 1979 | for (;;) { |
1969 | 1980 | ||
1970 | /* Handle grace-period start. */ | 1981 | /* Handle grace-period start. */ |
@@ -2750,8 +2761,8 @@ static void force_qs_rnp(struct rcu_state *rsp, | |||
2750 | bit = 1; | 2761 | bit = 1; |
2751 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { | 2762 | for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { |
2752 | if ((rnp->qsmask & bit) != 0) { | 2763 | if ((rnp->qsmask & bit) != 0) { |
2753 | if ((rnp->qsmaskinit & bit) != 0) | 2764 | if ((rnp->qsmaskinit & bit) == 0) |
2754 | *isidle = false; | 2765 | *isidle = false; /* Pending hotplug. */ |
2755 | if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) | 2766 | if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) |
2756 | mask |= bit; | 2767 | mask |= bit; |
2757 | } | 2768 | } |
@@ -2895,7 +2906,7 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp, | |||
2895 | * If called from an extended quiescent state, invoke the RCU | 2906 | * If called from an extended quiescent state, invoke the RCU |
2896 | * core in order to force a re-evaluation of RCU's idleness. | 2907 | * core in order to force a re-evaluation of RCU's idleness. |
2897 | */ | 2908 | */ |
2898 | if (!rcu_is_watching() && cpu_online(smp_processor_id())) | 2909 | if (!rcu_is_watching()) |
2899 | invoke_rcu_core(); | 2910 | invoke_rcu_core(); |
2900 | 2911 | ||
2901 | /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ | 2912 | /* If interrupts were disabled or CPU offline, don't invoke RCU core. */ |
@@ -2981,11 +2992,22 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
2981 | 2992 | ||
2982 | if (cpu != -1) | 2993 | if (cpu != -1) |
2983 | rdp = per_cpu_ptr(rsp->rda, cpu); | 2994 | rdp = per_cpu_ptr(rsp->rda, cpu); |
2984 | offline = !__call_rcu_nocb(rdp, head, lazy, flags); | 2995 | if (likely(rdp->mynode)) { |
2985 | WARN_ON_ONCE(offline); | 2996 | /* Post-boot, so this should be for a no-CBs CPU. */ |
2986 | /* _call_rcu() is illegal on offline CPU; leak the callback. */ | 2997 | offline = !__call_rcu_nocb(rdp, head, lazy, flags); |
2987 | local_irq_restore(flags); | 2998 | WARN_ON_ONCE(offline); |
2988 | return; | 2999 | /* Offline CPU, _call_rcu() illegal, leak callback. */ |
3000 | local_irq_restore(flags); | ||
3001 | return; | ||
3002 | } | ||
3003 | /* | ||
3004 | * Very early boot, before rcu_init(). Initialize if needed | ||
3005 | * and then drop through to queue the callback. | ||
3006 | */ | ||
3007 | BUG_ON(cpu != -1); | ||
3008 | WARN_ON_ONCE(!rcu_is_watching()); | ||
3009 | if (!likely(rdp->nxtlist)) | ||
3010 | init_default_callback_list(rdp); | ||
2989 | } | 3011 | } |
2990 | ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1; | 3012 | ACCESS_ONCE(rdp->qlen) = rdp->qlen + 1; |
2991 | if (lazy) | 3013 | if (lazy) |
@@ -3108,7 +3130,7 @@ void synchronize_sched(void) | |||
3108 | "Illegal synchronize_sched() in RCU-sched read-side critical section"); | 3130 | "Illegal synchronize_sched() in RCU-sched read-side critical section"); |
3109 | if (rcu_blocking_is_gp()) | 3131 | if (rcu_blocking_is_gp()) |
3110 | return; | 3132 | return; |
3111 | if (rcu_expedited) | 3133 | if (rcu_gp_is_expedited()) |
3112 | synchronize_sched_expedited(); | 3134 | synchronize_sched_expedited(); |
3113 | else | 3135 | else |
3114 | wait_rcu_gp(call_rcu_sched); | 3136 | wait_rcu_gp(call_rcu_sched); |
@@ -3135,7 +3157,7 @@ void synchronize_rcu_bh(void) | |||
3135 | "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); | 3157 | "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); |
3136 | if (rcu_blocking_is_gp()) | 3158 | if (rcu_blocking_is_gp()) |
3137 | return; | 3159 | return; |
3138 | if (rcu_expedited) | 3160 | if (rcu_gp_is_expedited()) |
3139 | synchronize_rcu_bh_expedited(); | 3161 | synchronize_rcu_bh_expedited(); |
3140 | else | 3162 | else |
3141 | wait_rcu_gp(call_rcu_bh); | 3163 | wait_rcu_gp(call_rcu_bh); |
@@ -3735,7 +3757,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp) | |||
3735 | rdp->qlen_last_fqs_check = 0; | 3757 | rdp->qlen_last_fqs_check = 0; |
3736 | rdp->n_force_qs_snap = rsp->n_force_qs; | 3758 | rdp->n_force_qs_snap = rsp->n_force_qs; |
3737 | rdp->blimit = blimit; | 3759 | rdp->blimit = blimit; |
3738 | init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ | 3760 | if (!rdp->nxtlist) |
3761 | init_callback_list(rdp); /* Re-enable callbacks on this CPU. */ | ||
3739 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | 3762 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; |
3740 | rcu_sysidle_init_percpu_data(rdp->dynticks); | 3763 | rcu_sysidle_init_percpu_data(rdp->dynticks); |
3741 | atomic_set(&rdp->dynticks->dynticks, | 3764 | atomic_set(&rdp->dynticks->dynticks, |
@@ -3826,11 +3849,12 @@ static int rcu_pm_notify(struct notifier_block *self, | |||
3826 | case PM_HIBERNATION_PREPARE: | 3849 | case PM_HIBERNATION_PREPARE: |
3827 | case PM_SUSPEND_PREPARE: | 3850 | case PM_SUSPEND_PREPARE: |
3828 | if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ | 3851 | if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ |
3829 | rcu_expedited = 1; | 3852 | rcu_expedite_gp(); |
3830 | break; | 3853 | break; |
3831 | case PM_POST_HIBERNATION: | 3854 | case PM_POST_HIBERNATION: |
3832 | case PM_POST_SUSPEND: | 3855 | case PM_POST_SUSPEND: |
3833 | rcu_expedited = 0; | 3856 | if (nr_cpu_ids <= 256) /* Expediting bad for large systems. */ |
3857 | rcu_unexpedite_gp(); | ||
3834 | break; | 3858 | break; |
3835 | default: | 3859 | default: |
3836 | break; | 3860 | break; |
@@ -3900,30 +3924,26 @@ void rcu_scheduler_starting(void) | |||
3900 | * Compute the per-level fanout, either using the exact fanout specified | 3924 | * Compute the per-level fanout, either using the exact fanout specified |
3901 | * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. | 3925 | * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT. |
3902 | */ | 3926 | */ |
3903 | #ifdef CONFIG_RCU_FANOUT_EXACT | ||
3904 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 3927 | static void __init rcu_init_levelspread(struct rcu_state *rsp) |
3905 | { | 3928 | { |
3906 | int i; | 3929 | int i; |
3907 | 3930 | ||
3908 | rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; | 3931 | if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) { |
3909 | for (i = rcu_num_lvls - 2; i >= 0; i--) | 3932 | rsp->levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; |
3910 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; | 3933 | for (i = rcu_num_lvls - 2; i >= 0; i--) |
3911 | } | 3934 | rsp->levelspread[i] = CONFIG_RCU_FANOUT; |
3912 | #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ | 3935 | } else { |
3913 | static void __init rcu_init_levelspread(struct rcu_state *rsp) | 3936 | int ccur; |
3914 | { | 3937 | int cprv; |
3915 | int ccur; | 3938 | |
3916 | int cprv; | 3939 | cprv = nr_cpu_ids; |
3917 | int i; | 3940 | for (i = rcu_num_lvls - 1; i >= 0; i--) { |
3918 | 3941 | ccur = rsp->levelcnt[i]; | |
3919 | cprv = nr_cpu_ids; | 3942 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; |
3920 | for (i = rcu_num_lvls - 1; i >= 0; i--) { | 3943 | cprv = ccur; |
3921 | ccur = rsp->levelcnt[i]; | 3944 | } |
3922 | rsp->levelspread[i] = (cprv + ccur - 1) / ccur; | ||
3923 | cprv = ccur; | ||
3924 | } | 3945 | } |
3925 | } | 3946 | } |
3926 | #endif /* #else #ifdef CONFIG_RCU_FANOUT_EXACT */ | ||
3927 | 3947 | ||
3928 | /* | 3948 | /* |
3929 | * Helper function for rcu_init() that initializes one rcu_state structure. | 3949 | * Helper function for rcu_init() that initializes one rcu_state structure. |
@@ -3999,7 +4019,6 @@ static void __init rcu_init_one(struct rcu_state *rsp, | |||
3999 | } | 4019 | } |
4000 | } | 4020 | } |
4001 | 4021 | ||
4002 | rsp->rda = rda; | ||
4003 | init_waitqueue_head(&rsp->gp_wq); | 4022 | init_waitqueue_head(&rsp->gp_wq); |
4004 | rnp = rsp->level[rcu_num_lvls - 1]; | 4023 | rnp = rsp->level[rcu_num_lvls - 1]; |
4005 | for_each_possible_cpu(i) { | 4024 | for_each_possible_cpu(i) { |
@@ -4092,6 +4111,8 @@ void __init rcu_init(void) | |||
4092 | { | 4111 | { |
4093 | int cpu; | 4112 | int cpu; |
4094 | 4113 | ||
4114 | rcu_early_boot_tests(); | ||
4115 | |||
4095 | rcu_bootup_announce(); | 4116 | rcu_bootup_announce(); |
4096 | rcu_init_geometry(); | 4117 | rcu_init_geometry(); |
4097 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); | 4118 | rcu_init_one(&rcu_bh_state, &rcu_bh_data); |
@@ -4108,8 +4129,6 @@ void __init rcu_init(void) | |||
4108 | pm_notifier(rcu_pm_notify, 0); | 4129 | pm_notifier(rcu_pm_notify, 0); |
4109 | for_each_online_cpu(cpu) | 4130 | for_each_online_cpu(cpu) |
4110 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); | 4131 | rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu); |
4111 | |||
4112 | rcu_early_boot_tests(); | ||
4113 | } | 4132 | } |
4114 | 4133 | ||
4115 | #include "tree_plugin.h" | 4134 | #include "tree_plugin.h" |
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index d45e961515c1..8c0ec0f5a027 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h | |||
@@ -58,38 +58,33 @@ static bool __read_mostly rcu_nocb_poll; /* Offload kthread are to poll. */ | |||
58 | */ | 58 | */ |
59 | static void __init rcu_bootup_announce_oddness(void) | 59 | static void __init rcu_bootup_announce_oddness(void) |
60 | { | 60 | { |
61 | #ifdef CONFIG_RCU_TRACE | 61 | if (IS_ENABLED(CONFIG_RCU_TRACE)) |
62 | pr_info("\tRCU debugfs-based tracing is enabled.\n"); | 62 | pr_info("\tRCU debugfs-based tracing is enabled.\n"); |
63 | #endif | 63 | if ((IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || |
64 | #if (defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 64) || (!defined(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32) | 64 | (!IS_ENABLED(CONFIG_64BIT) && CONFIG_RCU_FANOUT != 32)) |
65 | pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", | 65 | pr_info("\tCONFIG_RCU_FANOUT set to non-default value of %d\n", |
66 | CONFIG_RCU_FANOUT); | 66 | CONFIG_RCU_FANOUT); |
67 | #endif | 67 | if (IS_ENABLED(CONFIG_RCU_FANOUT_EXACT)) |
68 | #ifdef CONFIG_RCU_FANOUT_EXACT | 68 | pr_info("\tHierarchical RCU autobalancing is disabled.\n"); |
69 | pr_info("\tHierarchical RCU autobalancing is disabled.\n"); | 69 | if (IS_ENABLED(CONFIG_RCU_FAST_NO_HZ)) |
70 | #endif | 70 | pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); |
71 | #ifdef CONFIG_RCU_FAST_NO_HZ | 71 | if (IS_ENABLED(CONFIG_PROVE_RCU)) |
72 | pr_info("\tRCU dyntick-idle grace-period acceleration is enabled.\n"); | 72 | pr_info("\tRCU lockdep checking is enabled.\n"); |
73 | #endif | 73 | if (IS_ENABLED(CONFIG_RCU_TORTURE_TEST_RUNNABLE)) |
74 | #ifdef CONFIG_PROVE_RCU | 74 | pr_info("\tRCU torture testing starts during boot.\n"); |
75 | pr_info("\tRCU lockdep checking is enabled.\n"); | 75 | if (IS_ENABLED(CONFIG_RCU_CPU_STALL_INFO)) |
76 | #endif | 76 | pr_info("\tAdditional per-CPU info printed with stalls.\n"); |
77 | #ifdef CONFIG_RCU_TORTURE_TEST_RUNNABLE | 77 | if (NUM_RCU_LVL_4 != 0) |
78 | pr_info("\tRCU torture testing starts during boot.\n"); | 78 | pr_info("\tFour-level hierarchy is enabled.\n"); |
79 | #endif | 79 | if (CONFIG_RCU_FANOUT_LEAF != 16) |
80 | #if defined(CONFIG_RCU_CPU_STALL_INFO) | 80 | pr_info("\tBuild-time adjustment of leaf fanout to %d.\n", |
81 | pr_info("\tAdditional per-CPU info printed with stalls.\n"); | 81 | CONFIG_RCU_FANOUT_LEAF); |
82 | #endif | ||
83 | #if NUM_RCU_LVL_4 != 0 | ||
84 | pr_info("\tFour-level hierarchy is enabled.\n"); | ||
85 | #endif | ||
86 | if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) | 82 | if (rcu_fanout_leaf != CONFIG_RCU_FANOUT_LEAF) |
87 | pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); | 83 | pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); |
88 | if (nr_cpu_ids != NR_CPUS) | 84 | if (nr_cpu_ids != NR_CPUS) |
89 | pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); | 85 | pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); |
90 | #ifdef CONFIG_RCU_BOOST | 86 | if (IS_ENABLED(CONFIG_RCU_BOOST)) |
91 | pr_info("\tRCU kthread priority: %d.\n", kthread_prio); | 87 | pr_info("\tRCU kthread priority: %d.\n", kthread_prio); |
92 | #endif | ||
93 | } | 88 | } |
94 | 89 | ||
95 | #ifdef CONFIG_PREEMPT_RCU | 90 | #ifdef CONFIG_PREEMPT_RCU |
@@ -296,7 +291,13 @@ void rcu_read_unlock_special(struct task_struct *t) | |||
296 | } | 291 | } |
297 | 292 | ||
298 | /* Hardware IRQ handlers cannot block, complain if they get here. */ | 293 | /* Hardware IRQ handlers cannot block, complain if they get here. */ |
299 | if (WARN_ON_ONCE(in_irq() || in_serving_softirq())) { | 294 | if (in_irq() || in_serving_softirq()) { |
295 | lockdep_rcu_suspicious(__FILE__, __LINE__, | ||
296 | "rcu_read_unlock() from irq or softirq with blocking in critical section!!!\n"); | ||
297 | pr_alert("->rcu_read_unlock_special: %#x (b: %d, nq: %d)\n", | ||
298 | t->rcu_read_unlock_special.s, | ||
299 | t->rcu_read_unlock_special.b.blocked, | ||
300 | t->rcu_read_unlock_special.b.need_qs); | ||
300 | local_irq_restore(flags); | 301 | local_irq_restore(flags); |
301 | return; | 302 | return; |
302 | } | 303 | } |
@@ -535,7 +536,7 @@ void synchronize_rcu(void) | |||
535 | "Illegal synchronize_rcu() in RCU read-side critical section"); | 536 | "Illegal synchronize_rcu() in RCU read-side critical section"); |
536 | if (!rcu_scheduler_active) | 537 | if (!rcu_scheduler_active) |
537 | return; | 538 | return; |
538 | if (rcu_expedited) | 539 | if (rcu_gp_is_expedited()) |
539 | synchronize_rcu_expedited(); | 540 | synchronize_rcu_expedited(); |
540 | else | 541 | else |
541 | wait_rcu_gp(call_rcu); | 542 | wait_rcu_gp(call_rcu); |
@@ -1940,7 +1941,8 @@ static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu) | |||
1940 | rhp = ACCESS_ONCE(rdp->nocb_follower_head); | 1941 | rhp = ACCESS_ONCE(rdp->nocb_follower_head); |
1941 | 1942 | ||
1942 | /* Having no rcuo kthread but CBs after scheduler starts is bad! */ | 1943 | /* Having no rcuo kthread but CBs after scheduler starts is bad! */ |
1943 | if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp) { | 1944 | if (!ACCESS_ONCE(rdp->nocb_kthread) && rhp && |
1945 | rcu_scheduler_fully_active) { | ||
1944 | /* RCU callback enqueued before CPU first came online??? */ | 1946 | /* RCU callback enqueued before CPU first came online??? */ |
1945 | pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", | 1947 | pr_err("RCU: Never-onlined no-CBs CPU %d has CB %p\n", |
1946 | cpu, rhp->func); | 1948 | cpu, rhp->func); |
@@ -2387,18 +2389,8 @@ void __init rcu_init_nohz(void) | |||
2387 | pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); | 2389 | pr_info("\tPoll for callbacks from no-CBs CPUs.\n"); |
2388 | 2390 | ||
2389 | for_each_rcu_flavor(rsp) { | 2391 | for_each_rcu_flavor(rsp) { |
2390 | for_each_cpu(cpu, rcu_nocb_mask) { | 2392 | for_each_cpu(cpu, rcu_nocb_mask) |
2391 | struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); | 2393 | init_nocb_callback_list(per_cpu_ptr(rsp->rda, cpu)); |
2392 | |||
2393 | /* | ||
2394 | * If there are early callbacks, they will need | ||
2395 | * to be moved to the nocb lists. | ||
2396 | */ | ||
2397 | WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] != | ||
2398 | &rdp->nxtlist && | ||
2399 | rdp->nxttail[RCU_NEXT_TAIL] != NULL); | ||
2400 | init_nocb_callback_list(rdp); | ||
2401 | } | ||
2402 | rcu_organize_nocb_kthreads(rsp); | 2394 | rcu_organize_nocb_kthreads(rsp); |
2403 | } | 2395 | } |
2404 | } | 2396 | } |
@@ -2535,6 +2527,16 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) | |||
2535 | if (!rcu_is_nocb_cpu(rdp->cpu)) | 2527 | if (!rcu_is_nocb_cpu(rdp->cpu)) |
2536 | return false; | 2528 | return false; |
2537 | 2529 | ||
2530 | /* If there are early-boot callbacks, move them to nocb lists. */ | ||
2531 | if (rdp->nxtlist) { | ||
2532 | rdp->nocb_head = rdp->nxtlist; | ||
2533 | rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL]; | ||
2534 | atomic_long_set(&rdp->nocb_q_count, rdp->qlen); | ||
2535 | atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy); | ||
2536 | rdp->nxtlist = NULL; | ||
2537 | rdp->qlen = 0; | ||
2538 | rdp->qlen_lazy = 0; | ||
2539 | } | ||
2538 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; | 2540 | rdp->nxttail[RCU_NEXT_TAIL] = NULL; |
2539 | return true; | 2541 | return true; |
2540 | } | 2542 | } |
@@ -2758,7 +2760,8 @@ static void rcu_sysidle_exit(int irq) | |||
2758 | 2760 | ||
2759 | /* | 2761 | /* |
2760 | * Check to see if the current CPU is idle. Note that usermode execution | 2762 | * Check to see if the current CPU is idle. Note that usermode execution |
2761 | * does not count as idle. The caller must have disabled interrupts. | 2763 | * does not count as idle. The caller must have disabled interrupts, |
2764 | * and must be running on tick_do_timer_cpu. | ||
2762 | */ | 2765 | */ |
2763 | static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, | 2766 | static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, |
2764 | unsigned long *maxj) | 2767 | unsigned long *maxj) |
@@ -2779,8 +2782,8 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle, | |||
2779 | if (!*isidle || rdp->rsp != rcu_state_p || | 2782 | if (!*isidle || rdp->rsp != rcu_state_p || |
2780 | cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) | 2783 | cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) |
2781 | return; | 2784 | return; |
2782 | if (rcu_gp_in_progress(rdp->rsp)) | 2785 | /* Verify affinity of current kthread. */ |
2783 | WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); | 2786 | WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu); |
2784 | 2787 | ||
2785 | /* Pick up current idle and NMI-nesting counter and check. */ | 2788 | /* Pick up current idle and NMI-nesting counter and check. */ |
2786 | cur = atomic_read(&rdtp->dynticks_idle); | 2789 | cur = atomic_read(&rdtp->dynticks_idle); |
@@ -3063,11 +3066,10 @@ static void rcu_bind_gp_kthread(void) | |||
3063 | return; | 3066 | return; |
3064 | #ifdef CONFIG_NO_HZ_FULL_SYSIDLE | 3067 | #ifdef CONFIG_NO_HZ_FULL_SYSIDLE |
3065 | cpu = tick_do_timer_cpu; | 3068 | cpu = tick_do_timer_cpu; |
3066 | if (cpu >= 0 && cpu < nr_cpu_ids && raw_smp_processor_id() != cpu) | 3069 | if (cpu >= 0 && cpu < nr_cpu_ids) |
3067 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); | 3070 | set_cpus_allowed_ptr(current, cpumask_of(cpu)); |
3068 | #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ | 3071 | #else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ |
3069 | if (!is_housekeeping_cpu(raw_smp_processor_id())) | 3072 | housekeeping_affine(current); |
3070 | housekeeping_affine(current); | ||
3071 | #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ | 3073 | #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ |
3072 | } | 3074 | } |
3073 | 3075 | ||
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index e0d31a345ee6..1f133350da01 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c | |||
@@ -62,6 +62,63 @@ MODULE_ALIAS("rcupdate"); | |||
62 | 62 | ||
63 | module_param(rcu_expedited, int, 0); | 63 | module_param(rcu_expedited, int, 0); |
64 | 64 | ||
65 | #ifndef CONFIG_TINY_RCU | ||
66 | |||
67 | static atomic_t rcu_expedited_nesting = | ||
68 | ATOMIC_INIT(IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT) ? 1 : 0); | ||
69 | |||
70 | /* | ||
71 | * Should normal grace-period primitives be expedited? Intended for | ||
72 | * use within RCU. Note that this function takes the rcu_expedited | ||
73 | * sysfs/boot variable into account as well as the rcu_expedite_gp() | ||
74 | * nesting. So looping on rcu_unexpedite_gp() until rcu_gp_is_expedited() | ||
75 | * returns false is a -really- bad idea. | ||
76 | */ | ||
77 | bool rcu_gp_is_expedited(void) | ||
78 | { | ||
79 | return rcu_expedited || atomic_read(&rcu_expedited_nesting); | ||
80 | } | ||
81 | EXPORT_SYMBOL_GPL(rcu_gp_is_expedited); | ||
82 | |||
83 | /** | ||
84 | * rcu_expedite_gp - Expedite future RCU grace periods | ||
85 | * | ||
86 | * After a call to this function, future calls to synchronize_rcu() and | ||
87 | * friends act as the corresponding synchronize_rcu_expedited() function | ||
88 | * had instead been called. | ||
89 | */ | ||
90 | void rcu_expedite_gp(void) | ||
91 | { | ||
92 | atomic_inc(&rcu_expedited_nesting); | ||
93 | } | ||
94 | EXPORT_SYMBOL_GPL(rcu_expedite_gp); | ||
95 | |||
96 | /** | ||
97 | * rcu_unexpedite_gp - Cancel prior rcu_expedite_gp() invocation | ||
98 | * | ||
99 | * Undo a prior call to rcu_expedite_gp(). If all prior calls to | ||
100 | * rcu_expedite_gp() are undone by a subsequent call to rcu_unexpedite_gp(), | ||
101 | * and if the rcu_expedited sysfs/boot parameter is not set, then all | ||
102 | * subsequent calls to synchronize_rcu() and friends will return to | ||
103 | * their normal non-expedited behavior. | ||
104 | */ | ||
105 | void rcu_unexpedite_gp(void) | ||
106 | { | ||
107 | atomic_dec(&rcu_expedited_nesting); | ||
108 | } | ||
109 | EXPORT_SYMBOL_GPL(rcu_unexpedite_gp); | ||
110 | |||
111 | #endif /* #ifndef CONFIG_TINY_RCU */ | ||
112 | |||
113 | /* | ||
114 | * Inform RCU of the end of the in-kernel boot sequence. | ||
115 | */ | ||
116 | void rcu_end_inkernel_boot(void) | ||
117 | { | ||
118 | if (IS_ENABLED(CONFIG_RCU_EXPEDITE_BOOT)) | ||
119 | rcu_unexpedite_gp(); | ||
120 | } | ||
121 | |||
65 | #ifdef CONFIG_PREEMPT_RCU | 122 | #ifdef CONFIG_PREEMPT_RCU |
66 | 123 | ||
67 | /* | 124 | /* |
@@ -199,16 +256,13 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held); | |||
199 | 256 | ||
200 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | 257 | #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ |
201 | 258 | ||
202 | struct rcu_synchronize { | 259 | /** |
203 | struct rcu_head head; | 260 | * wakeme_after_rcu() - Callback function to awaken a task after grace period |
204 | struct completion completion; | 261 | * @head: Pointer to rcu_head member within rcu_synchronize structure |
205 | }; | 262 | * |
206 | 263 | * Awaken the corresponding task now that a grace period has elapsed. | |
207 | /* | ||
208 | * Awaken the corresponding synchronize_rcu() instance now that a | ||
209 | * grace period has elapsed. | ||
210 | */ | 264 | */ |
211 | static void wakeme_after_rcu(struct rcu_head *head) | 265 | void wakeme_after_rcu(struct rcu_head *head) |
212 | { | 266 | { |
213 | struct rcu_synchronize *rcu; | 267 | struct rcu_synchronize *rcu; |
214 | 268 | ||
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1173afc308ad..1ad74c0df01f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -1180,16 +1180,7 @@ config DEBUG_CREDENTIALS | |||
1180 | menu "RCU Debugging" | 1180 | menu "RCU Debugging" |
1181 | 1181 | ||
1182 | config PROVE_RCU | 1182 | config PROVE_RCU |
1183 | bool "RCU debugging: prove RCU correctness" | 1183 | def_bool PROVE_LOCKING |
1184 | depends on PROVE_LOCKING | ||
1185 | default n | ||
1186 | help | ||
1187 | This feature enables lockdep extensions that check for correct | ||
1188 | use of RCU APIs. This is currently under development. Say Y | ||
1189 | if you want to debug RCU usage or help work on the PROVE_RCU | ||
1190 | feature. | ||
1191 | |||
1192 | Say N if you are unsure. | ||
1193 | 1184 | ||
1194 | config PROVE_RCU_REPEATEDLY | 1185 | config PROVE_RCU_REPEATEDLY |
1195 | bool "RCU debugging: don't disable PROVE_RCU on first splat" | 1186 | bool "RCU debugging: don't disable PROVE_RCU on first splat" |
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 368d64ac779e..dd2812ceb0ba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh | |||
@@ -310,7 +310,7 @@ function dump(first, pastlast) | |||
310 | cfr[jn] = cf[j] "." cfrep[cf[j]]; | 310 | cfr[jn] = cf[j] "." cfrep[cf[j]]; |
311 | } | 311 | } |
312 | if (cpusr[jn] > ncpus && ncpus != 0) | 312 | if (cpusr[jn] > ncpus && ncpus != 0) |
313 | ovf = "(!)"; | 313 | ovf = "-ovf"; |
314 | else | 314 | else |
315 | ovf = ""; | 315 | ovf = ""; |
316 | print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; | 316 | print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; |