aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2014-09-23 01:21:42 -0400
committerIngo Molnar <mingo@kernel.org>2014-09-23 01:21:42 -0400
commit62731433591156ece255e23ffd69ea4544b424f1 (patch)
tree125ce584cdb3166456b1767d03c3d5e72ea79a6e
parent7c9a3730a5ef4c6240eaaa2d8dcdb8cc1627d715 (diff)
parentdd56af42bd829c6e770ed69812bd65a04eaeb1e4 (diff)
Merge branch 'rcu/next' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu
Pull the v3.18 RCU changes from Paul E. McKenney: " * Update RCU documentation. These were posted to LKML at https://lkml.org/lkml/2014/8/28/378. * Miscellaneous fixes. These were posted to LKML at https://lkml.org/lkml/2014/8/28/386. An additional fix that eliminates a documented (but now inconvenient) deadlock between RCU hotplug and expedited grace periods was posted at https://lkml.org/lkml/2014/8/28/573. * Changes related to No-CBs CPUs and NO_HZ_FULL. These were posted to LKML at https://lkml.org/lkml/2014/8/28/412. * Torture-test updates. These were posted to LKML at https://lkml.org/lkml/2014/8/28/546 and at https://lkml.org/lkml/2014/9/11/1114. * RCU-tasks implementation. These were posted to LKML at https://lkml.org/lkml/2014/8/28/540. " Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--Documentation/RCU/stallwarn.txt33
-rw-r--r--Documentation/kernel-parameters.txt68
-rw-r--r--Documentation/locking/locktorture.txt142
-rw-r--r--Documentation/memory-barriers.txt128
-rw-r--r--fs/file.c2
-rw-r--r--include/linux/cpu.h2
-rw-r--r--include/linux/init_task.h12
-rw-r--r--include/linux/lockdep.h1
-rw-r--r--include/linux/rcupdate.h106
-rw-r--r--include/linux/rcutiny.h2
-rw-r--r--include/linux/sched.h39
-rw-r--r--include/linux/torture.h5
-rw-r--r--include/trace/events/rcu.h3
-rw-r--r--init/Kconfig14
-rw-r--r--init/main.c1
-rw-r--r--kernel/cpu.c16
-rw-r--r--kernel/exit.c3
-rw-r--r--kernel/locking/locktorture.c392
-rw-r--r--kernel/rcu/rcutorture.c278
-rw-r--r--kernel/rcu/tiny.c20
-rw-r--r--kernel/rcu/tree.c115
-rw-r--r--kernel/rcu/tree.h18
-rw-r--r--kernel/rcu/tree_plugin.h404
-rw-r--r--kernel/rcu/update.c345
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/sysctl.c9
-rw-r--r--kernel/torture.c32
-rw-r--r--mm/mlock.c2
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/rcutorture/bin/config2frag.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configcheck.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh4
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh20
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh5
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/rcutorture/bin/kvm.sh6
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh5
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh9
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-torture.sh5
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFLIST2
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK026
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK036
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS019
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS025
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS0313
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE014
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE073
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh2
-rw-r--r--tools/testing/selftests/rcutorture/doc/initrd.txt1
60 files changed, 1798 insertions, 530 deletions
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index 68fe3ad27015..ef5a2fd4ff70 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -56,8 +56,20 @@ RCU_STALL_RAT_DELAY
56 two jiffies. (This is a cpp macro, not a kernel configuration 56 two jiffies. (This is a cpp macro, not a kernel configuration
57 parameter.) 57 parameter.)
58 58
59When a CPU detects that it is stalling, it will print a message similar 59rcupdate.rcu_task_stall_timeout
60to the following: 60
61 This boot/sysfs parameter controls the RCU-tasks stall warning
62 interval. A value of zero or less suppresses RCU-tasks stall
63 warnings. A positive value sets the stall-warning interval
64 in jiffies. An RCU-tasks stall warning starts wtih the line:
65
66 INFO: rcu_tasks detected stalls on tasks:
67
68 And continues with the output of sched_show_task() for each
69 task stalling the current RCU-tasks grace period.
70
71For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling,
72it will print a message similar to the following:
61 73
62INFO: rcu_sched_state detected stall on CPU 5 (t=2500 jiffies) 74INFO: rcu_sched_state detected stall on CPU 5 (t=2500 jiffies)
63 75
@@ -174,8 +186,12 @@ o A CPU looping with preemption disabled. This condition can
174o A CPU looping with bottom halves disabled. This condition can 186o A CPU looping with bottom halves disabled. This condition can
175 result in RCU-sched and RCU-bh stalls. 187 result in RCU-sched and RCU-bh stalls.
176 188
177o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel 189o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the
178 without invoking schedule(). 190 kernel without invoking schedule(). Note that cond_resched()
191 does not necessarily prevent RCU CPU stall warnings. Therefore,
192 if the looping in the kernel is really expected and desirable
193 behavior, you might need to replace some of the cond_resched()
194 calls with calls to cond_resched_rcu_qs().
179 195
180o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might 196o A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
181 happen to preempt a low-priority task in the middle of an RCU 197 happen to preempt a low-priority task in the middle of an RCU
@@ -208,11 +224,10 @@ o A hardware failure. This is quite unlikely, but has occurred
208 This resulted in a series of RCU CPU stall warnings, eventually 224 This resulted in a series of RCU CPU stall warnings, eventually
209 leading the realization that the CPU had failed. 225 leading the realization that the CPU had failed.
210 226
211The RCU, RCU-sched, and RCU-bh implementations have CPU stall warning. 227The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall
212SRCU does not have its own CPU stall warnings, but its calls to 228warning. Note that SRCU does -not- have CPU stall warnings. Please note
213synchronize_sched() will result in RCU-sched detecting RCU-sched-related 229that RCU only detects CPU stalls when there is a grace period in progress.
214CPU stalls. Please note that RCU only detects CPU stalls when there is 230No grace period, no CPU stall warnings.
215a grace period in progress. No grace period, no CPU stall warnings.
216 231
217To diagnose the cause of the stall, inspect the stack traces. 232To diagnose the cause of the stall, inspect the stack traces.
218The offending function will usually be near the top of the stack. 233The offending function will usually be near the top of the stack.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 10d51c2f10d7..aa0eedc84d00 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1704,6 +1704,49 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1704 lockd.nlm_udpport=M [NFS] Assign UDP port. 1704 lockd.nlm_udpport=M [NFS] Assign UDP port.
1705 Format: <integer> 1705 Format: <integer>
1706 1706
1707 locktorture.nreaders_stress= [KNL]
1708 Set the number of locking read-acquisition kthreads.
1709 Defaults to being automatically set based on the
1710 number of online CPUs.
1711
1712 locktorture.nwriters_stress= [KNL]
1713 Set the number of locking write-acquisition kthreads.
1714
1715 locktorture.onoff_holdoff= [KNL]
1716 Set time (s) after boot for CPU-hotplug testing.
1717
1718 locktorture.onoff_interval= [KNL]
1719 Set time (s) between CPU-hotplug operations, or
1720 zero to disable CPU-hotplug testing.
1721
1722 locktorture.shuffle_interval= [KNL]
1723 Set task-shuffle interval (jiffies). Shuffling
1724 tasks allows some CPUs to go into dyntick-idle
1725 mode during the locktorture test.
1726
1727 locktorture.shutdown_secs= [KNL]
1728 Set time (s) after boot system shutdown. This
1729 is useful for hands-off automated testing.
1730
1731 locktorture.stat_interval= [KNL]
1732 Time (s) between statistics printk()s.
1733
1734 locktorture.stutter= [KNL]
1735 Time (s) to stutter testing, for example,
1736 specifying five seconds causes the test to run for
1737 five seconds, wait for five seconds, and so on.
1738 This tests the locking primitive's ability to
1739 transition abruptly to and from idle.
1740
1741 locktorture.torture_runnable= [BOOT]
1742 Start locktorture running at boot time.
1743
1744 locktorture.torture_type= [KNL]
1745 Specify the locking implementation to test.
1746
1747 locktorture.verbose= [KNL]
1748 Enable additional printk() statements.
1749
1707 logibm.irq= [HW,MOUSE] Logitech Bus Mouse Driver 1750 logibm.irq= [HW,MOUSE] Logitech Bus Mouse Driver
1708 Format: <irq> 1751 Format: <irq>
1709 1752
@@ -2881,6 +2924,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2881 Lazy RCU callbacks are those which RCU can 2924 Lazy RCU callbacks are those which RCU can
2882 prove do nothing more than free memory. 2925 prove do nothing more than free memory.
2883 2926
2927 rcutorture.cbflood_inter_holdoff= [KNL]
2928 Set holdoff time (jiffies) between successive
2929 callback-flood tests.
2930
2931 rcutorture.cbflood_intra_holdoff= [KNL]
2932 Set holdoff time (jiffies) between successive
2933 bursts of callbacks within a given callback-flood
2934 test.
2935
2936 rcutorture.cbflood_n_burst= [KNL]
2937 Set the number of bursts making up a given
2938 callback-flood test. Set this to zero to
2939 disable callback-flood testing.
2940
2941 rcutorture.cbflood_n_per_burst= [KNL]
2942 Set the number of callbacks to be registered
2943 in a given burst of a callback-flood test.
2944
2884 rcutorture.fqs_duration= [KNL] 2945 rcutorture.fqs_duration= [KNL]
2885 Set duration of force_quiescent_state bursts. 2946 Set duration of force_quiescent_state bursts.
2886 2947
@@ -2920,7 +2981,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2920 Set time (s) between CPU-hotplug operations, or 2981 Set time (s) between CPU-hotplug operations, or
2921 zero to disable CPU-hotplug testing. 2982 zero to disable CPU-hotplug testing.
2922 2983
2923 rcutorture.rcutorture_runnable= [BOOT] 2984 rcutorture.torture_runnable= [BOOT]
2924 Start rcutorture running at boot time. 2985 Start rcutorture running at boot time.
2925 2986
2926 rcutorture.shuffle_interval= [KNL] 2987 rcutorture.shuffle_interval= [KNL]
@@ -2982,6 +3043,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2982 rcupdate.rcu_cpu_stall_timeout= [KNL] 3043 rcupdate.rcu_cpu_stall_timeout= [KNL]
2983 Set timeout for RCU CPU stall warning messages. 3044 Set timeout for RCU CPU stall warning messages.
2984 3045
3046 rcupdate.rcu_task_stall_timeout= [KNL]
3047 Set timeout in jiffies for RCU task stall warning
3048 messages. Disable with a value less than or equal
3049 to zero.
3050
2985 rdinit= [KNL] 3051 rdinit= [KNL]
2986 Format: <full_path> 3052 Format: <full_path>
2987 Run specified binary instead of /init from the ramdisk, 3053 Run specified binary instead of /init from the ramdisk,
diff --git a/Documentation/locking/locktorture.txt b/Documentation/locking/locktorture.txt
new file mode 100644
index 000000000000..be715015e0f7
--- /dev/null
+++ b/Documentation/locking/locktorture.txt
@@ -0,0 +1,142 @@
1Kernel Lock Torture Test Operation
2
3CONFIG_LOCK_TORTURE_TEST
4
5The CONFIG LOCK_TORTURE_TEST config option provides a kernel module
6that runs torture tests on core kernel locking primitives. The kernel
7module, 'locktorture', may be built after the fact on the running
8kernel to be tested, if desired. The tests periodically output status
9messages via printk(), which can be examined via the dmesg (perhaps
10grepping for "torture"). The test is started when the module is loaded,
11and stops when the module is unloaded. This program is based on how RCU
12is tortured, via rcutorture.
13
14This torture test consists of creating a number of kernel threads which
15acquire the lock and hold it for specific amount of time, thus simulating
16different critical region behaviors. The amount of contention on the lock
17can be simulated by either enlarging this critical region hold time and/or
18creating more kthreads.
19
20
21MODULE PARAMETERS
22
23This module has the following parameters:
24
25
26 ** Locktorture-specific **
27
28nwriters_stress Number of kernel threads that will stress exclusive lock
29 ownership (writers). The default value is twice the number
30 of online CPUs.
31
32nreaders_stress Number of kernel threads that will stress shared lock
33 ownership (readers). The default is the same amount of writer
34 locks. If the user did not specify nwriters_stress, then
35 both readers and writers be the amount of online CPUs.
36
37torture_type Type of lock to torture. By default, only spinlocks will
38 be tortured. This module can torture the following locks,
39 with string values as follows:
40
41 o "lock_busted": Simulates a buggy lock implementation.
42
43 o "spin_lock": spin_lock() and spin_unlock() pairs.
44
45 o "spin_lock_irq": spin_lock_irq() and spin_unlock_irq()
46 pairs.
47
48 o "mutex_lock": mutex_lock() and mutex_unlock() pairs.
49
50 o "rwsem_lock": read/write down() and up() semaphore pairs.
51
52torture_runnable Start locktorture at boot time in the case where the
53 module is built into the kernel, otherwise wait for
54 torture_runnable to be set via sysfs before starting.
55 By default it will begin once the module is loaded.
56
57
58 ** Torture-framework (RCU + locking) **
59
60shutdown_secs The number of seconds to run the test before terminating
61 the test and powering off the system. The default is
62 zero, which disables test termination and system shutdown.
63 This capability is useful for automated testing.
64
65onoff_interval The number of seconds between each attempt to execute a
66 randomly selected CPU-hotplug operation. Defaults
67 to zero, which disables CPU hotplugging. In
68 CONFIG_HOTPLUG_CPU=n kernels, locktorture will silently
69 refuse to do any CPU-hotplug operations regardless of
70 what value is specified for onoff_interval.
71
72onoff_holdoff The number of seconds to wait until starting CPU-hotplug
73 operations. This would normally only be used when
74 locktorture was built into the kernel and started
75 automatically at boot time, in which case it is useful
76 in order to avoid confusing boot-time code with CPUs
77 coming and going. This parameter is only useful if
78 CONFIG_HOTPLUG_CPU is enabled.
79
80stat_interval Number of seconds between statistics-related printk()s.
81 By default, locktorture will report stats every 60 seconds.
82 Setting the interval to zero causes the statistics to
83 be printed -only- when the module is unloaded, and this
84 is the default.
85
86stutter The length of time to run the test before pausing for this
87 same period of time. Defaults to "stutter=5", so as
88 to run and pause for (roughly) five-second intervals.
89 Specifying "stutter=0" causes the test to run continuously
90 without pausing, which is the old default behavior.
91
92shuffle_interval The number of seconds to keep the test threads affinitied
93 to a particular subset of the CPUs, defaults to 3 seconds.
94 Used in conjunction with test_no_idle_hz.
95
96verbose Enable verbose debugging printing, via printk(). Enabled
97 by default. This extra information is mostly related to
98 high-level errors and reports from the main 'torture'
99 framework.
100
101
102STATISTICS
103
104Statistics are printed in the following format:
105
106spin_lock-torture: Writes: Total: 93746064 Max/Min: 0/0 Fail: 0
107 (A) (B) (C) (D) (E)
108
109(A): Lock type that is being tortured -- torture_type parameter.
110
111(B): Number of writer lock acquisitions. If dealing with a read/write primitive
112 a second "Reads" statistics line is printed.
113
114(C): Number of times the lock was acquired.
115
116(D): Min and max number of times threads failed to acquire the lock.
117
118(E): true/false values if there were errors acquiring the lock. This should
119 -only- be positive if there is a bug in the locking primitive's
120 implementation. Otherwise a lock should never fail (i.e., spin_lock()).
121 Of course, the same applies for (C), above. A dummy example of this is
122 the "lock_busted" type.
123
124USAGE
125
126The following script may be used to torture locks:
127
128 #!/bin/sh
129
130 modprobe locktorture
131 sleep 3600
132 rmmod locktorture
133 dmesg | grep torture:
134
135The output can be manually inspected for the error flag of "!!!".
136One could of course create a more elaborate script that automatically
137checked for such errors. The "rmmod" command forces a "SUCCESS",
138"FAILURE", or "RCU_HOTPLUG" indication to be printk()ed. The first
139two are self-explanatory, while the last indicates that while there
140were no locking failures, CPU-hotplug problems were detected.
141
142Also see: Documentation/RCU/torture.txt
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index a4de88fb55f0..22a969cdd476 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -574,30 +574,14 @@ However, stores are not speculated. This means that ordering -is- provided
574in the following example: 574in the following example:
575 575
576 q = ACCESS_ONCE(a); 576 q = ACCESS_ONCE(a);
577 if (ACCESS_ONCE(q)) {
578 ACCESS_ONCE(b) = p;
579 }
580
581Please note that ACCESS_ONCE() is not optional! Without the ACCESS_ONCE(),
582the compiler is within its rights to transform this example:
583
584 q = a;
585 if (q) { 577 if (q) {
586 b = p; /* BUG: Compiler can reorder!!! */ 578 ACCESS_ONCE(b) = p;
587 do_something();
588 } else {
589 b = p; /* BUG: Compiler can reorder!!! */
590 do_something_else();
591 } 579 }
592 580
593into this, which of course defeats the ordering: 581Please note that ACCESS_ONCE() is not optional! Without the
594 582ACCESS_ONCE(), might combine the load from 'a' with other loads from
595 b = p; 583'a', and the store to 'b' with other stores to 'b', with possible highly
596 q = a; 584counterintuitive effects on ordering.
597 if (q)
598 do_something();
599 else
600 do_something_else();
601 585
602Worse yet, if the compiler is able to prove (say) that the value of 586Worse yet, if the compiler is able to prove (say) that the value of
603variable 'a' is always non-zero, it would be well within its rights 587variable 'a' is always non-zero, it would be well within its rights
@@ -605,11 +589,12 @@ to optimize the original example by eliminating the "if" statement
605as follows: 589as follows:
606 590
607 q = a; 591 q = a;
608 b = p; /* BUG: Compiler can reorder!!! */ 592 b = p; /* BUG: Compiler and CPU can both reorder!!! */
609 do_something(); 593
594So don't leave out the ACCESS_ONCE().
610 595
611The solution is again ACCESS_ONCE() and barrier(), which preserves the 596It is tempting to try to enforce ordering on identical stores on both
612ordering between the load from variable 'a' and the store to variable 'b': 597branches of the "if" statement as follows:
613 598
614 q = ACCESS_ONCE(a); 599 q = ACCESS_ONCE(a);
615 if (q) { 600 if (q) {
@@ -622,18 +607,11 @@ ordering between the load from variable 'a' and the store to variable 'b':
622 do_something_else(); 607 do_something_else();
623 } 608 }
624 609
625The initial ACCESS_ONCE() is required to prevent the compiler from 610Unfortunately, current compilers will transform this as follows at high
626proving the value of 'a', and the pair of barrier() invocations are 611optimization levels:
627required to prevent the compiler from pulling the two identical stores
628to 'b' out from the legs of the "if" statement.
629
630It is important to note that control dependencies absolutely require a
631a conditional. For example, the following "optimized" version of
632the above example breaks ordering, which is why the barrier() invocations
633are absolutely required if you have identical stores in both legs of
634the "if" statement:
635 612
636 q = ACCESS_ONCE(a); 613 q = ACCESS_ONCE(a);
614 barrier();
637 ACCESS_ONCE(b) = p; /* BUG: No ordering vs. load from a!!! */ 615 ACCESS_ONCE(b) = p; /* BUG: No ordering vs. load from a!!! */
638 if (q) { 616 if (q) {
639 /* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */ 617 /* ACCESS_ONCE(b) = p; -- moved up, BUG!!! */
@@ -643,21 +621,36 @@ the "if" statement:
643 do_something_else(); 621 do_something_else();
644 } 622 }
645 623
646It is of course legal for the prior load to be part of the conditional, 624Now there is no conditional between the load from 'a' and the store to
647for example, as follows: 625'b', which means that the CPU is within its rights to reorder them:
626The conditional is absolutely required, and must be present in the
627assembly code even after all compiler optimizations have been applied.
628Therefore, if you need ordering in this example, you need explicit
629memory barriers, for example, smp_store_release():
648 630
649 if (ACCESS_ONCE(a) > 0) { 631 q = ACCESS_ONCE(a);
650 barrier(); 632 if (q) {
651 ACCESS_ONCE(b) = q / 2; 633 smp_store_release(&b, p);
652 do_something(); 634 do_something();
653 } else { 635 } else {
654 barrier(); 636 smp_store_release(&b, p);
655 ACCESS_ONCE(b) = q / 3; 637 do_something_else();
638 }
639
640In contrast, without explicit memory barriers, two-legged-if control
641ordering is guaranteed only when the stores differ, for example:
642
643 q = ACCESS_ONCE(a);
644 if (q) {
645 ACCESS_ONCE(b) = p;
646 do_something();
647 } else {
648 ACCESS_ONCE(b) = r;
656 do_something_else(); 649 do_something_else();
657 } 650 }
658 651
659This will again ensure that the load from variable 'a' is ordered before the 652The initial ACCESS_ONCE() is still required to prevent the compiler from
660stores to variable 'b'. 653proving the value of 'a'.
661 654
662In addition, you need to be careful what you do with the local variable 'q', 655In addition, you need to be careful what you do with the local variable 'q',
663otherwise the compiler might be able to guess the value and again remove 656otherwise the compiler might be able to guess the value and again remove
@@ -665,12 +658,10 @@ the needed conditional. For example:
665 658
666 q = ACCESS_ONCE(a); 659 q = ACCESS_ONCE(a);
667 if (q % MAX) { 660 if (q % MAX) {
668 barrier();
669 ACCESS_ONCE(b) = p; 661 ACCESS_ONCE(b) = p;
670 do_something(); 662 do_something();
671 } else { 663 } else {
672 barrier(); 664 ACCESS_ONCE(b) = r;
673 ACCESS_ONCE(b) = p;
674 do_something_else(); 665 do_something_else();
675 } 666 }
676 667
@@ -682,9 +673,12 @@ transform the above code into the following:
682 ACCESS_ONCE(b) = p; 673 ACCESS_ONCE(b) = p;
683 do_something_else(); 674 do_something_else();
684 675
685This transformation loses the ordering between the load from variable 'a' 676Given this transformation, the CPU is not required to respect the ordering
686and the store to variable 'b'. If you are relying on this ordering, you 677between the load from variable 'a' and the store to variable 'b'. It is
687should do something like the following: 678tempting to add a barrier(), but this does not help. The conditional
679is gone, and the barrier won't bring it back. Therefore, if you are
680relying on this ordering, you should make sure that MAX is greater than
681one, perhaps as follows:
688 682
689 q = ACCESS_ONCE(a); 683 q = ACCESS_ONCE(a);
690 BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */ 684 BUILD_BUG_ON(MAX <= 1); /* Order load from a with store to b. */
@@ -692,35 +686,45 @@ should do something like the following:
692 ACCESS_ONCE(b) = p; 686 ACCESS_ONCE(b) = p;
693 do_something(); 687 do_something();
694 } else { 688 } else {
695 ACCESS_ONCE(b) = p; 689 ACCESS_ONCE(b) = r;
696 do_something_else(); 690 do_something_else();
697 } 691 }
698 692
693Please note once again that the stores to 'b' differ. If they were
694identical, as noted earlier, the compiler could pull this store outside
695of the 'if' statement.
696
699Finally, control dependencies do -not- provide transitivity. This is 697Finally, control dependencies do -not- provide transitivity. This is
700demonstrated by two related examples: 698demonstrated by two related examples, with the initial values of
699x and y both being zero:
701 700
702 CPU 0 CPU 1 701 CPU 0 CPU 1
703 ===================== ===================== 702 ===================== =====================
704 r1 = ACCESS_ONCE(x); r2 = ACCESS_ONCE(y); 703 r1 = ACCESS_ONCE(x); r2 = ACCESS_ONCE(y);
705 if (r1 >= 0) if (r2 >= 0) 704 if (r1 > 0) if (r2 > 0)
706 ACCESS_ONCE(y) = 1; ACCESS_ONCE(x) = 1; 705 ACCESS_ONCE(y) = 1; ACCESS_ONCE(x) = 1;
707 706
708 assert(!(r1 == 1 && r2 == 1)); 707 assert(!(r1 == 1 && r2 == 1));
709 708
710The above two-CPU example will never trigger the assert(). However, 709The above two-CPU example will never trigger the assert(). However,
711if control dependencies guaranteed transitivity (which they do not), 710if control dependencies guaranteed transitivity (which they do not),
712then adding the following two CPUs would guarantee a related assertion: 711then adding the following CPU would guarantee a related assertion:
713 712
714 CPU 2 CPU 3 713 CPU 2
715 ===================== ===================== 714 =====================
716 ACCESS_ONCE(x) = 2; ACCESS_ONCE(y) = 2; 715 ACCESS_ONCE(x) = 2;
716
717 assert(!(r1 == 2 && r2 == 1 && x == 2)); /* FAILS!!! */
717 718
718 assert(!(r1 == 2 && r2 == 2 && x == 1 && y == 1)); /* FAILS!!! */ 719But because control dependencies do -not- provide transitivity, the above
720assertion can fail after the combined three-CPU example completes. If you
721need the three-CPU example to provide ordering, you will need smp_mb()
722between the loads and stores in the CPU 0 and CPU 1 code fragments,
723that is, just before or just after the "if" statements.
719 724
720But because control dependencies do -not- provide transitivity, the 725These two examples are the LB and WWC litmus tests from this paper:
721above assertion can fail after the combined four-CPU example completes. 726http://www.cl.cam.ac.uk/users/pes20/ppc-supplemental/test6.pdf and this
722If you need the four-CPU example to provide ordering, you will need 727site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
723smp_mb() between the loads and stores in the CPU 0 and CPU 1 code fragments.
724 728
725In summary: 729In summary:
726 730
diff --git a/fs/file.c b/fs/file.c
index 66923fe3176e..1cafc4c9275b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -367,7 +367,7 @@ static struct fdtable *close_files(struct files_struct * files)
367 struct file * file = xchg(&fdt->fd[i], NULL); 367 struct file * file = xchg(&fdt->fd[i], NULL);
368 if (file) { 368 if (file) {
369 filp_close(file, files); 369 filp_close(file, files);
370 cond_resched(); 370 cond_resched_rcu_qs();
371 } 371 }
372 } 372 }
373 i++; 373 i++;
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 95978ad7fcdd..b2d9a43012b2 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -213,6 +213,7 @@ extern struct bus_type cpu_subsys;
213extern void cpu_hotplug_begin(void); 213extern void cpu_hotplug_begin(void);
214extern void cpu_hotplug_done(void); 214extern void cpu_hotplug_done(void);
215extern void get_online_cpus(void); 215extern void get_online_cpus(void);
216extern bool try_get_online_cpus(void);
216extern void put_online_cpus(void); 217extern void put_online_cpus(void);
217extern void cpu_hotplug_disable(void); 218extern void cpu_hotplug_disable(void);
218extern void cpu_hotplug_enable(void); 219extern void cpu_hotplug_enable(void);
@@ -230,6 +231,7 @@ int cpu_down(unsigned int cpu);
230static inline void cpu_hotplug_begin(void) {} 231static inline void cpu_hotplug_begin(void) {}
231static inline void cpu_hotplug_done(void) {} 232static inline void cpu_hotplug_done(void) {}
232#define get_online_cpus() do { } while (0) 233#define get_online_cpus() do { } while (0)
234#define try_get_online_cpus() true
233#define put_online_cpus() do { } while (0) 235#define put_online_cpus() do { } while (0)
234#define cpu_hotplug_disable() do { } while (0) 236#define cpu_hotplug_disable() do { } while (0)
235#define cpu_hotplug_enable() do { } while (0) 237#define cpu_hotplug_enable() do { } while (0)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 2bb4c4f3531a..77fc43f8fb72 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -111,12 +111,21 @@ extern struct group_info init_groups;
111#ifdef CONFIG_PREEMPT_RCU 111#ifdef CONFIG_PREEMPT_RCU
112#define INIT_TASK_RCU_PREEMPT(tsk) \ 112#define INIT_TASK_RCU_PREEMPT(tsk) \
113 .rcu_read_lock_nesting = 0, \ 113 .rcu_read_lock_nesting = 0, \
114 .rcu_read_unlock_special = 0, \ 114 .rcu_read_unlock_special.s = 0, \
115 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \ 115 .rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry), \
116 INIT_TASK_RCU_TREE_PREEMPT() 116 INIT_TASK_RCU_TREE_PREEMPT()
117#else 117#else
118#define INIT_TASK_RCU_PREEMPT(tsk) 118#define INIT_TASK_RCU_PREEMPT(tsk)
119#endif 119#endif
120#ifdef CONFIG_TASKS_RCU
121#define INIT_TASK_RCU_TASKS(tsk) \
122 .rcu_tasks_holdout = false, \
123 .rcu_tasks_holdout_list = \
124 LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list), \
125 .rcu_tasks_idle_cpu = -1,
126#else
127#define INIT_TASK_RCU_TASKS(tsk)
128#endif
120 129
121extern struct cred init_cred; 130extern struct cred init_cred;
122 131
@@ -224,6 +233,7 @@ extern struct task_group root_task_group;
224 INIT_FTRACE_GRAPH \ 233 INIT_FTRACE_GRAPH \
225 INIT_TRACE_RECURSION \ 234 INIT_TRACE_RECURSION \
226 INIT_TASK_RCU_PREEMPT(tsk) \ 235 INIT_TASK_RCU_PREEMPT(tsk) \
236 INIT_TASK_RCU_TASKS(tsk) \
227 INIT_CPUSET_SEQ(tsk) \ 237 INIT_CPUSET_SEQ(tsk) \
228 INIT_RT_MUTEXES(tsk) \ 238 INIT_RT_MUTEXES(tsk) \
229 INIT_VTIME(tsk) \ 239 INIT_VTIME(tsk) \
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 008388f920d7..4f86465cc317 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -505,6 +505,7 @@ static inline void print_irqtrace_events(struct task_struct *curr)
505 505
506#define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_) 506#define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
507#define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_) 507#define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
508#define lock_map_acquire_tryread(l) lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
508#define lock_map_release(l) lock_release(l, 1, _THIS_IP_) 509#define lock_map_release(l) lock_release(l, 1, _THIS_IP_)
509 510
510#ifdef CONFIG_PROVE_LOCKING 511#ifdef CONFIG_PROVE_LOCKING
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d231aa17b1d7..a4a819ffb2d1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,14 +47,12 @@
47#include <asm/barrier.h> 47#include <asm/barrier.h>
48 48
49extern int rcu_expedited; /* for sysctl */ 49extern int rcu_expedited; /* for sysctl */
50#ifdef CONFIG_RCU_TORTURE_TEST
51extern int rcutorture_runnable; /* for sysctl */
52#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
53 50
54enum rcutorture_type { 51enum rcutorture_type {
55 RCU_FLAVOR, 52 RCU_FLAVOR,
56 RCU_BH_FLAVOR, 53 RCU_BH_FLAVOR,
57 RCU_SCHED_FLAVOR, 54 RCU_SCHED_FLAVOR,
55 RCU_TASKS_FLAVOR,
58 SRCU_FLAVOR, 56 SRCU_FLAVOR,
59 INVALID_RCU_FLAVOR 57 INVALID_RCU_FLAVOR
60}; 58};
@@ -197,6 +195,28 @@ void call_rcu_sched(struct rcu_head *head,
197 195
198void synchronize_sched(void); 196void synchronize_sched(void);
199 197
198/**
199 * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
200 * @head: structure to be used for queueing the RCU updates.
201 * @func: actual callback function to be invoked after the grace period
202 *
203 * The callback function will be invoked some time after a full grace
204 * period elapses, in other words after all currently executing RCU
205 * read-side critical sections have completed. call_rcu_tasks() assumes
206 * that the read-side critical sections end at a voluntary context
207 * switch (not a preemption!), entry into idle, or transition to usermode
208 * execution. As such, there are no read-side primitives analogous to
209 * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
210 * to determine that all tasks have passed through a safe state, not so
211 * much for data-strcuture synchronization.
212 *
213 * See the description of call_rcu() for more detailed information on
214 * memory ordering guarantees.
215 */
216void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head));
217void synchronize_rcu_tasks(void);
218void rcu_barrier_tasks(void);
219
200#ifdef CONFIG_PREEMPT_RCU 220#ifdef CONFIG_PREEMPT_RCU
201 221
202void __rcu_read_lock(void); 222void __rcu_read_lock(void);
@@ -238,8 +258,8 @@ static inline int rcu_preempt_depth(void)
238 258
239/* Internal to kernel */ 259/* Internal to kernel */
240void rcu_init(void); 260void rcu_init(void);
241void rcu_sched_qs(int cpu); 261void rcu_sched_qs(void);
242void rcu_bh_qs(int cpu); 262void rcu_bh_qs(void);
243void rcu_check_callbacks(int cpu, int user); 263void rcu_check_callbacks(int cpu, int user);
244struct notifier_block; 264struct notifier_block;
245void rcu_idle_enter(void); 265void rcu_idle_enter(void);
@@ -269,6 +289,14 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
269 struct task_struct *next) { } 289 struct task_struct *next) { }
270#endif /* CONFIG_RCU_USER_QS */ 290#endif /* CONFIG_RCU_USER_QS */
271 291
292#ifdef CONFIG_RCU_NOCB_CPU
293void rcu_init_nohz(void);
294#else /* #ifdef CONFIG_RCU_NOCB_CPU */
295static inline void rcu_init_nohz(void)
296{
297}
298#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
299
272/** 300/**
273 * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers 301 * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
274 * @a: Code that RCU needs to pay attention to. 302 * @a: Code that RCU needs to pay attention to.
@@ -294,6 +322,36 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
294 rcu_irq_exit(); \ 322 rcu_irq_exit(); \
295 } while (0) 323 } while (0)
296 324
325/*
326 * Note a voluntary context switch for RCU-tasks benefit. This is a
327 * macro rather than an inline function to avoid #include hell.
328 */
329#ifdef CONFIG_TASKS_RCU
330#define TASKS_RCU(x) x
331extern struct srcu_struct tasks_rcu_exit_srcu;
332#define rcu_note_voluntary_context_switch(t) \
333 do { \
334 if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \
335 ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \
336 } while (0)
337#else /* #ifdef CONFIG_TASKS_RCU */
338#define TASKS_RCU(x) do { } while (0)
339#define rcu_note_voluntary_context_switch(t) do { } while (0)
340#endif /* #else #ifdef CONFIG_TASKS_RCU */
341
342/**
343 * cond_resched_rcu_qs - Report potential quiescent states to RCU
344 *
345 * This macro resembles cond_resched(), except that it is defined to
346 * report potential quiescent states to RCU-tasks even if the cond_resched()
347 * machinery were to be shut off, as some advocate for PREEMPT kernels.
348 */
349#define cond_resched_rcu_qs() \
350do { \
351 rcu_note_voluntary_context_switch(current); \
352 cond_resched(); \
353} while (0)
354
297#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) 355#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
298bool __rcu_is_watching(void); 356bool __rcu_is_watching(void);
299#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */ 357#endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
@@ -349,7 +407,7 @@ bool rcu_lockdep_current_cpu_online(void);
349#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 407#else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
350static inline bool rcu_lockdep_current_cpu_online(void) 408static inline bool rcu_lockdep_current_cpu_online(void)
351{ 409{
352 return 1; 410 return true;
353} 411}
354#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */ 412#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
355 413
@@ -371,41 +429,7 @@ extern struct lockdep_map rcu_sched_lock_map;
371extern struct lockdep_map rcu_callback_map; 429extern struct lockdep_map rcu_callback_map;
372int debug_lockdep_rcu_enabled(void); 430int debug_lockdep_rcu_enabled(void);
373 431
374/** 432int rcu_read_lock_held(void);
375 * rcu_read_lock_held() - might we be in RCU read-side critical section?
376 *
377 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
378 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
379 * this assumes we are in an RCU read-side critical section unless it can
380 * prove otherwise. This is useful for debug checks in functions that
381 * require that they be called within an RCU read-side critical section.
382 *
383 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
384 * and while lockdep is disabled.
385 *
386 * Note that rcu_read_lock() and the matching rcu_read_unlock() must
387 * occur in the same context, for example, it is illegal to invoke
388 * rcu_read_unlock() in process context if the matching rcu_read_lock()
389 * was invoked from within an irq handler.
390 *
391 * Note that rcu_read_lock() is disallowed if the CPU is either idle or
392 * offline from an RCU perspective, so check for those as well.
393 */
394static inline int rcu_read_lock_held(void)
395{
396 if (!debug_lockdep_rcu_enabled())
397 return 1;
398 if (!rcu_is_watching())
399 return 0;
400 if (!rcu_lockdep_current_cpu_online())
401 return 0;
402 return lock_is_held(&rcu_lock_map);
403}
404
405/*
406 * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
407 * hell.
408 */
409int rcu_read_lock_bh_held(void); 433int rcu_read_lock_bh_held(void);
410 434
411/** 435/**
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index d40a6a451330..38cc5b1e252d 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -80,7 +80,7 @@ static inline void kfree_call_rcu(struct rcu_head *head,
80 80
81static inline void rcu_note_context_switch(int cpu) 81static inline void rcu_note_context_switch(int cpu)
82{ 82{
83 rcu_sched_qs(cpu); 83 rcu_sched_qs();
84} 84}
85 85
86/* 86/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..42888d715fb1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1212,6 +1212,13 @@ struct sched_dl_entity {
1212 struct hrtimer dl_timer; 1212 struct hrtimer dl_timer;
1213}; 1213};
1214 1214
1215union rcu_special {
1216 struct {
1217 bool blocked;
1218 bool need_qs;
1219 } b;
1220 short s;
1221};
1215struct rcu_node; 1222struct rcu_node;
1216 1223
1217enum perf_event_task_context { 1224enum perf_event_task_context {
@@ -1264,12 +1271,18 @@ struct task_struct {
1264 1271
1265#ifdef CONFIG_PREEMPT_RCU 1272#ifdef CONFIG_PREEMPT_RCU
1266 int rcu_read_lock_nesting; 1273 int rcu_read_lock_nesting;
1267 char rcu_read_unlock_special; 1274 union rcu_special rcu_read_unlock_special;
1268 struct list_head rcu_node_entry; 1275 struct list_head rcu_node_entry;
1269#endif /* #ifdef CONFIG_PREEMPT_RCU */ 1276#endif /* #ifdef CONFIG_PREEMPT_RCU */
1270#ifdef CONFIG_TREE_PREEMPT_RCU 1277#ifdef CONFIG_TREE_PREEMPT_RCU
1271 struct rcu_node *rcu_blocked_node; 1278 struct rcu_node *rcu_blocked_node;
1272#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 1279#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1280#ifdef CONFIG_TASKS_RCU
1281 unsigned long rcu_tasks_nvcsw;
1282 bool rcu_tasks_holdout;
1283 struct list_head rcu_tasks_holdout_list;
1284 int rcu_tasks_idle_cpu;
1285#endif /* #ifdef CONFIG_TASKS_RCU */
1273 1286
1274#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) 1287#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
1275 struct sched_info sched_info; 1288 struct sched_info sched_info;
@@ -1999,29 +2012,21 @@ extern void task_clear_jobctl_trapping(struct task_struct *task);
1999extern void task_clear_jobctl_pending(struct task_struct *task, 2012extern void task_clear_jobctl_pending(struct task_struct *task,
2000 unsigned int mask); 2013 unsigned int mask);
2001 2014
2002#ifdef CONFIG_PREEMPT_RCU
2003
2004#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
2005#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
2006
2007static inline void rcu_copy_process(struct task_struct *p) 2015static inline void rcu_copy_process(struct task_struct *p)
2008{ 2016{
2017#ifdef CONFIG_PREEMPT_RCU
2009 p->rcu_read_lock_nesting = 0; 2018 p->rcu_read_lock_nesting = 0;
2010 p->rcu_read_unlock_special = 0; 2019 p->rcu_read_unlock_special.s = 0;
2011#ifdef CONFIG_TREE_PREEMPT_RCU
2012 p->rcu_blocked_node = NULL; 2020 p->rcu_blocked_node = NULL;
2013#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
2014 INIT_LIST_HEAD(&p->rcu_node_entry); 2021 INIT_LIST_HEAD(&p->rcu_node_entry);
2022#endif /* #ifdef CONFIG_PREEMPT_RCU */
2023#ifdef CONFIG_TASKS_RCU
2024 p->rcu_tasks_holdout = false;
2025 INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
2026 p->rcu_tasks_idle_cpu = -1;
2027#endif /* #ifdef CONFIG_TASKS_RCU */
2015} 2028}
2016 2029
2017#else
2018
2019static inline void rcu_copy_process(struct task_struct *p)
2020{
2021}
2022
2023#endif
2024
2025static inline void tsk_restore_flags(struct task_struct *task, 2030static inline void tsk_restore_flags(struct task_struct *task,
2026 unsigned long orig_flags, unsigned long flags) 2031 unsigned long orig_flags, unsigned long flags)
2027{ 2032{
diff --git a/include/linux/torture.h b/include/linux/torture.h
index 5ca58fcbaf1b..7759fc3c622d 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -51,7 +51,7 @@
51 51
52/* Definitions for online/offline exerciser. */ 52/* Definitions for online/offline exerciser. */
53int torture_onoff_init(long ooholdoff, long oointerval); 53int torture_onoff_init(long ooholdoff, long oointerval);
54char *torture_onoff_stats(char *page); 54void torture_onoff_stats(void);
55bool torture_onoff_failures(void); 55bool torture_onoff_failures(void);
56 56
57/* Low-rider random number generator. */ 57/* Low-rider random number generator. */
@@ -77,7 +77,8 @@ int torture_stutter_init(int s);
77/* Initialization and cleanup. */ 77/* Initialization and cleanup. */
78bool torture_init_begin(char *ttype, bool v, int *runnable); 78bool torture_init_begin(char *ttype, bool v, int *runnable);
79void torture_init_end(void); 79void torture_init_end(void);
80bool torture_cleanup(void); 80bool torture_cleanup_begin(void);
81void torture_cleanup_end(void);
81bool torture_must_stop(void); 82bool torture_must_stop(void);
82bool torture_must_stop_irq(void); 83bool torture_must_stop_irq(void);
83void torture_kthread_stopping(char *title); 84void torture_kthread_stopping(char *title);
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index aca382266411..9b56f37148cf 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -180,9 +180,12 @@ TRACE_EVENT(rcu_grace_period_init,
180 * argument is a string as follows: 180 * argument is a string as follows:
181 * 181 *
182 * "WakeEmpty": Wake rcuo kthread, first CB to empty list. 182 * "WakeEmpty": Wake rcuo kthread, first CB to empty list.
183 * "WakeEmptyIsDeferred": Wake rcuo kthread later, first CB to empty list.
183 * "WakeOvf": Wake rcuo kthread, CB list is huge. 184 * "WakeOvf": Wake rcuo kthread, CB list is huge.
185 * "WakeOvfIsDeferred": Wake rcuo kthread later, CB list is huge.
184 * "WakeNot": Don't wake rcuo kthread. 186 * "WakeNot": Don't wake rcuo kthread.
185 * "WakeNotPoll": Don't wake rcuo kthread because it is polling. 187 * "WakeNotPoll": Don't wake rcuo kthread because it is polling.
188 * "DeferredWake": Carried out the "IsDeferred" wakeup.
186 * "Poll": Start of new polling cycle for rcu_nocb_poll. 189 * "Poll": Start of new polling cycle for rcu_nocb_poll.
187 * "Sleep": Sleep waiting for CBs for !rcu_nocb_poll. 190 * "Sleep": Sleep waiting for CBs for !rcu_nocb_poll.
188 * "WokeEmpty": rcuo kthread woke to find empty list. 191 * "WokeEmpty": rcuo kthread woke to find empty list.
diff --git a/init/Kconfig b/init/Kconfig
index e84c6423a2e5..4fe5500b998f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -507,6 +507,16 @@ config PREEMPT_RCU
507 This option enables preemptible-RCU code that is common between 507 This option enables preemptible-RCU code that is common between
508 TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU. 508 TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU.
509 509
510config TASKS_RCU
511 bool "Task_based RCU implementation using voluntary context switch"
512 default n
513 help
514 This option enables a task-based RCU implementation that uses
515 only voluntary context switch (not preemption!), idle, and
516 user-mode execution as quiescent states.
517
518 If unsure, say N.
519
510config RCU_STALL_COMMON 520config RCU_STALL_COMMON
511 def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE ) 521 def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
512 help 522 help
@@ -737,7 +747,7 @@ choice
737 747
738config RCU_NOCB_CPU_NONE 748config RCU_NOCB_CPU_NONE
739 bool "No build_forced no-CBs CPUs" 749 bool "No build_forced no-CBs CPUs"
740 depends on RCU_NOCB_CPU && !NO_HZ_FULL_ALL 750 depends on RCU_NOCB_CPU
741 help 751 help
742 This option does not force any of the CPUs to be no-CBs CPUs. 752 This option does not force any of the CPUs to be no-CBs CPUs.
743 Only CPUs designated by the rcu_nocbs= boot parameter will be 753 Only CPUs designated by the rcu_nocbs= boot parameter will be
@@ -751,7 +761,7 @@ config RCU_NOCB_CPU_NONE
751 761
752config RCU_NOCB_CPU_ZERO 762config RCU_NOCB_CPU_ZERO
753 bool "CPU 0 is a build_forced no-CBs CPU" 763 bool "CPU 0 is a build_forced no-CBs CPU"
754 depends on RCU_NOCB_CPU && !NO_HZ_FULL_ALL 764 depends on RCU_NOCB_CPU
755 help 765 help
756 This option forces CPU 0 to be a no-CBs CPU, so that its RCU 766 This option forces CPU 0 to be a no-CBs CPU, so that its RCU
757 callbacks are invoked by a per-CPU kthread whose name begins 767 callbacks are invoked by a per-CPU kthread whose name begins
diff --git a/init/main.c b/init/main.c
index bb1aed928f21..e3c4cdd94d5b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -578,6 +578,7 @@ asmlinkage __visible void __init start_kernel(void)
578 idr_init_cache(); 578 idr_init_cache();
579 rcu_init(); 579 rcu_init();
580 tick_nohz_init(); 580 tick_nohz_init();
581 rcu_init_nohz();
581 context_tracking_init(); 582 context_tracking_init();
582 radix_tree_init(); 583 radix_tree_init();
583 /* init some links before init_ISA_irqs() */ 584 /* init some links before init_ISA_irqs() */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 81e2a388a0f6..356450f09c1f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -79,6 +79,8 @@ static struct {
79 79
80/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */ 80/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
81#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map) 81#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
82#define cpuhp_lock_acquire_tryread() \
83 lock_map_acquire_tryread(&cpu_hotplug.dep_map)
82#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) 84#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
83#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) 85#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
84 86
@@ -91,10 +93,22 @@ void get_online_cpus(void)
91 mutex_lock(&cpu_hotplug.lock); 93 mutex_lock(&cpu_hotplug.lock);
92 cpu_hotplug.refcount++; 94 cpu_hotplug.refcount++;
93 mutex_unlock(&cpu_hotplug.lock); 95 mutex_unlock(&cpu_hotplug.lock);
94
95} 96}
96EXPORT_SYMBOL_GPL(get_online_cpus); 97EXPORT_SYMBOL_GPL(get_online_cpus);
97 98
99bool try_get_online_cpus(void)
100{
101 if (cpu_hotplug.active_writer == current)
102 return true;
103 if (!mutex_trylock(&cpu_hotplug.lock))
104 return false;
105 cpuhp_lock_acquire_tryread();
106 cpu_hotplug.refcount++;
107 mutex_unlock(&cpu_hotplug.lock);
108 return true;
109}
110EXPORT_SYMBOL_GPL(try_get_online_cpus);
111
98void put_online_cpus(void) 112void put_online_cpus(void)
99{ 113{
100 if (cpu_hotplug.active_writer == current) 114 if (cpu_hotplug.active_writer == current)
diff --git a/kernel/exit.c b/kernel/exit.c
index 32c58f7433a3..d13f2eec4bb8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -667,6 +667,7 @@ void do_exit(long code)
667{ 667{
668 struct task_struct *tsk = current; 668 struct task_struct *tsk = current;
669 int group_dead; 669 int group_dead;
670 TASKS_RCU(int tasks_rcu_i);
670 671
671 profile_task_exit(tsk); 672 profile_task_exit(tsk);
672 673
@@ -775,6 +776,7 @@ void do_exit(long code)
775 */ 776 */
776 flush_ptrace_hw_breakpoint(tsk); 777 flush_ptrace_hw_breakpoint(tsk);
777 778
779 TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
778 exit_notify(tsk, group_dead); 780 exit_notify(tsk, group_dead);
779 proc_exit_connector(tsk); 781 proc_exit_connector(tsk);
780#ifdef CONFIG_NUMA 782#ifdef CONFIG_NUMA
@@ -814,6 +816,7 @@ void do_exit(long code)
814 if (tsk->nr_dirtied) 816 if (tsk->nr_dirtied)
815 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied); 817 __this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
816 exit_rcu(); 818 exit_rcu();
819 TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
817 820
818 /* 821 /*
819 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed 822 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index 0955b885d0dc..540d5dfe1112 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -27,6 +27,7 @@
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <linux/err.h> 28#include <linux/err.h>
29#include <linux/spinlock.h> 29#include <linux/spinlock.h>
30#include <linux/mutex.h>
30#include <linux/smp.h> 31#include <linux/smp.h>
31#include <linux/interrupt.h> 32#include <linux/interrupt.h>
32#include <linux/sched.h> 33#include <linux/sched.h>
@@ -51,6 +52,8 @@ MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>");
51 52
52torture_param(int, nwriters_stress, -1, 53torture_param(int, nwriters_stress, -1,
53 "Number of write-locking stress-test threads"); 54 "Number of write-locking stress-test threads");
55torture_param(int, nreaders_stress, -1,
56 "Number of read-locking stress-test threads");
54torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); 57torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
55torture_param(int, onoff_interval, 0, 58torture_param(int, onoff_interval, 0,
56 "Time between CPU hotplugs (s), 0=disable"); 59 "Time between CPU hotplugs (s), 0=disable");
@@ -66,30 +69,28 @@ torture_param(bool, verbose, true,
66static char *torture_type = "spin_lock"; 69static char *torture_type = "spin_lock";
67module_param(torture_type, charp, 0444); 70module_param(torture_type, charp, 0444);
68MODULE_PARM_DESC(torture_type, 71MODULE_PARM_DESC(torture_type,
69 "Type of lock to torture (spin_lock, spin_lock_irq, ...)"); 72 "Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)");
70
71static atomic_t n_lock_torture_errors;
72 73
73static struct task_struct *stats_task; 74static struct task_struct *stats_task;
74static struct task_struct **writer_tasks; 75static struct task_struct **writer_tasks;
76static struct task_struct **reader_tasks;
75 77
76static int nrealwriters_stress;
77static bool lock_is_write_held; 78static bool lock_is_write_held;
79static bool lock_is_read_held;
78 80
79struct lock_writer_stress_stats { 81struct lock_stress_stats {
80 long n_write_lock_fail; 82 long n_lock_fail;
81 long n_write_lock_acquired; 83 long n_lock_acquired;
82}; 84};
83static struct lock_writer_stress_stats *lwsa;
84 85
85#if defined(MODULE) 86#if defined(MODULE)
86#define LOCKTORTURE_RUNNABLE_INIT 1 87#define LOCKTORTURE_RUNNABLE_INIT 1
87#else 88#else
88#define LOCKTORTURE_RUNNABLE_INIT 0 89#define LOCKTORTURE_RUNNABLE_INIT 0
89#endif 90#endif
90int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; 91int torture_runnable = LOCKTORTURE_RUNNABLE_INIT;
91module_param(locktorture_runnable, int, 0444); 92module_param(torture_runnable, int, 0444);
92MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at module init"); 93MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init");
93 94
94/* Forward reference. */ 95/* Forward reference. */
95static void lock_torture_cleanup(void); 96static void lock_torture_cleanup(void);
@@ -102,12 +103,25 @@ struct lock_torture_ops {
102 int (*writelock)(void); 103 int (*writelock)(void);
103 void (*write_delay)(struct torture_random_state *trsp); 104 void (*write_delay)(struct torture_random_state *trsp);
104 void (*writeunlock)(void); 105 void (*writeunlock)(void);
106 int (*readlock)(void);
107 void (*read_delay)(struct torture_random_state *trsp);
108 void (*readunlock)(void);
105 unsigned long flags; 109 unsigned long flags;
106 const char *name; 110 const char *name;
107}; 111};
108 112
109static struct lock_torture_ops *cur_ops; 113struct lock_torture_cxt {
110 114 int nrealwriters_stress;
115 int nrealreaders_stress;
116 bool debug_lock;
117 atomic_t n_lock_torture_errors;
118 struct lock_torture_ops *cur_ops;
119 struct lock_stress_stats *lwsa; /* writer statistics */
120 struct lock_stress_stats *lrsa; /* reader statistics */
121};
122static struct lock_torture_cxt cxt = { 0, 0, false,
123 ATOMIC_INIT(0),
124 NULL, NULL};
111/* 125/*
112 * Definitions for lock torture testing. 126 * Definitions for lock torture testing.
113 */ 127 */
@@ -123,10 +137,10 @@ static void torture_lock_busted_write_delay(struct torture_random_state *trsp)
123 137
124 /* We want a long delay occasionally to force massive contention. */ 138 /* We want a long delay occasionally to force massive contention. */
125 if (!(torture_random(trsp) % 139 if (!(torture_random(trsp) %
126 (nrealwriters_stress * 2000 * longdelay_us))) 140 (cxt.nrealwriters_stress * 2000 * longdelay_us)))
127 mdelay(longdelay_us); 141 mdelay(longdelay_us);
128#ifdef CONFIG_PREEMPT 142#ifdef CONFIG_PREEMPT
129 if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) 143 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
130 preempt_schedule(); /* Allow test to be preempted. */ 144 preempt_schedule(); /* Allow test to be preempted. */
131#endif 145#endif
132} 146}
@@ -140,6 +154,9 @@ static struct lock_torture_ops lock_busted_ops = {
140 .writelock = torture_lock_busted_write_lock, 154 .writelock = torture_lock_busted_write_lock,
141 .write_delay = torture_lock_busted_write_delay, 155 .write_delay = torture_lock_busted_write_delay,
142 .writeunlock = torture_lock_busted_write_unlock, 156 .writeunlock = torture_lock_busted_write_unlock,
157 .readlock = NULL,
158 .read_delay = NULL,
159 .readunlock = NULL,
143 .name = "lock_busted" 160 .name = "lock_busted"
144}; 161};
145 162
@@ -160,13 +177,13 @@ static void torture_spin_lock_write_delay(struct torture_random_state *trsp)
160 * we want a long delay occasionally to force massive contention. 177 * we want a long delay occasionally to force massive contention.
161 */ 178 */
162 if (!(torture_random(trsp) % 179 if (!(torture_random(trsp) %
163 (nrealwriters_stress * 2000 * longdelay_us))) 180 (cxt.nrealwriters_stress * 2000 * longdelay_us)))
164 mdelay(longdelay_us); 181 mdelay(longdelay_us);
165 if (!(torture_random(trsp) % 182 if (!(torture_random(trsp) %
166 (nrealwriters_stress * 2 * shortdelay_us))) 183 (cxt.nrealwriters_stress * 2 * shortdelay_us)))
167 udelay(shortdelay_us); 184 udelay(shortdelay_us);
168#ifdef CONFIG_PREEMPT 185#ifdef CONFIG_PREEMPT
169 if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) 186 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
170 preempt_schedule(); /* Allow test to be preempted. */ 187 preempt_schedule(); /* Allow test to be preempted. */
171#endif 188#endif
172} 189}
@@ -180,6 +197,9 @@ static struct lock_torture_ops spin_lock_ops = {
180 .writelock = torture_spin_lock_write_lock, 197 .writelock = torture_spin_lock_write_lock,
181 .write_delay = torture_spin_lock_write_delay, 198 .write_delay = torture_spin_lock_write_delay,
182 .writeunlock = torture_spin_lock_write_unlock, 199 .writeunlock = torture_spin_lock_write_unlock,
200 .readlock = NULL,
201 .read_delay = NULL,
202 .readunlock = NULL,
183 .name = "spin_lock" 203 .name = "spin_lock"
184}; 204};
185 205
@@ -189,30 +209,137 @@ __acquires(torture_spinlock_irq)
189 unsigned long flags; 209 unsigned long flags;
190 210
191 spin_lock_irqsave(&torture_spinlock, flags); 211 spin_lock_irqsave(&torture_spinlock, flags);
192 cur_ops->flags = flags; 212 cxt.cur_ops->flags = flags;
193 return 0; 213 return 0;
194} 214}
195 215
196static void torture_lock_spin_write_unlock_irq(void) 216static void torture_lock_spin_write_unlock_irq(void)
197__releases(torture_spinlock) 217__releases(torture_spinlock)
198{ 218{
199 spin_unlock_irqrestore(&torture_spinlock, cur_ops->flags); 219 spin_unlock_irqrestore(&torture_spinlock, cxt.cur_ops->flags);
200} 220}
201 221
202static struct lock_torture_ops spin_lock_irq_ops = { 222static struct lock_torture_ops spin_lock_irq_ops = {
203 .writelock = torture_spin_lock_write_lock_irq, 223 .writelock = torture_spin_lock_write_lock_irq,
204 .write_delay = torture_spin_lock_write_delay, 224 .write_delay = torture_spin_lock_write_delay,
205 .writeunlock = torture_lock_spin_write_unlock_irq, 225 .writeunlock = torture_lock_spin_write_unlock_irq,
226 .readlock = NULL,
227 .read_delay = NULL,
228 .readunlock = NULL,
206 .name = "spin_lock_irq" 229 .name = "spin_lock_irq"
207}; 230};
208 231
232static DEFINE_MUTEX(torture_mutex);
233
234static int torture_mutex_lock(void) __acquires(torture_mutex)
235{
236 mutex_lock(&torture_mutex);
237 return 0;
238}
239
240static void torture_mutex_delay(struct torture_random_state *trsp)
241{
242 const unsigned long longdelay_ms = 100;
243
244 /* We want a long delay occasionally to force massive contention. */
245 if (!(torture_random(trsp) %
246 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
247 mdelay(longdelay_ms * 5);
248 else
249 mdelay(longdelay_ms / 5);
250#ifdef CONFIG_PREEMPT
251 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
252 preempt_schedule(); /* Allow test to be preempted. */
253#endif
254}
255
256static void torture_mutex_unlock(void) __releases(torture_mutex)
257{
258 mutex_unlock(&torture_mutex);
259}
260
261static struct lock_torture_ops mutex_lock_ops = {
262 .writelock = torture_mutex_lock,
263 .write_delay = torture_mutex_delay,
264 .writeunlock = torture_mutex_unlock,
265 .readlock = NULL,
266 .read_delay = NULL,
267 .readunlock = NULL,
268 .name = "mutex_lock"
269};
270
271static DECLARE_RWSEM(torture_rwsem);
272static int torture_rwsem_down_write(void) __acquires(torture_rwsem)
273{
274 down_write(&torture_rwsem);
275 return 0;
276}
277
278static void torture_rwsem_write_delay(struct torture_random_state *trsp)
279{
280 const unsigned long longdelay_ms = 100;
281
282 /* We want a long delay occasionally to force massive contention. */
283 if (!(torture_random(trsp) %
284 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
285 mdelay(longdelay_ms * 10);
286 else
287 mdelay(longdelay_ms / 10);
288#ifdef CONFIG_PREEMPT
289 if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000)))
290 preempt_schedule(); /* Allow test to be preempted. */
291#endif
292}
293
294static void torture_rwsem_up_write(void) __releases(torture_rwsem)
295{
296 up_write(&torture_rwsem);
297}
298
299static int torture_rwsem_down_read(void) __acquires(torture_rwsem)
300{
301 down_read(&torture_rwsem);
302 return 0;
303}
304
305static void torture_rwsem_read_delay(struct torture_random_state *trsp)
306{
307 const unsigned long longdelay_ms = 100;
308
309 /* We want a long delay occasionally to force massive contention. */
310 if (!(torture_random(trsp) %
311 (cxt.nrealwriters_stress * 2000 * longdelay_ms)))
312 mdelay(longdelay_ms * 2);
313 else
314 mdelay(longdelay_ms / 2);
315#ifdef CONFIG_PREEMPT
316 if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000)))
317 preempt_schedule(); /* Allow test to be preempted. */
318#endif
319}
320
321static void torture_rwsem_up_read(void) __releases(torture_rwsem)
322{
323 up_read(&torture_rwsem);
324}
325
326static struct lock_torture_ops rwsem_lock_ops = {
327 .writelock = torture_rwsem_down_write,
328 .write_delay = torture_rwsem_write_delay,
329 .writeunlock = torture_rwsem_up_write,
330 .readlock = torture_rwsem_down_read,
331 .read_delay = torture_rwsem_read_delay,
332 .readunlock = torture_rwsem_up_read,
333 .name = "rwsem_lock"
334};
335
209/* 336/*
210 * Lock torture writer kthread. Repeatedly acquires and releases 337 * Lock torture writer kthread. Repeatedly acquires and releases
211 * the lock, checking for duplicate acquisitions. 338 * the lock, checking for duplicate acquisitions.
212 */ 339 */
213static int lock_torture_writer(void *arg) 340static int lock_torture_writer(void *arg)
214{ 341{
215 struct lock_writer_stress_stats *lwsp = arg; 342 struct lock_stress_stats *lwsp = arg;
216 static DEFINE_TORTURE_RANDOM(rand); 343 static DEFINE_TORTURE_RANDOM(rand);
217 344
218 VERBOSE_TOROUT_STRING("lock_torture_writer task started"); 345 VERBOSE_TOROUT_STRING("lock_torture_writer task started");
@@ -221,14 +348,14 @@ static int lock_torture_writer(void *arg)
221 do { 348 do {
222 if ((torture_random(&rand) & 0xfffff) == 0) 349 if ((torture_random(&rand) & 0xfffff) == 0)
223 schedule_timeout_uninterruptible(1); 350 schedule_timeout_uninterruptible(1);
224 cur_ops->writelock(); 351 cxt.cur_ops->writelock();
225 if (WARN_ON_ONCE(lock_is_write_held)) 352 if (WARN_ON_ONCE(lock_is_write_held))
226 lwsp->n_write_lock_fail++; 353 lwsp->n_lock_fail++;
227 lock_is_write_held = 1; 354 lock_is_write_held = 1;
228 lwsp->n_write_lock_acquired++; 355 lwsp->n_lock_acquired++;
229 cur_ops->write_delay(&rand); 356 cxt.cur_ops->write_delay(&rand);
230 lock_is_write_held = 0; 357 lock_is_write_held = 0;
231 cur_ops->writeunlock(); 358 cxt.cur_ops->writeunlock();
232 stutter_wait("lock_torture_writer"); 359 stutter_wait("lock_torture_writer");
233 } while (!torture_must_stop()); 360 } while (!torture_must_stop());
234 torture_kthread_stopping("lock_torture_writer"); 361 torture_kthread_stopping("lock_torture_writer");
@@ -236,32 +363,61 @@ static int lock_torture_writer(void *arg)
236} 363}
237 364
238/* 365/*
366 * Lock torture reader kthread. Repeatedly acquires and releases
367 * the reader lock.
368 */
369static int lock_torture_reader(void *arg)
370{
371 struct lock_stress_stats *lrsp = arg;
372 static DEFINE_TORTURE_RANDOM(rand);
373
374 VERBOSE_TOROUT_STRING("lock_torture_reader task started");
375 set_user_nice(current, MAX_NICE);
376
377 do {
378 if ((torture_random(&rand) & 0xfffff) == 0)
379 schedule_timeout_uninterruptible(1);
380 cxt.cur_ops->readlock();
381 lock_is_read_held = 1;
382 lrsp->n_lock_acquired++;
383 cxt.cur_ops->read_delay(&rand);
384 lock_is_read_held = 0;
385 cxt.cur_ops->readunlock();
386 stutter_wait("lock_torture_reader");
387 } while (!torture_must_stop());
388 torture_kthread_stopping("lock_torture_reader");
389 return 0;
390}
391
392/*
239 * Create an lock-torture-statistics message in the specified buffer. 393 * Create an lock-torture-statistics message in the specified buffer.
240 */ 394 */
241static void lock_torture_printk(char *page) 395static void __torture_print_stats(char *page,
396 struct lock_stress_stats *statp, bool write)
242{ 397{
243 bool fail = 0; 398 bool fail = 0;
244 int i; 399 int i, n_stress;
245 long max = 0; 400 long max = 0;
246 long min = lwsa[0].n_write_lock_acquired; 401 long min = statp[0].n_lock_acquired;
247 long long sum = 0; 402 long long sum = 0;
248 403
249 for (i = 0; i < nrealwriters_stress; i++) { 404 n_stress = write ? cxt.nrealwriters_stress : cxt.nrealreaders_stress;
250 if (lwsa[i].n_write_lock_fail) 405 for (i = 0; i < n_stress; i++) {
406 if (statp[i].n_lock_fail)
251 fail = true; 407 fail = true;
252 sum += lwsa[i].n_write_lock_acquired; 408 sum += statp[i].n_lock_acquired;
253 if (max < lwsa[i].n_write_lock_fail) 409 if (max < statp[i].n_lock_fail)
254 max = lwsa[i].n_write_lock_fail; 410 max = statp[i].n_lock_fail;
255 if (min > lwsa[i].n_write_lock_fail) 411 if (min > statp[i].n_lock_fail)
256 min = lwsa[i].n_write_lock_fail; 412 min = statp[i].n_lock_fail;
257 } 413 }
258 page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG);
259 page += sprintf(page, 414 page += sprintf(page,
260 "Writes: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", 415 "%s: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n",
416 write ? "Writes" : "Reads ",
261 sum, max, min, max / 2 > min ? "???" : "", 417 sum, max, min, max / 2 > min ? "???" : "",
262 fail, fail ? "!!!" : ""); 418 fail, fail ? "!!!" : "");
263 if (fail) 419 if (fail)
264 atomic_inc(&n_lock_torture_errors); 420 atomic_inc(&cxt.n_lock_torture_errors);
265} 421}
266 422
267/* 423/*
@@ -274,18 +430,35 @@ static void lock_torture_printk(char *page)
274 */ 430 */
275static void lock_torture_stats_print(void) 431static void lock_torture_stats_print(void)
276{ 432{
277 int size = nrealwriters_stress * 200 + 8192; 433 int size = cxt.nrealwriters_stress * 200 + 8192;
278 char *buf; 434 char *buf;
279 435
436 if (cxt.cur_ops->readlock)
437 size += cxt.nrealreaders_stress * 200 + 8192;
438
280 buf = kmalloc(size, GFP_KERNEL); 439 buf = kmalloc(size, GFP_KERNEL);
281 if (!buf) { 440 if (!buf) {
282 pr_err("lock_torture_stats_print: Out of memory, need: %d", 441 pr_err("lock_torture_stats_print: Out of memory, need: %d",
283 size); 442 size);
284 return; 443 return;
285 } 444 }
286 lock_torture_printk(buf); 445
446 __torture_print_stats(buf, cxt.lwsa, true);
287 pr_alert("%s", buf); 447 pr_alert("%s", buf);
288 kfree(buf); 448 kfree(buf);
449
450 if (cxt.cur_ops->readlock) {
451 buf = kmalloc(size, GFP_KERNEL);
452 if (!buf) {
453 pr_err("lock_torture_stats_print: Out of memory, need: %d",
454 size);
455 return;
456 }
457
458 __torture_print_stats(buf, cxt.lrsa, false);
459 pr_alert("%s", buf);
460 kfree(buf);
461 }
289} 462}
290 463
291/* 464/*
@@ -312,9 +485,10 @@ lock_torture_print_module_parms(struct lock_torture_ops *cur_ops,
312 const char *tag) 485 const char *tag)
313{ 486{
314 pr_alert("%s" TORTURE_FLAG 487 pr_alert("%s" TORTURE_FLAG
315 "--- %s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", 488 "--- %s%s: nwriters_stress=%d nreaders_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n",
316 torture_type, tag, nrealwriters_stress, stat_interval, verbose, 489 torture_type, tag, cxt.debug_lock ? " [debug]": "",
317 shuffle_interval, stutter, shutdown_secs, 490 cxt.nrealwriters_stress, cxt.nrealreaders_stress, stat_interval,
491 verbose, shuffle_interval, stutter, shutdown_secs,
318 onoff_interval, onoff_holdoff); 492 onoff_interval, onoff_holdoff);
319} 493}
320 494
@@ -322,46 +496,56 @@ static void lock_torture_cleanup(void)
322{ 496{
323 int i; 497 int i;
324 498
325 if (torture_cleanup()) 499 if (torture_cleanup_begin())
326 return; 500 return;
327 501
328 if (writer_tasks) { 502 if (writer_tasks) {
329 for (i = 0; i < nrealwriters_stress; i++) 503 for (i = 0; i < cxt.nrealwriters_stress; i++)
330 torture_stop_kthread(lock_torture_writer, 504 torture_stop_kthread(lock_torture_writer,
331 writer_tasks[i]); 505 writer_tasks[i]);
332 kfree(writer_tasks); 506 kfree(writer_tasks);
333 writer_tasks = NULL; 507 writer_tasks = NULL;
334 } 508 }
335 509
510 if (reader_tasks) {
511 for (i = 0; i < cxt.nrealreaders_stress; i++)
512 torture_stop_kthread(lock_torture_reader,
513 reader_tasks[i]);
514 kfree(reader_tasks);
515 reader_tasks = NULL;
516 }
517
336 torture_stop_kthread(lock_torture_stats, stats_task); 518 torture_stop_kthread(lock_torture_stats, stats_task);
337 lock_torture_stats_print(); /* -After- the stats thread is stopped! */ 519 lock_torture_stats_print(); /* -After- the stats thread is stopped! */
338 520
339 if (atomic_read(&n_lock_torture_errors)) 521 if (atomic_read(&cxt.n_lock_torture_errors))
340 lock_torture_print_module_parms(cur_ops, 522 lock_torture_print_module_parms(cxt.cur_ops,
341 "End of test: FAILURE"); 523 "End of test: FAILURE");
342 else if (torture_onoff_failures()) 524 else if (torture_onoff_failures())
343 lock_torture_print_module_parms(cur_ops, 525 lock_torture_print_module_parms(cxt.cur_ops,
344 "End of test: LOCK_HOTPLUG"); 526 "End of test: LOCK_HOTPLUG");
345 else 527 else
346 lock_torture_print_module_parms(cur_ops, 528 lock_torture_print_module_parms(cxt.cur_ops,
347 "End of test: SUCCESS"); 529 "End of test: SUCCESS");
530 torture_cleanup_end();
348} 531}
349 532
350static int __init lock_torture_init(void) 533static int __init lock_torture_init(void)
351{ 534{
352 int i; 535 int i, j;
353 int firsterr = 0; 536 int firsterr = 0;
354 static struct lock_torture_ops *torture_ops[] = { 537 static struct lock_torture_ops *torture_ops[] = {
355 &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, 538 &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops,
539 &mutex_lock_ops, &rwsem_lock_ops,
356 }; 540 };
357 541
358 if (!torture_init_begin(torture_type, verbose, &locktorture_runnable)) 542 if (!torture_init_begin(torture_type, verbose, &torture_runnable))
359 return -EBUSY; 543 return -EBUSY;
360 544
361 /* Process args and tell the world that the torturer is on the job. */ 545 /* Process args and tell the world that the torturer is on the job. */
362 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 546 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
363 cur_ops = torture_ops[i]; 547 cxt.cur_ops = torture_ops[i];
364 if (strcmp(torture_type, cur_ops->name) == 0) 548 if (strcmp(torture_type, cxt.cur_ops->name) == 0)
365 break; 549 break;
366 } 550 }
367 if (i == ARRAY_SIZE(torture_ops)) { 551 if (i == ARRAY_SIZE(torture_ops)) {
@@ -374,31 +558,68 @@ static int __init lock_torture_init(void)
374 torture_init_end(); 558 torture_init_end();
375 return -EINVAL; 559 return -EINVAL;
376 } 560 }
377 if (cur_ops->init) 561 if (cxt.cur_ops->init)
378 cur_ops->init(); /* no "goto unwind" prior to this point!!! */ 562 cxt.cur_ops->init(); /* no "goto unwind" prior to this point!!! */
379 563
380 if (nwriters_stress >= 0) 564 if (nwriters_stress >= 0)
381 nrealwriters_stress = nwriters_stress; 565 cxt.nrealwriters_stress = nwriters_stress;
382 else 566 else
383 nrealwriters_stress = 2 * num_online_cpus(); 567 cxt.nrealwriters_stress = 2 * num_online_cpus();
384 lock_torture_print_module_parms(cur_ops, "Start of test"); 568
569#ifdef CONFIG_DEBUG_MUTEXES
570 if (strncmp(torture_type, "mutex", 5) == 0)
571 cxt.debug_lock = true;
572#endif
573#ifdef CONFIG_DEBUG_SPINLOCK
574 if (strncmp(torture_type, "spin", 4) == 0)
575 cxt.debug_lock = true;
576#endif
385 577
386 /* Initialize the statistics so that each run gets its own numbers. */ 578 /* Initialize the statistics so that each run gets its own numbers. */
387 579
388 lock_is_write_held = 0; 580 lock_is_write_held = 0;
389 lwsa = kmalloc(sizeof(*lwsa) * nrealwriters_stress, GFP_KERNEL); 581 cxt.lwsa = kmalloc(sizeof(*cxt.lwsa) * cxt.nrealwriters_stress, GFP_KERNEL);
390 if (lwsa == NULL) { 582 if (cxt.lwsa == NULL) {
391 VERBOSE_TOROUT_STRING("lwsa: Out of memory"); 583 VERBOSE_TOROUT_STRING("cxt.lwsa: Out of memory");
392 firsterr = -ENOMEM; 584 firsterr = -ENOMEM;
393 goto unwind; 585 goto unwind;
394 } 586 }
395 for (i = 0; i < nrealwriters_stress; i++) { 587 for (i = 0; i < cxt.nrealwriters_stress; i++) {
396 lwsa[i].n_write_lock_fail = 0; 588 cxt.lwsa[i].n_lock_fail = 0;
397 lwsa[i].n_write_lock_acquired = 0; 589 cxt.lwsa[i].n_lock_acquired = 0;
398 } 590 }
399 591
400 /* Start up the kthreads. */ 592 if (cxt.cur_ops->readlock) {
593 if (nreaders_stress >= 0)
594 cxt.nrealreaders_stress = nreaders_stress;
595 else {
596 /*
597 * By default distribute evenly the number of
598 * readers and writers. We still run the same number
599 * of threads as the writer-only locks default.
600 */
601 if (nwriters_stress < 0) /* user doesn't care */
602 cxt.nrealwriters_stress = num_online_cpus();
603 cxt.nrealreaders_stress = cxt.nrealwriters_stress;
604 }
605
606 lock_is_read_held = 0;
607 cxt.lrsa = kmalloc(sizeof(*cxt.lrsa) * cxt.nrealreaders_stress, GFP_KERNEL);
608 if (cxt.lrsa == NULL) {
609 VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory");
610 firsterr = -ENOMEM;
611 kfree(cxt.lwsa);
612 goto unwind;
613 }
614
615 for (i = 0; i < cxt.nrealreaders_stress; i++) {
616 cxt.lrsa[i].n_lock_fail = 0;
617 cxt.lrsa[i].n_lock_acquired = 0;
618 }
619 }
620 lock_torture_print_module_parms(cxt.cur_ops, "Start of test");
401 621
622 /* Prepare torture context. */
402 if (onoff_interval > 0) { 623 if (onoff_interval > 0) {
403 firsterr = torture_onoff_init(onoff_holdoff * HZ, 624 firsterr = torture_onoff_init(onoff_holdoff * HZ,
404 onoff_interval * HZ); 625 onoff_interval * HZ);
@@ -422,18 +643,51 @@ static int __init lock_torture_init(void)
422 goto unwind; 643 goto unwind;
423 } 644 }
424 645
425 writer_tasks = kzalloc(nrealwriters_stress * sizeof(writer_tasks[0]), 646 writer_tasks = kzalloc(cxt.nrealwriters_stress * sizeof(writer_tasks[0]),
426 GFP_KERNEL); 647 GFP_KERNEL);
427 if (writer_tasks == NULL) { 648 if (writer_tasks == NULL) {
428 VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); 649 VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory");
429 firsterr = -ENOMEM; 650 firsterr = -ENOMEM;
430 goto unwind; 651 goto unwind;
431 } 652 }
432 for (i = 0; i < nrealwriters_stress; i++) { 653
433 firsterr = torture_create_kthread(lock_torture_writer, &lwsa[i], 654 if (cxt.cur_ops->readlock) {
655 reader_tasks = kzalloc(cxt.nrealreaders_stress * sizeof(reader_tasks[0]),
656 GFP_KERNEL);
657 if (reader_tasks == NULL) {
658 VERBOSE_TOROUT_ERRSTRING("reader_tasks: Out of memory");
659 firsterr = -ENOMEM;
660 goto unwind;
661 }
662 }
663
664 /*
665 * Create the kthreads and start torturing (oh, those poor little locks).
666 *
667 * TODO: Note that we interleave writers with readers, giving writers a
668 * slight advantage, by creating its kthread first. This can be modified
669 * for very specific needs, or even let the user choose the policy, if
670 * ever wanted.
671 */
672 for (i = 0, j = 0; i < cxt.nrealwriters_stress ||
673 j < cxt.nrealreaders_stress; i++, j++) {
674 if (i >= cxt.nrealwriters_stress)
675 goto create_reader;
676
677 /* Create writer. */
678 firsterr = torture_create_kthread(lock_torture_writer, &cxt.lwsa[i],
434 writer_tasks[i]); 679 writer_tasks[i]);
435 if (firsterr) 680 if (firsterr)
436 goto unwind; 681 goto unwind;
682
683 create_reader:
684 if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress))
685 continue;
686 /* Create reader. */
687 firsterr = torture_create_kthread(lock_torture_reader, &cxt.lrsa[j],
688 reader_tasks[j]);
689 if (firsterr)
690 goto unwind;
437 } 691 }
438 if (stat_interval > 0) { 692 if (stat_interval > 0) {
439 firsterr = torture_create_kthread(lock_torture_stats, NULL, 693 firsterr = torture_create_kthread(lock_torture_stats, NULL,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 948a7693748e..240fa9094f83 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -49,11 +49,19 @@
49#include <linux/trace_clock.h> 49#include <linux/trace_clock.h>
50#include <asm/byteorder.h> 50#include <asm/byteorder.h>
51#include <linux/torture.h> 51#include <linux/torture.h>
52#include <linux/vmalloc.h>
52 53
53MODULE_LICENSE("GPL"); 54MODULE_LICENSE("GPL");
54MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>"); 55MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and Josh Triplett <josh@joshtriplett.org>");
55 56
56 57
58torture_param(int, cbflood_inter_holdoff, HZ,
59 "Holdoff between floods (jiffies)");
60torture_param(int, cbflood_intra_holdoff, 1,
61 "Holdoff between bursts (jiffies)");
62torture_param(int, cbflood_n_burst, 3, "# bursts in flood, zero to disable");
63torture_param(int, cbflood_n_per_burst, 20000,
64 "# callbacks per burst in flood");
57torture_param(int, fqs_duration, 0, 65torture_param(int, fqs_duration, 0,
58 "Duration of fqs bursts (us), 0 to disable"); 66 "Duration of fqs bursts (us), 0 to disable");
59torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)"); 67torture_param(int, fqs_holdoff, 0, "Holdoff time within fqs bursts (us)");
@@ -96,10 +104,12 @@ module_param(torture_type, charp, 0444);
96MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)"); 104MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, ...)");
97 105
98static int nrealreaders; 106static int nrealreaders;
107static int ncbflooders;
99static struct task_struct *writer_task; 108static struct task_struct *writer_task;
100static struct task_struct **fakewriter_tasks; 109static struct task_struct **fakewriter_tasks;
101static struct task_struct **reader_tasks; 110static struct task_struct **reader_tasks;
102static struct task_struct *stats_task; 111static struct task_struct *stats_task;
112static struct task_struct **cbflood_task;
103static struct task_struct *fqs_task; 113static struct task_struct *fqs_task;
104static struct task_struct *boost_tasks[NR_CPUS]; 114static struct task_struct *boost_tasks[NR_CPUS];
105static struct task_struct *stall_task; 115static struct task_struct *stall_task;
@@ -138,6 +148,7 @@ static long n_rcu_torture_boosts;
138static long n_rcu_torture_timers; 148static long n_rcu_torture_timers;
139static long n_barrier_attempts; 149static long n_barrier_attempts;
140static long n_barrier_successes; 150static long n_barrier_successes;
151static atomic_long_t n_cbfloods;
141static struct list_head rcu_torture_removed; 152static struct list_head rcu_torture_removed;
142 153
143static int rcu_torture_writer_state; 154static int rcu_torture_writer_state;
@@ -157,9 +168,9 @@ static int rcu_torture_writer_state;
157#else 168#else
158#define RCUTORTURE_RUNNABLE_INIT 0 169#define RCUTORTURE_RUNNABLE_INIT 0
159#endif 170#endif
160int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT; 171static int torture_runnable = RCUTORTURE_RUNNABLE_INIT;
161module_param(rcutorture_runnable, int, 0444); 172module_param(torture_runnable, int, 0444);
162MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot"); 173MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
163 174
164#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU) 175#if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
165#define rcu_can_boost() 1 176#define rcu_can_boost() 1
@@ -182,7 +193,7 @@ static u64 notrace rcu_trace_clock_local(void)
182#endif /* #else #ifdef CONFIG_RCU_TRACE */ 193#endif /* #else #ifdef CONFIG_RCU_TRACE */
183 194
184static unsigned long boost_starttime; /* jiffies of next boost test start. */ 195static unsigned long boost_starttime; /* jiffies of next boost test start. */
185DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */ 196static DEFINE_MUTEX(boost_mutex); /* protect setting boost_starttime */
186 /* and boost task create/destroy. */ 197 /* and boost task create/destroy. */
187static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */ 198static atomic_t barrier_cbs_count; /* Barrier callbacks registered. */
188static bool barrier_phase; /* Test phase. */ 199static bool barrier_phase; /* Test phase. */
@@ -242,7 +253,7 @@ struct rcu_torture_ops {
242 void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); 253 void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
243 void (*cb_barrier)(void); 254 void (*cb_barrier)(void);
244 void (*fqs)(void); 255 void (*fqs)(void);
245 void (*stats)(char *page); 256 void (*stats)(void);
246 int irq_capable; 257 int irq_capable;
247 int can_boost; 258 int can_boost;
248 const char *name; 259 const char *name;
@@ -525,21 +536,21 @@ static void srcu_torture_barrier(void)
525 srcu_barrier(&srcu_ctl); 536 srcu_barrier(&srcu_ctl);
526} 537}
527 538
528static void srcu_torture_stats(char *page) 539static void srcu_torture_stats(void)
529{ 540{
530 int cpu; 541 int cpu;
531 int idx = srcu_ctl.completed & 0x1; 542 int idx = srcu_ctl.completed & 0x1;
532 543
533 page += sprintf(page, "%s%s per-CPU(idx=%d):", 544 pr_alert("%s%s per-CPU(idx=%d):",
534 torture_type, TORTURE_FLAG, idx); 545 torture_type, TORTURE_FLAG, idx);
535 for_each_possible_cpu(cpu) { 546 for_each_possible_cpu(cpu) {
536 long c0, c1; 547 long c0, c1;
537 548
538 c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx]; 549 c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx];
539 c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx]; 550 c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx];
540 page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1); 551 pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
541 } 552 }
542 sprintf(page, "\n"); 553 pr_cont("\n");
543} 554}
544 555
545static void srcu_torture_synchronize_expedited(void) 556static void srcu_torture_synchronize_expedited(void)
@@ -601,6 +612,52 @@ static struct rcu_torture_ops sched_ops = {
601 .name = "sched" 612 .name = "sched"
602}; 613};
603 614
615#ifdef CONFIG_TASKS_RCU
616
617/*
618 * Definitions for RCU-tasks torture testing.
619 */
620
621static int tasks_torture_read_lock(void)
622{
623 return 0;
624}
625
626static void tasks_torture_read_unlock(int idx)
627{
628}
629
630static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
631{
632 call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
633}
634
635static struct rcu_torture_ops tasks_ops = {
636 .ttype = RCU_TASKS_FLAVOR,
637 .init = rcu_sync_torture_init,
638 .readlock = tasks_torture_read_lock,
639 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
640 .readunlock = tasks_torture_read_unlock,
641 .completed = rcu_no_completed,
642 .deferred_free = rcu_tasks_torture_deferred_free,
643 .sync = synchronize_rcu_tasks,
644 .exp_sync = synchronize_rcu_tasks,
645 .call = call_rcu_tasks,
646 .cb_barrier = rcu_barrier_tasks,
647 .fqs = NULL,
648 .stats = NULL,
649 .irq_capable = 1,
650 .name = "tasks"
651};
652
653#define RCUTORTURE_TASKS_OPS &tasks_ops,
654
655#else /* #ifdef CONFIG_TASKS_RCU */
656
657#define RCUTORTURE_TASKS_OPS
658
659#endif /* #else #ifdef CONFIG_TASKS_RCU */
660
604/* 661/*
605 * RCU torture priority-boost testing. Runs one real-time thread per 662 * RCU torture priority-boost testing. Runs one real-time thread per
606 * CPU for moderate bursts, repeatedly registering RCU callbacks and 663 * CPU for moderate bursts, repeatedly registering RCU callbacks and
@@ -667,7 +724,7 @@ static int rcu_torture_boost(void *arg)
667 } 724 }
668 call_rcu_time = jiffies; 725 call_rcu_time = jiffies;
669 } 726 }
670 cond_resched(); 727 cond_resched_rcu_qs();
671 stutter_wait("rcu_torture_boost"); 728 stutter_wait("rcu_torture_boost");
672 if (torture_must_stop()) 729 if (torture_must_stop())
673 goto checkwait; 730 goto checkwait;
@@ -707,6 +764,58 @@ checkwait: stutter_wait("rcu_torture_boost");
707 return 0; 764 return 0;
708} 765}
709 766
767static void rcu_torture_cbflood_cb(struct rcu_head *rhp)
768{
769}
770
771/*
772 * RCU torture callback-flood kthread. Repeatedly induces bursts of calls
773 * to call_rcu() or analogous, increasing the probability of occurrence
774 * of callback-overflow corner cases.
775 */
776static int
777rcu_torture_cbflood(void *arg)
778{
779 int err = 1;
780 int i;
781 int j;
782 struct rcu_head *rhp;
783
784 if (cbflood_n_per_burst > 0 &&
785 cbflood_inter_holdoff > 0 &&
786 cbflood_intra_holdoff > 0 &&
787 cur_ops->call &&
788 cur_ops->cb_barrier) {
789 rhp = vmalloc(sizeof(*rhp) *
790 cbflood_n_burst * cbflood_n_per_burst);
791 err = !rhp;
792 }
793 if (err) {
794 VERBOSE_TOROUT_STRING("rcu_torture_cbflood disabled: Bad args or OOM");
795 while (!torture_must_stop())
796 schedule_timeout_interruptible(HZ);
797 return 0;
798 }
799 VERBOSE_TOROUT_STRING("rcu_torture_cbflood task started");
800 do {
801 schedule_timeout_interruptible(cbflood_inter_holdoff);
802 atomic_long_inc(&n_cbfloods);
803 WARN_ON(signal_pending(current));
804 for (i = 0; i < cbflood_n_burst; i++) {
805 for (j = 0; j < cbflood_n_per_burst; j++) {
806 cur_ops->call(&rhp[i * cbflood_n_per_burst + j],
807 rcu_torture_cbflood_cb);
808 }
809 schedule_timeout_interruptible(cbflood_intra_holdoff);
810 WARN_ON(signal_pending(current));
811 }
812 cur_ops->cb_barrier();
813 stutter_wait("rcu_torture_cbflood");
814 } while (!torture_must_stop());
815 torture_kthread_stopping("rcu_torture_cbflood");
816 return 0;
817}
818
710/* 819/*
711 * RCU torture force-quiescent-state kthread. Repeatedly induces 820 * RCU torture force-quiescent-state kthread. Repeatedly induces
712 * bursts of calls to force_quiescent_state(), increasing the probability 821 * bursts of calls to force_quiescent_state(), increasing the probability
@@ -1019,7 +1128,7 @@ rcu_torture_reader(void *arg)
1019 __this_cpu_inc(rcu_torture_batch[completed]); 1128 __this_cpu_inc(rcu_torture_batch[completed]);
1020 preempt_enable(); 1129 preempt_enable();
1021 cur_ops->readunlock(idx); 1130 cur_ops->readunlock(idx);
1022 cond_resched(); 1131 cond_resched_rcu_qs();
1023 stutter_wait("rcu_torture_reader"); 1132 stutter_wait("rcu_torture_reader");
1024 } while (!torture_must_stop()); 1133 } while (!torture_must_stop());
1025 if (irqreader && cur_ops->irq_capable) { 1134 if (irqreader && cur_ops->irq_capable) {
@@ -1031,10 +1140,15 @@ rcu_torture_reader(void *arg)
1031} 1140}
1032 1141
1033/* 1142/*
1034 * Create an RCU-torture statistics message in the specified buffer. 1143 * Print torture statistics. Caller must ensure that there is only
1144 * one call to this function at a given time!!! This is normally
1145 * accomplished by relying on the module system to only have one copy
1146 * of the module loaded, and then by giving the rcu_torture_stats
1147 * kthread full control (or the init/cleanup functions when rcu_torture_stats
1148 * thread is not running).
1035 */ 1149 */
1036static void 1150static void
1037rcu_torture_printk(char *page) 1151rcu_torture_stats_print(void)
1038{ 1152{
1039 int cpu; 1153 int cpu;
1040 int i; 1154 int i;
@@ -1052,55 +1166,61 @@ rcu_torture_printk(char *page)
1052 if (pipesummary[i] != 0) 1166 if (pipesummary[i] != 0)
1053 break; 1167 break;
1054 } 1168 }
1055 page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); 1169
1056 page += sprintf(page, 1170 pr_alert("%s%s ", torture_type, TORTURE_FLAG);
1057 "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ", 1171 pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
1058 rcu_torture_current, 1172 rcu_torture_current,
1059 rcu_torture_current_version, 1173 rcu_torture_current_version,
1060 list_empty(&rcu_torture_freelist), 1174 list_empty(&rcu_torture_freelist),
1061 atomic_read(&n_rcu_torture_alloc), 1175 atomic_read(&n_rcu_torture_alloc),
1062 atomic_read(&n_rcu_torture_alloc_fail), 1176 atomic_read(&n_rcu_torture_alloc_fail),
1063 atomic_read(&n_rcu_torture_free)); 1177 atomic_read(&n_rcu_torture_free));
1064 page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ", 1178 pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ",
1065 atomic_read(&n_rcu_torture_mberror), 1179 atomic_read(&n_rcu_torture_mberror),
1066 n_rcu_torture_boost_ktrerror, 1180 n_rcu_torture_boost_ktrerror,
1067 n_rcu_torture_boost_rterror); 1181 n_rcu_torture_boost_rterror);
1068 page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ", 1182 pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
1069 n_rcu_torture_boost_failure, 1183 n_rcu_torture_boost_failure,
1070 n_rcu_torture_boosts, 1184 n_rcu_torture_boosts,
1071 n_rcu_torture_timers); 1185 n_rcu_torture_timers);
1072 page = torture_onoff_stats(page); 1186 torture_onoff_stats();
1073 page += sprintf(page, "barrier: %ld/%ld:%ld", 1187 pr_cont("barrier: %ld/%ld:%ld ",
1074 n_barrier_successes, 1188 n_barrier_successes,
1075 n_barrier_attempts, 1189 n_barrier_attempts,
1076 n_rcu_torture_barrier_error); 1190 n_rcu_torture_barrier_error);
1077 page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); 1191 pr_cont("cbflood: %ld\n", atomic_long_read(&n_cbfloods));
1192
1193 pr_alert("%s%s ", torture_type, TORTURE_FLAG);
1078 if (atomic_read(&n_rcu_torture_mberror) != 0 || 1194 if (atomic_read(&n_rcu_torture_mberror) != 0 ||
1079 n_rcu_torture_barrier_error != 0 || 1195 n_rcu_torture_barrier_error != 0 ||
1080 n_rcu_torture_boost_ktrerror != 0 || 1196 n_rcu_torture_boost_ktrerror != 0 ||
1081 n_rcu_torture_boost_rterror != 0 || 1197 n_rcu_torture_boost_rterror != 0 ||
1082 n_rcu_torture_boost_failure != 0 || 1198 n_rcu_torture_boost_failure != 0 ||
1083 i > 1) { 1199 i > 1) {
1084 page += sprintf(page, "!!! "); 1200 pr_cont("%s", "!!! ");
1085 atomic_inc(&n_rcu_torture_error); 1201 atomic_inc(&n_rcu_torture_error);
1086 WARN_ON_ONCE(1); 1202 WARN_ON_ONCE(1);
1087 } 1203 }
1088 page += sprintf(page, "Reader Pipe: "); 1204 pr_cont("Reader Pipe: ");
1089 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 1205 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
1090 page += sprintf(page, " %ld", pipesummary[i]); 1206 pr_cont(" %ld", pipesummary[i]);
1091 page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); 1207 pr_cont("\n");
1092 page += sprintf(page, "Reader Batch: "); 1208
1209 pr_alert("%s%s ", torture_type, TORTURE_FLAG);
1210 pr_cont("Reader Batch: ");
1093 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 1211 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
1094 page += sprintf(page, " %ld", batchsummary[i]); 1212 pr_cont(" %ld", batchsummary[i]);
1095 page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG); 1213 pr_cont("\n");
1096 page += sprintf(page, "Free-Block Circulation: "); 1214
1215 pr_alert("%s%s ", torture_type, TORTURE_FLAG);
1216 pr_cont("Free-Block Circulation: ");
1097 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) { 1217 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
1098 page += sprintf(page, " %d", 1218 pr_cont(" %d", atomic_read(&rcu_torture_wcount[i]));
1099 atomic_read(&rcu_torture_wcount[i]));
1100 } 1219 }
1101 page += sprintf(page, "\n"); 1220 pr_cont("\n");
1221
1102 if (cur_ops->stats) 1222 if (cur_ops->stats)
1103 cur_ops->stats(page); 1223 cur_ops->stats();
1104 if (rtcv_snap == rcu_torture_current_version && 1224 if (rtcv_snap == rcu_torture_current_version &&
1105 rcu_torture_current != NULL) { 1225 rcu_torture_current != NULL) {
1106 int __maybe_unused flags; 1226 int __maybe_unused flags;
@@ -1109,10 +1229,9 @@ rcu_torture_printk(char *page)
1109 1229
1110 rcutorture_get_gp_data(cur_ops->ttype, 1230 rcutorture_get_gp_data(cur_ops->ttype,
1111 &flags, &gpnum, &completed); 1231 &flags, &gpnum, &completed);
1112 page += sprintf(page, 1232 pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n",
1113 "??? Writer stall state %d g%lu c%lu f%#x\n", 1233 rcu_torture_writer_state,
1114 rcu_torture_writer_state, 1234 gpnum, completed, flags);
1115 gpnum, completed, flags);
1116 show_rcu_gp_kthreads(); 1235 show_rcu_gp_kthreads();
1117 rcutorture_trace_dump(); 1236 rcutorture_trace_dump();
1118 } 1237 }
@@ -1120,30 +1239,6 @@ rcu_torture_printk(char *page)
1120} 1239}
1121 1240
1122/* 1241/*
1123 * Print torture statistics. Caller must ensure that there is only
1124 * one call to this function at a given time!!! This is normally
1125 * accomplished by relying on the module system to only have one copy
1126 * of the module loaded, and then by giving the rcu_torture_stats
1127 * kthread full control (or the init/cleanup functions when rcu_torture_stats
1128 * thread is not running).
1129 */
1130static void
1131rcu_torture_stats_print(void)
1132{
1133 int size = nr_cpu_ids * 200 + 8192;
1134 char *buf;
1135
1136 buf = kmalloc(size, GFP_KERNEL);
1137 if (!buf) {
1138 pr_err("rcu-torture: Out of memory, need: %d", size);
1139 return;
1140 }
1141 rcu_torture_printk(buf);
1142 pr_alert("%s", buf);
1143 kfree(buf);
1144}
1145
1146/*
1147 * Periodically prints torture statistics, if periodic statistics printing 1242 * Periodically prints torture statistics, if periodic statistics printing
1148 * was specified via the stat_interval module parameter. 1243 * was specified via the stat_interval module parameter.
1149 */ 1244 */
@@ -1295,7 +1390,8 @@ static int rcu_torture_barrier_cbs(void *arg)
1295 if (atomic_dec_and_test(&barrier_cbs_count)) 1390 if (atomic_dec_and_test(&barrier_cbs_count))
1296 wake_up(&barrier_wq); 1391 wake_up(&barrier_wq);
1297 } while (!torture_must_stop()); 1392 } while (!torture_must_stop());
1298 cur_ops->cb_barrier(); 1393 if (cur_ops->cb_barrier != NULL)
1394 cur_ops->cb_barrier();
1299 destroy_rcu_head_on_stack(&rcu); 1395 destroy_rcu_head_on_stack(&rcu);
1300 torture_kthread_stopping("rcu_torture_barrier_cbs"); 1396 torture_kthread_stopping("rcu_torture_barrier_cbs");
1301 return 0; 1397 return 0;
@@ -1418,7 +1514,7 @@ rcu_torture_cleanup(void)
1418 int i; 1514 int i;
1419 1515
1420 rcutorture_record_test_transition(); 1516 rcutorture_record_test_transition();
1421 if (torture_cleanup()) { 1517 if (torture_cleanup_begin()) {
1422 if (cur_ops->cb_barrier != NULL) 1518 if (cur_ops->cb_barrier != NULL)
1423 cur_ops->cb_barrier(); 1519 cur_ops->cb_barrier();
1424 return; 1520 return;
@@ -1447,6 +1543,8 @@ rcu_torture_cleanup(void)
1447 1543
1448 torture_stop_kthread(rcu_torture_stats, stats_task); 1544 torture_stop_kthread(rcu_torture_stats, stats_task);
1449 torture_stop_kthread(rcu_torture_fqs, fqs_task); 1545 torture_stop_kthread(rcu_torture_fqs, fqs_task);
1546 for (i = 0; i < ncbflooders; i++)
1547 torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]);
1450 if ((test_boost == 1 && cur_ops->can_boost) || 1548 if ((test_boost == 1 && cur_ops->can_boost) ||
1451 test_boost == 2) { 1549 test_boost == 2) {
1452 unregister_cpu_notifier(&rcutorture_cpu_nb); 1550 unregister_cpu_notifier(&rcutorture_cpu_nb);
@@ -1468,6 +1566,7 @@ rcu_torture_cleanup(void)
1468 "End of test: RCU_HOTPLUG"); 1566 "End of test: RCU_HOTPLUG");
1469 else 1567 else
1470 rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS"); 1568 rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
1569 torture_cleanup_end();
1471} 1570}
1472 1571
1473#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD 1572#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
@@ -1534,9 +1633,10 @@ rcu_torture_init(void)
1534 int firsterr = 0; 1633 int firsterr = 0;
1535 static struct rcu_torture_ops *torture_ops[] = { 1634 static struct rcu_torture_ops *torture_ops[] = {
1536 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops, 1635 &rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
1636 RCUTORTURE_TASKS_OPS
1537 }; 1637 };
1538 1638
1539 if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable)) 1639 if (!torture_init_begin(torture_type, verbose, &torture_runnable))
1540 return -EBUSY; 1640 return -EBUSY;
1541 1641
1542 /* Process args and tell the world that the torturer is on the job. */ 1642 /* Process args and tell the world that the torturer is on the job. */
@@ -1693,6 +1793,24 @@ rcu_torture_init(void)
1693 goto unwind; 1793 goto unwind;
1694 if (object_debug) 1794 if (object_debug)
1695 rcu_test_debug_objects(); 1795 rcu_test_debug_objects();
1796 if (cbflood_n_burst > 0) {
1797 /* Create the cbflood threads */
1798 ncbflooders = (num_online_cpus() + 3) / 4;
1799 cbflood_task = kcalloc(ncbflooders, sizeof(*cbflood_task),
1800 GFP_KERNEL);
1801 if (!cbflood_task) {
1802 VERBOSE_TOROUT_ERRSTRING("out of memory");
1803 firsterr = -ENOMEM;
1804 goto unwind;
1805 }
1806 for (i = 0; i < ncbflooders; i++) {
1807 firsterr = torture_create_kthread(rcu_torture_cbflood,
1808 NULL,
1809 cbflood_task[i]);
1810 if (firsterr)
1811 goto unwind;
1812 }
1813 }
1696 rcutorture_record_test_transition(); 1814 rcutorture_record_test_transition();
1697 torture_init_end(); 1815 torture_init_end();
1698 return 0; 1816 return 0;
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index d9efcc13008c..c0623fc47125 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -51,7 +51,7 @@ static long long rcu_dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
51 51
52#include "tiny_plugin.h" 52#include "tiny_plugin.h"
53 53
54/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcutree.c. */ 54/* Common code for rcu_idle_enter() and rcu_irq_exit(), see kernel/rcu/tree.c. */
55static void rcu_idle_enter_common(long long newval) 55static void rcu_idle_enter_common(long long newval)
56{ 56{
57 if (newval) { 57 if (newval) {
@@ -62,7 +62,7 @@ static void rcu_idle_enter_common(long long newval)
62 } 62 }
63 RCU_TRACE(trace_rcu_dyntick(TPS("Start"), 63 RCU_TRACE(trace_rcu_dyntick(TPS("Start"),
64 rcu_dynticks_nesting, newval)); 64 rcu_dynticks_nesting, newval));
65 if (!is_idle_task(current)) { 65 if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
66 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); 66 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
67 67
68 RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"), 68 RCU_TRACE(trace_rcu_dyntick(TPS("Entry error: not idle task"),
@@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval)
72 current->pid, current->comm, 72 current->pid, current->comm,
73 idle->pid, idle->comm); /* must be idle task! */ 73 idle->pid, idle->comm); /* must be idle task! */
74 } 74 }
75 rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */ 75 rcu_sched_qs(); /* implies rcu_bh_inc() */
76 barrier(); 76 barrier();
77 rcu_dynticks_nesting = newval; 77 rcu_dynticks_nesting = newval;
78} 78}
@@ -114,7 +114,7 @@ void rcu_irq_exit(void)
114} 114}
115EXPORT_SYMBOL_GPL(rcu_irq_exit); 115EXPORT_SYMBOL_GPL(rcu_irq_exit);
116 116
117/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcutree.c. */ 117/* Common code for rcu_idle_exit() and rcu_irq_enter(), see kernel/rcu/tree.c. */
118static void rcu_idle_exit_common(long long oldval) 118static void rcu_idle_exit_common(long long oldval)
119{ 119{
120 if (oldval) { 120 if (oldval) {
@@ -123,7 +123,7 @@ static void rcu_idle_exit_common(long long oldval)
123 return; 123 return;
124 } 124 }
125 RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting)); 125 RCU_TRACE(trace_rcu_dyntick(TPS("End"), oldval, rcu_dynticks_nesting));
126 if (!is_idle_task(current)) { 126 if (IS_ENABLED(CONFIG_RCU_TRACE) && !is_idle_task(current)) {
127 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id()); 127 struct task_struct *idle __maybe_unused = idle_task(smp_processor_id());
128 128
129 RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"), 129 RCU_TRACE(trace_rcu_dyntick(TPS("Exit error: not idle task"),
@@ -217,7 +217,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
217 * are at it, given that any rcu quiescent state is also an rcu_bh 217 * are at it, given that any rcu quiescent state is also an rcu_bh
218 * quiescent state. Use "+" instead of "||" to defeat short circuiting. 218 * quiescent state. Use "+" instead of "||" to defeat short circuiting.
219 */ 219 */
220void rcu_sched_qs(int cpu) 220void rcu_sched_qs(void)
221{ 221{
222 unsigned long flags; 222 unsigned long flags;
223 223
@@ -231,7 +231,7 @@ void rcu_sched_qs(int cpu)
231/* 231/*
232 * Record an rcu_bh quiescent state. 232 * Record an rcu_bh quiescent state.
233 */ 233 */
234void rcu_bh_qs(int cpu) 234void rcu_bh_qs(void)
235{ 235{
236 unsigned long flags; 236 unsigned long flags;
237 237
@@ -251,9 +251,11 @@ void rcu_check_callbacks(int cpu, int user)
251{ 251{
252 RCU_TRACE(check_cpu_stalls()); 252 RCU_TRACE(check_cpu_stalls());
253 if (user || rcu_is_cpu_rrupt_from_idle()) 253 if (user || rcu_is_cpu_rrupt_from_idle())
254 rcu_sched_qs(cpu); 254 rcu_sched_qs();
255 else if (!in_softirq()) 255 else if (!in_softirq())
256 rcu_bh_qs(cpu); 256 rcu_bh_qs();
257 if (user)
258 rcu_note_voluntary_context_switch(current);
257} 259}
258 260
259/* 261/*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1b70cb6fbe3c..133e47223095 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -79,9 +79,18 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
79 * the tracing userspace tools to be able to decipher the string 79 * the tracing userspace tools to be able to decipher the string
80 * address to the matching string. 80 * address to the matching string.
81 */ 81 */
82#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \ 82#ifdef CONFIG_TRACING
83# define DEFINE_RCU_TPS(sname) \
83static char sname##_varname[] = #sname; \ 84static char sname##_varname[] = #sname; \
84static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname; \ 85static const char *tp_##sname##_varname __used __tracepoint_string = sname##_varname;
86# define RCU_STATE_NAME(sname) sname##_varname
87#else
88# define DEFINE_RCU_TPS(sname)
89# define RCU_STATE_NAME(sname) __stringify(sname)
90#endif
91
92#define RCU_STATE_INITIALIZER(sname, sabbr, cr) \
93DEFINE_RCU_TPS(sname) \
85struct rcu_state sname##_state = { \ 94struct rcu_state sname##_state = { \
86 .level = { &sname##_state.node[0] }, \ 95 .level = { &sname##_state.node[0] }, \
87 .call = cr, \ 96 .call = cr, \
@@ -93,7 +102,7 @@ struct rcu_state sname##_state = { \
93 .orphan_donetail = &sname##_state.orphan_donelist, \ 102 .orphan_donetail = &sname##_state.orphan_donelist, \
94 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 103 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
95 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \ 104 .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
96 .name = sname##_varname, \ 105 .name = RCU_STATE_NAME(sname), \
97 .abbr = sabbr, \ 106 .abbr = sabbr, \
98}; \ 107}; \
99DEFINE_PER_CPU(struct rcu_data, sname##_data) 108DEFINE_PER_CPU(struct rcu_data, sname##_data)
@@ -188,22 +197,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
188 * one since the start of the grace period, this just sets a flag. 197 * one since the start of the grace period, this just sets a flag.
189 * The caller must have disabled preemption. 198 * The caller must have disabled preemption.
190 */ 199 */
191void rcu_sched_qs(int cpu) 200void rcu_sched_qs(void)
192{ 201{
193 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 202 if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) {
194 203 trace_rcu_grace_period(TPS("rcu_sched"),
195 if (rdp->passed_quiesce == 0) 204 __this_cpu_read(rcu_sched_data.gpnum),
196 trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs")); 205 TPS("cpuqs"));
197 rdp->passed_quiesce = 1; 206 __this_cpu_write(rcu_sched_data.passed_quiesce, 1);
207 }
198} 208}
199 209
200void rcu_bh_qs(int cpu) 210void rcu_bh_qs(void)
201{ 211{
202 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 212 if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
203 213 trace_rcu_grace_period(TPS("rcu_bh"),
204 if (rdp->passed_quiesce == 0) 214 __this_cpu_read(rcu_bh_data.gpnum),
205 trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs")); 215 TPS("cpuqs"));
206 rdp->passed_quiesce = 1; 216 __this_cpu_write(rcu_bh_data.passed_quiesce, 1);
217 }
207} 218}
208 219
209static DEFINE_PER_CPU(int, rcu_sched_qs_mask); 220static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
@@ -278,7 +289,7 @@ static void rcu_momentary_dyntick_idle(void)
278void rcu_note_context_switch(int cpu) 289void rcu_note_context_switch(int cpu)
279{ 290{
280 trace_rcu_utilization(TPS("Start context switch")); 291 trace_rcu_utilization(TPS("Start context switch"));
281 rcu_sched_qs(cpu); 292 rcu_sched_qs();
282 rcu_preempt_note_context_switch(cpu); 293 rcu_preempt_note_context_switch(cpu);
283 if (unlikely(raw_cpu_read(rcu_sched_qs_mask))) 294 if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
284 rcu_momentary_dyntick_idle(); 295 rcu_momentary_dyntick_idle();
@@ -526,6 +537,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
526 atomic_inc(&rdtp->dynticks); 537 atomic_inc(&rdtp->dynticks);
527 smp_mb__after_atomic(); /* Force ordering with next sojourn. */ 538 smp_mb__after_atomic(); /* Force ordering with next sojourn. */
528 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 539 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
540 rcu_dynticks_task_enter();
529 541
530 /* 542 /*
531 * It is illegal to enter an extended quiescent state while 543 * It is illegal to enter an extended quiescent state while
@@ -642,6 +654,7 @@ void rcu_irq_exit(void)
642static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval, 654static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
643 int user) 655 int user)
644{ 656{
657 rcu_dynticks_task_exit();
645 smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */ 658 smp_mb__before_atomic(); /* Force ordering w/previous sojourn. */
646 atomic_inc(&rdtp->dynticks); 659 atomic_inc(&rdtp->dynticks);
647 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 660 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
@@ -819,7 +832,7 @@ bool notrace __rcu_is_watching(void)
819 */ 832 */
820bool notrace rcu_is_watching(void) 833bool notrace rcu_is_watching(void)
821{ 834{
822 int ret; 835 bool ret;
823 836
824 preempt_disable(); 837 preempt_disable();
825 ret = __rcu_is_watching(); 838 ret = __rcu_is_watching();
@@ -1647,7 +1660,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
1647 rnp->level, rnp->grplo, 1660 rnp->level, rnp->grplo,
1648 rnp->grphi, rnp->qsmask); 1661 rnp->grphi, rnp->qsmask);
1649 raw_spin_unlock_irq(&rnp->lock); 1662 raw_spin_unlock_irq(&rnp->lock);
1650 cond_resched(); 1663 cond_resched_rcu_qs();
1651 } 1664 }
1652 1665
1653 mutex_unlock(&rsp->onoff_mutex); 1666 mutex_unlock(&rsp->onoff_mutex);
@@ -1668,7 +1681,7 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1668 if (fqs_state == RCU_SAVE_DYNTICK) { 1681 if (fqs_state == RCU_SAVE_DYNTICK) {
1669 /* Collect dyntick-idle snapshots. */ 1682 /* Collect dyntick-idle snapshots. */
1670 if (is_sysidle_rcu_state(rsp)) { 1683 if (is_sysidle_rcu_state(rsp)) {
1671 isidle = 1; 1684 isidle = true;
1672 maxj = jiffies - ULONG_MAX / 4; 1685 maxj = jiffies - ULONG_MAX / 4;
1673 } 1686 }
1674 force_qs_rnp(rsp, dyntick_save_progress_counter, 1687 force_qs_rnp(rsp, dyntick_save_progress_counter,
@@ -1677,14 +1690,15 @@ static int rcu_gp_fqs(struct rcu_state *rsp, int fqs_state_in)
1677 fqs_state = RCU_FORCE_QS; 1690 fqs_state = RCU_FORCE_QS;
1678 } else { 1691 } else {
1679 /* Handle dyntick-idle and offline CPUs. */ 1692 /* Handle dyntick-idle and offline CPUs. */
1680 isidle = 0; 1693 isidle = false;
1681 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj); 1694 force_qs_rnp(rsp, rcu_implicit_dynticks_qs, &isidle, &maxj);
1682 } 1695 }
1683 /* Clear flag to prevent immediate re-entry. */ 1696 /* Clear flag to prevent immediate re-entry. */
1684 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) { 1697 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1685 raw_spin_lock_irq(&rnp->lock); 1698 raw_spin_lock_irq(&rnp->lock);
1686 smp_mb__after_unlock_lock(); 1699 smp_mb__after_unlock_lock();
1687 ACCESS_ONCE(rsp->gp_flags) &= ~RCU_GP_FLAG_FQS; 1700 ACCESS_ONCE(rsp->gp_flags) =
1701 ACCESS_ONCE(rsp->gp_flags) & ~RCU_GP_FLAG_FQS;
1688 raw_spin_unlock_irq(&rnp->lock); 1702 raw_spin_unlock_irq(&rnp->lock);
1689 } 1703 }
1690 return fqs_state; 1704 return fqs_state;
@@ -1736,7 +1750,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
1736 /* smp_mb() provided by prior unlock-lock pair. */ 1750 /* smp_mb() provided by prior unlock-lock pair. */
1737 nocb += rcu_future_gp_cleanup(rsp, rnp); 1751 nocb += rcu_future_gp_cleanup(rsp, rnp);
1738 raw_spin_unlock_irq(&rnp->lock); 1752 raw_spin_unlock_irq(&rnp->lock);
1739 cond_resched(); 1753 cond_resched_rcu_qs();
1740 } 1754 }
1741 rnp = rcu_get_root(rsp); 1755 rnp = rcu_get_root(rsp);
1742 raw_spin_lock_irq(&rnp->lock); 1756 raw_spin_lock_irq(&rnp->lock);
@@ -1785,8 +1799,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
1785 /* Locking provides needed memory barrier. */ 1799 /* Locking provides needed memory barrier. */
1786 if (rcu_gp_init(rsp)) 1800 if (rcu_gp_init(rsp))
1787 break; 1801 break;
1788 cond_resched(); 1802 cond_resched_rcu_qs();
1789 flush_signals(current); 1803 WARN_ON(signal_pending(current));
1790 trace_rcu_grace_period(rsp->name, 1804 trace_rcu_grace_period(rsp->name,
1791 ACCESS_ONCE(rsp->gpnum), 1805 ACCESS_ONCE(rsp->gpnum),
1792 TPS("reqwaitsig")); 1806 TPS("reqwaitsig"));
@@ -1828,11 +1842,11 @@ static int __noreturn rcu_gp_kthread(void *arg)
1828 trace_rcu_grace_period(rsp->name, 1842 trace_rcu_grace_period(rsp->name,
1829 ACCESS_ONCE(rsp->gpnum), 1843 ACCESS_ONCE(rsp->gpnum),
1830 TPS("fqsend")); 1844 TPS("fqsend"));
1831 cond_resched(); 1845 cond_resched_rcu_qs();
1832 } else { 1846 } else {
1833 /* Deal with stray signal. */ 1847 /* Deal with stray signal. */
1834 cond_resched(); 1848 cond_resched_rcu_qs();
1835 flush_signals(current); 1849 WARN_ON(signal_pending(current));
1836 trace_rcu_grace_period(rsp->name, 1850 trace_rcu_grace_period(rsp->name,
1837 ACCESS_ONCE(rsp->gpnum), 1851 ACCESS_ONCE(rsp->gpnum),
1838 TPS("fqswaitsig")); 1852 TPS("fqswaitsig"));
@@ -1928,7 +1942,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
1928{ 1942{
1929 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 1943 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
1930 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags); 1944 raw_spin_unlock_irqrestore(&rcu_get_root(rsp)->lock, flags);
1931 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ 1945 rcu_gp_kthread_wake(rsp);
1932} 1946}
1933 1947
1934/* 1948/*
@@ -2210,8 +2224,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
2210 /* Adjust any no-longer-needed kthreads. */ 2224 /* Adjust any no-longer-needed kthreads. */
2211 rcu_boost_kthread_setaffinity(rnp, -1); 2225 rcu_boost_kthread_setaffinity(rnp, -1);
2212 2226
2213 /* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
2214
2215 /* Exclude any attempts to start a new grace period. */ 2227 /* Exclude any attempts to start a new grace period. */
2216 mutex_lock(&rsp->onoff_mutex); 2228 mutex_lock(&rsp->onoff_mutex);
2217 raw_spin_lock_irqsave(&rsp->orphan_lock, flags); 2229 raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
@@ -2393,8 +2405,8 @@ void rcu_check_callbacks(int cpu, int user)
2393 * at least not while the corresponding CPU is online. 2405 * at least not while the corresponding CPU is online.
2394 */ 2406 */
2395 2407
2396 rcu_sched_qs(cpu); 2408 rcu_sched_qs();
2397 rcu_bh_qs(cpu); 2409 rcu_bh_qs();
2398 2410
2399 } else if (!in_softirq()) { 2411 } else if (!in_softirq()) {
2400 2412
@@ -2405,11 +2417,13 @@ void rcu_check_callbacks(int cpu, int user)
2405 * critical section, so note it. 2417 * critical section, so note it.
2406 */ 2418 */
2407 2419
2408 rcu_bh_qs(cpu); 2420 rcu_bh_qs();
2409 } 2421 }
2410 rcu_preempt_check_callbacks(cpu); 2422 rcu_preempt_check_callbacks(cpu);
2411 if (rcu_pending(cpu)) 2423 if (rcu_pending(cpu))
2412 invoke_rcu_core(); 2424 invoke_rcu_core();
2425 if (user)
2426 rcu_note_voluntary_context_switch(current);
2413 trace_rcu_utilization(TPS("End scheduler-tick")); 2427 trace_rcu_utilization(TPS("End scheduler-tick"));
2414} 2428}
2415 2429
@@ -2432,7 +2446,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
2432 struct rcu_node *rnp; 2446 struct rcu_node *rnp;
2433 2447
2434 rcu_for_each_leaf_node(rsp, rnp) { 2448 rcu_for_each_leaf_node(rsp, rnp) {
2435 cond_resched(); 2449 cond_resched_rcu_qs();
2436 mask = 0; 2450 mask = 0;
2437 raw_spin_lock_irqsave(&rnp->lock, flags); 2451 raw_spin_lock_irqsave(&rnp->lock, flags);
2438 smp_mb__after_unlock_lock(); 2452 smp_mb__after_unlock_lock();
@@ -2449,7 +2463,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
2449 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) { 2463 for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2450 if ((rnp->qsmask & bit) != 0) { 2464 if ((rnp->qsmask & bit) != 0) {
2451 if ((rnp->qsmaskinit & bit) != 0) 2465 if ((rnp->qsmaskinit & bit) != 0)
2452 *isidle = 0; 2466 *isidle = false;
2453 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj)) 2467 if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
2454 mask |= bit; 2468 mask |= bit;
2455 } 2469 }
@@ -2505,9 +2519,10 @@ static void force_quiescent_state(struct rcu_state *rsp)
2505 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 2519 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2506 return; /* Someone beat us to it. */ 2520 return; /* Someone beat us to it. */
2507 } 2521 }
2508 ACCESS_ONCE(rsp->gp_flags) |= RCU_GP_FLAG_FQS; 2522 ACCESS_ONCE(rsp->gp_flags) =
2523 ACCESS_ONCE(rsp->gp_flags) | RCU_GP_FLAG_FQS;
2509 raw_spin_unlock_irqrestore(&rnp_old->lock, flags); 2524 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
2510 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ 2525 rcu_gp_kthread_wake(rsp);
2511} 2526}
2512 2527
2513/* 2528/*
@@ -2925,11 +2940,6 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
2925 * restructure your code to batch your updates, and then use a single 2940 * restructure your code to batch your updates, and then use a single
2926 * synchronize_sched() instead. 2941 * synchronize_sched() instead.
2927 * 2942 *
2928 * Note that it is illegal to call this function while holding any lock
2929 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
2930 * to call this function from a CPU-hotplug notifier. Failing to observe
2931 * these restriction will result in deadlock.
2932 *
2933 * This implementation can be thought of as an application of ticket 2943 * This implementation can be thought of as an application of ticket
2934 * locking to RCU, with sync_sched_expedited_started and 2944 * locking to RCU, with sync_sched_expedited_started and
2935 * sync_sched_expedited_done taking on the roles of the halves 2945 * sync_sched_expedited_done taking on the roles of the halves
@@ -2979,7 +2989,12 @@ void synchronize_sched_expedited(void)
2979 */ 2989 */
2980 snap = atomic_long_inc_return(&rsp->expedited_start); 2990 snap = atomic_long_inc_return(&rsp->expedited_start);
2981 firstsnap = snap; 2991 firstsnap = snap;
2982 get_online_cpus(); 2992 if (!try_get_online_cpus()) {
2993 /* CPU hotplug operation in flight, fall back to normal GP. */
2994 wait_rcu_gp(call_rcu_sched);
2995 atomic_long_inc(&rsp->expedited_normal);
2996 return;
2997 }
2983 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); 2998 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
2984 2999
2985 /* 3000 /*
@@ -3026,7 +3041,12 @@ void synchronize_sched_expedited(void)
3026 * and they started after our first try, so their grace 3041 * and they started after our first try, so their grace
3027 * period works for us. 3042 * period works for us.
3028 */ 3043 */
3029 get_online_cpus(); 3044 if (!try_get_online_cpus()) {
3045 /* CPU hotplug operation in flight, use normal GP. */
3046 wait_rcu_gp(call_rcu_sched);
3047 atomic_long_inc(&rsp->expedited_normal);
3048 return;
3049 }
3030 snap = atomic_long_read(&rsp->expedited_start); 3050 snap = atomic_long_read(&rsp->expedited_start);
3031 smp_mb(); /* ensure read is before try_stop_cpus(). */ 3051 smp_mb(); /* ensure read is before try_stop_cpus(). */
3032 } 3052 }
@@ -3442,6 +3462,7 @@ static int rcu_cpu_notify(struct notifier_block *self,
3442 case CPU_UP_PREPARE_FROZEN: 3462 case CPU_UP_PREPARE_FROZEN:
3443 rcu_prepare_cpu(cpu); 3463 rcu_prepare_cpu(cpu);
3444 rcu_prepare_kthreads(cpu); 3464 rcu_prepare_kthreads(cpu);
3465 rcu_spawn_all_nocb_kthreads(cpu);
3445 break; 3466 break;
3446 case CPU_ONLINE: 3467 case CPU_ONLINE:
3447 case CPU_DOWN_FAILED: 3468 case CPU_DOWN_FAILED:
@@ -3489,7 +3510,7 @@ static int rcu_pm_notify(struct notifier_block *self,
3489} 3510}
3490 3511
3491/* 3512/*
3492 * Spawn the kthread that handles this RCU flavor's grace periods. 3513 * Spawn the kthreads that handle each RCU flavor's grace periods.
3493 */ 3514 */
3494static int __init rcu_spawn_gp_kthread(void) 3515static int __init rcu_spawn_gp_kthread(void)
3495{ 3516{
@@ -3498,6 +3519,7 @@ static int __init rcu_spawn_gp_kthread(void)
3498 struct rcu_state *rsp; 3519 struct rcu_state *rsp;
3499 struct task_struct *t; 3520 struct task_struct *t;
3500 3521
3522 rcu_scheduler_fully_active = 1;
3501 for_each_rcu_flavor(rsp) { 3523 for_each_rcu_flavor(rsp) {
3502 t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name); 3524 t = kthread_run(rcu_gp_kthread, rsp, "%s", rsp->name);
3503 BUG_ON(IS_ERR(t)); 3525 BUG_ON(IS_ERR(t));
@@ -3505,8 +3527,9 @@ static int __init rcu_spawn_gp_kthread(void)
3505 raw_spin_lock_irqsave(&rnp->lock, flags); 3527 raw_spin_lock_irqsave(&rnp->lock, flags);
3506 rsp->gp_kthread = t; 3528 rsp->gp_kthread = t;
3507 raw_spin_unlock_irqrestore(&rnp->lock, flags); 3529 raw_spin_unlock_irqrestore(&rnp->lock, flags);
3508 rcu_spawn_nocb_kthreads(rsp);
3509 } 3530 }
3531 rcu_spawn_nocb_kthreads();
3532 rcu_spawn_boost_kthreads();
3510 return 0; 3533 return 0;
3511} 3534}
3512early_initcall(rcu_spawn_gp_kthread); 3535early_initcall(rcu_spawn_gp_kthread);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 6a86eb7bac45..d03764652d91 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -350,7 +350,7 @@ struct rcu_data {
350 int nocb_p_count_lazy; /* (approximate). */ 350 int nocb_p_count_lazy; /* (approximate). */
351 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ 351 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
352 struct task_struct *nocb_kthread; 352 struct task_struct *nocb_kthread;
353 bool nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */ 353 int nocb_defer_wakeup; /* Defer wakeup of nocb_kthread. */
354 354
355 /* The following fields are used by the leader, hence own cacheline. */ 355 /* The following fields are used by the leader, hence own cacheline. */
356 struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp; 356 struct rcu_head *nocb_gp_head ____cacheline_internodealigned_in_smp;
@@ -383,6 +383,11 @@ struct rcu_data {
383#define RCU_FORCE_QS 3 /* Need to force quiescent state. */ 383#define RCU_FORCE_QS 3 /* Need to force quiescent state. */
384#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK 384#define RCU_SIGNAL_INIT RCU_SAVE_DYNTICK
385 385
386/* Values for nocb_defer_wakeup field in struct rcu_data. */
387#define RCU_NOGP_WAKE_NOT 0
388#define RCU_NOGP_WAKE 1
389#define RCU_NOGP_WAKE_FORCE 2
390
386#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500)) 391#define RCU_JIFFIES_TILL_FORCE_QS (1 + (HZ > 250) + (HZ > 500))
387 /* For jiffies_till_first_fqs and */ 392 /* For jiffies_till_first_fqs and */
388 /* and jiffies_till_next_fqs. */ 393 /* and jiffies_till_next_fqs. */
@@ -572,6 +577,7 @@ static void rcu_preempt_do_callbacks(void);
572static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 577static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
573 struct rcu_node *rnp); 578 struct rcu_node *rnp);
574#endif /* #ifdef CONFIG_RCU_BOOST */ 579#endif /* #ifdef CONFIG_RCU_BOOST */
580static void __init rcu_spawn_boost_kthreads(void);
575static void rcu_prepare_kthreads(int cpu); 581static void rcu_prepare_kthreads(int cpu);
576static void rcu_cleanup_after_idle(int cpu); 582static void rcu_cleanup_after_idle(int cpu);
577static void rcu_prepare_for_idle(int cpu); 583static void rcu_prepare_for_idle(int cpu);
@@ -589,10 +595,14 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
589static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 595static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
590 struct rcu_data *rdp, 596 struct rcu_data *rdp,
591 unsigned long flags); 597 unsigned long flags);
592static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); 598static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp);
593static void do_nocb_deferred_wakeup(struct rcu_data *rdp); 599static void do_nocb_deferred_wakeup(struct rcu_data *rdp);
594static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); 600static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
595static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); 601static void rcu_spawn_all_nocb_kthreads(int cpu);
602static void __init rcu_spawn_nocb_kthreads(void);
603#ifdef CONFIG_RCU_NOCB_CPU
604static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp);
605#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
596static void __maybe_unused rcu_kick_nohz_cpu(int cpu); 606static void __maybe_unused rcu_kick_nohz_cpu(int cpu);
597static bool init_nocb_callback_list(struct rcu_data *rdp); 607static bool init_nocb_callback_list(struct rcu_data *rdp);
598static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq); 608static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq);
@@ -605,6 +615,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
605static void rcu_bind_gp_kthread(void); 615static void rcu_bind_gp_kthread(void);
606static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp); 616static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
607static bool rcu_nohz_full_cpu(struct rcu_state *rsp); 617static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
618static void rcu_dynticks_task_enter(void);
619static void rcu_dynticks_task_exit(void);
608 620
609#endif /* #ifndef RCU_TREE_NONCORE */ 621#endif /* #ifndef RCU_TREE_NONCORE */
610 622
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a7997e272564..387dd4599344 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -85,33 +85,6 @@ static void __init rcu_bootup_announce_oddness(void)
85 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 85 pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
86 if (nr_cpu_ids != NR_CPUS) 86 if (nr_cpu_ids != NR_CPUS)
87 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 87 pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
88#ifdef CONFIG_RCU_NOCB_CPU
89#ifndef CONFIG_RCU_NOCB_CPU_NONE
90 if (!have_rcu_nocb_mask) {
91 zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
92 have_rcu_nocb_mask = true;
93 }
94#ifdef CONFIG_RCU_NOCB_CPU_ZERO
95 pr_info("\tOffload RCU callbacks from CPU 0\n");
96 cpumask_set_cpu(0, rcu_nocb_mask);
97#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
98#ifdef CONFIG_RCU_NOCB_CPU_ALL
99 pr_info("\tOffload RCU callbacks from all CPUs\n");
100 cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
101#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
102#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
103 if (have_rcu_nocb_mask) {
104 if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
105 pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
106 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
107 rcu_nocb_mask);
108 }
109 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
110 pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
111 if (rcu_nocb_poll)
112 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
113 }
114#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
115} 88}
116 89
117#ifdef CONFIG_TREE_PREEMPT_RCU 90#ifdef CONFIG_TREE_PREEMPT_RCU
@@ -134,7 +107,7 @@ static void __init rcu_bootup_announce(void)
134 * Return the number of RCU-preempt batches processed thus far 107 * Return the number of RCU-preempt batches processed thus far
135 * for debug and statistics. 108 * for debug and statistics.
136 */ 109 */
137long rcu_batches_completed_preempt(void) 110static long rcu_batches_completed_preempt(void)
138{ 111{
139 return rcu_preempt_state.completed; 112 return rcu_preempt_state.completed;
140} 113}
@@ -155,18 +128,19 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
155 * not in a quiescent state. There might be any number of tasks blocked 128 * not in a quiescent state. There might be any number of tasks blocked
156 * while in an RCU read-side critical section. 129 * while in an RCU read-side critical section.
157 * 130 *
158 * Unlike the other rcu_*_qs() functions, callers to this function 131 * As with the other rcu_*_qs() functions, callers to this function
159 * must disable irqs in order to protect the assignment to 132 * must disable preemption.
160 * ->rcu_read_unlock_special. 133 */
161 */ 134static void rcu_preempt_qs(void)
162static void rcu_preempt_qs(int cpu) 135{
163{ 136 if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
164 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 137 trace_rcu_grace_period(TPS("rcu_preempt"),
165 138 __this_cpu_read(rcu_preempt_data.gpnum),
166 if (rdp->passed_quiesce == 0) 139 TPS("cpuqs"));
167 trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs")); 140 __this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
168 rdp->passed_quiesce = 1; 141 barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
169 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 142 current->rcu_read_unlock_special.b.need_qs = false;
143 }
170} 144}
171 145
172/* 146/*
@@ -190,14 +164,14 @@ static void rcu_preempt_note_context_switch(int cpu)
190 struct rcu_node *rnp; 164 struct rcu_node *rnp;
191 165
192 if (t->rcu_read_lock_nesting > 0 && 166 if (t->rcu_read_lock_nesting > 0 &&
193 (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { 167 !t->rcu_read_unlock_special.b.blocked) {
194 168
195 /* Possibly blocking in an RCU read-side critical section. */ 169 /* Possibly blocking in an RCU read-side critical section. */
196 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); 170 rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
197 rnp = rdp->mynode; 171 rnp = rdp->mynode;
198 raw_spin_lock_irqsave(&rnp->lock, flags); 172 raw_spin_lock_irqsave(&rnp->lock, flags);
199 smp_mb__after_unlock_lock(); 173 smp_mb__after_unlock_lock();
200 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; 174 t->rcu_read_unlock_special.b.blocked = true;
201 t->rcu_blocked_node = rnp; 175 t->rcu_blocked_node = rnp;
202 176
203 /* 177 /*
@@ -239,7 +213,7 @@ static void rcu_preempt_note_context_switch(int cpu)
239 : rnp->gpnum + 1); 213 : rnp->gpnum + 1);
240 raw_spin_unlock_irqrestore(&rnp->lock, flags); 214 raw_spin_unlock_irqrestore(&rnp->lock, flags);
241 } else if (t->rcu_read_lock_nesting < 0 && 215 } else if (t->rcu_read_lock_nesting < 0 &&
242 t->rcu_read_unlock_special) { 216 t->rcu_read_unlock_special.s) {
243 217
244 /* 218 /*
245 * Complete exit from RCU read-side critical section on 219 * Complete exit from RCU read-side critical section on
@@ -257,9 +231,7 @@ static void rcu_preempt_note_context_switch(int cpu)
257 * grace period, then the fact that the task has been enqueued 231 * grace period, then the fact that the task has been enqueued
258 * means that we continue to block the current grace period. 232 * means that we continue to block the current grace period.
259 */ 233 */
260 local_irq_save(flags); 234 rcu_preempt_qs();
261 rcu_preempt_qs(cpu);
262 local_irq_restore(flags);
263} 235}
264 236
265/* 237/*
@@ -340,7 +312,7 @@ void rcu_read_unlock_special(struct task_struct *t)
340 bool drop_boost_mutex = false; 312 bool drop_boost_mutex = false;
341#endif /* #ifdef CONFIG_RCU_BOOST */ 313#endif /* #ifdef CONFIG_RCU_BOOST */
342 struct rcu_node *rnp; 314 struct rcu_node *rnp;
343 int special; 315 union rcu_special special;
344 316
345 /* NMI handlers cannot block and cannot safely manipulate state. */ 317 /* NMI handlers cannot block and cannot safely manipulate state. */
346 if (in_nmi()) 318 if (in_nmi())
@@ -350,12 +322,13 @@ void rcu_read_unlock_special(struct task_struct *t)
350 322
351 /* 323 /*
352 * If RCU core is waiting for this CPU to exit critical section, 324 * If RCU core is waiting for this CPU to exit critical section,
353 * let it know that we have done so. 325 * let it know that we have done so. Because irqs are disabled,
326 * t->rcu_read_unlock_special cannot change.
354 */ 327 */
355 special = t->rcu_read_unlock_special; 328 special = t->rcu_read_unlock_special;
356 if (special & RCU_READ_UNLOCK_NEED_QS) { 329 if (special.b.need_qs) {
357 rcu_preempt_qs(smp_processor_id()); 330 rcu_preempt_qs();
358 if (!t->rcu_read_unlock_special) { 331 if (!t->rcu_read_unlock_special.s) {
359 local_irq_restore(flags); 332 local_irq_restore(flags);
360 return; 333 return;
361 } 334 }
@@ -368,8 +341,8 @@ void rcu_read_unlock_special(struct task_struct *t)
368 } 341 }
369 342
370 /* Clean up if blocked during RCU read-side critical section. */ 343 /* Clean up if blocked during RCU read-side critical section. */
371 if (special & RCU_READ_UNLOCK_BLOCKED) { 344 if (special.b.blocked) {
372 t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED; 345 t->rcu_read_unlock_special.b.blocked = false;
373 346
374 /* 347 /*
375 * Remove this task from the list it blocked on. The 348 * Remove this task from the list it blocked on. The
@@ -653,12 +626,13 @@ static void rcu_preempt_check_callbacks(int cpu)
653 struct task_struct *t = current; 626 struct task_struct *t = current;
654 627
655 if (t->rcu_read_lock_nesting == 0) { 628 if (t->rcu_read_lock_nesting == 0) {
656 rcu_preempt_qs(cpu); 629 rcu_preempt_qs();
657 return; 630 return;
658 } 631 }
659 if (t->rcu_read_lock_nesting > 0 && 632 if (t->rcu_read_lock_nesting > 0 &&
660 per_cpu(rcu_preempt_data, cpu).qs_pending) 633 per_cpu(rcu_preempt_data, cpu).qs_pending &&
661 t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS; 634 !per_cpu(rcu_preempt_data, cpu).passed_quiesce)
635 t->rcu_read_unlock_special.b.need_qs = true;
662} 636}
663 637
664#ifdef CONFIG_RCU_BOOST 638#ifdef CONFIG_RCU_BOOST
@@ -819,11 +793,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
819 * In fact, if you are using synchronize_rcu_expedited() in a loop, 793 * In fact, if you are using synchronize_rcu_expedited() in a loop,
820 * please restructure your code to batch your updates, and then Use a 794 * please restructure your code to batch your updates, and then Use a
821 * single synchronize_rcu() instead. 795 * single synchronize_rcu() instead.
822 *
823 * Note that it is illegal to call this function while holding any lock
824 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
825 * to call this function from a CPU-hotplug notifier. Failing to observe
826 * these restriction will result in deadlock.
827 */ 796 */
828void synchronize_rcu_expedited(void) 797void synchronize_rcu_expedited(void)
829{ 798{
@@ -845,7 +814,11 @@ void synchronize_rcu_expedited(void)
845 * being boosted. This simplifies the process of moving tasks 814 * being boosted. This simplifies the process of moving tasks
846 * from leaf to root rcu_node structures. 815 * from leaf to root rcu_node structures.
847 */ 816 */
848 get_online_cpus(); 817 if (!try_get_online_cpus()) {
818 /* CPU-hotplug operation in flight, fall back to normal GP. */
819 wait_rcu_gp(call_rcu);
820 return;
821 }
849 822
850 /* 823 /*
851 * Acquire lock, falling back to synchronize_rcu() if too many 824 * Acquire lock, falling back to synchronize_rcu() if too many
@@ -897,7 +870,8 @@ void synchronize_rcu_expedited(void)
897 870
898 /* Clean up and exit. */ 871 /* Clean up and exit. */
899 smp_mb(); /* ensure expedited GP seen before counter increment. */ 872 smp_mb(); /* ensure expedited GP seen before counter increment. */
900 ACCESS_ONCE(sync_rcu_preempt_exp_count)++; 873 ACCESS_ONCE(sync_rcu_preempt_exp_count) =
874 sync_rcu_preempt_exp_count + 1;
901unlock_mb_ret: 875unlock_mb_ret:
902 mutex_unlock(&sync_rcu_preempt_exp_mutex); 876 mutex_unlock(&sync_rcu_preempt_exp_mutex);
903mb_ret: 877mb_ret:
@@ -941,7 +915,7 @@ void exit_rcu(void)
941 return; 915 return;
942 t->rcu_read_lock_nesting = 1; 916 t->rcu_read_lock_nesting = 1;
943 barrier(); 917 barrier();
944 t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED; 918 t->rcu_read_unlock_special.b.blocked = true;
945 __rcu_read_unlock(); 919 __rcu_read_unlock();
946} 920}
947 921
@@ -1462,14 +1436,13 @@ static struct smp_hotplug_thread rcu_cpu_thread_spec = {
1462}; 1436};
1463 1437
1464/* 1438/*
1465 * Spawn all kthreads -- called as soon as the scheduler is running. 1439 * Spawn boost kthreads -- called as soon as the scheduler is running.
1466 */ 1440 */
1467static int __init rcu_spawn_kthreads(void) 1441static void __init rcu_spawn_boost_kthreads(void)
1468{ 1442{
1469 struct rcu_node *rnp; 1443 struct rcu_node *rnp;
1470 int cpu; 1444 int cpu;
1471 1445
1472 rcu_scheduler_fully_active = 1;
1473 for_each_possible_cpu(cpu) 1446 for_each_possible_cpu(cpu)
1474 per_cpu(rcu_cpu_has_work, cpu) = 0; 1447 per_cpu(rcu_cpu_has_work, cpu) = 0;
1475 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); 1448 BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
@@ -1479,9 +1452,7 @@ static int __init rcu_spawn_kthreads(void)
1479 rcu_for_each_leaf_node(rcu_state_p, rnp) 1452 rcu_for_each_leaf_node(rcu_state_p, rnp)
1480 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp); 1453 (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
1481 } 1454 }
1482 return 0;
1483} 1455}
1484early_initcall(rcu_spawn_kthreads);
1485 1456
1486static void rcu_prepare_kthreads(int cpu) 1457static void rcu_prepare_kthreads(int cpu)
1487{ 1458{
@@ -1519,12 +1490,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
1519{ 1490{
1520} 1491}
1521 1492
1522static int __init rcu_scheduler_really_started(void) 1493static void __init rcu_spawn_boost_kthreads(void)
1523{ 1494{
1524 rcu_scheduler_fully_active = 1;
1525 return 0;
1526} 1495}
1527early_initcall(rcu_scheduler_really_started);
1528 1496
1529static void rcu_prepare_kthreads(int cpu) 1497static void rcu_prepare_kthreads(int cpu)
1530{ 1498{
@@ -1625,7 +1593,7 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
1625 1593
1626 /* Exit early if we advanced recently. */ 1594 /* Exit early if we advanced recently. */
1627 if (jiffies == rdtp->last_advance_all) 1595 if (jiffies == rdtp->last_advance_all)
1628 return 0; 1596 return false;
1629 rdtp->last_advance_all = jiffies; 1597 rdtp->last_advance_all = jiffies;
1630 1598
1631 for_each_rcu_flavor(rsp) { 1599 for_each_rcu_flavor(rsp) {
@@ -1848,7 +1816,7 @@ static int rcu_oom_notify(struct notifier_block *self,
1848 get_online_cpus(); 1816 get_online_cpus();
1849 for_each_online_cpu(cpu) { 1817 for_each_online_cpu(cpu) {
1850 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1); 1818 smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
1851 cond_resched(); 1819 cond_resched_rcu_qs();
1852 } 1820 }
1853 put_online_cpus(); 1821 put_online_cpus();
1854 1822
@@ -2075,7 +2043,7 @@ static void wake_nocb_leader(struct rcu_data *rdp, bool force)
2075 if (!ACCESS_ONCE(rdp_leader->nocb_kthread)) 2043 if (!ACCESS_ONCE(rdp_leader->nocb_kthread))
2076 return; 2044 return;
2077 if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) { 2045 if (ACCESS_ONCE(rdp_leader->nocb_leader_sleep) || force) {
2078 /* Prior xchg orders against prior callback enqueue. */ 2046 /* Prior smp_mb__after_atomic() orders against prior enqueue. */
2079 ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false; 2047 ACCESS_ONCE(rdp_leader->nocb_leader_sleep) = false;
2080 wake_up(&rdp_leader->nocb_wq); 2048 wake_up(&rdp_leader->nocb_wq);
2081 } 2049 }
@@ -2104,6 +2072,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2104 ACCESS_ONCE(*old_rhpp) = rhp; 2072 ACCESS_ONCE(*old_rhpp) = rhp;
2105 atomic_long_add(rhcount, &rdp->nocb_q_count); 2073 atomic_long_add(rhcount, &rdp->nocb_q_count);
2106 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy); 2074 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
2075 smp_mb__after_atomic(); /* Store *old_rhpp before _wake test. */
2107 2076
2108 /* If we are not being polled and there is a kthread, awaken it ... */ 2077 /* If we are not being polled and there is a kthread, awaken it ... */
2109 t = ACCESS_ONCE(rdp->nocb_kthread); 2078 t = ACCESS_ONCE(rdp->nocb_kthread);
@@ -2120,16 +2089,23 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2120 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2089 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2121 TPS("WakeEmpty")); 2090 TPS("WakeEmpty"));
2122 } else { 2091 } else {
2123 rdp->nocb_defer_wakeup = true; 2092 rdp->nocb_defer_wakeup = RCU_NOGP_WAKE;
2124 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2093 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2125 TPS("WakeEmptyIsDeferred")); 2094 TPS("WakeEmptyIsDeferred"));
2126 } 2095 }
2127 rdp->qlen_last_fqs_check = 0; 2096 rdp->qlen_last_fqs_check = 0;
2128 } else if (len > rdp->qlen_last_fqs_check + qhimark) { 2097 } else if (len > rdp->qlen_last_fqs_check + qhimark) {
2129 /* ... or if many callbacks queued. */ 2098 /* ... or if many callbacks queued. */
2130 wake_nocb_leader(rdp, true); 2099 if (!irqs_disabled_flags(flags)) {
2100 wake_nocb_leader(rdp, true);
2101 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2102 TPS("WakeOvf"));
2103 } else {
2104 rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE;
2105 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2106 TPS("WakeOvfIsDeferred"));
2107 }
2131 rdp->qlen_last_fqs_check = LONG_MAX / 2; 2108 rdp->qlen_last_fqs_check = LONG_MAX / 2;
2132 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeOvf"));
2133 } else { 2109 } else {
2134 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot")); 2110 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("WakeNot"));
2135 } 2111 }
@@ -2150,7 +2126,7 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2150{ 2126{
2151 2127
2152 if (!rcu_is_nocb_cpu(rdp->cpu)) 2128 if (!rcu_is_nocb_cpu(rdp->cpu))
2153 return 0; 2129 return false;
2154 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags); 2130 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy, flags);
2155 if (__is_kfree_rcu_offset((unsigned long)rhp->func)) 2131 if (__is_kfree_rcu_offset((unsigned long)rhp->func))
2156 trace_rcu_kfree_callback(rdp->rsp->name, rhp, 2132 trace_rcu_kfree_callback(rdp->rsp->name, rhp,
@@ -2161,7 +2137,18 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2161 trace_rcu_callback(rdp->rsp->name, rhp, 2137 trace_rcu_callback(rdp->rsp->name, rhp,
2162 -atomic_long_read(&rdp->nocb_q_count_lazy), 2138 -atomic_long_read(&rdp->nocb_q_count_lazy),
2163 -atomic_long_read(&rdp->nocb_q_count)); 2139 -atomic_long_read(&rdp->nocb_q_count));
2164 return 1; 2140
2141 /*
2142 * If called from an extended quiescent state with interrupts
2143 * disabled, invoke the RCU core in order to allow the idle-entry
2144 * deferred-wakeup check to function.
2145 */
2146 if (irqs_disabled_flags(flags) &&
2147 !rcu_is_watching() &&
2148 cpu_online(smp_processor_id()))
2149 invoke_rcu_core();
2150
2151 return true;
2165} 2152}
2166 2153
2167/* 2154/*
@@ -2177,7 +2164,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2177 2164
2178 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ 2165 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
2179 if (!rcu_is_nocb_cpu(smp_processor_id())) 2166 if (!rcu_is_nocb_cpu(smp_processor_id()))
2180 return 0; 2167 return false;
2181 rsp->qlen = 0; 2168 rsp->qlen = 0;
2182 rsp->qlen_lazy = 0; 2169 rsp->qlen_lazy = 0;
2183 2170
@@ -2196,7 +2183,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2196 rsp->orphan_nxtlist = NULL; 2183 rsp->orphan_nxtlist = NULL;
2197 rsp->orphan_nxttail = &rsp->orphan_nxtlist; 2184 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
2198 } 2185 }
2199 return 1; 2186 return true;
2200} 2187}
2201 2188
2202/* 2189/*
@@ -2229,7 +2216,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2229 (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c))); 2216 (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
2230 if (likely(d)) 2217 if (likely(d))
2231 break; 2218 break;
2232 flush_signals(current); 2219 WARN_ON(signal_pending(current));
2233 trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait")); 2220 trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
2234 } 2221 }
2235 trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait")); 2222 trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
@@ -2288,7 +2275,7 @@ wait_again:
2288 if (!rcu_nocb_poll) 2275 if (!rcu_nocb_poll)
2289 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu, 2276 trace_rcu_nocb_wake(my_rdp->rsp->name, my_rdp->cpu,
2290 "WokeEmpty"); 2277 "WokeEmpty");
2291 flush_signals(current); 2278 WARN_ON(signal_pending(current));
2292 schedule_timeout_interruptible(1); 2279 schedule_timeout_interruptible(1);
2293 2280
2294 /* Rescan in case we were a victim of memory ordering. */ 2281 /* Rescan in case we were a victim of memory ordering. */
@@ -2327,6 +2314,7 @@ wait_again:
2327 atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count); 2314 atomic_long_add(rdp->nocb_gp_count, &rdp->nocb_follower_count);
2328 atomic_long_add(rdp->nocb_gp_count_lazy, 2315 atomic_long_add(rdp->nocb_gp_count_lazy,
2329 &rdp->nocb_follower_count_lazy); 2316 &rdp->nocb_follower_count_lazy);
2317 smp_mb__after_atomic(); /* Store *tail before wakeup. */
2330 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) { 2318 if (rdp != my_rdp && tail == &rdp->nocb_follower_head) {
2331 /* 2319 /*
2332 * List was empty, wake up the follower. 2320 * List was empty, wake up the follower.
@@ -2367,7 +2355,7 @@ static void nocb_follower_wait(struct rcu_data *rdp)
2367 if (!rcu_nocb_poll) 2355 if (!rcu_nocb_poll)
2368 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, 2356 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
2369 "WokeEmpty"); 2357 "WokeEmpty");
2370 flush_signals(current); 2358 WARN_ON(signal_pending(current));
2371 schedule_timeout_interruptible(1); 2359 schedule_timeout_interruptible(1);
2372 } 2360 }
2373} 2361}
@@ -2428,15 +2416,16 @@ static int rcu_nocb_kthread(void *arg)
2428 list = next; 2416 list = next;
2429 } 2417 }
2430 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1); 2418 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
2431 ACCESS_ONCE(rdp->nocb_p_count) -= c; 2419 ACCESS_ONCE(rdp->nocb_p_count) = rdp->nocb_p_count - c;
2432 ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl; 2420 ACCESS_ONCE(rdp->nocb_p_count_lazy) =
2421 rdp->nocb_p_count_lazy - cl;
2433 rdp->n_nocbs_invoked += c; 2422 rdp->n_nocbs_invoked += c;
2434 } 2423 }
2435 return 0; 2424 return 0;
2436} 2425}
2437 2426
2438/* Is a deferred wakeup of rcu_nocb_kthread() required? */ 2427/* Is a deferred wakeup of rcu_nocb_kthread() required? */
2439static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) 2428static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2440{ 2429{
2441 return ACCESS_ONCE(rdp->nocb_defer_wakeup); 2430 return ACCESS_ONCE(rdp->nocb_defer_wakeup);
2442} 2431}
@@ -2444,11 +2433,79 @@ static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2444/* Do a deferred wakeup of rcu_nocb_kthread(). */ 2433/* Do a deferred wakeup of rcu_nocb_kthread(). */
2445static void do_nocb_deferred_wakeup(struct rcu_data *rdp) 2434static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2446{ 2435{
2436 int ndw;
2437
2447 if (!rcu_nocb_need_deferred_wakeup(rdp)) 2438 if (!rcu_nocb_need_deferred_wakeup(rdp))
2448 return; 2439 return;
2449 ACCESS_ONCE(rdp->nocb_defer_wakeup) = false; 2440 ndw = ACCESS_ONCE(rdp->nocb_defer_wakeup);
2450 wake_nocb_leader(rdp, false); 2441 ACCESS_ONCE(rdp->nocb_defer_wakeup) = RCU_NOGP_WAKE_NOT;
2451 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty")); 2442 wake_nocb_leader(rdp, ndw == RCU_NOGP_WAKE_FORCE);
2443 trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWake"));
2444}
2445
2446void __init rcu_init_nohz(void)
2447{
2448 int cpu;
2449 bool need_rcu_nocb_mask = true;
2450 struct rcu_state *rsp;
2451
2452#ifdef CONFIG_RCU_NOCB_CPU_NONE
2453 need_rcu_nocb_mask = false;
2454#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
2455
2456#if defined(CONFIG_NO_HZ_FULL)
2457 if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
2458 need_rcu_nocb_mask = true;
2459#endif /* #if defined(CONFIG_NO_HZ_FULL) */
2460
2461 if (!have_rcu_nocb_mask && need_rcu_nocb_mask) {
2462 if (!zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL)) {
2463 pr_info("rcu_nocb_mask allocation failed, callback offloading disabled.\n");
2464 return;
2465 }
2466 have_rcu_nocb_mask = true;
2467 }
2468 if (!have_rcu_nocb_mask)
2469 return;
2470
2471#ifdef CONFIG_RCU_NOCB_CPU_ZERO
2472 pr_info("\tOffload RCU callbacks from CPU 0\n");
2473 cpumask_set_cpu(0, rcu_nocb_mask);
2474#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
2475#ifdef CONFIG_RCU_NOCB_CPU_ALL
2476 pr_info("\tOffload RCU callbacks from all CPUs\n");
2477 cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
2478#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
2479#if defined(CONFIG_NO_HZ_FULL)
2480 if (tick_nohz_full_running)
2481 cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
2482#endif /* #if defined(CONFIG_NO_HZ_FULL) */
2483
2484 if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
2485 pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
2486 cpumask_and(rcu_nocb_mask, cpu_possible_mask,
2487 rcu_nocb_mask);
2488 }
2489 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
2490 pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
2491 if (rcu_nocb_poll)
2492 pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
2493
2494 for_each_rcu_flavor(rsp) {
2495 for_each_cpu(cpu, rcu_nocb_mask) {
2496 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
2497
2498 /*
2499 * If there are early callbacks, they will need
2500 * to be moved to the nocb lists.
2501 */
2502 WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] !=
2503 &rdp->nxtlist &&
2504 rdp->nxttail[RCU_NEXT_TAIL] != NULL);
2505 init_nocb_callback_list(rdp);
2506 }
2507 rcu_organize_nocb_kthreads(rsp);
2508 }
2452} 2509}
2453 2510
2454/* Initialize per-rcu_data variables for no-CBs CPUs. */ 2511/* Initialize per-rcu_data variables for no-CBs CPUs. */
@@ -2459,15 +2516,85 @@ static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2459 rdp->nocb_follower_tail = &rdp->nocb_follower_head; 2516 rdp->nocb_follower_tail = &rdp->nocb_follower_head;
2460} 2517}
2461 2518
2519/*
2520 * If the specified CPU is a no-CBs CPU that does not already have its
2521 * rcuo kthread for the specified RCU flavor, spawn it. If the CPUs are
2522 * brought online out of order, this can require re-organizing the
2523 * leader-follower relationships.
2524 */
2525static void rcu_spawn_one_nocb_kthread(struct rcu_state *rsp, int cpu)
2526{
2527 struct rcu_data *rdp;
2528 struct rcu_data *rdp_last;
2529 struct rcu_data *rdp_old_leader;
2530 struct rcu_data *rdp_spawn = per_cpu_ptr(rsp->rda, cpu);
2531 struct task_struct *t;
2532
2533 /*
2534 * If this isn't a no-CBs CPU or if it already has an rcuo kthread,
2535 * then nothing to do.
2536 */
2537 if (!rcu_is_nocb_cpu(cpu) || rdp_spawn->nocb_kthread)
2538 return;
2539
2540 /* If we didn't spawn the leader first, reorganize! */
2541 rdp_old_leader = rdp_spawn->nocb_leader;
2542 if (rdp_old_leader != rdp_spawn && !rdp_old_leader->nocb_kthread) {
2543 rdp_last = NULL;
2544 rdp = rdp_old_leader;
2545 do {
2546 rdp->nocb_leader = rdp_spawn;
2547 if (rdp_last && rdp != rdp_spawn)
2548 rdp_last->nocb_next_follower = rdp;
2549 rdp_last = rdp;
2550 rdp = rdp->nocb_next_follower;
2551 rdp_last->nocb_next_follower = NULL;
2552 } while (rdp);
2553 rdp_spawn->nocb_next_follower = rdp_old_leader;
2554 }
2555
2556 /* Spawn the kthread for this CPU and RCU flavor. */
2557 t = kthread_run(rcu_nocb_kthread, rdp_spawn,
2558 "rcuo%c/%d", rsp->abbr, cpu);
2559 BUG_ON(IS_ERR(t));
2560 ACCESS_ONCE(rdp_spawn->nocb_kthread) = t;
2561}
2562
2563/*
2564 * If the specified CPU is a no-CBs CPU that does not already have its
2565 * rcuo kthreads, spawn them.
2566 */
2567static void rcu_spawn_all_nocb_kthreads(int cpu)
2568{
2569 struct rcu_state *rsp;
2570
2571 if (rcu_scheduler_fully_active)
2572 for_each_rcu_flavor(rsp)
2573 rcu_spawn_one_nocb_kthread(rsp, cpu);
2574}
2575
2576/*
2577 * Once the scheduler is running, spawn rcuo kthreads for all online
2578 * no-CBs CPUs. This assumes that the early_initcall()s happen before
2579 * non-boot CPUs come online -- if this changes, we will need to add
2580 * some mutual exclusion.
2581 */
2582static void __init rcu_spawn_nocb_kthreads(void)
2583{
2584 int cpu;
2585
2586 for_each_online_cpu(cpu)
2587 rcu_spawn_all_nocb_kthreads(cpu);
2588}
2589
2462/* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */ 2590/* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */
2463static int rcu_nocb_leader_stride = -1; 2591static int rcu_nocb_leader_stride = -1;
2464module_param(rcu_nocb_leader_stride, int, 0444); 2592module_param(rcu_nocb_leader_stride, int, 0444);
2465 2593
2466/* 2594/*
2467 * Create a kthread for each RCU flavor for each no-CBs CPU. 2595 * Initialize leader-follower relationships for all no-CBs CPU.
2468 * Also initialize leader-follower relationships.
2469 */ 2596 */
2470static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) 2597static void __init rcu_organize_nocb_kthreads(struct rcu_state *rsp)
2471{ 2598{
2472 int cpu; 2599 int cpu;
2473 int ls = rcu_nocb_leader_stride; 2600 int ls = rcu_nocb_leader_stride;
@@ -2475,14 +2602,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2475 struct rcu_data *rdp; 2602 struct rcu_data *rdp;
2476 struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */ 2603 struct rcu_data *rdp_leader = NULL; /* Suppress misguided gcc warn. */
2477 struct rcu_data *rdp_prev = NULL; 2604 struct rcu_data *rdp_prev = NULL;
2478 struct task_struct *t;
2479 2605
2480 if (rcu_nocb_mask == NULL) 2606 if (!have_rcu_nocb_mask)
2481 return; 2607 return;
2482#if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL)
2483 if (tick_nohz_full_running)
2484 cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
2485#endif /* #if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL) */
2486 if (ls == -1) { 2608 if (ls == -1) {
2487 ls = int_sqrt(nr_cpu_ids); 2609 ls = int_sqrt(nr_cpu_ids);
2488 rcu_nocb_leader_stride = ls; 2610 rcu_nocb_leader_stride = ls;
@@ -2505,21 +2627,15 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2505 rdp_prev->nocb_next_follower = rdp; 2627 rdp_prev->nocb_next_follower = rdp;
2506 } 2628 }
2507 rdp_prev = rdp; 2629 rdp_prev = rdp;
2508
2509 /* Spawn the kthread for this CPU. */
2510 t = kthread_run(rcu_nocb_kthread, rdp,
2511 "rcuo%c/%d", rsp->abbr, cpu);
2512 BUG_ON(IS_ERR(t));
2513 ACCESS_ONCE(rdp->nocb_kthread) = t;
2514 } 2630 }
2515} 2631}
2516 2632
2517/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ 2633/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
2518static bool init_nocb_callback_list(struct rcu_data *rdp) 2634static bool init_nocb_callback_list(struct rcu_data *rdp)
2519{ 2635{
2520 if (rcu_nocb_mask == NULL || 2636 if (!rcu_is_nocb_cpu(rdp->cpu))
2521 !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
2522 return false; 2637 return false;
2638
2523 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 2639 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2524 return true; 2640 return true;
2525} 2641}
@@ -2541,21 +2657,21 @@ static void rcu_init_one_nocb(struct rcu_node *rnp)
2541static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2657static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2542 bool lazy, unsigned long flags) 2658 bool lazy, unsigned long flags)
2543{ 2659{
2544 return 0; 2660 return false;
2545} 2661}
2546 2662
2547static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 2663static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2548 struct rcu_data *rdp, 2664 struct rcu_data *rdp,
2549 unsigned long flags) 2665 unsigned long flags)
2550{ 2666{
2551 return 0; 2667 return false;
2552} 2668}
2553 2669
2554static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) 2670static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2555{ 2671{
2556} 2672}
2557 2673
2558static bool rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) 2674static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp)
2559{ 2675{
2560 return false; 2676 return false;
2561} 2677}
@@ -2564,7 +2680,11 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
2564{ 2680{
2565} 2681}
2566 2682
2567static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp) 2683static void rcu_spawn_all_nocb_kthreads(int cpu)
2684{
2685}
2686
2687static void __init rcu_spawn_nocb_kthreads(void)
2568{ 2688{
2569} 2689}
2570 2690
@@ -2595,16 +2715,6 @@ static void __maybe_unused rcu_kick_nohz_cpu(int cpu)
2595 2715
2596#ifdef CONFIG_NO_HZ_FULL_SYSIDLE 2716#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
2597 2717
2598/*
2599 * Define RCU flavor that holds sysidle state. This needs to be the
2600 * most active flavor of RCU.
2601 */
2602#ifdef CONFIG_PREEMPT_RCU
2603static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state;
2604#else /* #ifdef CONFIG_PREEMPT_RCU */
2605static struct rcu_state *rcu_sysidle_state = &rcu_sched_state;
2606#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
2607
2608static int full_sysidle_state; /* Current system-idle state. */ 2718static int full_sysidle_state; /* Current system-idle state. */
2609#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */ 2719#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */
2610#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */ 2720#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */
@@ -2622,6 +2732,10 @@ static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
2622{ 2732{
2623 unsigned long j; 2733 unsigned long j;
2624 2734
2735 /* If there are no nohz_full= CPUs, no need to track this. */
2736 if (!tick_nohz_full_enabled())
2737 return;
2738
2625 /* Adjust nesting, check for fully idle. */ 2739 /* Adjust nesting, check for fully idle. */
2626 if (irq) { 2740 if (irq) {
2627 rdtp->dynticks_idle_nesting--; 2741 rdtp->dynticks_idle_nesting--;
@@ -2687,6 +2801,10 @@ void rcu_sysidle_force_exit(void)
2687 */ 2801 */
2688static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq) 2802static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
2689{ 2803{
2804 /* If there are no nohz_full= CPUs, no need to track this. */
2805 if (!tick_nohz_full_enabled())
2806 return;
2807
2690 /* Adjust nesting, check for already non-idle. */ 2808 /* Adjust nesting, check for already non-idle. */
2691 if (irq) { 2809 if (irq) {
2692 rdtp->dynticks_idle_nesting++; 2810 rdtp->dynticks_idle_nesting++;
@@ -2741,12 +2859,16 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2741 unsigned long j; 2859 unsigned long j;
2742 struct rcu_dynticks *rdtp = rdp->dynticks; 2860 struct rcu_dynticks *rdtp = rdp->dynticks;
2743 2861
2862 /* If there are no nohz_full= CPUs, don't check system-wide idleness. */
2863 if (!tick_nohz_full_enabled())
2864 return;
2865
2744 /* 2866 /*
2745 * If some other CPU has already reported non-idle, if this is 2867 * If some other CPU has already reported non-idle, if this is
2746 * not the flavor of RCU that tracks sysidle state, or if this 2868 * not the flavor of RCU that tracks sysidle state, or if this
2747 * is an offline or the timekeeping CPU, nothing to do. 2869 * is an offline or the timekeeping CPU, nothing to do.
2748 */ 2870 */
2749 if (!*isidle || rdp->rsp != rcu_sysidle_state || 2871 if (!*isidle || rdp->rsp != rcu_state_p ||
2750 cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu) 2872 cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
2751 return; 2873 return;
2752 if (rcu_gp_in_progress(rdp->rsp)) 2874 if (rcu_gp_in_progress(rdp->rsp))
@@ -2772,7 +2894,7 @@ static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2772 */ 2894 */
2773static bool is_sysidle_rcu_state(struct rcu_state *rsp) 2895static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2774{ 2896{
2775 return rsp == rcu_sysidle_state; 2897 return rsp == rcu_state_p;
2776} 2898}
2777 2899
2778/* 2900/*
@@ -2850,7 +2972,7 @@ static void rcu_sysidle_cancel(void)
2850static void rcu_sysidle_report(struct rcu_state *rsp, int isidle, 2972static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
2851 unsigned long maxj, bool gpkt) 2973 unsigned long maxj, bool gpkt)
2852{ 2974{
2853 if (rsp != rcu_sysidle_state) 2975 if (rsp != rcu_state_p)
2854 return; /* Wrong flavor, ignore. */ 2976 return; /* Wrong flavor, ignore. */
2855 if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) 2977 if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
2856 return; /* Running state machine from timekeeping CPU. */ 2978 return; /* Running state machine from timekeeping CPU. */
@@ -2867,6 +2989,10 @@ static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
2867static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle, 2989static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2868 unsigned long maxj) 2990 unsigned long maxj)
2869{ 2991{
2992 /* If there are no nohz_full= CPUs, no need to track this. */
2993 if (!tick_nohz_full_enabled())
2994 return;
2995
2870 rcu_sysidle_report(rsp, isidle, maxj, true); 2996 rcu_sysidle_report(rsp, isidle, maxj, true);
2871} 2997}
2872 2998
@@ -2893,7 +3019,8 @@ static void rcu_sysidle_cb(struct rcu_head *rhp)
2893 3019
2894/* 3020/*
2895 * Check to see if the system is fully idle, other than the timekeeping CPU. 3021 * Check to see if the system is fully idle, other than the timekeeping CPU.
2896 * The caller must have disabled interrupts. 3022 * The caller must have disabled interrupts. This is not intended to be
3023 * called unless tick_nohz_full_enabled().
2897 */ 3024 */
2898bool rcu_sys_is_idle(void) 3025bool rcu_sys_is_idle(void)
2899{ 3026{
@@ -2919,13 +3046,12 @@ bool rcu_sys_is_idle(void)
2919 3046
2920 /* Scan all the CPUs looking for nonidle CPUs. */ 3047 /* Scan all the CPUs looking for nonidle CPUs. */
2921 for_each_possible_cpu(cpu) { 3048 for_each_possible_cpu(cpu) {
2922 rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu); 3049 rdp = per_cpu_ptr(rcu_state_p->rda, cpu);
2923 rcu_sysidle_check_cpu(rdp, &isidle, &maxj); 3050 rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
2924 if (!isidle) 3051 if (!isidle)
2925 break; 3052 break;
2926 } 3053 }
2927 rcu_sysidle_report(rcu_sysidle_state, 3054 rcu_sysidle_report(rcu_state_p, isidle, maxj, false);
2928 isidle, maxj, false);
2929 oldrss = rss; 3055 oldrss = rss;
2930 rss = ACCESS_ONCE(full_sysidle_state); 3056 rss = ACCESS_ONCE(full_sysidle_state);
2931 } 3057 }
@@ -2952,7 +3078,7 @@ bool rcu_sys_is_idle(void)
2952 * provided by the memory allocator. 3078 * provided by the memory allocator.
2953 */ 3079 */
2954 if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL && 3080 if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
2955 !rcu_gp_in_progress(rcu_sysidle_state) && 3081 !rcu_gp_in_progress(rcu_state_p) &&
2956 !rsh.inuse && xchg(&rsh.inuse, 1) == 0) 3082 !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
2957 call_rcu(&rsh.rh, rcu_sysidle_cb); 3083 call_rcu(&rsh.rh, rcu_sysidle_cb);
2958 return false; 3084 return false;
@@ -3036,3 +3162,19 @@ static void rcu_bind_gp_kthread(void)
3036 housekeeping_affine(current); 3162 housekeeping_affine(current);
3037#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */ 3163#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
3038} 3164}
3165
3166/* Record the current task on dyntick-idle entry. */
3167static void rcu_dynticks_task_enter(void)
3168{
3169#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
3170 ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
3171#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
3172}
3173
3174/* Record no current task on dyntick-idle exit. */
3175static void rcu_dynticks_task_exit(void)
3176{
3177#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
3178 ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
3179#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
3180}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d7992a6c..3ef8ba58694e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -47,6 +47,8 @@
47#include <linux/hardirq.h> 47#include <linux/hardirq.h>
48#include <linux/delay.h> 48#include <linux/delay.h>
49#include <linux/module.h> 49#include <linux/module.h>
50#include <linux/kthread.h>
51#include <linux/tick.h>
50 52
51#define CREATE_TRACE_POINTS 53#define CREATE_TRACE_POINTS
52 54
@@ -91,7 +93,7 @@ void __rcu_read_unlock(void)
91 barrier(); /* critical section before exit code. */ 93 barrier(); /* critical section before exit code. */
92 t->rcu_read_lock_nesting = INT_MIN; 94 t->rcu_read_lock_nesting = INT_MIN;
93 barrier(); /* assign before ->rcu_read_unlock_special load */ 95 barrier(); /* assign before ->rcu_read_unlock_special load */
94 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 96 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
95 rcu_read_unlock_special(t); 97 rcu_read_unlock_special(t);
96 barrier(); /* ->rcu_read_unlock_special load before assign */ 98 barrier(); /* ->rcu_read_unlock_special load before assign */
97 t->rcu_read_lock_nesting = 0; 99 t->rcu_read_lock_nesting = 0;
@@ -137,6 +139,38 @@ int notrace debug_lockdep_rcu_enabled(void)
137EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled); 139EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
138 140
139/** 141/**
142 * rcu_read_lock_held() - might we be in RCU read-side critical section?
143 *
144 * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
145 * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
146 * this assumes we are in an RCU read-side critical section unless it can
147 * prove otherwise. This is useful for debug checks in functions that
148 * require that they be called within an RCU read-side critical section.
149 *
150 * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
151 * and while lockdep is disabled.
152 *
153 * Note that rcu_read_lock() and the matching rcu_read_unlock() must
154 * occur in the same context, for example, it is illegal to invoke
155 * rcu_read_unlock() in process context if the matching rcu_read_lock()
156 * was invoked from within an irq handler.
157 *
158 * Note that rcu_read_lock() is disallowed if the CPU is either idle or
159 * offline from an RCU perspective, so check for those as well.
160 */
161int rcu_read_lock_held(void)
162{
163 if (!debug_lockdep_rcu_enabled())
164 return 1;
165 if (!rcu_is_watching())
166 return 0;
167 if (!rcu_lockdep_current_cpu_online())
168 return 0;
169 return lock_is_held(&rcu_lock_map);
170}
171EXPORT_SYMBOL_GPL(rcu_read_lock_held);
172
173/**
140 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section? 174 * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
141 * 175 *
142 * Check for bottom half being disabled, which covers both the 176 * Check for bottom half being disabled, which covers both the
@@ -347,3 +381,312 @@ static int __init check_cpu_stall_init(void)
347early_initcall(check_cpu_stall_init); 381early_initcall(check_cpu_stall_init);
348 382
349#endif /* #ifdef CONFIG_RCU_STALL_COMMON */ 383#endif /* #ifdef CONFIG_RCU_STALL_COMMON */
384
385#ifdef CONFIG_TASKS_RCU
386
387/*
388 * Simple variant of RCU whose quiescent states are voluntary context switch,
389 * user-space execution, and idle. As such, grace periods can take one good
390 * long time. There are no read-side primitives similar to rcu_read_lock()
391 * and rcu_read_unlock() because this implementation is intended to get
392 * the system into a safe state for some of the manipulations involved in
393 * tracing and the like. Finally, this implementation does not support
394 * high call_rcu_tasks() rates from multiple CPUs. If this is required,
395 * per-CPU callback lists will be needed.
396 */
397
398/* Global list of callbacks and associated lock. */
399static struct rcu_head *rcu_tasks_cbs_head;
400static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
401static DECLARE_WAIT_QUEUE_HEAD(rcu_tasks_cbs_wq);
402static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
403
404/* Track exiting tasks in order to allow them to be waited for. */
405DEFINE_SRCU(tasks_rcu_exit_srcu);
406
407/* Control stall timeouts. Disable with <= 0, otherwise jiffies till stall. */
408static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 10;
409module_param(rcu_task_stall_timeout, int, 0644);
410
411static void rcu_spawn_tasks_kthread(void);
412
413/*
414 * Post an RCU-tasks callback. First call must be from process context
415 * after the scheduler if fully operational.
416 */
417void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
418{
419 unsigned long flags;
420 bool needwake;
421
422 rhp->next = NULL;
423 rhp->func = func;
424 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
425 needwake = !rcu_tasks_cbs_head;
426 *rcu_tasks_cbs_tail = rhp;
427 rcu_tasks_cbs_tail = &rhp->next;
428 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
429 if (needwake) {
430 rcu_spawn_tasks_kthread();
431 wake_up(&rcu_tasks_cbs_wq);
432 }
433}
434EXPORT_SYMBOL_GPL(call_rcu_tasks);
435
436/**
437 * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
438 *
439 * Control will return to the caller some time after a full rcu-tasks
440 * grace period has elapsed, in other words after all currently
441 * executing rcu-tasks read-side critical sections have elapsed. These
442 * read-side critical sections are delimited by calls to schedule(),
443 * cond_resched_rcu_qs(), idle execution, userspace execution, calls
444 * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
445 *
446 * This is a very specialized primitive, intended only for a few uses in
447 * tracing and other situations requiring manipulation of function
448 * preambles and profiling hooks. The synchronize_rcu_tasks() function
449 * is not (yet) intended for heavy use from multiple CPUs.
450 *
451 * Note that this guarantee implies further memory-ordering guarantees.
452 * On systems with more than one CPU, when synchronize_rcu_tasks() returns,
453 * each CPU is guaranteed to have executed a full memory barrier since the
454 * end of its last RCU-tasks read-side critical section whose beginning
455 * preceded the call to synchronize_rcu_tasks(). In addition, each CPU
456 * having an RCU-tasks read-side critical section that extends beyond
457 * the return from synchronize_rcu_tasks() is guaranteed to have executed
458 * a full memory barrier after the beginning of synchronize_rcu_tasks()
459 * and before the beginning of that RCU-tasks read-side critical section.
460 * Note that these guarantees include CPUs that are offline, idle, or
461 * executing in user mode, as well as CPUs that are executing in the kernel.
462 *
463 * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
464 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
465 * to have executed a full memory barrier during the execution of
466 * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
467 * (but again only if the system has more than one CPU).
468 */
469void synchronize_rcu_tasks(void)
470{
471 /* Complain if the scheduler has not started. */
472 rcu_lockdep_assert(!rcu_scheduler_active,
473 "synchronize_rcu_tasks called too soon");
474
475 /* Wait for the grace period. */
476 wait_rcu_gp(call_rcu_tasks);
477}
478EXPORT_SYMBOL_GPL(synchronize_rcu_tasks);
479
480/**
481 * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
482 *
483 * Although the current implementation is guaranteed to wait, it is not
484 * obligated to, for example, if there are no pending callbacks.
485 */
486void rcu_barrier_tasks(void)
487{
488 /* There is only one callback queue, so this is easy. ;-) */
489 synchronize_rcu_tasks();
490}
491EXPORT_SYMBOL_GPL(rcu_barrier_tasks);
492
493/* See if tasks are still holding out, complain if so. */
494static void check_holdout_task(struct task_struct *t,
495 bool needreport, bool *firstreport)
496{
497 int cpu;
498
499 if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
500 t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
501 !ACCESS_ONCE(t->on_rq) ||
502 (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
503 !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
504 ACCESS_ONCE(t->rcu_tasks_holdout) = false;
505 list_del_init(&t->rcu_tasks_holdout_list);
506 put_task_struct(t);
507 return;
508 }
509 if (!needreport)
510 return;
511 if (*firstreport) {
512 pr_err("INFO: rcu_tasks detected stalls on tasks:\n");
513 *firstreport = false;
514 }
515 cpu = task_cpu(t);
516 pr_alert("%p: %c%c nvcsw: %lu/%lu holdout: %d idle_cpu: %d/%d\n",
517 t, ".I"[is_idle_task(t)],
518 "N."[cpu < 0 || !tick_nohz_full_cpu(cpu)],
519 t->rcu_tasks_nvcsw, t->nvcsw, t->rcu_tasks_holdout,
520 t->rcu_tasks_idle_cpu, cpu);
521 sched_show_task(t);
522}
523
524/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
525static int __noreturn rcu_tasks_kthread(void *arg)
526{
527 unsigned long flags;
528 struct task_struct *g, *t;
529 unsigned long lastreport;
530 struct rcu_head *list;
531 struct rcu_head *next;
532 LIST_HEAD(rcu_tasks_holdouts);
533
534 /* FIXME: Add housekeeping affinity. */
535
536 /*
537 * Each pass through the following loop makes one check for
538 * newly arrived callbacks, and, if there are some, waits for
539 * one RCU-tasks grace period and then invokes the callbacks.
540 * This loop is terminated by the system going down. ;-)
541 */
542 for (;;) {
543
544 /* Pick up any new callbacks. */
545 raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
546 list = rcu_tasks_cbs_head;
547 rcu_tasks_cbs_head = NULL;
548 rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
549 raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
550
551 /* If there were none, wait a bit and start over. */
552 if (!list) {
553 wait_event_interruptible(rcu_tasks_cbs_wq,
554 rcu_tasks_cbs_head);
555 if (!rcu_tasks_cbs_head) {
556 WARN_ON(signal_pending(current));
557 schedule_timeout_interruptible(HZ/10);
558 }
559 continue;
560 }
561
562 /*
563 * Wait for all pre-existing t->on_rq and t->nvcsw
564 * transitions to complete. Invoking synchronize_sched()
565 * suffices because all these transitions occur with
566 * interrupts disabled. Without this synchronize_sched(),
567 * a read-side critical section that started before the
568 * grace period might be incorrectly seen as having started
569 * after the grace period.
570 *
571 * This synchronize_sched() also dispenses with the
572 * need for a memory barrier on the first store to
573 * ->rcu_tasks_holdout, as it forces the store to happen
574 * after the beginning of the grace period.
575 */
576 synchronize_sched();
577
578 /*
579 * There were callbacks, so we need to wait for an
580 * RCU-tasks grace period. Start off by scanning
581 * the task list for tasks that are not already
582 * voluntarily blocked. Mark these tasks and make
583 * a list of them in rcu_tasks_holdouts.
584 */
585 rcu_read_lock();
586 for_each_process_thread(g, t) {
587 if (t != current && ACCESS_ONCE(t->on_rq) &&
588 !is_idle_task(t)) {
589 get_task_struct(t);
590 t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
591 ACCESS_ONCE(t->rcu_tasks_holdout) = true;
592 list_add(&t->rcu_tasks_holdout_list,
593 &rcu_tasks_holdouts);
594 }
595 }
596 rcu_read_unlock();
597
598 /*
599 * Wait for tasks that are in the process of exiting.
600 * This does only part of the job, ensuring that all
601 * tasks that were previously exiting reach the point
602 * where they have disabled preemption, allowing the
603 * later synchronize_sched() to finish the job.
604 */
605 synchronize_srcu(&tasks_rcu_exit_srcu);
606
607 /*
608 * Each pass through the following loop scans the list
609 * of holdout tasks, removing any that are no longer
610 * holdouts. When the list is empty, we are done.
611 */
612 lastreport = jiffies;
613 while (!list_empty(&rcu_tasks_holdouts)) {
614 bool firstreport;
615 bool needreport;
616 int rtst;
617 struct task_struct *t1;
618
619 schedule_timeout_interruptible(HZ);
620 rtst = ACCESS_ONCE(rcu_task_stall_timeout);
621 needreport = rtst > 0 &&
622 time_after(jiffies, lastreport + rtst);
623 if (needreport)
624 lastreport = jiffies;
625 firstreport = true;
626 WARN_ON(signal_pending(current));
627 list_for_each_entry_safe(t, t1, &rcu_tasks_holdouts,
628 rcu_tasks_holdout_list) {
629 check_holdout_task(t, needreport, &firstreport);
630 cond_resched();
631 }
632 }
633
634 /*
635 * Because ->on_rq and ->nvcsw are not guaranteed
636 * to have a full memory barriers prior to them in the
637 * schedule() path, memory reordering on other CPUs could
638 * cause their RCU-tasks read-side critical sections to
639 * extend past the end of the grace period. However,
640 * because these ->nvcsw updates are carried out with
641 * interrupts disabled, we can use synchronize_sched()
642 * to force the needed ordering on all such CPUs.
643 *
644 * This synchronize_sched() also confines all
645 * ->rcu_tasks_holdout accesses to be within the grace
646 * period, avoiding the need for memory barriers for
647 * ->rcu_tasks_holdout accesses.
648 *
649 * In addition, this synchronize_sched() waits for exiting
650 * tasks to complete their final preempt_disable() region
651 * of execution, cleaning up after the synchronize_srcu()
652 * above.
653 */
654 synchronize_sched();
655
656 /* Invoke the callbacks. */
657 while (list) {
658 next = list->next;
659 local_bh_disable();
660 list->func(list);
661 local_bh_enable();
662 list = next;
663 cond_resched();
664 }
665 schedule_timeout_uninterruptible(HZ/10);
666 }
667}
668
669/* Spawn rcu_tasks_kthread() at first call to call_rcu_tasks(). */
670static void rcu_spawn_tasks_kthread(void)
671{
672 static DEFINE_MUTEX(rcu_tasks_kthread_mutex);
673 static struct task_struct *rcu_tasks_kthread_ptr;
674 struct task_struct *t;
675
676 if (ACCESS_ONCE(rcu_tasks_kthread_ptr)) {
677 smp_mb(); /* Ensure caller sees full kthread. */
678 return;
679 }
680 mutex_lock(&rcu_tasks_kthread_mutex);
681 if (rcu_tasks_kthread_ptr) {
682 mutex_unlock(&rcu_tasks_kthread_mutex);
683 return;
684 }
685 t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
686 BUG_ON(IS_ERR(t));
687 smp_mb(); /* Ensure others see full kthread. */
688 ACCESS_ONCE(rcu_tasks_kthread_ptr) = t;
689 mutex_unlock(&rcu_tasks_kthread_mutex);
690}
691
692#endif /* #ifdef CONFIG_TASKS_RCU */
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5918d227730f..348ec763b104 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -278,7 +278,7 @@ restart:
278 pending >>= softirq_bit; 278 pending >>= softirq_bit;
279 } 279 }
280 280
281 rcu_bh_qs(smp_processor_id()); 281 rcu_bh_qs();
282 local_irq_disable(); 282 local_irq_disable();
283 283
284 pending = local_softirq_pending(); 284 pending = local_softirq_pending();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75875a741b5e..ab456664609d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1055,15 +1055,6 @@ static struct ctl_table kern_table[] = {
1055 .child = key_sysctls, 1055 .child = key_sysctls,
1056 }, 1056 },
1057#endif 1057#endif
1058#ifdef CONFIG_RCU_TORTURE_TEST
1059 {
1060 .procname = "rcutorture_runnable",
1061 .data = &rcutorture_runnable,
1062 .maxlen = sizeof(int),
1063 .mode = 0644,
1064 .proc_handler = proc_dointvec,
1065 },
1066#endif
1067#ifdef CONFIG_PERF_EVENTS 1058#ifdef CONFIG_PERF_EVENTS
1068 /* 1059 /*
1069 * User-space scripts rely on the existence of this file 1060 * User-space scripts rely on the existence of this file
diff --git a/kernel/torture.c b/kernel/torture.c
index d600af21f022..dd70993c266c 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -211,18 +211,16 @@ EXPORT_SYMBOL_GPL(torture_onoff_cleanup);
211/* 211/*
212 * Print online/offline testing statistics. 212 * Print online/offline testing statistics.
213 */ 213 */
214char *torture_onoff_stats(char *page) 214void torture_onoff_stats(void)
215{ 215{
216#ifdef CONFIG_HOTPLUG_CPU 216#ifdef CONFIG_HOTPLUG_CPU
217 page += sprintf(page, 217 pr_cont("onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
218 "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ", 218 n_online_successes, n_online_attempts,
219 n_online_successes, n_online_attempts, 219 n_offline_successes, n_offline_attempts,
220 n_offline_successes, n_offline_attempts, 220 min_online, max_online,
221 min_online, max_online, 221 min_offline, max_offline,
222 min_offline, max_offline, 222 sum_online, sum_offline, HZ);
223 sum_online, sum_offline, HZ);
224#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 223#endif /* #ifdef CONFIG_HOTPLUG_CPU */
225 return page;
226} 224}
227EXPORT_SYMBOL_GPL(torture_onoff_stats); 225EXPORT_SYMBOL_GPL(torture_onoff_stats);
228 226
@@ -635,8 +633,13 @@ EXPORT_SYMBOL_GPL(torture_init_end);
635 * 633 *
636 * This must be called before the caller starts shutting down its own 634 * This must be called before the caller starts shutting down its own
637 * kthreads. 635 * kthreads.
636 *
637 * Both torture_cleanup_begin() and torture_cleanup_end() must be paired,
638 * in order to correctly perform the cleanup. They are separated because
639 * threads can still need to reference the torture_type type, thus nullify
640 * only after completing all other relevant calls.
638 */ 641 */
639bool torture_cleanup(void) 642bool torture_cleanup_begin(void)
640{ 643{
641 mutex_lock(&fullstop_mutex); 644 mutex_lock(&fullstop_mutex);
642 if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) { 645 if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
@@ -651,12 +654,17 @@ bool torture_cleanup(void)
651 torture_shuffle_cleanup(); 654 torture_shuffle_cleanup();
652 torture_stutter_cleanup(); 655 torture_stutter_cleanup();
653 torture_onoff_cleanup(); 656 torture_onoff_cleanup();
657 return false;
658}
659EXPORT_SYMBOL_GPL(torture_cleanup_begin);
660
661void torture_cleanup_end(void)
662{
654 mutex_lock(&fullstop_mutex); 663 mutex_lock(&fullstop_mutex);
655 torture_type = NULL; 664 torture_type = NULL;
656 mutex_unlock(&fullstop_mutex); 665 mutex_unlock(&fullstop_mutex);
657 return false;
658} 666}
659EXPORT_SYMBOL_GPL(torture_cleanup); 667EXPORT_SYMBOL_GPL(torture_cleanup_end);
660 668
661/* 669/*
662 * Is it time for the current torture test to stop? 670 * Is it time for the current torture test to stop?
diff --git a/mm/mlock.c b/mm/mlock.c
index ce84cb0b83ef..ab3150c26711 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -789,7 +789,7 @@ static int do_mlockall(int flags)
789 789
790 /* Ignore errors */ 790 /* Ignore errors */
791 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); 791 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
792 cond_resched(); 792 cond_resched_rcu_qs();
793 } 793 }
794out: 794out:
795 return 0; 795 return 0;
diff --git a/tools/testing/selftests/rcutorture/bin/config2frag.sh b/tools/testing/selftests/rcutorture/bin/config2frag.sh
index 9f9ffcd427d3..56f51ae13d73 100644..100755
--- a/tools/testing/selftests/rcutorture/bin/config2frag.sh
+++ b/tools/testing/selftests/rcutorture/bin/config2frag.sh
@@ -1,5 +1,5 @@
1#!/bin/sh 1#!/bin/bash
2# Usage: sh config2frag.sh < .config > configfrag 2# Usage: config2frag.sh < .config > configfrag
3# 3#
4# Converts the "# CONFIG_XXX is not set" to "CONFIG_XXX=n" so that the 4# Converts the "# CONFIG_XXX is not set" to "CONFIG_XXX=n" so that the
5# resulting file becomes a legitimate Kconfig fragment. 5# resulting file becomes a legitimate Kconfig fragment.
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index d686537dd55c..eee31e261bf7 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -1,5 +1,5 @@
1#!/bin/sh 1#!/bin/bash
2# Usage: sh configcheck.sh .config .config-template 2# Usage: configcheck.sh .config .config-template
3# 3#
4# This program is free software; you can redistribute it and/or modify 4# This program is free software; you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by 5# it under the terms of the GNU General Public License as published by
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index 9c3f3d39b934..15f1a17ca96e 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -1,6 +1,6 @@
1#!/bin/sh 1#!/bin/bash
2# 2#
3# sh configinit.sh config-spec-file [ build output dir ] 3# Usage: configinit.sh config-spec-file [ build output dir ]
4# 4#
5# Create a .config file from the spec file. Run from the kernel source tree. 5# Create a .config file from the spec file. Run from the kernel source tree.
6# Exits with 0 if all went well, with 1 if all went well but the config 6# Exits with 0 if all went well, with 1 if all went well but the config
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index d01b865bb100..b325470c01b3 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -64,6 +64,26 @@ configfrag_boot_params () {
64 fi 64 fi
65} 65}
66 66
67# configfrag_boot_cpus bootparam-string config-fragment-file config-cpus
68#
69# Decreases number of CPUs based on any maxcpus= boot parameters specified.
70configfrag_boot_cpus () {
71 local bootargs="`configfrag_boot_params "$1" "$2"`"
72 local maxcpus
73 if echo "${bootargs}" | grep -q 'maxcpus=[0-9]'
74 then
75 maxcpus="`echo "${bootargs}" | sed -e 's/^.*maxcpus=\([0-9]*\).*$/\1/'`"
76 if test "$3" -gt "$maxcpus"
77 then
78 echo $maxcpus
79 else
80 echo $3
81 fi
82 else
83 echo $3
84 fi
85}
86
67# configfrag_hotplug_cpu config-fragment-file 87# configfrag_hotplug_cpu config-fragment-file
68# 88#
69# Returns 1 if the config fragment specifies hotplug CPU. 89# Returns 1 if the config fragment specifies hotplug CPU.
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 7c1e56b46de4..00cb0db2643d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -2,7 +2,7 @@
2# 2#
3# Build a kvm-ready Linux kernel from the tree in the current directory. 3# Build a kvm-ready Linux kernel from the tree in the current directory.
4# 4#
5# Usage: sh kvm-build.sh config-template build-dir more-configs 5# Usage: kvm-build.sh config-template build-dir more-configs
6# 6#
7# This program is free software; you can redistribute it and/or modify 7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by 8# it under the terms of the GNU General Public License as published by
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
index 7f1ff1a8fc4b..43f764098e50 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
@@ -2,7 +2,7 @@
2# 2#
3# Analyze a given results directory for locktorture progress. 3# Analyze a given results directory for locktorture progress.
4# 4#
5# Usage: sh kvm-recheck-lock.sh resdir 5# Usage: kvm-recheck-lock.sh resdir
6# 6#
7# This program is free software; you can redistribute it and/or modify 7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by 8# it under the terms of the GNU General Public License as published by
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 307c4b95f325..d6cc07fc137f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -2,7 +2,7 @@
2# 2#
3# Analyze a given results directory for rcutorture progress. 3# Analyze a given results directory for rcutorture progress.
4# 4#
5# Usage: sh kvm-recheck-rcu.sh resdir 5# Usage: kvm-recheck-rcu.sh resdir
6# 6#
7# This program is free software; you can redistribute it and/or modify 7# This program is free software; you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by 8# it under the terms of the GNU General Public License as published by
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 3f6c9b78d177..4f5b20f367a9 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -4,7 +4,7 @@
4# check the build and console output for errors. Given a directory 4# check the build and console output for errors. Given a directory
5# containing results directories, this recursively checks them all. 5# containing results directories, this recursively checks them all.
6# 6#
7# Usage: sh kvm-recheck.sh resdir ... 7# Usage: kvm-recheck.sh resdir ...
8# 8#
9# This program is free software; you can redistribute it and/or modify 9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by 10# it under the terms of the GNU General Public License as published by
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 0f69dcbf9def..f6b2b4771b78 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -6,7 +6,7 @@
6# Execute this in the source tree. Do not run it as a background task 6# Execute this in the source tree. Do not run it as a background task
7# because qemu does not seem to like that much. 7# because qemu does not seem to like that much.
8# 8#
9# Usage: sh kvm-test-1-run.sh config builddir resdir minutes qemu-args boot_args 9# Usage: kvm-test-1-run.sh config builddir resdir minutes qemu-args boot_args
10# 10#
11# qemu-args defaults to "-nographic", along with arguments specifying the 11# qemu-args defaults to "-nographic", along with arguments specifying the
12# number of CPUs and other options generated from 12# number of CPUs and other options generated from
@@ -140,6 +140,7 @@ fi
140# Generate -smp qemu argument. 140# Generate -smp qemu argument.
141qemu_args="-nographic $qemu_args" 141qemu_args="-nographic $qemu_args"
142cpu_count=`configNR_CPUS.sh $config_template` 142cpu_count=`configNR_CPUS.sh $config_template`
143cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
143vcpus=`identify_qemu_vcpus` 144vcpus=`identify_qemu_vcpus`
144if test $cpu_count -gt $vcpus 145if test $cpu_count -gt $vcpus
145then 146then
@@ -214,7 +215,7 @@ then
214 fi 215 fi
215 if test $kruntime -ge $((seconds + grace)) 216 if test $kruntime -ge $((seconds + grace))
216 then 217 then
217 echo "!!! Hang at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1 218 echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
218 kill -KILL $qemu_pid 219 kill -KILL $qemu_pid
219 break 220 break
220 fi 221 fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 589e9c38413b..e527dc952eb0 100644..100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -7,7 +7,7 @@
7# Edit the definitions below to set the locations of the various directories, 7# Edit the definitions below to set the locations of the various directories,
8# as well as the test duration. 8# as well as the test duration.
9# 9#
10# Usage: sh kvm.sh [ options ] 10# Usage: kvm.sh [ options ]
11# 11#
12# This program is free software; you can redistribute it and/or modify 12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by 13# it under the terms of the GNU General Public License as published by
@@ -188,7 +188,9 @@ for CF in $configs
188do 188do
189 if test -f "$CONFIGFRAG/$kversion/$CF" 189 if test -f "$CONFIGFRAG/$kversion/$CF"
190 then 190 then
191 echo $CF `configNR_CPUS.sh $CONFIGFRAG/$kversion/$CF` >> $T/cfgcpu 191 cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$kversion/$CF`
192 cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$kversion/$CF" "$cpu_count"`
193 echo $CF $cpu_count >> $T/cfgcpu
192 else 194 else
193 echo "The --configs file $CF does not exist, terminating." 195 echo "The --configs file $CF does not exist, terminating."
194 exit 1 196 exit 1
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
index 543230951c38..499d1e598e42 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -1,4 +1,4 @@
1#!/bin/sh 1#!/bin/bash
2# 2#
3# Check the build output from an rcutorture run for goodness. 3# Check the build output from an rcutorture run for goodness.
4# The "file" is a pathname on the local system, and "title" is 4# The "file" is a pathname on the local system, and "title" is
@@ -6,8 +6,7 @@
6# 6#
7# The file must contain kernel build output. 7# The file must contain kernel build output.
8# 8#
9# Usage: 9# Usage: parse-build.sh file title
10# sh parse-build.sh file title
11# 10#
12# This program is free software; you can redistribute it and/or modify 11# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by 12# it under the terms of the GNU General Public License as published by
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 4185d4cab32e..f962ba4cf68b 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -1,11 +1,10 @@
1#!/bin/sh 1#!/bin/bash
2# 2#
3# Check the console output from an rcutorture run for oopses. 3# Check the console output from an rcutorture run for oopses.
4# The "file" is a pathname on the local system, and "title" is 4# The "file" is a pathname on the local system, and "title" is
5# a text string for error-message purposes. 5# a text string for error-message purposes.
6# 6#
7# Usage: 7# Usage: parse-console.sh file title
8# sh parse-console.sh file title
9# 8#
10# This program is free software; you can redistribute it and/or modify 9# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by 10# it under the terms of the GNU General Public License as published by
@@ -33,6 +32,10 @@ title="$2"
33 32
34. functions.sh 33. functions.sh
35 34
35if grep -Pq '\x00' < $file
36then
37 print_warning Console output contains nul bytes, old qemu still running?
38fi
36egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $T 39egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $T
37if test -s $T 40if test -s $T
38then 41then
diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh
index 3455560ab4e4..e3c5f0705696 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-torture.sh
@@ -1,4 +1,4 @@
1#!/bin/sh 1#!/bin/bash
2# 2#
3# Check the console output from a torture run for goodness. 3# Check the console output from a torture run for goodness.
4# The "file" is a pathname on the local system, and "title" is 4# The "file" is a pathname on the local system, and "title" is
@@ -7,8 +7,7 @@
7# The file must contain torture output, but can be interspersed 7# The file must contain torture output, but can be interspersed
8# with other dmesg text, as in console-log output. 8# with other dmesg text, as in console-log output.
9# 9#
10# Usage: 10# Usage: parse-torture.sh file title
11# sh parse-torture.sh file title
12# 11#
13# This program is free software; you can redistribute it and/or modify 12# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by 13# it under the terms of the GNU General Public License as published by
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
index a061b22d1892..6108137da770 100644
--- a/tools/testing/selftests/rcutorture/configs/lock/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
@@ -1 +1,3 @@
1LOCK01 1LOCK01
2LOCK02
3LOCK03
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK02 b/tools/testing/selftests/rcutorture/configs/lock/LOCK02
new file mode 100644
index 000000000000..1d1da1477fc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK02
@@ -0,0 +1,6 @@
1CONFIG_SMP=y
2CONFIG_NR_CPUS=4
3CONFIG_HOTPLUG_CPU=y
4CONFIG_PREEMPT_NONE=n
5CONFIG_PREEMPT_VOLUNTARY=n
6CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot
new file mode 100644
index 000000000000..5aa44b4f1b51
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot
@@ -0,0 +1 @@
locktorture.torture_type=mutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK03 b/tools/testing/selftests/rcutorture/configs/lock/LOCK03
new file mode 100644
index 000000000000..1d1da1477fc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK03
@@ -0,0 +1,6 @@
1CONFIG_SMP=y
2CONFIG_NR_CPUS=4
3CONFIG_HOTPLUG_CPU=y
4CONFIG_PREEMPT_NONE=n
5CONFIG_PREEMPT_VOLUNTARY=n
6CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot
new file mode 100644
index 000000000000..a67bbe0245c9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot
@@ -0,0 +1 @@
locktorture.torture_type=rwsem_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
index 9746ea1cd6c7..252aae618984 100644
--- a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
@@ -38,6 +38,6 @@ per_version_boot_params () {
38 echo $1 `locktorture_param_onoff "$1" "$2"` \ 38 echo $1 `locktorture_param_onoff "$1" "$2"` \
39 locktorture.stat_interval=15 \ 39 locktorture.stat_interval=15 \
40 locktorture.shutdown_secs=$3 \ 40 locktorture.shutdown_secs=$3 \
41 locktorture.locktorture_runnable=1 \ 41 locktorture.torture_runnable=1 \
42 locktorture.verbose=1 42 locktorture.verbose=1
43} 43}
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index cd3d29cb0a47..a3a1a05a2b5c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -11,3 +11,6 @@ SRCU-N
11SRCU-P 11SRCU-P
12TINY01 12TINY01
13TINY02 13TINY02
14TASKS01
15TASKS02
16TASKS03
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
new file mode 100644
index 000000000000..97f0a0b27ef7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -0,0 +1,9 @@
1CONFIG_SMP=y
2CONFIG_NR_CPUS=2
3CONFIG_HOTPLUG_CPU=y
4CONFIG_PREEMPT_NONE=n
5CONFIG_PREEMPT_VOLUNTARY=n
6CONFIG_PREEMPT=y
7CONFIG_DEBUG_LOCK_ALLOC=y
8CONFIG_PROVE_RCU=y
9CONFIG_TASKS_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
new file mode 100644
index 000000000000..cd2a188eeb6d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
@@ -0,0 +1 @@
rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
new file mode 100644
index 000000000000..696d2ea74d13
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -0,0 +1,5 @@
1CONFIG_SMP=n
2CONFIG_PREEMPT_NONE=y
3CONFIG_PREEMPT_VOLUNTARY=n
4CONFIG_PREEMPT=n
5CONFIG_TASKS_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
new file mode 100644
index 000000000000..cd2a188eeb6d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
@@ -0,0 +1 @@
rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
new file mode 100644
index 000000000000..9c60da5b5d1d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -0,0 +1,13 @@
1CONFIG_SMP=y
2CONFIG_NR_CPUS=2
3CONFIG_HOTPLUG_CPU=n
4CONFIG_SUSPEND=n
5CONFIG_HIBERNATION=n
6CONFIG_PREEMPT_NONE=n
7CONFIG_PREEMPT_VOLUNTARY=n
8CONFIG_PREEMPT=y
9CONFIG_TASKS_RCU=y
10CONFIG_HZ_PERIODIC=n
11CONFIG_NO_HZ_IDLE=n
12CONFIG_NO_HZ_FULL=y
13CONFIG_NO_HZ_FULL_ALL=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
new file mode 100644
index 000000000000..cd2a188eeb6d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
@@ -0,0 +1 @@
rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index 063b7079c621..38e3895759dd 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -1,5 +1,4 @@
1CONFIG_SMP=y 1CONFIG_SMP=y
2CONFIG_NR_CPUS=8
3CONFIG_PREEMPT_NONE=n 2CONFIG_PREEMPT_NONE=n
4CONFIG_PREEMPT_VOLUNTARY=n 3CONFIG_PREEMPT_VOLUNTARY=n
5CONFIG_PREEMPT=y 4CONFIG_PREEMPT=y
@@ -10,8 +9,7 @@ CONFIG_NO_HZ_FULL=n
10CONFIG_RCU_FAST_NO_HZ=y 9CONFIG_RCU_FAST_NO_HZ=y
11CONFIG_RCU_TRACE=y 10CONFIG_RCU_TRACE=y
12CONFIG_HOTPLUG_CPU=y 11CONFIG_HOTPLUG_CPU=y
13CONFIG_RCU_FANOUT=8 12CONFIG_MAXSMP=y
14CONFIG_RCU_FANOUT_EXACT=n
15CONFIG_RCU_NOCB_CPU=y 13CONFIG_RCU_NOCB_CPU=y
16CONFIG_RCU_NOCB_CPU_ZERO=y 14CONFIG_RCU_NOCB_CPU_ZERO=y
17CONFIG_DEBUG_LOCK_ALLOC=n 15CONFIG_DEBUG_LOCK_ALLOC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index 0fc8a3428938..adc3abc82fb8 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -1 +1 @@
rcutorture.torture_type=rcu_bh rcutorture.torture_type=rcu_bh maxcpus=8
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index ab6225506909..8f1017666aa7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -1,5 +1,6 @@
1CONFIG_SMP=y 1CONFIG_SMP=y
2CONFIG_NR_CPUS=16 2CONFIG_NR_CPUS=16
3CONFIG_CPUMASK_OFFSTACK=y
3CONFIG_PREEMPT_NONE=y 4CONFIG_PREEMPT_NONE=y
4CONFIG_PREEMPT_VOLUNTARY=n 5CONFIG_PREEMPT_VOLUNTARY=n
5CONFIG_PREEMPT=n 6CONFIG_PREEMPT=n
@@ -7,7 +8,7 @@ CONFIG_PREEMPT=n
7CONFIG_HZ_PERIODIC=n 8CONFIG_HZ_PERIODIC=n
8CONFIG_NO_HZ_IDLE=n 9CONFIG_NO_HZ_IDLE=n
9CONFIG_NO_HZ_FULL=y 10CONFIG_NO_HZ_FULL=y
10CONFIG_NO_HZ_FULL_ALL=y 11CONFIG_NO_HZ_FULL_ALL=n
11CONFIG_NO_HZ_FULL_SYSIDLE=y 12CONFIG_NO_HZ_FULL_SYSIDLE=y
12CONFIG_RCU_FAST_NO_HZ=n 13CONFIG_RCU_FAST_NO_HZ=n
13CONFIG_RCU_TRACE=y 14CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
new file mode 100644
index 000000000000..d44609937503
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
@@ -0,0 +1 @@
nohz_full=2-9
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index 8977d8d31b19..ffb85ed786fa 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -51,7 +51,7 @@ per_version_boot_params () {
51 `rcutorture_param_n_barrier_cbs "$1"` \ 51 `rcutorture_param_n_barrier_cbs "$1"` \
52 rcutorture.stat_interval=15 \ 52 rcutorture.stat_interval=15 \
53 rcutorture.shutdown_secs=$3 \ 53 rcutorture.shutdown_secs=$3 \
54 rcutorture.rcutorture_runnable=1 \ 54 rcutorture.torture_runnable=1 \
55 rcutorture.test_no_idle_hz=1 \ 55 rcutorture.test_no_idle_hz=1 \
56 rcutorture.verbose=1 56 rcutorture.verbose=1
57} 57}
diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt
index 49d134c25c04..4170e714f044 100644
--- a/tools/testing/selftests/rcutorture/doc/initrd.txt
+++ b/tools/testing/selftests/rcutorture/doc/initrd.txt
@@ -6,6 +6,7 @@ this case. There are probably much better ways of doing this.
6That said, here are the commands: 6That said, here are the commands:
7 7
8------------------------------------------------------------------------ 8------------------------------------------------------------------------
9cd tools/testing/selftests/rcutorture
9zcat /initrd.img > /tmp/initrd.img.zcat 10zcat /initrd.img > /tmp/initrd.img.zcat
10mkdir initrd 11mkdir initrd
11cd initrd 12cd initrd